From 7f9ad39d3499826bd610b5efe9a6c10e89d61c5a Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 11 Sep 2019 13:18:59 +0200 Subject: [PATCH 001/155] First version for the over-approximation of POMDP reachability --- src/storm-pomdp-cli/storm-pomdp.cpp | 18 +- .../ApproximatePOMDPModelchecker.cpp | 419 ++++++++++++++++++ .../ApproximatePOMDPModelchecker.h | 118 +++++ src/storm/models/sparse/Pomdp.cpp | 13 +- src/storm/models/sparse/Pomdp.h | 2 +- 5 files changed, 566 insertions(+), 4 deletions(-) create mode 100644 src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp create mode 100644 src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index ec888bd02..730ce3ec8 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -38,6 +38,7 @@ #include "storm-pomdp/transformer/BinaryPomdpTransformer.h" #include "storm-pomdp/analysis/UniqueObservationStates.h" #include "storm-pomdp/analysis/QualitativeAnalysis.h" +#include "storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h" #include "storm/api/storm.h" /*! @@ -89,6 +90,18 @@ int main(const int argc, const char** argv) { auto const& coreSettings = storm::settings::getModule(); auto const& pomdpSettings = storm::settings::getModule(); + auto const &general = storm::settings::getModule(); + auto const &debug = storm::settings::getModule(); + + if (general.isVerboseSet()) { + storm::utility::setLogLevel(l3pp::LogLevel::INFO); + } + if (debug.isDebugSet()) { + storm::utility::setLogLevel(l3pp::LogLevel::DEBUG); + } + if (debug.isTraceSet()) { + storm::utility::setLogLevel(l3pp::LogLevel::TRACE); + } // For several engines, no model building step is performed, but the verification is started right away. storm::settings::modules::CoreSettings::Engine engine = coreSettings.getEngine(); @@ -98,7 +111,10 @@ int main(const int argc, const char** argv) { auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, engine); STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp, storm::exceptions::WrongFormatException, "Expected a POMDP."); std::shared_ptr> pomdp = model->template as>(); - + + // For ease of testing + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + std::shared_ptr formula; if (!symbolicInput.properties.empty()) { formula = symbolicInput.properties.front().getRawFormula(); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp new file mode 100644 index 000000000..52694180f --- /dev/null +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -0,0 +1,419 @@ +#include +#include "ApproximatePOMDPModelchecker.h" +#include "storm/utility/vector.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + template + ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { + //Intentionally left empty + } + + template + /*std::unique_ptr*/ void + ApproximatePOMDPModelchecker::computeReachabilityProbability( + storm::models::sparse::Pomdp const &pomdp, + std::set target_observations, bool min, uint64_t gridResolution) { + //TODO add timing + uint64_t maxIterations = 100; + bool finished = false; + uint64_t iteration = 0; + + + std::vector> beliefGrid; + std::vector beliefIsKnown; + constructBeliefGrid(pomdp, target_observations, gridResolution, beliefGrid, beliefIsKnown); + std::map result; + std::map result_backup; + + std::vector>> observationProbabilities; + std::vector>>> nextBelieves; + + uint64_t nextId = beliefGrid.size(); + for (size_t i = 0; i < beliefGrid.size(); ++i) { + auto currentBelief = beliefGrid[i]; + bool isTarget = beliefIsKnown[i]; + if (isTarget) { + result.emplace(std::make_pair(currentBelief.id, storm::utility::one())); + result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::one())); + } else { + result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); + result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); + + std::vector> observationProbabilitiesInAction; + std::vector>> nextBelievesInAction; + + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + for (uint64_t action = 0; action < numChoices; ++action) { + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( + pomdp, currentBelief, action); + std::map> actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); + iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, + currentBelief, + action, + observation, + nextId); + ++nextId; + } + observationProbabilitiesInAction.push_back(actionObservationProbabilities); + nextBelievesInAction.push_back(actionObservationBelieves); + } + observationProbabilities.push_back(observationProbabilitiesInAction); + nextBelieves.push_back(nextBelievesInAction); + } + } + + // Value Iteration + while (!finished && iteration < maxIterations) { + STORM_LOG_DEBUG("Iteration " << std::to_string(iteration)); + bool improvement = false; + for (size_t i = 0; i < beliefGrid.size(); ++i) { + bool isTarget = beliefIsKnown[i]; + if (!isTarget) { + Belief currentBelief = beliefGrid[i]; + // we can take any state with the observation as they have the same number of choices + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + + // Initialize the values for the value iteration + ValueType bestValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); + uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType currentValue; + + for (uint64_t action = 0; action < numChoices; ++action) { + currentValue = storm::utility::zero(); // simply change this for rewards? + + for (auto iter = observationProbabilities[i][action].begin(); + iter != observationProbabilities[i][action].end(); ++iter) { + uint32_t observation = iter->first; + Belief nextBelief = nextBelieves[i][action][observation]; + + // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + currentBelief.probabilities, gridResolution); + std::vector> subSimplex = temp.first; + std::vector lambdas = temp.second; + + ValueType sum = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (lambdas[j] != storm::utility::zero()) { + sum += lambdas[j] * result_backup.at( + getBeliefIdInGrid(beliefGrid, observation, subSimplex[j])); + } + } + + currentValue += iter->second * sum; + } + + // Update the selected actions + auto cc = storm::utility::ConstantsComparator(); + if ((min && cc.isLess(storm::utility::zero(), bestValue - currentValue)) || + (!min && cc.isLess(storm::utility::zero(), currentValue - bestValue))) { + improvement = true; + bestValue = currentValue; + chosenActionIndex = action; + } + // TODO tie breaker? + } + result[currentBelief.id] = bestValue; + } + } + finished = !improvement; + // back up + for (auto iter = result.begin(); iter != result.end(); ++iter) { + result_backup[iter->first] = result[iter->first]; + } + ++iteration; + } + + // maybe change this so the initial Belief always has ID 0 + Belief initialBelief = getInitialBelief(pomdp, nextId); + ++nextId; + + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + initialBelief.probabilities, gridResolution); + std::vector> subSimplex = temp.first; + std::vector lambdas = temp.second; + + ValueType overApprox = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (lambdas[j] != storm::utility::zero()) { + overApprox += lambdas[j] * + result_backup[getBeliefIdInGrid(beliefGrid, initialBelief.observation, + subSimplex[j])]; + } + } + + STORM_LOG_DEBUG("Over-Approximation Result: " << overApprox); + } + + template + uint64_t ApproximatePOMDPModelchecker::getBeliefIdInGrid( + std::vector> &grid, uint32_t observation, std::vector probabilities) { + for (auto const &belief : grid) { + if (belief.observation == observation && probabilities.size() == belief.probabilities.size()) { + if (belief.probabilities == probabilities) { + STORM_LOG_DEBUG("Found belief with id " << std::to_string(belief.id)); + return belief.id; + } + } + } + STORM_LOG_DEBUG("Did not find the belief in the grid"); + return -1; + } + + template + Belief ApproximatePOMDPModelchecker::getInitialBelief( + storm::models::sparse::Pomdp const &pomdp, uint64_t id) { + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() > 1, + "POMDP contains more than one initial state"); + std::vector distribution(pomdp.getNumberOfStates(), storm::utility::zero()); + uint32_t observation = 0; + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + if (pomdp.getInitialStates()[state] == 1) { + distribution[state] = storm::utility::one(); + observation = pomdp.getObservation(state); + } + } + return Belief{id, observation, distribution}; + } + + template + void ApproximatePOMDPModelchecker::constructBeliefGrid( + storm::models::sparse::Pomdp const &pomdp, + std::set target_observations, uint64_t gridResolution, + std::vector> &grid, std::vector &beliefIsKnown) { + bool isTarget; + uint64_t newId = 0; + + for (uint32_t observation = 0; observation < pomdp.getNrObservations(); ++observation) { + std::vector statesWithObservation = pomdp.getStatesWithObservation(observation); + isTarget = target_observations.find(observation) != target_observations.end(); + + // TODO this can probably be condensed + if (statesWithObservation.size() == 1) { + // If there is only one state with the observation, we can directly add the corresponding belief + std::vector distribution(pomdp.getNumberOfStates(), + storm::utility::zero()); + distribution[statesWithObservation.front()] = storm::utility::one(); + Belief belief = {newId, observation, distribution}; + STORM_LOG_TRACE( + "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," + << distribution << "]"); + grid.push_back(belief); + beliefIsKnown.push_back(isTarget); + ++newId; + } else { + // Otherwise we have to enumerate all possible distributions with regards to the grid + // helper is used to derive the distribution of the belief + std::vector helper(statesWithObservation.size(), 0); + helper[0] = gridResolution; + bool done = false; + uint64_t index = 0; + + while (!done) { + std::vector distribution(pomdp.getNumberOfStates(), + storm::utility::zero()); + for (size_t i = 0; i < statesWithObservation.size() - 1; ++i) { + distribution[statesWithObservation[i]] = ValueType( + double(helper[i] - helper[i + 1]) / gridResolution); + } + distribution[statesWithObservation.back()] = ValueType( + double(helper[statesWithObservation.size() - 1]) / gridResolution); + + Belief belief = {newId, observation, distribution}; + STORM_LOG_TRACE( + "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) + << ")," << distribution << "]"); + grid.push_back(belief); + beliefIsKnown.push_back(isTarget); + if (helper[statesWithObservation.size() - 1] == gridResolution) { + // If the last entry of helper is the gridResolution, we have enumerated all necessary distributions + done = true; + } else { + // Update helper by finding the index to increment + index = statesWithObservation.size() - 1; + while (helper[index] == helper[index - 1]) { + --index; + } + STORM_LOG_ASSERT(index > 0, "Error in BeliefGrid generation - index wrong"); + // Increment the value at the index + ++helper[index]; + // Reset all indices greater than the changed one to 0 + ++index; + while (index < statesWithObservation.size()) { + helper[index] = 0; + ++index; + } + } + ++newId; + } + } + } + } + + template + std::pair>, std::vector> + ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( + std::vector probabilities, uint64_t resolution) { + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) + // Variable names are based on the paper + + std::vector x(probabilities.size(), storm::utility::zero()); + std::vector v(probabilities.size(), storm::utility::zero()); + std::vector d(probabilities.size(), storm::utility::zero()); + + for (size_t i = 0; i < probabilities.size(); ++i) { + for (size_t j = i; j < probabilities.size(); ++j) { + x[i] += storm::utility::convertNumber(resolution) * probabilities[j]; + } + v[i] = storm::utility::floor(x[i]); + d[i] = x[i] - v[i]; + } + + auto p = storm::utility::vector::getSortedIndices(d); + + std::vector> qs; + for (size_t i = 0; i < probabilities.size(); ++i) { + std::vector q; + if (i == 0) { + for (size_t j = 0; j < probabilities.size(); ++j) { + q[i] = v[i]; + } + qs.push_back(q); + } else { + for (size_t j = 0; j < probabilities.size(); ++j) { + if (j == p[i - 1]) { + q[j] = qs[i - 1][j] + storm::utility::one(); + } else { + q[j] = qs[i - 1][j]; + } + } + qs.push_back(q); + } + } + + std::vector> subSimplex; + for (auto q : qs) { + std::vector node; + for (size_t i = 0; i < probabilities.size(); ++i) { + if (i != probabilities.size() - 1) { + node.push_back((q[i] - q[i + 1]) / storm::utility::convertNumber(resolution)); + } else { + node.push_back(q[i] / storm::utility::convertNumber(resolution)); + } + } + subSimplex.push_back(node); + } + + std::vector lambdas(probabilities.size(), storm::utility::zero()); + auto sum = storm::utility::zero(); + for (size_t i = 1; i < probabilities.size(); ++i) { + lambdas[i] = d[p[i - 1]] - d[p[i]]; + sum += d[p[i - 1]] - d[p[i]]; + } + lambdas[0] = storm::utility::one() - sum; + + //TODO add assertion that we are close enough + return std::make_pair(subSimplex, lambdas); + } + + + template + std::map + ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( + storm::models::sparse::Pomdp const &pomdp, Belief belief, + uint64_t actionIndex) { + std::map res; + // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) + std::vector postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, + 0).probabilities; + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + uint32_t observation = pomdp.getObservation(state); + if (postProbabilities[state] != storm::utility::zero()) { + if (res.count(observation) == 0) { + res[observation] = postProbabilities[state]; + } else { + res[observation] += postProbabilities[state]; + } + } + } + return res; + } + + template + Belief ApproximatePOMDPModelchecker::getBeliefAfterAction( + storm::models::sparse::Pomdp const &pomdp, Belief belief, + uint64_t actionIndex, uint64_t id) { + std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); + uint32_t observation = 0; + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + if (belief.probabilities[state] != storm::utility::zero()) { + auto row = pomdp.getTransitionMatrix().getRow( + pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + for (auto const &entry : row) { + observation = pomdp.getObservation(entry.getColumn()); + distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); + } + } + } + /* Should not be necessary + // We have to normalize the distribution + auto sum = storm::utility::zero(); + for(ValueType const& entry : distributionAfter){ + sum += entry; + } + for(size_t i = 0; i < pomdp.getNumberOfStates(); ++i){ + distributionAfter[i] /= sum; + }*/ + return Belief{id, observation, distributionAfter}; + } + + template + Belief + ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( + storm::models::sparse::Pomdp const &pomdp, Belief belief, + uint64_t actionIndex, uint32_t observation, uint64_t id) { + std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + if (belief.probabilities[state] != storm::utility::zero()) { + auto row = pomdp.getTransitionMatrix().getRow( + pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + for (auto const &entry : row) { + if (pomdp.getObservation(entry.getColumn()) == observation) { + distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); + } + } + } + } + // We have to normalize the distribution + auto sum = storm::utility::zero(); + for (ValueType const &entry : distributionAfter) { + sum += entry; + } + for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) { + distributionAfter[i] /= sum; + } + return Belief{id, observation, distributionAfter}; + } + + + template + class ApproximatePOMDPModelchecker; + +#ifdef STORM_HAVE_CARL + + //template class ApproximatePOMDPModelchecker; + template + class ApproximatePOMDPModelchecker; + +#endif + } + } +} \ No newline at end of file diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h new file mode 100644 index 000000000..4a4028288 --- /dev/null +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -0,0 +1,118 @@ +#include +#include "storm/modelchecker/CheckTask.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/utility/logging.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + class POMDPCheckResult; + + // Structure used to represent a belief + template + struct Belief { + uint64_t id; + uint32_t observation; + //TODO make this sparse? + std::vector probabilities; + }; + + template> + class ApproximatePOMDPModelchecker { + public: + explicit ApproximatePOMDPModelchecker(); + + /*std::unique_ptr*/ void + computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, + std::set target_observations, bool min, + uint64_t gridResolution); + + std::unique_ptr + computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, + std::set target_observations, uint64_t gridResolution); + + private: + /** + * + * @param pomdp + * @param id + * @return + */ + Belief + getInitialBelief(storm::models::sparse::Pomdp const &pomdp, uint64_t id); + + + /** + * + * @param probabilities + * @param gridResolution + * @return + */ + std::pair>, std::vector> + computeSubSimplexAndLambdas(std::vector probabilities, uint64_t gridResolution); + + + /** + * Helper method to construct the grid of Belief states to approximate the POMDP + * + * @param pomdp + * @param gridResolution + * + */ + void constructBeliefGrid(storm::models::sparse::Pomdp const &pomdp, + std::set target_observations, uint64_t gridResolution, + std::vector> &grid, std::vector &beliefIsKnown); + + + /** + * Helper method to get the probabilities of each observation after performing an action + * + * @param pomdp + * @param belief + * @param actionIndex + * @return + */ + std::map computeObservationProbabilitiesAfterAction( + storm::models::sparse::Pomdp const &pomdp, Belief belief, + uint64_t actionIndex); + + /** + * Helper method to get the next belief that results from a belief by performing an action and observing an observation + * + * @param pomdp the POMDP on which the evaluation should be performed + * @param belief the starting belief + * @param actionIndex the index of the action to be performed + * @param observation the observation after the action was performed + * @return the resulting belief (observation and distribution) + */ + Belief + getBeliefAfterActionAndObservation(const models::sparse::Pomdp &pomdp, + Belief belief, + uint64_t actionIndex, uint32_t observation, uint64_t id); + + /** + * Helper method to get the next belief that results from a belief by performing an action + * + * @param pomdp + * @param belief + * @param actionIndex + * @return + */ + Belief + getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, + Belief belief, uint64_t actionIndex, uint64_t id); + + /** + * Helper to get the id of a Belief in the grid + * + * @param observation + * @param probabilities + * @return + */ + uint64_t getBeliefIdInGrid(std::vector> &grid, uint32_t observation, + std::vector probabilities); + }; + + } + } +} \ No newline at end of file diff --git a/src/storm/models/sparse/Pomdp.cpp b/src/storm/models/sparse/Pomdp.cpp index b3930822c..babc81e6b 100644 --- a/src/storm/models/sparse/Pomdp.cpp +++ b/src/storm/models/sparse/Pomdp.cpp @@ -59,8 +59,17 @@ namespace storm { return observations; } - - + template + std::vector + Pomdp::getStatesWithObservation(uint32_t observation) const { + std::vector result; + for (uint64_t state = 0; state < this->getNumberOfStates(); ++state) { + if (this->getObservation(state) == observation) { + result.push_back(state); + } + } + return result; + } template class Pomdp; template class Pomdp; diff --git a/src/storm/models/sparse/Pomdp.h b/src/storm/models/sparse/Pomdp.h index de77ef6f0..d480deebc 100644 --- a/src/storm/models/sparse/Pomdp.h +++ b/src/storm/models/sparse/Pomdp.h @@ -61,7 +61,7 @@ namespace storm { std::vector const& getObservations() const; - + std::vector getStatesWithObservation(uint32_t observation) const; protected: // TODO: consider a bitvector based presentation (depending on our needs). From 74cfecd0113302044b2acca09918b516f81b9260 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 11 Sep 2019 17:46:13 +0200 Subject: [PATCH 002/155] Working version of over-approximation --- src/storm-pomdp-cli/storm-pomdp.cpp | 1 + .../ApproximatePOMDPModelchecker.cpp | 73 +++++++++++++------ 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 730ce3ec8..880dc29ad 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -114,6 +114,7 @@ int main(const int argc, const char** argv) { // For ease of testing storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + checker.computeReachabilityProbability(*pomdp, std::set({7}), false, 10); std::shared_ptr formula; if (!symbolicInput.properties.empty()) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 52694180f..0688e1cf2 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -68,27 +68,31 @@ namespace storm { } } + STORM_LOG_DEBUG("End of Section 1"); // Value Iteration + auto cc = storm::utility::ConstantsComparator(); while (!finished && iteration < maxIterations) { - STORM_LOG_DEBUG("Iteration " << std::to_string(iteration)); + STORM_PRINT("Iteration " << std::to_string(iteration) << std::endl); bool improvement = false; for (size_t i = 0; i < beliefGrid.size(); ++i) { bool isTarget = beliefIsKnown[i]; if (!isTarget) { Belief currentBelief = beliefGrid[i]; + STORM_LOG_DEBUG( + "Check Belief " << currentBelief.id << ": ||" << currentBelief.observation << "|" + << currentBelief.probabilities << "||"); // we can take any state with the observation as they have the same number of choices uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); - + STORM_LOG_DEBUG("Number choices: " << std::to_string(numChoices)); // Initialize the values for the value iteration - ValueType bestValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); uint64_t chosenActionIndex = std::numeric_limits::infinity(); ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { currentValue = storm::utility::zero(); // simply change this for rewards? - for (auto iter = observationProbabilities[i][action].begin(); iter != observationProbabilities[i][action].end(); ++iter) { uint32_t observation = iter->first; @@ -96,7 +100,7 @@ namespace storm { // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - currentBelief.probabilities, gridResolution); + nextBelief.probabilities, gridResolution); std::vector> subSimplex = temp.first; std::vector lambdas = temp.second; @@ -107,21 +111,37 @@ namespace storm { getBeliefIdInGrid(beliefGrid, observation, subSimplex[j])); } } - currentValue += iter->second * sum; } // Update the selected actions - auto cc = storm::utility::ConstantsComparator(); - if ((min && cc.isLess(storm::utility::zero(), bestValue - currentValue)) || - (!min && cc.isLess(storm::utility::zero(), currentValue - bestValue))) { - improvement = true; - bestValue = currentValue; + if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isLess(storm::utility::zero(), currentValue - chosenValue))) { + chosenValue = currentValue; chosenActionIndex = action; } // TODO tie breaker? } - result[currentBelief.id] = bestValue; + result[currentBelief.id] = chosenValue; + // Check if the iteration brought an improvement + if ((min && cc.isLess(storm::utility::zero(), + result_backup[currentBelief.id] - result[currentBelief.id])) || + (!min && cc.isLess(storm::utility::zero(), + result[currentBelief.id] - result_backup[currentBelief.id]))) { + if (min) { + STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: " + << result[currentBelief.id] << std::endl << "Delta: " + << result_backup[currentBelief.id] - result[currentBelief.id] + << std::endl); + } else { + STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: " + << result[currentBelief.id] << std::endl << "Delta: " + << result_backup[currentBelief.id] - result[currentBelief.id] + << std::endl); + } + improvement = true; + } } } finished = !improvement; @@ -150,7 +170,7 @@ namespace storm { } } - STORM_LOG_DEBUG("Over-Approximation Result: " << overApprox); + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); } template @@ -171,8 +191,10 @@ namespace storm { template Belief ApproximatePOMDPModelchecker::getInitialBelief( storm::models::sparse::Pomdp const &pomdp, uint64_t id) { - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() > 1, + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, "POMDP contains more than one initial state"); + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, + "POMDP does not contain an initial state"); std::vector distribution(pomdp.getNumberOfStates(), storm::utility::zero()); uint32_t observation = 0; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -212,8 +234,8 @@ namespace storm { } else { // Otherwise we have to enumerate all possible distributions with regards to the grid // helper is used to derive the distribution of the belief - std::vector helper(statesWithObservation.size(), 0); - helper[0] = gridResolution; + std::vector helper(statesWithObservation.size(), ValueType(0)); + helper[0] = storm::utility::convertNumber(gridResolution); bool done = false; uint64_t index = 0; @@ -221,11 +243,13 @@ namespace storm { std::vector distribution(pomdp.getNumberOfStates(), storm::utility::zero()); for (size_t i = 0; i < statesWithObservation.size() - 1; ++i) { - distribution[statesWithObservation[i]] = ValueType( - double(helper[i] - helper[i + 1]) / gridResolution); + distribution[statesWithObservation[i]] = (helper[i] - helper[i + 1]) / + storm::utility::convertNumber( + gridResolution); } - distribution[statesWithObservation.back()] = ValueType( - double(helper[statesWithObservation.size() - 1]) / gridResolution); + distribution[statesWithObservation.back()] = + helper[statesWithObservation.size() - 1] / + storm::utility::convertNumber(gridResolution); Belief belief = {newId, observation, distribution}; STORM_LOG_TRACE( @@ -233,7 +257,8 @@ namespace storm { << ")," << distribution << "]"); grid.push_back(belief); beliefIsKnown.push_back(isTarget); - if (helper[statesWithObservation.size() - 1] == gridResolution) { + if (helper[statesWithObservation.size() - 1] == + storm::utility::convertNumber(gridResolution)) { // If the last entry of helper is the gridResolution, we have enumerated all necessary distributions done = true; } else { @@ -281,10 +306,10 @@ namespace storm { std::vector> qs; for (size_t i = 0; i < probabilities.size(); ++i) { - std::vector q; + std::vector q(probabilities.size(), storm::utility::zero()); if (i == 0) { for (size_t j = 0; j < probabilities.size(); ++j) { - q[i] = v[i]; + q[j] = v[j]; } qs.push_back(q); } else { From 2bc79e6e0714e2911d1795ba81f4a86b4403e2c0 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 18 Sep 2019 17:13:39 +0200 Subject: [PATCH 003/155] Refactoring to include a list of all generated beliefs --- .../ApproximatePOMDPModelchecker.cpp | 218 ++++++++++++------ .../ApproximatePOMDPModelchecker.h | 37 ++- src/storm-pomdp/storage/Belief.h | 12 + 3 files changed, 180 insertions(+), 87 deletions(-) create mode 100644 src/storm-pomdp/storage/Belief.h diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 0688e1cf2..e44102656 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -20,17 +20,26 @@ namespace storm { bool finished = false; uint64_t iteration = 0; + std::vector> beliefList; + uint64_t nextId = 0; + // Initial belief always has ID 0 + storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + ++nextId; + beliefList.push_back(initialBelief); - std::vector> beliefGrid; + std::vector> beliefGrid; std::vector beliefIsKnown; - constructBeliefGrid(pomdp, target_observations, gridResolution, beliefGrid, beliefIsKnown); + constructBeliefGrid(pomdp, target_observations, gridResolution, beliefList, beliefGrid, beliefIsKnown, + nextId); + nextId = beliefList.size(); + std::map result; std::map result_backup; + std::map chosenActions; std::vector>> observationProbabilities; - std::vector>>> nextBelieves; + std::vector>> nextBelieves; - uint64_t nextId = beliefGrid.size(); for (size_t i = 0; i < beliefGrid.size(); ++i) { auto currentBelief = beliefGrid[i]; bool isTarget = beliefIsKnown[i]; @@ -42,23 +51,24 @@ namespace storm { result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); std::vector> observationProbabilitiesInAction; - std::vector>> nextBelievesInAction; + std::vector> nextBelievesInAction; uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( pomdp, currentBelief, action); - std::map> actionObservationBelieves; + std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, + beliefList, currentBelief, action, observation, nextId); - ++nextId; + nextId = beliefList.size(); } observationProbabilitiesInAction.push_back(actionObservationProbabilities); nextBelievesInAction.push_back(actionObservationBelieves); @@ -67,24 +77,18 @@ namespace storm { nextBelieves.push_back(nextBelievesInAction); } } - - STORM_LOG_DEBUG("End of Section 1"); // Value Iteration auto cc = storm::utility::ConstantsComparator(); while (!finished && iteration < maxIterations) { - STORM_PRINT("Iteration " << std::to_string(iteration) << std::endl); + STORM_LOG_DEBUG("Iteration " << iteration + 1); bool improvement = false; for (size_t i = 0; i < beliefGrid.size(); ++i) { bool isTarget = beliefIsKnown[i]; if (!isTarget) { - Belief currentBelief = beliefGrid[i]; - STORM_LOG_DEBUG( - "Check Belief " << currentBelief.id << ": ||" << currentBelief.observation << "|" - << currentBelief.probabilities << "||"); + storm::pomdp::Belief currentBelief = beliefGrid[i]; // we can take any state with the observation as they have the same number of choices uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); - STORM_LOG_DEBUG("Number choices: " << std::to_string(numChoices)); // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); @@ -96,8 +100,7 @@ namespace storm { for (auto iter = observationProbabilities[i][action].begin(); iter != observationProbabilities[i][action].end(); ++iter) { uint32_t observation = iter->first; - Belief nextBelief = nextBelieves[i][action][observation]; - + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[i][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief std::pair>, std::vector> temp = computeSubSimplexAndLambdas( nextBelief.probabilities, gridResolution); @@ -108,7 +111,7 @@ namespace storm { for (size_t j = 0; j < lambdas.size(); ++j) { if (lambdas[j] != storm::utility::zero()) { sum += lambdas[j] * result_backup.at( - getBeliefIdInGrid(beliefGrid, observation, subSimplex[j])); + getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } currentValue += iter->second * sum; @@ -124,22 +127,12 @@ namespace storm { // TODO tie breaker? } result[currentBelief.id] = chosenValue; + chosenActions[currentBelief.id] = chosenActionIndex; // Check if the iteration brought an improvement if ((min && cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) || (!min && cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id]))) { - if (min) { - STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: " - << result[currentBelief.id] << std::endl << "Delta: " - << result_backup[currentBelief.id] - result[currentBelief.id] - << std::endl); - } else { - STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: " - << result[currentBelief.id] << std::endl << "Delta: " - << result_backup[currentBelief.id] - result[currentBelief.id] - << std::endl); - } improvement = true; } } @@ -152,32 +145,120 @@ namespace storm { ++iteration; } - // maybe change this so the initial Belief always has ID 0 - Belief initialBelief = getInitialBelief(pomdp, nextId); - ++nextId; + STORM_PRINT("Grid approximation took " << iteration << " iterations" << std::endl); + + beliefGrid.push_back(initialBelief); + beliefIsKnown.push_back( + target_observations.find(initialBelief.observation) != target_observations.end()); std::pair>, std::vector> temp = computeSubSimplexAndLambdas( initialBelief.probabilities, gridResolution); - std::vector> subSimplex = temp.first; - std::vector lambdas = temp.second; - - ValueType overApprox = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (lambdas[j] != storm::utility::zero()) { - overApprox += lambdas[j] * - result_backup[getBeliefIdInGrid(beliefGrid, initialBelief.observation, - subSimplex[j])]; + std::vector> initSubSimplex = temp.first; + std::vector initLambdas = temp.second; + + auto overApprox = storm::utility::zero(); + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (initLambdas[j] != storm::utility::zero()) { + overApprox += initLambdas[j] * + result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, + initSubSimplex[j])]; } } + // Now onto the under-approximation + + + /* All this has to be put into a separate function as we have to repeat it for other believes not in the grid + // first compute some things for the initial belief + std::vector> observationProbabilitiesInAction; + std::vector> nextBelievesInAction; + + uint64_t initialNumChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(initialBelief.observation).front()); + for (uint64_t action = 0; action < initialNumChoices; ++action) { + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( + pomdp, initialBelief, action); + std::map actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); + iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, + beliefList, + initialBelief, + action, + observation, + nextId); + } + observationProbabilitiesInAction.push_back(actionObservationProbabilities); + nextBelievesInAction.push_back(actionObservationBelieves); + } + observationProbabilities.push_back(observationProbabilitiesInAction); + nextBelieves.push_back(nextBelievesInAction); + + // do one step here to get the best action in the initial state + ValueType chosenValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); + uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType currentValue; + + for (uint64_t action = 0; action < initialNumChoices; ++action) { + currentValue = storm::utility::zero(); // simply change this for rewards? + for (auto iter = observationProbabilities[initialBelief.id][action].begin(); + iter != observationProbabilities[initialBelief.id][action].end(); ++iter) { + uint32_t observation = iter->first; + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[initialBelief.id][action][observation]]; + + // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief + temp = computeSubSimplexAndLambdas( + nextBelief.probabilities, gridResolution); + std::vector> subSimplex = temp.first; + std::vector lambdas = temp.second; + + ValueType sum = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (lambdas[j] != storm::utility::zero()) { + sum += lambdas[j] * result.at( + getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + } + } + currentValue += iter->second * sum; + } + + // Update the selected actions + if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isLess(storm::utility::zero(), currentValue - chosenValue))) { + chosenValue = currentValue; + chosenActionIndex = action; + } + } + chosenActions[initialBelief.id] = chosenActionIndex;*/ + + std::set exploredBelieves; + std::deque believesToBeExplored; + + exploredBelieves.insert(initialBelief.id); + believesToBeExplored.push_back(initialBelief.id); + while (!believesToBeExplored.empty()) { + auto currentBeliefId = believesToBeExplored.front(); + if (chosenActions.find(currentBeliefId) != chosenActions.end()) { + + } else { + + } + believesToBeExplored.pop_front(); + } + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); } template - uint64_t ApproximatePOMDPModelchecker::getBeliefIdInGrid( - std::vector> &grid, uint32_t observation, std::vector probabilities) { + uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( + std::vector> &grid, uint32_t observation, + std::vector probabilities) { for (auto const &belief : grid) { - if (belief.observation == observation && probabilities.size() == belief.probabilities.size()) { + if (belief.observation == observation) { if (belief.probabilities == probabilities) { STORM_LOG_DEBUG("Found belief with id " << std::to_string(belief.id)); return belief.id; @@ -189,7 +270,7 @@ namespace storm { } template - Belief ApproximatePOMDPModelchecker::getInitialBelief( + storm::pomdp::Belief ApproximatePOMDPModelchecker::getInitialBelief( storm::models::sparse::Pomdp const &pomdp, uint64_t id) { STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, "POMDP contains more than one initial state"); @@ -203,16 +284,18 @@ namespace storm { observation = pomdp.getObservation(state); } } - return Belief{id, observation, distribution}; + return storm::pomdp::Belief{id, observation, distribution}; } template void ApproximatePOMDPModelchecker::constructBeliefGrid( storm::models::sparse::Pomdp const &pomdp, std::set target_observations, uint64_t gridResolution, - std::vector> &grid, std::vector &beliefIsKnown) { + std::vector> &beliefList, + std::vector> &grid, std::vector &beliefIsKnown, + uint64_t nextId) { bool isTarget; - uint64_t newId = 0; + uint64_t newId = nextId; for (uint32_t observation = 0; observation < pomdp.getNrObservations(); ++observation) { std::vector statesWithObservation = pomdp.getStatesWithObservation(observation); @@ -224,10 +307,11 @@ namespace storm { std::vector distribution(pomdp.getNumberOfStates(), storm::utility::zero()); distribution[statesWithObservation.front()] = storm::utility::one(); - Belief belief = {newId, observation, distribution}; + storm::pomdp::Belief belief = {newId, observation, distribution}; STORM_LOG_TRACE( "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); + beliefList.push_back(belief); grid.push_back(belief); beliefIsKnown.push_back(isTarget); ++newId; @@ -251,10 +335,11 @@ namespace storm { helper[statesWithObservation.size() - 1] / storm::utility::convertNumber(gridResolution); - Belief belief = {newId, observation, distribution}; + storm::pomdp::Belief belief = {newId, observation, distribution}; STORM_LOG_TRACE( "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); + beliefList.push_back(belief); grid.push_back(belief); beliefIsKnown.push_back(isTarget); if (helper[statesWithObservation.size() - 1] == @@ -353,7 +438,8 @@ namespace storm { template std::map ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( - storm::models::sparse::Pomdp const &pomdp, Belief belief, + storm::models::sparse::Pomdp const &pomdp, + storm::pomdp::Belief belief, uint64_t actionIndex) { std::map res; // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) @@ -373,8 +459,10 @@ namespace storm { } template - Belief ApproximatePOMDPModelchecker::getBeliefAfterAction( - storm::models::sparse::Pomdp const &pomdp, Belief belief, + storm::pomdp::Belief + ApproximatePOMDPModelchecker::getBeliefAfterAction( + storm::models::sparse::Pomdp const &pomdp, + storm::pomdp::Belief belief, uint64_t actionIndex, uint64_t id) { std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); uint32_t observation = 0; @@ -388,22 +476,13 @@ namespace storm { } } } - /* Should not be necessary - // We have to normalize the distribution - auto sum = storm::utility::zero(); - for(ValueType const& entry : distributionAfter){ - sum += entry; - } - for(size_t i = 0; i < pomdp.getNumberOfStates(); ++i){ - distributionAfter[i] /= sum; - }*/ - return Belief{id, observation, distributionAfter}; + return storm::pomdp::Belief{id, observation, distributionAfter}; } template - Belief - ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( - storm::models::sparse::Pomdp const &pomdp, Belief belief, + uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( + storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, uint64_t id) { std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -425,7 +504,12 @@ namespace storm { for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) { distributionAfter[i] /= sum; } - return Belief{id, observation, distributionAfter}; + if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { + return getBeliefIdInVector(beliefList, observation, distributionAfter); + } else { + beliefList.push_back(storm::pomdp::Belief{id, observation, distributionAfter}); + return id; + } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 4a4028288..e5636d5a9 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -2,21 +2,13 @@ #include "storm/modelchecker/CheckTask.h" #include "storm/models/sparse/Pomdp.h" #include "storm/utility/logging.h" +#include "storm-pomdp/storage/Belief.h" namespace storm { namespace pomdp { namespace modelchecker { class POMDPCheckResult; - // Structure used to represent a belief - template - struct Belief { - uint64_t id; - uint32_t observation; - //TODO make this sparse? - std::vector probabilities; - }; - template> class ApproximatePOMDPModelchecker { public: @@ -38,7 +30,7 @@ namespace storm { * @param id * @return */ - Belief + storm::pomdp::Belief getInitialBelief(storm::models::sparse::Pomdp const &pomdp, uint64_t id); @@ -61,7 +53,9 @@ namespace storm { */ void constructBeliefGrid(storm::models::sparse::Pomdp const &pomdp, std::set target_observations, uint64_t gridResolution, - std::vector> &grid, std::vector &beliefIsKnown); + std::vector> &beliefList, + std::vector> &grid, + std::vector &beliefIsKnown, uint64_t nextId); /** @@ -73,11 +67,13 @@ namespace storm { * @return */ std::map computeObservationProbabilitiesAfterAction( - storm::models::sparse::Pomdp const &pomdp, Belief belief, + storm::models::sparse::Pomdp const &pomdp, + storm::pomdp::Belief belief, uint64_t actionIndex); /** - * Helper method to get the next belief that results from a belief by performing an action and observing an observation + * Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation. + * If the belief does not exist yet, it is created and added to the list of all beliefs * * @param pomdp the POMDP on which the evaluation should be performed * @param belief the starting belief @@ -85,9 +81,10 @@ namespace storm { * @param observation the observation after the action was performed * @return the resulting belief (observation and distribution) */ - Belief + uint64_t getBeliefAfterActionAndObservation(const models::sparse::Pomdp &pomdp, - Belief belief, + std::vector> &beliefList, + storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, uint64_t id); /** @@ -98,19 +95,19 @@ namespace storm { * @param actionIndex * @return */ - Belief + storm::pomdp::Belief getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, - Belief belief, uint64_t actionIndex, uint64_t id); + storm::pomdp::Belief belief, uint64_t actionIndex, uint64_t id); /** - * Helper to get the id of a Belief in the grid + * Helper to get the id of a Belief stored in a given vector structure * * @param observation * @param probabilities * @return */ - uint64_t getBeliefIdInGrid(std::vector> &grid, uint32_t observation, - std::vector probabilities); + uint64_t getBeliefIdInVector(std::vector> &grid, uint32_t observation, + std::vector probabilities); }; } diff --git a/src/storm-pomdp/storage/Belief.h b/src/storm-pomdp/storage/Belief.h new file mode 100644 index 000000000..bf4df4c9e --- /dev/null +++ b/src/storm-pomdp/storage/Belief.h @@ -0,0 +1,12 @@ +namespace storm { + namespace pomdp { + // Structure used to represent a belief + template + struct Belief { + uint64_t id; + uint32_t observation; + //TODO make this sparse? + std::vector probabilities; + }; + } +} \ No newline at end of file From d814942997cb5c9c30ca9e8c6af122a0cdcfde7c Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 25 Sep 2019 14:17:39 +0200 Subject: [PATCH 004/155] Working version of under-approximation --- src/storm-pomdp-cli/storm-pomdp.cpp | 3 +- src/storm-pomdp/CMakeLists.txt | 2 +- .../ApproximatePOMDPModelchecker.cpp | 249 ++++++++++++++---- .../ApproximatePOMDPModelchecker.h | 45 +++- 4 files changed, 232 insertions(+), 67 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 880dc29ad..8d33fc2d2 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -23,6 +23,7 @@ #include "storm/settings/modules/AbstractionSettings.h" #include "storm/settings/modules/BuildSettings.h" #include "storm/settings/modules/JitBuilderSettings.h" +#include "storm/settings/modules/TopologicalEquationSolverSettings.h" #include "storm/settings/modules/MultiObjectiveSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" @@ -63,7 +64,7 @@ void initializeSettings() { storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); - + storm::settings::addModule(); storm::settings::addModule(); diff --git a/src/storm-pomdp/CMakeLists.txt b/src/storm-pomdp/CMakeLists.txt index 53643b738..37da8b794 100644 --- a/src/storm-pomdp/CMakeLists.txt +++ b/src/storm-pomdp/CMakeLists.txt @@ -17,7 +17,7 @@ set_target_properties(storm-pomdp PROPERTIES DEFINE_SYMBOL "") list(APPEND STORM_TARGETS storm-pomdp) set(STORM_TARGETS ${STORM_TARGETS} PARENT_SCOPE) -target_link_libraries(storm-pomdp PUBLIC storm ${STORM_POMDP_LINK_LIBRARIES}) +target_link_libraries(storm-pomdp PUBLIC storm storm-parsers ${STORM_POMDP_LINK_LIBRARIES}) # Install storm headers to include directory. foreach(HEADER ${STORM_POMDP_HEADERS}) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index e44102656..24d312c7f 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -1,6 +1,17 @@ -#include #include "ApproximatePOMDPModelchecker.h" + +#include + + +#include "storm/utility/ConstantsComparator.h" +#include "storm/models/sparse/Dtmc.h" +#include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" #include "storm/utility/vector.h" +#include "storm/modelchecker/results/CheckResult.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/api/properties.h" +#include "storm-parsers/api/storm-parsers.h" namespace storm { namespace pomdp { @@ -14,35 +25,42 @@ namespace storm { /*std::unique_ptr*/ void ApproximatePOMDPModelchecker::computeReachabilityProbability( storm::models::sparse::Pomdp const &pomdp, - std::set target_observations, bool min, uint64_t gridResolution) { + std::set targetObservations, bool min, uint64_t gridResolution) { //TODO add timing uint64_t maxIterations = 100; bool finished = false; uint64_t iteration = 0; std::vector> beliefList; + std::vector beliefIsTarget; uint64_t nextId = 0; // Initial belief always has ID 0 storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + std::vector> beliefGrid; - std::vector beliefIsKnown; - constructBeliefGrid(pomdp, target_observations, gridResolution, beliefList, beliefGrid, beliefIsKnown, + constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); nextId = beliefList.size(); + // ID -> Value std::map result; std::map result_backup; + // ID -> ActionIndex std::map chosenActions; - std::vector>> observationProbabilities; - std::vector>> nextBelieves; + // ID -> Observation -> Probability + std::map>> observationProbabilities; + // current ID -> action -> next ID + std::map>> nextBelieves; for (size_t i = 0; i < beliefGrid.size(); ++i) { auto currentBelief = beliefGrid[i]; - bool isTarget = beliefIsKnown[i]; + bool isTarget = beliefIsTarget[currentBelief.id]; if (isTarget) { result.emplace(std::make_pair(currentBelief.id, storm::utility::one())); result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::one())); @@ -64,6 +82,8 @@ namespace storm { uint32_t observation = iter->first; actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, + beliefIsTarget, + targetObservations, currentBelief, action, observation, @@ -73,8 +93,9 @@ namespace storm { observationProbabilitiesInAction.push_back(actionObservationProbabilities); nextBelievesInAction.push_back(actionObservationBelieves); } - observationProbabilities.push_back(observationProbabilitiesInAction); - nextBelieves.push_back(nextBelievesInAction); + observationProbabilities.emplace( + std::make_pair(currentBelief.id, observationProbabilitiesInAction)); + nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); } } // Value Iteration @@ -83,9 +104,9 @@ namespace storm { STORM_LOG_DEBUG("Iteration " << iteration + 1); bool improvement = false; for (size_t i = 0; i < beliefGrid.size(); ++i) { - bool isTarget = beliefIsKnown[i]; + storm::pomdp::Belief currentBelief = beliefGrid[i]; + bool isTarget = beliefIsTarget[currentBelief.id]; if (!isTarget) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; // we can take any state with the observation as they have the same number of choices uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); @@ -97,17 +118,17 @@ namespace storm { for (uint64_t action = 0; action < numChoices; ++action) { currentValue = storm::utility::zero(); // simply change this for rewards? - for (auto iter = observationProbabilities[i][action].begin(); - iter != observationProbabilities[i][action].end(); ++iter) { + for (auto iter = observationProbabilities[currentBelief.id][action].begin(); + iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[i][action][observation]]; + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief std::pair>, std::vector> temp = computeSubSimplexAndLambdas( nextBelief.probabilities, gridResolution); std::vector> subSimplex = temp.first; std::vector lambdas = temp.second; - ValueType sum = storm::utility::zero(); + auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (lambdas[j] != storm::utility::zero()) { sum += lambdas[j] * result_backup.at( @@ -117,12 +138,19 @@ namespace storm { currentValue += iter->second * sum; } - // Update the selected actions + // Update the selected actions TODO make this nicer if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue))) { chosenValue = currentValue; chosenActionIndex = action; + } else if ((min && cc.isEqual(storm::utility::zero(), + chosenValue - currentValue)) || + (!min && + cc.isEqual(storm::utility::zero(), + currentValue - chosenValue))) { + chosenValue = currentValue; + chosenActionIndex = action; } // TODO tie breaker? } @@ -148,8 +176,8 @@ namespace storm { STORM_PRINT("Grid approximation took " << iteration << " iterations" << std::endl); beliefGrid.push_back(initialBelief); - beliefIsKnown.push_back( - target_observations.find(initialBelief.observation) != target_observations.end()); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); std::pair>, std::vector> temp = computeSubSimplexAndLambdas( initialBelief.probabilities, gridResolution); @@ -167,92 +195,197 @@ namespace storm { // Now onto the under-approximation + std::set visitedBelieves; + std::deque believesToBeExpanded; + std::map beliefStateMap; + std::vector> transitions; + std::vector targetStates; + + uint64_t stateId = 0; + beliefStateMap[initialBelief.id] = stateId; + ++stateId; + + // Expand the believes TODO capsuling + visitedBelieves.insert(initialBelief.id); + believesToBeExpanded.push_back(initialBelief.id); + while (!believesToBeExpanded.empty()) { + auto currentBeliefId = believesToBeExpanded.front(); + std::map transitionsInState; + STORM_LOG_DEBUG("Exploring Belief " << beliefList[currentBeliefId].observation << "||" + << beliefList[currentBeliefId].probabilities); + if (beliefIsTarget[currentBeliefId]) { + // add a self-loop to target states and save them + transitionsInState[beliefStateMap[currentBeliefId]] = storm::utility::one(); + targetStates.push_back(beliefStateMap[currentBeliefId]); + } else { + if (chosenActions.find(currentBeliefId) == chosenActions.end()) { + // If the current Belief is not part of the grid, we have not computed the action to choose yet + chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, + targetObservations, + observationProbabilities, + nextBelieves, result, gridResolution, + currentBeliefId, beliefList.size(), min); + } + for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].begin(); + iter != + observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].end(); ++iter) { + uint32_t observation = iter->first; + uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId]][observation]; + if (visitedBelieves.insert(nextBeliefId).second) { + beliefStateMap[nextBeliefId] = stateId; + ++stateId; + believesToBeExpanded.push_back(nextBeliefId); + } + transitionsInState[beliefStateMap[nextBeliefId]] = iter->second; + } + } + transitions.push_back(transitionsInState); + believesToBeExpanded.pop_front(); + } + + for (size_t i = 0; i < transitions.size(); ++i) { + for (auto const &transition : transitions[i]) { + STORM_LOG_DEBUG( + "Transition: " << i << " -- " << transition.second << "--> " << transition.first); + } + } + storm::models::sparse::StateLabeling labeling(transitions.size()); + labeling.addLabel("init"); + labeling.addLabel("target"); + labeling.addLabelToState("init", 0); + for (auto targetState : targetStates) { + labeling.addLabelToState("target", targetState); + } + storm::storage::sparse::ModelComponents modelComponents( + buildTransitionMatrix(transitions), labeling); + + storm::models::sparse::Dtmc underApproxDtmc(modelComponents); + auto model = std::make_shared>(underApproxDtmc); + model->printModelInformationToStream(std::cout); + + std::string propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties( + propertyVector).front(); + + std::unique_ptr res( + storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, + true))); + STORM_LOG_ASSERT(res, "Result does not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + ValueType resultValue = res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << resultValue << std::endl); + } + + template + storm::storage::SparseMatrix + ApproximatePOMDPModelchecker::buildTransitionMatrix( + std::vector> transitions) { + uint_fast64_t currentRow = 0; + uint64_t nrEntries = 0; + for (auto const &map : transitions) { + nrEntries += map.size(); + } + storm::storage::SparseMatrixBuilder smb(transitions.size(), transitions.size(), nrEntries); + for (auto const &map : transitions) { + for (auto const &transition : map) { + smb.addNextValue(currentRow, transition.first, transition.second); + } + ++currentRow; + } + return smb.build(); + } - /* All this has to be put into a separate function as we have to repeat it for other believes not in the grid - // first compute some things for the initial belief + template + uint64_t ApproximatePOMDPModelchecker::extractBestAction( + storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { + auto cc = storm::utility::ConstantsComparator(); + storm::pomdp::Belief currentBelief = beliefList[currentBeliefId]; std::vector> observationProbabilitiesInAction; std::vector> nextBelievesInAction; - uint64_t initialNumChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(initialBelief.observation).front()); - for (uint64_t action = 0; action < initialNumChoices; ++action) { + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, initialBelief, action); + pomdp, currentBelief, action); std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, - initialBelief, + beliefIsTarget, + targetObservations, + currentBelief, action, observation, nextId); + nextId = beliefList.size(); } observationProbabilitiesInAction.push_back(actionObservationProbabilities); nextBelievesInAction.push_back(actionObservationBelieves); } - observationProbabilities.push_back(observationProbabilitiesInAction); - nextBelieves.push_back(nextBelievesInAction); + //STORM_LOG_DEBUG("ID " << currentBeliefId << " add " << observationProbabilitiesInAction); + observationProbabilities.emplace(std::make_pair(currentBeliefId, observationProbabilitiesInAction)); + nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); - // do one step here to get the best action in the initial state + // choose the action which results in the value computed by the over-approximation ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); uint64_t chosenActionIndex = std::numeric_limits::infinity(); ValueType currentValue; - for (uint64_t action = 0; action < initialNumChoices; ++action) { + for (uint64_t action = 0; action < numChoices; ++action) { currentValue = storm::utility::zero(); // simply change this for rewards? - for (auto iter = observationProbabilities[initialBelief.id][action].begin(); - iter != observationProbabilities[initialBelief.id][action].end(); ++iter) { + for (auto iter = observationProbabilities[currentBelief.id][action].begin(); + iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[initialBelief.id][action][observation]]; + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - temp = computeSubSimplexAndLambdas( + auto temp = computeSubSimplexAndLambdas( nextBelief.probabilities, gridResolution); std::vector> subSimplex = temp.first; std::vector lambdas = temp.second; - ValueType sum = storm::utility::zero(); + auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (lambdas[j] != storm::utility::zero()) { sum += lambdas[j] * result.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + getBeliefIdInVector(beliefList, observation, subSimplex[j])); } } currentValue += iter->second * sum; } - // Update the selected actions + // Update the selected actions TODO make this nicer if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue))) { chosenValue = currentValue; chosenActionIndex = action; + } else if ((min && cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isEqual(storm::utility::zero(), currentValue - chosenValue))) { + chosenValue = currentValue; + chosenActionIndex = action; } } - chosenActions[initialBelief.id] = chosenActionIndex;*/ - - std::set exploredBelieves; - std::deque believesToBeExplored; - - exploredBelieves.insert(initialBelief.id); - believesToBeExplored.push_back(initialBelief.id); - while (!believesToBeExplored.empty()) { - auto currentBeliefId = believesToBeExplored.front(); - if (chosenActions.find(currentBeliefId) != chosenActions.end()) { - - } else { - - } - believesToBeExplored.pop_front(); - } - - - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + return chosenActionIndex; } + template uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( std::vector> &grid, uint32_t observation, @@ -260,12 +393,10 @@ namespace storm { for (auto const &belief : grid) { if (belief.observation == observation) { if (belief.probabilities == probabilities) { - STORM_LOG_DEBUG("Found belief with id " << std::to_string(belief.id)); return belief.id; } } } - STORM_LOG_DEBUG("Did not find the belief in the grid"); return -1; } @@ -482,7 +613,8 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, storm::pomdp::Belief belief, + std::vector> &beliefList, std::vector &beliefIsTarget, + std::set &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, uint64_t id) { std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -508,6 +640,7 @@ namespace storm { return getBeliefIdInVector(beliefList, observation, distributionAfter); } else { beliefList.push_back(storm::pomdp::Belief{id, observation, distributionAfter}); + beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); return id; } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index e5636d5a9..46b5ee1e4 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -1,22 +1,24 @@ #include -#include "storm/modelchecker/CheckTask.h" +#include "storm/api/storm.h" #include "storm/models/sparse/Pomdp.h" #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" +#include "storm/storage/jani/Property.h" + namespace storm { namespace pomdp { namespace modelchecker { class POMDPCheckResult; - template> + template> class ApproximatePOMDPModelchecker { public: explicit ApproximatePOMDPModelchecker(); /*std::unique_ptr*/ void computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set target_observations, bool min, + std::set targetObservations, bool min, uint64_t gridResolution); std::unique_ptr @@ -24,6 +26,29 @@ namespace storm { std::set target_observations, uint64_t gridResolution); private: + /** + * TODO + * @param pomdp + * @param beliefList + * @param observationProbabilities + * @param nextBelieves + * @param result + * @param gridResolution + * @param currentBeliefId + * @param nextId + * @param min + * @return + */ + uint64_t extractBestAction(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &target_observations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, + bool min); + /** * * @param pomdp @@ -82,10 +107,13 @@ namespace storm { * @return the resulting belief (observation and distribution) */ uint64_t - getBeliefAfterActionAndObservation(const models::sparse::Pomdp &pomdp, - std::vector> &beliefList, - storm::pomdp::Belief belief, - uint64_t actionIndex, uint32_t observation, uint64_t id); + getBeliefAfterActionAndObservation( + storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + storm::pomdp::Belief belief, + uint64_t actionIndex, uint32_t observation, uint64_t id); /** * Helper method to get the next belief that results from a belief by performing an action @@ -108,6 +136,9 @@ namespace storm { */ uint64_t getBeliefIdInVector(std::vector> &grid, uint32_t observation, std::vector probabilities); + + storm::storage::SparseMatrix + buildTransitionMatrix(std::vector> transitions); }; } From 3bd910f42b3c7979d7664b8782ebdf678d91f5be Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 1 Oct 2019 17:11:49 +0200 Subject: [PATCH 005/155] Added timing and caching of subsimplex computation results --- .../settings/modules/POMDPSettings.cpp | 17 ++++ .../settings/modules/POMDPSettings.h | 4 + src/storm-pomdp-cli/storm-pomdp.cpp | 42 ++++++++- .../ApproximatePOMDPModelchecker.cpp | 85 +++++++++++-------- .../ApproximatePOMDPModelchecker.h | 10 ++- 5 files changed, 117 insertions(+), 41 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 2eb418dab..617cebf36 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -14,6 +14,7 @@ namespace storm { const std::string POMDPSettings::moduleName = "pomdp"; const std::string exportAsParametricModelOption = "parametric-drn"; + const std::string gridApproximationOption = "gridapproximation"; const std::string qualitativeReductionOption = "qualitativereduction"; const std::string analyzeUniqueObservationsOption = "uniqueobservations"; const std::string mecReductionOption = "mecreduction"; @@ -37,6 +38,13 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, fscmode, false, "Sets the way the pMC is obtained").addArgument(storm::settings::ArgumentBuilder::createStringArgument("type", "type name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(fscModes)).setDefaultValueString("standard").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformBinaryOption, false, "Transforms the pomdp to a binary pomdp.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformSimpleOption, false, "Transforms the pomdp to a binary and simple pomdp.").build()); + this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false, + "Analyze the POMDP using grid approximation.").addArgument( + storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("resolution", + "the resolution of the grid").setDefaultValueUnsignedInteger( + 10).addValidatorUnsignedInteger( + storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator( + 0)).build()).build()); } bool POMDPSettings::isExportToParametricSet() const { @@ -62,6 +70,15 @@ namespace storm { bool POMDPSettings::isSelfloopReductionSet() const { return this->getOption(selfloopReductionOption).getHasOptionBeenSet(); } + + bool POMDPSettings::isGridApproximationSet() const { + return this->getOption(gridApproximationOption).getHasOptionBeenSet(); + } + + uint64_t POMDPSettings::getGridResolution() const { + return this->getOption(gridApproximationOption).getArgumentByName( + "resolution").getValueAsUnsignedInteger(); + } uint64_t POMDPSettings::getMemoryBound() const { return this->getOption(memoryBoundOption).getArgumentByName("bound").getValueAsUnsignedInteger(); diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 5b1d6cd9b..1e68871e8 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -25,6 +25,8 @@ namespace storm { std::string getExportToParametricFilename() const; bool isQualitativeReductionSet() const; + + bool isGridApproximationSet() const; bool isAnalyzeUniqueObservationsSet() const; bool isMecReductionSet() const; bool isSelfloopReductionSet() const; @@ -32,6 +34,8 @@ namespace storm { bool isTransformBinarySet() const; std::string getFscApplicationTypeString() const; uint64_t getMemoryBound() const; + + uint64_t getGridResolution() const; storm::storage::PomdpMemoryPattern getMemoryPattern() const; bool check() const override; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 8d33fc2d2..18f446d20 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -42,6 +42,8 @@ #include "storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h" #include "storm/api/storm.h" +#include + /*! * Initialize the settings manager. */ @@ -113,10 +115,6 @@ int main(const int argc, const char** argv) { STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp, storm::exceptions::WrongFormatException, "Expected a POMDP."); std::shared_ptr> pomdp = model->template as>(); - // For ease of testing - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - checker.computeReachabilityProbability(*pomdp, std::set({7}), false, 10); - std::shared_ptr formula; if (!symbolicInput.properties.empty()) { formula = symbolicInput.properties.front().getRawFormula(); @@ -149,6 +147,42 @@ int main(const int argc, const char** argv) { STORM_PRINT_AND_LOG(" done." << std::endl); std::cout << "actual reduction not yet implemented..." << std::endl; } + if (pomdpSettings.isGridApproximationSet()) { + storm::logic::ProbabilityOperatorFormula const &probFormula = formula->asProbabilityOperatorFormula(); + storm::logic::Formula const &subformula1 = probFormula.getSubformula(); + + std::set targetObservationSet; + //TODO refactor + bool validFormula = false; + if (subformula1.isEventuallyFormula()) { + storm::logic::EventuallyFormula const &eventuallyFormula = subformula1.asEventuallyFormula(); + storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); + if (subformula2.isAtomicLabelFormula()) { + storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); + validFormula = true; + std::string targetLabel = alFormula.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } + } + STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, + "The formula is not supported by the grid approximation"); + + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + storm::RationalNumber overRes = storm::utility::one(); + storm::RationalNumber underRes = storm::utility::zero(); + std::unique_ptr> result; + result = checker.computeReachabilityProbability(*pomdp, targetObservationSet, + probFormula.getOptimalityType() == + storm::OptimizationDirection::Minimize, + pomdpSettings.getGridResolution() + gridIncrease); + overRes = result->OverapproximationValue; + underRes = result->UnderapproximationValue; + } } else if (formula->isRewardOperatorFormula()) { if (pomdpSettings.isSelfloopReductionSet() && storm::solver::minimize(formula->asRewardOperatorFormula().getOptimalityType())) { STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 24d312c7f..f8c9709aa 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -22,11 +22,11 @@ namespace storm { } template - /*std::unique_ptr*/ void + std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbability( storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, uint64_t gridResolution) { - //TODO add timing + storm::utility::Stopwatch beliefGridTimer(true); uint64_t maxIterations = 100; bool finished = false; uint64_t iteration = 0; @@ -46,7 +46,9 @@ namespace storm { constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); nextId = beliefList.size(); + beliefGridTimer.stop(); + storm::utility::Stopwatch overApproxTimer(true); // ID -> Value std::map result; std::map result_backup; @@ -58,6 +60,7 @@ namespace storm { // current ID -> action -> next ID std::map>> nextBelieves; + storm::utility::Stopwatch nextBeliefGeneration(true); for (size_t i = 0; i < beliefGrid.size(); ++i) { auto currentBelief = beliefGrid[i]; bool isTarget = beliefIsTarget[currentBelief.id]; @@ -68,6 +71,7 @@ namespace storm { result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); + //TODO put this in extra function std::vector> observationProbabilitiesInAction; std::vector> nextBelievesInAction; @@ -98,9 +102,17 @@ namespace storm { nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); } } + nextBeliefGeneration.stop(); + + //Use chaching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + + STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration auto cc = storm::utility::ConstantsComparator(); while (!finished && iteration < maxIterations) { + storm::utility::Stopwatch iterationTimer(true); STORM_LOG_DEBUG("Iteration " << iteration + 1); bool improvement = false; for (size_t i = 0; i < beliefGrid.size(); ++i) { @@ -123,10 +135,20 @@ namespace storm { uint32_t observation = iter->first; storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); - std::vector> subSimplex = temp.first; - std::vector lambdas = temp.second; + // cache the values to not always re-calculate + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(nextBelief.id) > 0) { + subSimplex = subSimplexCache[nextBelief.id]; + lambdas = lambdaCache[nextBelief.id]; + } else { + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + nextBelief.probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[nextBelief.id] = subSimplex; + lambdaCache[nextBelief.id] = lambdas; + } auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { @@ -138,17 +160,11 @@ namespace storm { currentValue += iter->second * sum; } - // Update the selected actions TODO make this nicer + // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue))) { - chosenValue = currentValue; - chosenActionIndex = action; - } else if ((min && cc.isEqual(storm::utility::zero(), - chosenValue - currentValue)) || - (!min && - cc.isEqual(storm::utility::zero(), - currentValue - chosenValue))) { + cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; chosenActionIndex = action; } @@ -171,9 +187,11 @@ namespace storm { result_backup[iter->first] = result[iter->first]; } ++iteration; + iterationTimer.stop(); + STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); } - STORM_PRINT("Grid approximation took " << iteration << " iterations" << std::endl); + STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); beliefGrid.push_back(initialBelief); beliefIsTarget.push_back( @@ -192,9 +210,10 @@ namespace storm { initSubSimplex[j])]; } } + overApproxTimer.stop(); // Now onto the under-approximation - + storm::utility::Stopwatch underApproxTimer(true); std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; @@ -243,12 +262,6 @@ namespace storm { believesToBeExpanded.pop_front(); } - for (size_t i = 0; i < transitions.size(); ++i) { - for (auto const &transition : transitions[i]) { - STORM_LOG_DEBUG( - "Transition: " << i << " -- " << transition.second << "--> " << transition.first); - } - } storm::models::sparse::StateLabeling labeling(transitions.size()); labeling.addLabel("init"); labeling.addLabel("target"); @@ -276,8 +289,17 @@ namespace storm { ValueType resultValue = res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl + << "Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl); + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << resultValue << std::endl); + + return std::make_unique>( + POMDPCheckResult{overApprox, resultValue}); } template @@ -311,9 +333,10 @@ namespace storm { uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { auto cc = storm::utility::ConstantsComparator(); storm::pomdp::Belief currentBelief = beliefList[currentBeliefId]; + + //TODO put this in extra function std::vector> observationProbabilitiesInAction; std::vector> nextBelievesInAction; - uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); for (uint64_t action = 0; action < numChoices; ++action) { @@ -354,8 +377,7 @@ namespace storm { storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - auto temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); + auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution); std::vector> subSimplex = temp.first; std::vector lambdas = temp.second; @@ -369,15 +391,11 @@ namespace storm { currentValue += iter->second * sum; } - // Update the selected actions TODO make this nicer + // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue))) { - chosenValue = currentValue; - chosenActionIndex = action; - } else if ((min && cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isEqual(storm::utility::zero(), currentValue - chosenValue))) { + cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; chosenActionIndex = action; } @@ -561,7 +579,6 @@ namespace storm { } lambdas[0] = storm::utility::one() - sum; - //TODO add assertion that we are close enough return std::make_pair(subSimplex, lambdas); } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 46b5ee1e4..896435bce 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -9,19 +9,23 @@ namespace storm { namespace pomdp { namespace modelchecker { - class POMDPCheckResult; + template + struct POMDPCheckResult { + ValueType OverapproximationValue; + ValueType UnderapproximationValue; + }; template> class ApproximatePOMDPModelchecker { public: explicit ApproximatePOMDPModelchecker(); - /*std::unique_ptr*/ void + std::unique_ptr> computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, uint64_t gridResolution); - std::unique_ptr + std::unique_ptr> computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, std::set target_observations, uint64_t gridResolution); From 959a2c240020aac8ee83de2d29ffbacb4959a2ed Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 11 Oct 2019 10:57:30 +0200 Subject: [PATCH 006/155] Added ability to use an MDP for the underapproximation --- .../ApproximatePOMDPModelchecker.cpp | 228 +++++++++++++++--- .../ApproximatePOMDPModelchecker.h | 38 +++ 2 files changed, 239 insertions(+), 27 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index f8c9709aa..351694c27 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -49,13 +49,13 @@ namespace storm { beliefGridTimer.stop(); storm::utility::Stopwatch overApproxTimer(true); - // ID -> Value + // Belief ID -> Value std::map result; std::map result_backup; - // ID -> ActionIndex + // Belief ID -> ActionIndex std::map chosenActions; - // ID -> Observation -> Probability + // Belief ID -> Observation -> Probability std::map>> observationProbabilities; // current ID -> action -> next ID std::map>> nextBelieves; @@ -108,9 +108,10 @@ namespace storm { std::map>> subSimplexCache; std::map> lambdaCache; + STORM_PRINT("Nr Believes " << beliefList.size() << std::endl) STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration - auto cc = storm::utility::ConstantsComparator(); + storm::utility::ConstantsComparator cc(storm::utility::convertNumber(0.00000000001), false); while (!finished && iteration < maxIterations) { storm::utility::Stopwatch iterationTimer(true); STORM_LOG_DEBUG("Iteration " << iteration + 1); @@ -149,17 +150,15 @@ namespace storm { subSimplexCache[nextBelief.id] = subSimplex; lambdaCache[nextBelief.id] = lambdas; } - auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { - if (lambdas[j] != storm::utility::zero()) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { sum += lambdas[j] * result_backup.at( getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } currentValue += iter->second * sum; } - // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || (!min && @@ -168,7 +167,6 @@ namespace storm { chosenValue = currentValue; chosenActionIndex = action; } - // TODO tie breaker? } result[currentBelief.id] = chosenValue; chosenActions[currentBelief.id] = chosenActionIndex; @@ -182,10 +180,13 @@ namespace storm { } } finished = !improvement; + storm::utility::Stopwatch backupTimer(true); // back up for (auto iter = result.begin(); iter != result.end(); ++iter) { result_backup[iter->first] = result[iter->first]; } + backupTimer.stop(); + STORM_PRINT("Time Backup " << backupTimer << std::endl); ++iteration; iterationTimer.stop(); STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); @@ -213,7 +214,38 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation + bool useMdp = false;//true; storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min) : + computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, + chosenActions, gridResolution, initialBelief.id, min); + underApproxTimer.stop(); + + STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl + << "Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl); + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + + return std::make_unique>( + POMDPCheckResult{overApprox, underApprox}); + } + + template + ValueType + ApproximatePOMDPModelchecker::computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; @@ -221,12 +253,12 @@ namespace storm { std::vector targetStates; uint64_t stateId = 0; - beliefStateMap[initialBelief.id] = stateId; + beliefStateMap[initialBeliefId] = stateId; ++stateId; - // Expand the believes TODO capsuling - visitedBelieves.insert(initialBelief.id); - believesToBeExpanded.push_back(initialBelief.id); + // Expand the believes + visitedBelieves.insert(initialBeliefId); + believesToBeExpanded.push_back(initialBeliefId); while (!believesToBeExpanded.empty()) { auto currentBeliefId = believesToBeExpanded.front(); std::map transitionsInState; @@ -286,20 +318,154 @@ namespace storm { true))); STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - ValueType resultValue = res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + } + template + ValueType + ApproximatePOMDPModelchecker::computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min) { + std::set visitedBelieves; + std::deque believesToBeExpanded; + std::map beliefStateMap; + std::vector>> transitions; + std::vector targetStates; - STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl - << "Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); + uint64_t stateId = 0; + beliefStateMap[initialBeliefId] = stateId; + ++stateId; - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << resultValue << std::endl); + // Expand the believes + visitedBelieves.insert(initialBeliefId); + believesToBeExpanded.push_back(initialBeliefId); + while (!believesToBeExpanded.empty()) { + auto currentBeliefId = believesToBeExpanded.front(); + std::vector> actionTransitionStorage; + // for targets, we only consider one action with one transition + if (beliefIsTarget[currentBeliefId]) { + // add a self-loop to target states and save them + std::map transitionsInStateWithAction; + transitionsInStateWithAction[beliefStateMap[currentBeliefId]] = storm::utility::one(); + targetStates.push_back(beliefStateMap[currentBeliefId]); + actionTransitionStorage.push_back(transitionsInStateWithAction); + } else { + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); + if (chosenActions.find(currentBeliefId) == chosenActions.end()) { + // If the current Belief is not part of the grid, the next states have not been computed yet. + // For now, this is a very dirty workaround because I am currently to lazy to refactor everything to be able to do this without the extractBestAction method + std::vector> observationProbabilitiesInAction; + std::vector> nextBelievesInAction; + for (uint64_t action = 0; action < numChoices; ++action) { + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( + pomdp, beliefList[currentBeliefId], action); + std::map actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); + iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, + beliefList, + beliefIsTarget, + targetObservations, + beliefList[currentBeliefId], + action, + observation, + beliefList.size()); + } + observationProbabilitiesInAction.push_back(actionObservationProbabilities); + nextBelievesInAction.push_back(actionObservationBelieves); + } + observationProbabilities.emplace(std::make_pair(currentBeliefId, observationProbabilitiesInAction)); + nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); + } + // Iterate over all actions and add the corresponding transitions + for (uint64_t action = 0; action < numChoices; ++action) { + std::map transitionsInStateWithAction; - return std::make_unique>( - POMDPCheckResult{overApprox, resultValue}); + for (auto iter = observationProbabilities[currentBeliefId][action].begin(); + iter != + observationProbabilities[currentBeliefId][action].end(); ++iter) { + uint32_t observation = iter->first; + uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; + if (visitedBelieves.insert(nextBeliefId).second) { + beliefStateMap[nextBeliefId] = stateId; + ++stateId; + believesToBeExpanded.push_back(nextBeliefId); + } + transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second; + //STORM_PRINT("Transition with action " << action << " from state " << beliefStateMap[currentBeliefId] << " to state " << beliefStateMap[nextBeliefId] << " with prob " << iter->second << std::endl) + } + actionTransitionStorage.push_back(transitionsInStateWithAction); + } + } + transitions.push_back(actionTransitionStorage); + believesToBeExpanded.pop_front(); + } + + storm::models::sparse::StateLabeling labeling(transitions.size()); + labeling.addLabel("init"); + labeling.addLabel("target"); + labeling.addLabelToState("init", 0); + for (auto targetState : targetStates) { + labeling.addLabelToState("target", targetState); + } + + storm::storage::sparse::ModelComponents modelComponents( + buildTransitionMatrix(transitions), labeling); + + storm::models::sparse::Mdp underApproxMdp(modelComponents); + auto model = std::make_shared>(underApproxMdp); + model->printModelInformationToStream(std::cout); + + std::string propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties( + propertyVector).front(); + + std::unique_ptr res( + storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, + true))); + STORM_LOG_ASSERT(res, "Result does not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + } + + + template + storm::storage::SparseMatrix + ApproximatePOMDPModelchecker::buildTransitionMatrix( + std::vector>> transitions) { + uint_fast64_t currentRow = 0; + uint_fast64_t currentRowGroup = 0; + uint64_t nrColumns = transitions.size(); + uint64_t nrRows = 0; + uint64_t nrEntries = 0; + for (auto const &actionTransitions : transitions) { + for (auto const &map : actionTransitions) { + nrEntries += map.size(); + ++nrRows; + } + } + storm::storage::SparseMatrixBuilder smb(nrRows, nrColumns, nrEntries, true, true); + for (auto const &actionTransitions : transitions) { + smb.newRowGroup(currentRow); + for (auto const &map : actionTransitions) { + for (auto const &transition : map) { + //STORM_PRINT(" Add transition from state " << currentRowGroup << " to state " << transition.first << " with prob " << transition.second << std::endl) + smb.addNextValue(currentRow, transition.first, transition.second); + } + ++currentRow; + } + ++currentRowGroup; + } + return smb.build(); } template @@ -331,7 +497,7 @@ namespace storm { std::map>> &nextBelieves, std::map &result, uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { - auto cc = storm::utility::ConstantsComparator(); + storm::utility::ConstantsComparator cc(storm::utility::convertNumber(0.00000000001), false); storm::pomdp::Belief currentBelief = beliefList[currentBeliefId]; //TODO put this in extra function @@ -344,7 +510,7 @@ namespace storm { pomdp, currentBelief, action); std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, @@ -359,7 +525,6 @@ namespace storm { observationProbabilitiesInAction.push_back(actionObservationProbabilities); nextBelievesInAction.push_back(actionObservationBelieves); } - //STORM_LOG_DEBUG("ID " << currentBeliefId << " add " << observationProbabilitiesInAction); observationProbabilities.emplace(std::make_pair(currentBeliefId, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); @@ -383,7 +548,7 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { - if (lambdas[j] != storm::utility::zero()) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { sum += lambdas[j] * result.at( getBeliefIdInVector(beliefList, observation, subSimplex[j])); } @@ -408,13 +573,22 @@ namespace storm { uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( std::vector> &grid, uint32_t observation, std::vector probabilities) { + storm::utility::ConstantsComparator cc(storm::utility::convertNumber(0.00000000001), false); for (auto const &belief : grid) { if (belief.observation == observation) { - if (belief.probabilities == probabilities) { + bool same = true; + for (size_t i = 0; i < belief.probabilities.size(); ++i) { + if (!cc.isEqual(belief.probabilities[i], probabilities[i])) { + same = false; + break; + } + } + if (same) { return belief.id; } } } + return -1; } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 896435bce..cbde135dc 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -53,6 +53,41 @@ namespace storm { uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min); + /** + * TODO + * @param pomdp + * @param beliefList + * @param beliefIsTarget + * @param targetObservations + * @param observationProbabilities + * @param nextBelieves + * @param result + * @param chosenActions + * @param gridResolution + * @param initialBeliefId + * @param min + * @return + */ + ValueType computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min); + + ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min); + /** * * @param pomdp @@ -143,6 +178,9 @@ namespace storm { storm::storage::SparseMatrix buildTransitionMatrix(std::vector> transitions); + + storm::storage::SparseMatrix + buildTransitionMatrix(std::vector>> transitions); }; } From 5de96cc170d894baaded19106f268747b5006c29 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 11 Oct 2019 10:59:45 +0200 Subject: [PATCH 007/155] Modified implementation to speed up the subsimplex computation --- .../ApproximatePOMDPModelchecker.cpp | 38 ++++++++----------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 351694c27..69e9bea80 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -697,14 +697,14 @@ namespace storm { std::vector probabilities, uint64_t resolution) { // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Variable names are based on the paper - - std::vector x(probabilities.size(), storm::utility::zero()); - std::vector v(probabilities.size(), storm::utility::zero()); - std::vector d(probabilities.size(), storm::utility::zero()); + std::vector x(probabilities.size()); + std::vector v(probabilities.size()); + std::vector d(probabilities.size()); + auto convResolution = storm::utility::convertNumber(resolution); for (size_t i = 0; i < probabilities.size(); ++i) { for (size_t j = i; j < probabilities.size(); ++j) { - x[i] += storm::utility::convertNumber(resolution) * probabilities[j]; + x[i] += convResolution * probabilities[j]; } v[i] = storm::utility::floor(x[i]); d[i] = x[i] - v[i]; @@ -712,37 +712,29 @@ namespace storm { auto p = storm::utility::vector::getSortedIndices(d); - std::vector> qs; + std::vector> qs(probabilities.size(), std::vector(probabilities.size())); for (size_t i = 0; i < probabilities.size(); ++i) { - std::vector q(probabilities.size(), storm::utility::zero()); if (i == 0) { for (size_t j = 0; j < probabilities.size(); ++j) { - q[j] = v[j]; + qs[i][j] = v[j]; } - qs.push_back(q); } else { for (size_t j = 0; j < probabilities.size(); ++j) { if (j == p[i - 1]) { - q[j] = qs[i - 1][j] + storm::utility::one(); + qs[i][j] = qs[i - 1][j] + storm::utility::one(); } else { - q[j] = qs[i - 1][j]; + qs[i][j] = qs[i - 1][j]; } } - qs.push_back(q); } } - - std::vector> subSimplex; - for (auto q : qs) { - std::vector node; - for (size_t i = 0; i < probabilities.size(); ++i) { - if (i != probabilities.size() - 1) { - node.push_back((q[i] - q[i + 1]) / storm::utility::convertNumber(resolution)); - } else { - node.push_back(q[i] / storm::utility::convertNumber(resolution)); - } + std::vector> subSimplex(qs.size(), std::vector(probabilities.size())); + for (size_t j = 0; j < qs.size(); ++j) { + for (size_t i = 0; i < probabilities.size() - 1; ++i) { + subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution; } - subSimplex.push_back(node); + + subSimplex[j][probabilities.size() - 1] = qs[j][probabilities.size() - 1] / convResolution; } std::vector lambdas(probabilities.size(), storm::utility::zero()); From f6d9a6ac021ca55004aaf5627b699445a5e9f19a Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 11 Oct 2019 11:02:35 +0200 Subject: [PATCH 008/155] Changed datatype used in POMDP analysis from RationalNumber to double for better comparision of approximation speeds with PRISM --- src/storm-pomdp-cli/storm-pomdp.cpp | 56 +++++++++++-------- .../analysis/QualitativeAnalysis.cpp | 4 +- .../analysis/UniqueObservationStates.cpp | 4 +- .../ApplyFiniteSchedulerToPomdp.cpp | 3 + .../transformer/BinaryPomdpTransformer.cpp | 2 + .../GlobalPOMDPSelfLoopEliminator.cpp | 3 + .../GlobalPomdpMecChoiceEliminator.cpp | 3 + .../transformer/PomdpMemoryUnfolder.cpp | 3 + 8 files changed, 54 insertions(+), 24 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 18f446d20..e6be182a3 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -111,9 +111,9 @@ int main(const int argc, const char** argv) { storm::cli::SymbolicInput symbolicInput = storm::cli::parseAndPreprocessSymbolicInput(); // We should not export here if we are going to do some processing first. - auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, engine); + auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, engine); STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp, storm::exceptions::WrongFormatException, "Expected a POMDP."); - std::shared_ptr> pomdp = model->template as>(); + std::shared_ptr> pomdp = model->template as>(); std::shared_ptr formula; if (!symbolicInput.properties.empty()) { @@ -121,24 +121,24 @@ int main(const int argc, const char** argv) { STORM_PRINT_AND_LOG("Analyzing property '" << *formula << "'" << std::endl); STORM_LOG_WARN_COND(symbolicInput.properties.size() == 1, "There is currently no support for multiple properties. All other properties will be ignored."); } - + if (pomdpSettings.isAnalyzeUniqueObservationsSet()) { STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl); - storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); + storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); std::cout << uniqueAnalysis.analyse() << std::endl; } - + if (formula) { if (formula->isProbabilityOperatorFormula()) { if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) { STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); + storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); pomdp = selfLoopEliminator.transform(); STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); } if (pomdpSettings.isQualitativeReductionSet()) { - storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); + storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); STORM_PRINT_AND_LOG("Computing states with probability 0 ..."); std::cout << qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()) << std::endl; STORM_PRINT_AND_LOG(" done." << std::endl); @@ -167,19 +167,31 @@ int main(const int argc, const char** argv) { targetObservationSet.insert(pomdp->getObservation(state)); } } + } else if (subformula2.isAtomicExpressionFormula()) { + validFormula = true; + std::stringstream stream; + stream << subformula2.asAtomicExpressionFormula().getExpression(); + storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); + std::string targetLabel = formula3.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } } } STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, "The formula is not supported by the grid approximation"); - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - storm::RationalNumber overRes = storm::utility::one(); - storm::RationalNumber underRes = storm::utility::zero(); - std::unique_ptr> result; + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + double overRes = storm::utility::one(); + double underRes = storm::utility::zero(); + std::unique_ptr> result; result = checker.computeReachabilityProbability(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, - pomdpSettings.getGridResolution() + gridIncrease); + pomdpSettings.getGridResolution()); overRes = result->OverapproximationValue; underRes = result->UnderapproximationValue; } @@ -187,7 +199,7 @@ int main(const int argc, const char** argv) { if (pomdpSettings.isSelfloopReductionSet() && storm::solver::minimize(formula->asRewardOperatorFormula().getOptimalityType())) { STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); + storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); pomdp = selfLoopEliminator.transform(); STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); } @@ -196,43 +208,43 @@ int main(const int argc, const char** argv) { STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ..."); storm::storage::PomdpMemory memory = storm::storage::PomdpMemoryBuilder().build(pomdpSettings.getMemoryPattern(), pomdpSettings.getMemoryBound()); std::cout << memory.toString() << std::endl; - storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, memory); + storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, memory); pomdp = memoryUnfolder.transform(); STORM_PRINT_AND_LOG(" done." << std::endl); pomdp->printModelInformationToStream(std::cout); } else { STORM_PRINT_AND_LOG("Assumming memoryless schedulers." << std::endl;) } - + // From now on the pomdp is considered memoryless - + if (pomdpSettings.isMecReductionSet()) { STORM_PRINT_AND_LOG("Eliminating mec choices ..."); // Note: Elimination of mec choices only preserves memoryless schedulers. uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); + storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); pomdp = mecChoiceEliminator.transform(*formula); STORM_PRINT_AND_LOG(" done." << std::endl); STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl); pomdp->printModelInformationToStream(std::cout); } - + if (pomdpSettings.isTransformBinarySet() || pomdpSettings.isTransformSimpleSet()) { if (pomdpSettings.isTransformSimpleSet()) { STORM_PRINT_AND_LOG("Transforming the POMDP to a simple POMDP."); - pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, true); + pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, true); } else { STORM_PRINT_AND_LOG("Transforming the POMDP to a binary POMDP."); - pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, false); + pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, false); } pomdp->printModelInformationToStream(std::cout); STORM_PRINT_AND_LOG(" done." << std::endl); } - + if (pomdpSettings.isExportToParametricSet()) { STORM_PRINT_AND_LOG("Transforming memoryless POMDP to pMC..."); - storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); + storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); std::string transformMode = pomdpSettings.getFscApplicationTypeString(); auto pmc = toPMCTransformer.transform(storm::transformer::parsePomdpFscApplicationMode(transformMode)); STORM_PRINT_AND_LOG(" done." << std::endl); diff --git a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp index 09e2cf496..9bcec71a5 100644 --- a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp +++ b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp @@ -160,6 +160,8 @@ namespace storm { template class QualitativeAnalysis; - + + template + class QualitativeAnalysis; } } \ No newline at end of file diff --git a/src/storm-pomdp/analysis/UniqueObservationStates.cpp b/src/storm-pomdp/analysis/UniqueObservationStates.cpp index 5fc8e1426..e71ef162d 100644 --- a/src/storm-pomdp/analysis/UniqueObservationStates.cpp +++ b/src/storm-pomdp/analysis/UniqueObservationStates.cpp @@ -30,6 +30,8 @@ namespace storm { } template class UniqueObservationStates; - + + template + class UniqueObservationStates; } } \ No newline at end of file diff --git a/src/storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.cpp b/src/storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.cpp index 8f432bb2e..8f276c1fb 100644 --- a/src/storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.cpp +++ b/src/storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.cpp @@ -127,5 +127,8 @@ namespace storm { } template class ApplyFiniteSchedulerToPomdp; + + template + class ApplyFiniteSchedulerToPomdp; } } \ No newline at end of file diff --git a/src/storm-pomdp/transformer/BinaryPomdpTransformer.cpp b/src/storm-pomdp/transformer/BinaryPomdpTransformer.cpp index 170f6e863..b076ca36d 100644 --- a/src/storm-pomdp/transformer/BinaryPomdpTransformer.cpp +++ b/src/storm-pomdp/transformer/BinaryPomdpTransformer.cpp @@ -162,5 +162,7 @@ namespace storm { template class BinaryPomdpTransformer; + template + class BinaryPomdpTransformer; } } \ No newline at end of file diff --git a/src/storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.cpp b/src/storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.cpp index c61ce5b0f..54ad9631c 100644 --- a/src/storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.cpp +++ b/src/storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.cpp @@ -77,5 +77,8 @@ namespace storm { } template class GlobalPOMDPSelfLoopEliminator; + + template + class GlobalPOMDPSelfLoopEliminator; } } diff --git a/src/storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.cpp b/src/storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.cpp index d01342a7c..853b70952 100644 --- a/src/storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.cpp +++ b/src/storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.cpp @@ -225,5 +225,8 @@ namespace storm { template class GlobalPomdpMecChoiceEliminator; + + template + class GlobalPomdpMecChoiceEliminator; } } \ No newline at end of file diff --git a/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp index 827493efc..c23c828ab 100644 --- a/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp +++ b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp @@ -185,5 +185,8 @@ namespace storm { } template class PomdpMemoryUnfolder; + + template + class PomdpMemoryUnfolder; } } \ No newline at end of file From 4c8395c3b1e4db3de56974b96f425443f05a2ddd Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 18 Oct 2019 11:25:03 +0200 Subject: [PATCH 009/155] Speedup of probability approximation --- .../ApproximatePOMDPModelchecker.cpp | 67 ++++++++++--------- .../ApproximatePOMDPModelchecker.h | 9 ++- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 69e9bea80..e0b12229a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -18,14 +18,13 @@ namespace storm { namespace modelchecker { template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { - //Intentionally left empty + cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(0.00000000001), false); } template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbability( - storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { storm::utility::Stopwatch beliefGridTimer(true); uint64_t maxIterations = 100; bool finished = false; @@ -77,6 +76,9 @@ namespace storm { uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); + std::vector> observationProbabilitiesInAction(numChoices); + std::vector> nextBelievesInAction(numChoices); + for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( pomdp, currentBelief, action); @@ -94,8 +96,8 @@ namespace storm { nextId); nextId = beliefList.size(); } - observationProbabilitiesInAction.push_back(actionObservationProbabilities); - nextBelievesInAction.push_back(actionObservationBelieves); + observationProbabilitiesInAction[action] = actionObservationProbabilities; + nextBelievesInAction[action] = actionObservationBelieves; } observationProbabilities.emplace( std::make_pair(currentBelief.id, observationProbabilitiesInAction)); @@ -111,7 +113,6 @@ namespace storm { STORM_PRINT("Nr Believes " << beliefList.size() << std::endl) STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration - storm::utility::ConstantsComparator cc(storm::utility::convertNumber(0.00000000001), false); while (!finished && iteration < maxIterations) { storm::utility::Stopwatch iterationTimer(true); STORM_LOG_DEBUG("Iteration " << iteration + 1); @@ -171,10 +172,8 @@ namespace storm { result[currentBelief.id] = chosenValue; chosenActions[currentBelief.id] = chosenActionIndex; // Check if the iteration brought an improvement - if ((min && cc.isLess(storm::utility::zero(), - result_backup[currentBelief.id] - result[currentBelief.id])) || - (!min && cc.isLess(storm::utility::zero(), - result[currentBelief.id] - result_backup[currentBelief.id]))) { + if ((min && cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) || + (!min && cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id]))) { improvement = true; } } @@ -182,11 +181,8 @@ namespace storm { finished = !improvement; storm::utility::Stopwatch backupTimer(true); // back up - for (auto iter = result.begin(); iter != result.end(); ++iter) { - result_backup[iter->first] = result[iter->first]; - } - backupTimer.stop(); - STORM_PRINT("Time Backup " << backupTimer << std::endl); + result_backup = result; + ++iteration; iterationTimer.stop(); STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); @@ -214,7 +210,7 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = false;//true; + bool useMdp = /*false;*/true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min) : @@ -497,7 +493,6 @@ namespace storm { std::map>> &nextBelieves, std::map &result, uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { - storm::utility::ConstantsComparator cc(storm::utility::convertNumber(0.00000000001), false); storm::pomdp::Belief currentBelief = beliefList[currentBeliefId]; //TODO put this in extra function @@ -571,9 +566,9 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( - std::vector> &grid, uint32_t observation, + std::vector> const &grid, uint32_t observation, std::vector probabilities) { - storm::utility::ConstantsComparator cc(storm::utility::convertNumber(0.00000000001), false); + // TODO This one is quite slow for (auto const &belief : grid) { if (belief.observation == observation) { bool same = true; @@ -757,8 +752,7 @@ namespace storm { uint64_t actionIndex) { std::map res; // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) - std::vector postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, - 0).probabilities; + std::vector postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, 0).probabilities; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { uint32_t observation = pomdp.getObservation(state); if (postProbabilities[state] != storm::utility::zero()) { @@ -782,8 +776,7 @@ namespace storm { uint32_t observation = 0; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { if (belief.probabilities[state] != storm::utility::zero()) { - auto row = pomdp.getTransitionMatrix().getRow( - pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); for (auto const &entry : row) { observation = pomdp.getObservation(entry.getColumn()); distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); @@ -795,15 +788,14 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( - storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, storm::pomdp::Belief belief, - uint64_t actionIndex, uint32_t observation, uint64_t id) { - std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); + storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, + std::vector &beliefIsTarget, std::set &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, + uint64_t id) { + storm::utility::Stopwatch distrWatch(true); + std::vector distributionAfter(pomdp.getNumberOfStates()); //, storm::utility::zero()); for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { if (belief.probabilities[state] != storm::utility::zero()) { - auto row = pomdp.getTransitionMatrix().getRow( - pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); for (auto const &entry : row) { if (pomdp.getObservation(entry.getColumn()) == observation) { distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); @@ -811,19 +803,30 @@ namespace storm { } } } + distrWatch.stop(); // We have to normalize the distribution + storm::utility::Stopwatch normalizationWatch(true); auto sum = storm::utility::zero(); for (ValueType const &entry : distributionAfter) { sum += entry; } + for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) { distributionAfter[i] /= sum; } + normalizationWatch.stop(); if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { - return getBeliefIdInVector(beliefList, observation, distributionAfter); + storm::utility::Stopwatch getWatch(true); + auto res = getBeliefIdInVector(beliefList, observation, distributionAfter); + getWatch.stop(); + //STORM_PRINT("Distribution: "<< distrWatch.getTimeInNanoseconds() << " / Normalization: " << normalizationWatch.getTimeInNanoseconds() << " / getId: " << getWatch.getTimeInNanoseconds() << std::endl) + return res; } else { + storm::utility::Stopwatch pushWatch(true); beliefList.push_back(storm::pomdp::Belief{id, observation, distributionAfter}); beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); + pushWatch.stop(); + //STORM_PRINT("Distribution: "<< distrWatch.getTimeInNanoseconds() << " / Normalization: " << normalizationWatch.getTimeInNanoseconds() << " / generateBelief: " << pushWatch.getTimeInNanoseconds() << std::endl) return id; } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index cbde135dc..b8ad0fa66 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -27,7 +27,8 @@ namespace storm { std::unique_ptr> computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set target_observations, uint64_t gridResolution); + std::set targetObservations, bool min, + uint64_t gridResolution); private: /** @@ -173,7 +174,7 @@ namespace storm { * @param probabilities * @return */ - uint64_t getBeliefIdInVector(std::vector> &grid, uint32_t observation, + uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, std::vector probabilities); storm::storage::SparseMatrix @@ -181,6 +182,10 @@ namespace storm { storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> transitions); + + ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); + + storm::utility::ConstantsComparator cc; }; } From f119e3d4c751f99276b301280675c7a6ecaa38a7 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 18 Oct 2019 11:32:36 +0200 Subject: [PATCH 010/155] Added reward over-approximation --- src/storm-pomdp-cli/storm-pomdp.cpp | 55 ++++ .../ApproximatePOMDPModelchecker.cpp | 254 +++++++++++++++++- 2 files changed, 308 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index e6be182a3..b34ed65b7 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -183,6 +183,7 @@ int main(const int argc, const char** argv) { } STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, "The formula is not supported by the grid approximation"); + STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); double overRes = storm::utility::one(); @@ -203,6 +204,60 @@ int main(const int argc, const char** argv) { pomdp = selfLoopEliminator.transform(); STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); } + if (pomdpSettings.isGridApproximationSet()) { + std::string rewardModelName; + storm::logic::RewardOperatorFormula const &rewFormula = formula->asRewardOperatorFormula(); + if (rewFormula.hasRewardModelName()) { + rewardModelName = rewFormula.getRewardModelName(); + } + storm::logic::Formula const &subformula1 = rewFormula.getSubformula(); + + std::set targetObservationSet; + //TODO refactor + bool validFormula = false; + if (subformula1.isEventuallyFormula()) { + storm::logic::EventuallyFormula const &eventuallyFormula = subformula1.asEventuallyFormula(); + storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); + if (subformula2.isAtomicLabelFormula()) { + storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); + validFormula = true; + std::string targetLabel = alFormula.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } else if (subformula2.isAtomicExpressionFormula()) { + validFormula = true; + std::stringstream stream; + stream << subformula2.asAtomicExpressionFormula().getExpression(); + storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); + std::string targetLabel = formula3.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } + } + STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, + "The formula is not supported by the grid approximation"); + STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); + + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + double overRes = storm::utility::one(); + double underRes = storm::utility::zero(); + std::unique_ptr> result; + result = checker.computeReachabilityReward(*pomdp, targetObservationSet, + rewFormula.getOptimalityType() == + storm::OptimizationDirection::Minimize, + pomdpSettings.getGridResolution()); + overRes = result->OverapproximationValue; + underRes = result->UnderapproximationValue; + } + } if (pomdpSettings.getMemoryBound() > 1) { STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ..."); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index e0b12229a..43d2a0225 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -179,7 +179,6 @@ namespace storm { } } finished = !improvement; - storm::utility::Stopwatch backupTimer(true); // back up result_backup = result; @@ -231,6 +230,249 @@ namespace storm { POMDPCheckResult{overApprox, underApprox}); } + //TODO This function reuses a lot of code from the probability computation, refactor to minimize code duplication! + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + storm::utility::Stopwatch beliefGridTimer(true); + uint64_t maxIterations = 100; + bool finished = false; + uint64_t iteration = 0; + + RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); + + std::vector> beliefList; + std::vector beliefIsTarget; + uint64_t nextId = 0; + // Initial belief always has ID 0 + storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + ++nextId; + beliefList.push_back(initialBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + + + std::vector> beliefGrid; + constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, + nextId); + nextId = beliefList.size(); + beliefGridTimer.stop(); + + storm::utility::Stopwatch overApproxTimer(true); + // Belief ID -> Value + std::map result; + std::map result_backup; + // Belief ID -> ActionIndex + std::map chosenActions; + + // Belief ID -> Observation -> Probability + std::map>> observationProbabilities; + // current ID -> action -> next ID + std::map>> nextBelieves; + // current ID -> action -> reward + std::map> beliefActionRewards; + + storm::utility::Stopwatch nextBeliefGeneration(true); + for (size_t i = 0; i < beliefGrid.size(); ++i) { + auto currentBelief = beliefGrid[i]; + bool isTarget = beliefIsTarget[currentBelief.id]; + result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); + result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); + if (!isTarget) { + //TODO put this in extra function + storm::utility::Stopwatch beliefWatch(true); + // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative + uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); + std::vector> observationProbabilitiesInAction(numChoices); + std::vector> nextBelievesInAction(numChoices); + + std::vector actionRewardsInState(numChoices); + + for (uint64_t action = 0; action < numChoices; ++action) { + storm::utility::Stopwatch aopWatch(true); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( + pomdp, currentBelief, action); + aopWatch.stop(); + //STORM_PRINT("AOP " << actionObservationProbabilities.size() << ": " << aopWatch << std::endl) + std::map actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); + iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + storm::utility::Stopwatch callWatch(true); + // THIS CALL IS SLOW + // TODO speed this up + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, + beliefList, + beliefIsTarget, + targetObservations, + currentBelief, + action, + observation, + nextId); + nextId = beliefList.size(); + callWatch.stop(); + //STORM_PRINT("Overall: " << callWatch << std::endl) + } + observationProbabilitiesInAction[action] = actionObservationProbabilities; + nextBelievesInAction[action] = actionObservationBelieves; + + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief); + } + observationProbabilities.emplace( + std::make_pair(currentBelief.id, observationProbabilitiesInAction)); + nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); + beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); + beliefWatch.stop(); + //STORM_PRINT("Belief " << currentBelief.id << " (" << isTarget << "): " << beliefWatch << std::endl) + } + + } + nextBeliefGeneration.stop(); + + //Use chaching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + + STORM_PRINT("Nr Grid Believes " << beliefGrid.size() << std::endl) + STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) + // Value Iteration + while (!finished && iteration < maxIterations) { + storm::utility::Stopwatch iterationTimer(true); + STORM_LOG_DEBUG("Iteration " << iteration + 1); + bool improvement = false; + for (size_t i = 0; i < beliefGrid.size(); ++i) { + storm::pomdp::Belief currentBelief = beliefGrid[i]; + bool isTarget = beliefIsTarget[currentBelief.id]; + if (!isTarget) { + // we can take any state with the observation as they have the same number of choices + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + // Initialize the values for the value iteration + ValueType chosenValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); + uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType currentValue; + + for (uint64_t action = 0; action < numChoices; ++action) { + storm::utility::Stopwatch actionWatch(true); + + currentValue = beliefActionRewards[currentBelief.id][action]; + storm::utility::Stopwatch loopTimer(true); + for (auto iter = observationProbabilities[currentBelief.id][action].begin(); + iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { + storm::utility::Stopwatch subsimplexTime(true); + uint32_t observation = iter->first; + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; + // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief + // cache the values to not always re-calculate + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(nextBelief.id) > 0) { + subSimplex = subSimplexCache[nextBelief.id]; + lambdas = lambdaCache[nextBelief.id]; + } else { + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + nextBelief.probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[nextBelief.id] = subSimplex; + lambdaCache[nextBelief.id] = lambdas; + } + subsimplexTime.stop(); + //STORM_PRINT("--subsimplex: " << subsimplexTime.getTimeInNanoseconds() << "ns" << std::endl) + storm::utility::Stopwatch sumTime(true); + auto sum = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + sum += lambdas[j] * result_backup.at( + getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + } + } + + currentValue += iter->second * sum; + sumTime.stop(); + //STORM_PRINT("--value: " << sumTime.getTimeInNanoseconds() << "ns" << std::endl) + } + loopTimer.stop(); + //STORM_PRINT("-Loop: " << loopTimer.getTimeInNanoseconds() << "ns" << std::endl) + // Update the selected actions + if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + + chosenValue = currentValue; + chosenActionIndex = action; + } + actionWatch.stop(); + //STORM_PRINT("Action: " << actionWatch.getTimeInNanoseconds() << "ns" << std::endl) + } + + result[currentBelief.id] = chosenValue; + + chosenActions[currentBelief.id] = chosenActionIndex; + // Check if the iteration brought an improvement + if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || + cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + improvement = true; + } + } + } + finished = !improvement; + // back up + result_backup = result; + + ++iteration; + iterationTimer.stop(); + STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); + } + + STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + + beliefGrid.push_back(initialBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + initialBelief.probabilities, gridResolution); + std::vector> initSubSimplex = temp.first; + std::vector initLambdas = temp.second; + + auto overApprox = storm::utility::zero(); + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (initLambdas[j] != storm::utility::zero()) { + overApprox += initLambdas[j] * + result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, + initSubSimplex[j])]; + } + } + overApproxTimer.stop(); + /* + // Now onto the under-approximation + bool useMdp = false;true; + storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min) : + computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, + chosenActions, gridResolution, initialBelief.id, min); + underApproxTimer.stop(); + + STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl + << "Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl); +*/ + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + + return std::make_unique>( + POMDPCheckResult{overApprox, storm::utility::zero()}); + } + template ValueType ApproximatePOMDPModelchecker::computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, @@ -831,6 +1073,16 @@ namespace storm { } } + template + ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, + uint64_t action, storm::pomdp::Belief belief) { + auto result = storm::utility::zero(); + for (size_t i = 0; i < belief.probabilities.size(); ++i) { + result += belief.probabilities[i] * pomdp.getUniqueRewardModel().getTotalStateActionReward(i, action, pomdp.getTransitionMatrix()); + } + return result; + } + template class ApproximatePOMDPModelchecker; From 4b8664c521bb0d21ffcc9691cad58a28c48f9468 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 22 Oct 2019 10:30:50 +0200 Subject: [PATCH 011/155] Added reward under-approximation --- src/storm-pomdp-cli/storm-pomdp.cpp | 4 + .../ApproximatePOMDPModelchecker.cpp | 110 ++++++++++-------- .../ApproximatePOMDPModelchecker.h | 4 +- 3 files changed, 66 insertions(+), 52 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index b34ed65b7..dd1776149 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -24,6 +24,8 @@ #include "storm/settings/modules/BuildSettings.h" #include "storm/settings/modules/JitBuilderSettings.h" #include "storm/settings/modules/TopologicalEquationSolverSettings.h" +#include "storm/settings/modules/ModelCheckerSettings.h" +#include "storm/settings/modules/MultiplierSettings.h" #include "storm/settings/modules/MultiObjectiveSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" @@ -67,6 +69,8 @@ void initializeSettings() { storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); storm::settings::addModule(); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 43d2a0225..ad2fd4973 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -71,9 +71,6 @@ namespace storm { result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); //TODO put this in extra function - std::vector> observationProbabilitiesInAction; - std::vector> nextBelievesInAction; - uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); std::vector> observationProbabilitiesInAction(numChoices); @@ -86,6 +83,8 @@ namespace storm { for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; + // THIS CALL IS SLOW + // TODO speed this up actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, @@ -110,7 +109,6 @@ namespace storm { std::map>> subSimplexCache; std::map> lambdaCache; - STORM_PRINT("Nr Believes " << beliefList.size() << std::endl) STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration while (!finished && iteration < maxIterations) { @@ -131,7 +129,7 @@ namespace storm { ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = storm::utility::zero(); // simply change this for rewards? + currentValue = storm::utility::zero(); for (auto iter = observationProbabilities[currentBelief.id][action].begin(); iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { uint32_t observation = iter->first; @@ -172,8 +170,7 @@ namespace storm { result[currentBelief.id] = chosenValue; chosenActions[currentBelief.id] = chosenActionIndex; // Check if the iteration brought an improvement - if ((min && cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) || - (!min && cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id]))) { + if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { improvement = true; } } @@ -209,12 +206,12 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = /*false;*/true; + bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min) : + result, chosenActions, gridResolution, initialBelief.id, min, false) : computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, - chosenActions, gridResolution, initialBelief.id, min); + chosenActions, gridResolution, initialBelief.id, min, false); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -281,7 +278,6 @@ namespace storm { result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); if (!isTarget) { //TODO put this in extra function - storm::utility::Stopwatch beliefWatch(true); // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); @@ -291,16 +287,12 @@ namespace storm { std::vector actionRewardsInState(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - storm::utility::Stopwatch aopWatch(true); std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( pomdp, currentBelief, action); - aopWatch.stop(); - //STORM_PRINT("AOP " << actionObservationProbabilities.size() << ": " << aopWatch << std::endl) std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - storm::utility::Stopwatch callWatch(true); // THIS CALL IS SLOW // TODO speed this up actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, @@ -312,8 +304,6 @@ namespace storm { observation, nextId); nextId = beliefList.size(); - callWatch.stop(); - //STORM_PRINT("Overall: " << callWatch << std::endl) } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; @@ -325,8 +315,6 @@ namespace storm { std::make_pair(currentBelief.id, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); - beliefWatch.stop(); - //STORM_PRINT("Belief " << currentBelief.id << " (" << isTarget << "): " << beliefWatch << std::endl) } } @@ -336,7 +324,6 @@ namespace storm { std::map>> subSimplexCache; std::map> lambdaCache; - STORM_PRINT("Nr Grid Believes " << beliefGrid.size() << std::endl) STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration while (!finished && iteration < maxIterations) { @@ -357,13 +344,9 @@ namespace storm { ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { - storm::utility::Stopwatch actionWatch(true); - currentValue = beliefActionRewards[currentBelief.id][action]; - storm::utility::Stopwatch loopTimer(true); for (auto iter = observationProbabilities[currentBelief.id][action].begin(); iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - storm::utility::Stopwatch subsimplexTime(true); uint32_t observation = iter->first; storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief @@ -381,9 +364,6 @@ namespace storm { subSimplexCache[nextBelief.id] = subSimplex; lambdaCache[nextBelief.id] = lambdas; } - subsimplexTime.stop(); - //STORM_PRINT("--subsimplex: " << subsimplexTime.getTimeInNanoseconds() << "ns" << std::endl) - storm::utility::Stopwatch sumTime(true); auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { @@ -393,11 +373,7 @@ namespace storm { } currentValue += iter->second * sum; - sumTime.stop(); - //STORM_PRINT("--value: " << sumTime.getTimeInNanoseconds() << "ns" << std::endl) } - loopTimer.stop(); - //STORM_PRINT("-Loop: " << loopTimer.getTimeInNanoseconds() << "ns" << std::endl) // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || (!min && @@ -407,8 +383,6 @@ namespace storm { chosenValue = currentValue; chosenActionIndex = action; } - actionWatch.stop(); - //STORM_PRINT("Action: " << actionWatch.getTimeInNanoseconds() << "ns" << std::endl) } result[currentBelief.id] = chosenValue; @@ -450,14 +424,14 @@ namespace storm { } } overApproxTimer.stop(); - /* + // Now onto the under-approximation - bool useMdp = false;true; + bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, - chosenActions, gridResolution, initialBelief.id, min); + result, chosenActions, gridResolution, initialBelief.id, min, true) : + computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, true); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -465,12 +439,11 @@ namespace storm { << std::endl << "Time Underapproximation: " << underApproxTimer << std::endl); -*/ STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); return std::make_unique>( - POMDPCheckResult{overApprox, storm::utility::zero()}); + POMDPCheckResult{overApprox, underApprox}); } template @@ -483,7 +456,8 @@ namespace storm { std::map>> &nextBelieves, std::map &result, std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min) { + uint64_t gridResolution, uint64_t initialBeliefId, bool min, + bool computeReward) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; @@ -539,14 +513,30 @@ namespace storm { for (auto targetState : targetStates) { labeling.addLabelToState("target", targetState); } - storm::storage::sparse::ModelComponents modelComponents( - buildTransitionMatrix(transitions), labeling); + + storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + // Add the reward collected by taking the chosen Action in the belief + rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( + storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first])), + currentBelief)); + } + + std::unordered_map rewardModels = {{"std", rewardModel}}; + + storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(transitions), labeling, rewardModels); storm::models::sparse::Dtmc underApproxDtmc(modelComponents); auto model = std::make_shared>(underApproxDtmc); model->printModelInformationToStream(std::cout); - std::string propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + std::string propertyString; + if (computeReward) { + propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + } else { + propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + } std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties( propertyVector).front(); @@ -569,7 +559,8 @@ namespace storm { std::map>> &nextBelieves, std::map &result, std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min) { + uint64_t gridResolution, uint64_t initialBeliefId, bool min, + bool computeRewards) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; @@ -598,7 +589,6 @@ namespace storm { pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); if (chosenActions.find(currentBeliefId) == chosenActions.end()) { // If the current Belief is not part of the grid, the next states have not been computed yet. - // For now, this is a very dirty workaround because I am currently to lazy to refactor everything to be able to do this without the extractBestAction method std::vector> observationProbabilitiesInAction; std::vector> nextBelievesInAction; for (uint64_t action = 0; action < numChoices; ++action) { @@ -638,7 +628,6 @@ namespace storm { believesToBeExpanded.push_back(nextBeliefId); } transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second; - //STORM_PRINT("Transition with action " << action << " from state " << beliefStateMap[currentBeliefId] << " to state " << beliefStateMap[nextBeliefId] << " with prob " << iter->second << std::endl) } actionTransitionStorage.push_back(transitionsInStateWithAction); } @@ -659,10 +648,32 @@ namespace storm { buildTransitionMatrix(transitions), labeling); storm::models::sparse::Mdp underApproxMdp(modelComponents); + + if (computeRewards) { + storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } + } + underApproxMdp.addRewardModel("std", rewardModel); + underApproxMdp.restrictRewardModels(std::set({"std"})); + } + auto model = std::make_shared>(underApproxMdp); model->printModelInformationToStream(std::cout); - std::string propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + std::string propertyString; + if (computeRewards) { + propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + } else { + propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + } std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties( propertyVector).front(); @@ -696,7 +707,6 @@ namespace storm { smb.newRowGroup(currentRow); for (auto const &map : actionTransitions) { for (auto const &transition : map) { - //STORM_PRINT(" Add transition from state " << currentRowGroup << " to state " << transition.first << " with prob " << transition.second << std::endl) smb.addNextValue(currentRow, transition.first, transition.second); } ++currentRow; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index b8ad0fa66..f3947dcdc 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -77,7 +77,7 @@ namespace storm { std::map>> &nextBelieves, std::map &result, std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min); + uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward); ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, @@ -87,7 +87,7 @@ namespace storm { std::map>> &nextBelieves, std::map &result, std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min); + uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards); /** * From 11f89de9e824c4fb74fd38f1251fc671439077c4 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 29 Oct 2019 10:30:40 +0100 Subject: [PATCH 012/155] Added preprocessing to reduce the POMDP state space before analysis --- src/storm-pomdp-cli/storm-pomdp.cpp | 13 +- .../KnownProbabilityTransformer.cpp | 121 ++++++++++++++++++ .../transformer/KnownProbabilityTransformer.h | 17 +++ 3 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp create mode 100644 src/storm-pomdp/transformer/KnownProbabilityTransformer.h diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index dd1776149..96be00668 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -34,6 +34,7 @@ #include "storm-cli-utilities/cli.h" #include "storm-cli-utilities/model-handling.h" +#include "storm-pomdp/transformer/KnownProbabilityTransformer.h" #include "storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.h" #include "storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.h" #include "storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.h" @@ -134,6 +135,8 @@ int main(const int argc, const char** argv) { if (formula) { if (formula->isProbabilityOperatorFormula()) { + boost::optional prob1States; + boost::optional prob0States; if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) { STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); @@ -144,12 +147,16 @@ int main(const int argc, const char** argv) { if (pomdpSettings.isQualitativeReductionSet()) { storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); STORM_PRINT_AND_LOG("Computing states with probability 0 ..."); - std::cout << qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()) << std::endl; + prob0States = qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()); + std::cout << *prob0States << std::endl; STORM_PRINT_AND_LOG(" done." << std::endl); STORM_PRINT_AND_LOG("Computing states with probability 1 ..."); - std::cout << qualitativeAnalysis.analyseProb1(formula->asProbabilityOperatorFormula()) << std::endl; + prob1States = qualitativeAnalysis.analyseProb1(formula->asProbabilityOperatorFormula()); + std::cout << *prob1States << std::endl; STORM_PRINT_AND_LOG(" done." << std::endl); - std::cout << "actual reduction not yet implemented..." << std::endl; + //std::cout << "actual reduction not yet implemented..." << std::endl; + storm::pomdp::transformer::KnownProbabilityTransformer kpt = storm::pomdp::transformer::KnownProbabilityTransformer(); + pomdp = kpt.transform(*pomdp, *prob0States, *prob1States); } if (pomdpSettings.isGridApproximationSet()) { storm::logic::ProbabilityOperatorFormula const &probFormula = formula->asProbabilityOperatorFormula(); diff --git a/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp b/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp new file mode 100644 index 000000000..c5d225d02 --- /dev/null +++ b/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp @@ -0,0 +1,121 @@ +#include "KnownProbabilityTransformer.h" + +namespace storm { + namespace pomdp { + namespace transformer { + + template + KnownProbabilityTransformer::KnownProbabilityTransformer() { + // Intentionally left empty + } + + template + std::shared_ptr> + KnownProbabilityTransformer::transform(storm::models::sparse::Pomdp const &pomdp, storm::storage::BitVector &prob0States, + storm::storage::BitVector &prob1States) { + std::map stateMap; + std::map observationMap; + + storm::models::sparse::StateLabeling newLabeling(pomdp.getNumberOfStates() - prob0States.getNumberOfSetBits() - prob1States.getNumberOfSetBits() + 2); + + std::vector newObservations; + + // New state 0 represents all states with probability 1 + for (auto const &iter : prob1States) { + stateMap[iter] = 0; + + std::set labelSet = pomdp.getStateLabeling().getLabelsOfState(iter); + for (auto const &label : labelSet) { + if (!newLabeling.containsLabel(label)) { + newLabeling.addLabel(label); + } + newLabeling.addLabelToState(label, 0); + } + } + // New state 1 represents all states with probability 0 + for (auto const &iter : prob0States) { + stateMap[iter] = 1; + for (auto const &label : pomdp.getStateLabeling().getLabelsOfState(iter)) { + if (!newLabeling.containsLabel(label)) { + newLabeling.addLabel(label); + } + newLabeling.addLabelToState(label, 1); + } + } + + storm::storage::BitVector unknownStates = ~(prob1States | prob0States); + //If there are no states with probability 0 we set the next new state id to be 1, otherwise 2 + uint64_t newId = prob0States.empty() ? 1 : 2; + uint64_t nextObservation = prob0States.empty() ? 1 : 2; + for (auto const &iter : unknownStates) { + stateMap[iter] = newId; + if (observationMap.count(pomdp.getObservation(iter)) == 0) { + observationMap[pomdp.getObservation(iter)] = nextObservation; + ++nextObservation; + } + for (auto const &label : pomdp.getStateLabeling().getLabelsOfState(iter)) { + if (!newLabeling.containsLabel(label)) { + newLabeling.addLabel(label); + } + newLabeling.addLabelToState(label, newId); + } + ++newId; + } + + uint64_t newNrOfStates = pomdp.getNumberOfStates() - (prob1States.getNumberOfSetBits() + prob0States.getNumberOfSetBits()); + + uint64_t currentRow = 0; + uint64_t currentRowGroup = 0; + storm::storage::SparseMatrixBuilder smb(0, 0, 0, false, true); + //new row for prob 1 state + smb.newRowGroup(currentRow); + smb.addNextValue(currentRow, 0, storm::utility::one()); + newObservations.push_back(0); + ++currentRowGroup; + ++currentRow; + if (!prob0States.empty()) { + smb.newRowGroup(currentRow); + smb.addNextValue(currentRow, 1, storm::utility::one()); + ++currentRowGroup; + ++currentRow; + newObservations.push_back(1); + } + + auto transitionMatrix = pomdp.getTransitionMatrix(); + + for (auto const &iter : unknownStates) { + smb.newRowGroup(currentRow); + // First collect all transitions + //auto rowGroup = transitionMatrix.getRowGroup(iter); + for (uint64_t row = 0; row < transitionMatrix.getRowGroupSize(iter); ++row) { + std::map transitionsInAction; + for (auto const &entry : transitionMatrix.getRow(iter, row)) { + // here we use the state mapping to collect all probabilities to get to a state with prob 0/1 + transitionsInAction[stateMap[entry.getColumn()]] += entry.getValue(); + } + for (auto const &transition : transitionsInAction) { + smb.addNextValue(currentRow, transition.first, transition.second); + } + ++currentRow; + } + ++currentRowGroup; + newObservations.push_back(observationMap[pomdp.getObservation(iter)]); + } + + auto newTransitionMatrix = smb.build(currentRow, newNrOfStates, currentRowGroup); + //STORM_PRINT(newTransitionMatrix) + storm::storage::sparse::ModelComponents components(newTransitionMatrix, newLabeling); + components.observabilityClasses = newObservations; + + auto newPomdp = storm::models::sparse::Pomdp(components); + + newPomdp.printModelInformationToStream(std::cout); + + return std::make_shared>(newPomdp); + } + + template + class KnownProbabilityTransformer; + } + } +} \ No newline at end of file diff --git a/src/storm-pomdp/transformer/KnownProbabilityTransformer.h b/src/storm-pomdp/transformer/KnownProbabilityTransformer.h new file mode 100644 index 000000000..e043c3f99 --- /dev/null +++ b/src/storm-pomdp/transformer/KnownProbabilityTransformer.h @@ -0,0 +1,17 @@ +#include "storm/api/storm.h" +#include "storm/models/sparse/Pomdp.h" + +namespace storm { + namespace pomdp { + namespace transformer { + template + class KnownProbabilityTransformer { + public: + KnownProbabilityTransformer(); + + std::shared_ptr> + transform(storm::models::sparse::Pomdp const &pomdp, storm::storage::BitVector &prob0States, storm::storage::BitVector &prob1States); + }; + } + } +} From 7afc47f354246fd47cfa164f4309c8ec88ab285a Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 8 Nov 2019 10:14:43 +0100 Subject: [PATCH 013/155] Fixed wrong size of stateLabeling if no probability 0 states were found --- src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp b/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp index c5d225d02..45016e515 100644 --- a/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp +++ b/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp @@ -16,7 +16,9 @@ namespace storm { std::map stateMap; std::map observationMap; - storm::models::sparse::StateLabeling newLabeling(pomdp.getNumberOfStates() - prob0States.getNumberOfSetBits() - prob1States.getNumberOfSetBits() + 2); + uint64_t nrNewStates = prob0States.empty() ? 1 : 2; + + storm::models::sparse::StateLabeling newLabeling(pomdp.getNumberOfStates() - prob0States.getNumberOfSetBits() - prob1States.getNumberOfSetBits() + nrNewStates); std::vector newObservations; From a65c445243d28796272ce3913111b1d6958eb843 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 8 Nov 2019 10:20:49 +0100 Subject: [PATCH 014/155] Avoid multiple computation of size in subsimplex computation --- .../ApproximatePOMDPModelchecker.cpp | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index ad2fd4973..063f80146 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -944,13 +944,14 @@ namespace storm { std::vector probabilities, uint64_t resolution) { // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Variable names are based on the paper - std::vector x(probabilities.size()); - std::vector v(probabilities.size()); - std::vector d(probabilities.size()); + uint64_t probSize = probabilities.size(); + std::vector x(probSize); + std::vector v(probSize); + std::vector d(probSize); auto convResolution = storm::utility::convertNumber(resolution); - for (size_t i = 0; i < probabilities.size(); ++i) { - for (size_t j = i; j < probabilities.size(); ++j) { + for (size_t i = 0; i < probSize; ++i) { + for (size_t j = i; j < probSize; ++j) { x[i] += convResolution * probabilities[j]; } v[i] = storm::utility::floor(x[i]); @@ -959,14 +960,14 @@ namespace storm { auto p = storm::utility::vector::getSortedIndices(d); - std::vector> qs(probabilities.size(), std::vector(probabilities.size())); - for (size_t i = 0; i < probabilities.size(); ++i) { + std::vector> qs(probSize, std::vector(probSize)); + for (size_t i = 0; i < probSize; ++i) { if (i == 0) { - for (size_t j = 0; j < probabilities.size(); ++j) { + for (size_t j = 0; j < probSize; ++j) { qs[i][j] = v[j]; } } else { - for (size_t j = 0; j < probabilities.size(); ++j) { + for (size_t j = 0; j < probSize; ++j) { if (j == p[i - 1]) { qs[i][j] = qs[i - 1][j] + storm::utility::one(); } else { @@ -975,18 +976,18 @@ namespace storm { } } } - std::vector> subSimplex(qs.size(), std::vector(probabilities.size())); - for (size_t j = 0; j < qs.size(); ++j) { - for (size_t i = 0; i < probabilities.size() - 1; ++i) { + std::vector> subSimplex(probSize, std::vector(probSize)); + for (size_t j = 0; j < probSize; ++j) { + for (size_t i = 0; i < probSize - 1; ++i) { subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution; } - subSimplex[j][probabilities.size() - 1] = qs[j][probabilities.size() - 1] / convResolution; + subSimplex[j][probSize - 1] = qs[j][probSize - 1] / convResolution; } - std::vector lambdas(probabilities.size(), storm::utility::zero()); + std::vector lambdas(probSize, storm::utility::zero()); auto sum = storm::utility::zero(); - for (size_t i = 1; i < probabilities.size(); ++i) { + for (size_t i = 1; i < probSize; ++i) { lambdas[i] = d[p[i - 1]] - d[p[i]]; sum += d[p[i - 1]] - d[p[i]]; } From bc52aa86cae75c8e2d4b8e9b0e2296e4d701579a Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 8 Nov 2019 10:48:55 +0100 Subject: [PATCH 015/155] Added procedure to repeat probability computation with higher resolution --- .../ApproximatePOMDPModelchecker.cpp | 21 +++++++++++++++++++ .../ApproximatePOMDPModelchecker.h | 5 +++++ 2 files changed, 26 insertions(+) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 063f80146..14428c5bf 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -21,6 +21,27 @@ namespace storm { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(0.00000000001), false); } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::refineReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, + uint64_t startingResolution, uint64_t stepSize, uint64_t maxNrOfRefinements) { + uint64_t currentResolution = startingResolution; + uint64_t currentRefinement = 0; + std::unique_ptr> res = std::make_unique>( + POMDPCheckResult{storm::utility::one(), storm::utility::zero()}); + while (currentRefinement < maxNrOfRefinements && !cc.isEqual(storm::utility::zero(), res->OverapproximationValue - res->UnderapproximationValue)) { + STORM_PRINT("--------------------------------------------------------------" << std::endl) + STORM_PRINT("Refinement Step " << currentRefinement + 1 << " - Resolution " << currentResolution << std::endl) + STORM_PRINT("--------------------------------------------------------------" << std::endl) + res = computeReachabilityProbability(pomdp, targetObservations, min, currentResolution); + currentResolution += stepSize; + ++currentRefinement; + } + STORM_PRINT("Procedure took " << currentRefinement << " refinement steps" << std::endl) + return res; + } + template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index f3947dcdc..4792f9127 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -20,6 +20,11 @@ namespace storm { public: explicit ApproximatePOMDPModelchecker(); + std::unique_ptr> + refineReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, + uint64_t startingResolution, uint64_t stepSize, uint64_t maxNrOfRefinements); + std::unique_ptr> computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, From 21e417bdaca04eae0186a1282f4e17cb83fa6e53 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 8 Nov 2019 10:55:13 +0100 Subject: [PATCH 016/155] Added on-the-fly belief grid generation to avoid computations for unreachable beliefs --- src/storm-pomdp-cli/storm-pomdp.cpp | 14 +- .../ApproximatePOMDPModelchecker.cpp | 251 ++++++++++++++++++ .../ApproximatePOMDPModelchecker.h | 5 + 3 files changed, 266 insertions(+), 4 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 96be00668..0af965ddd 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -200,12 +200,18 @@ int main(const int argc, const char** argv) { double overRes = storm::utility::one(); double underRes = storm::utility::zero(); std::unique_ptr> result; - result = checker.computeReachabilityProbability(*pomdp, targetObservationSet, - probFormula.getOptimalityType() == - storm::OptimizationDirection::Minimize, - pomdpSettings.getGridResolution()); + + //result = checker.refineReachabilityProbability(*pomdp, targetObservationSet,probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(),1,10); + result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, + pomdpSettings.getGridResolution()); overRes = result->OverapproximationValue; underRes = result->UnderapproximationValue; + if (overRes != underRes) { + STORM_PRINT("Overapproximation Result: " << overRes << std::endl) + STORM_PRINT("Underapproximation Result: " << underRes << std::endl) + } else { + STORM_PRINT("Result: " << overRes << std::endl) + } } } else if (formula->isRewardOperatorFormula()) { if (pomdpSettings.isSelfloopReductionSet() && storm::solver::minimize(formula->asRewardOperatorFormula().getOptimalityType())) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 14428c5bf..8d0650c54 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -42,6 +42,257 @@ namespace storm { return res; } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) + + uint64_t maxIterations = 100; + bool finished = false; + uint64_t iteration = 0; + std::vector> beliefList; + std::vector beliefIsTarget; + std::vector> beliefGrid; + std::map result; + std::map result_backup; + //Use caching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + std::map chosenActions; + + std::deque beliefsToBeExpanded; + + // Belief ID -> Observation -> Probability + std::map>> observationProbabilities; + // current ID -> action -> next ID + std::map>> nextBelieves; + + uint64_t nextId = 0; + storm::utility::Stopwatch expansionTimer(true); + // Initial belief always has ID 0 + storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + ++nextId; + beliefList.push_back(initialBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + + // for the initial belief, add the triangulated initial states + std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( + initialBelief.probabilities, gridResolution); + std::vector> initSubSimplex = initTemp.first; + std::vector initLambdas = initTemp.second; + subSimplexCache[0] = initSubSimplex; + lambdaCache[0] = initLambdas; + bool initInserted = false; + + + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { + uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); + if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { + if (searchResult == 0) { + // the initial belief is on the grid itself + initInserted = true; + beliefGrid.push_back(initialBelief); + beliefsToBeExpanded.push_back(0); + } else { + // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; + beliefList.push_back(gridBelief); + beliefGrid.push_back(gridBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefsToBeExpanded.push_back(nextId); + ++nextId; + } + } + } + } + + + //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + + // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points + while (!beliefsToBeExpanded.empty()) { + uint64_t currId = beliefsToBeExpanded.front(); + beliefsToBeExpanded.pop_front(); + bool isTarget = beliefIsTarget[currId]; + if (isTarget) { + result.emplace(std::make_pair(currId, storm::utility::one())); + result_backup.emplace(std::make_pair(currId, storm::utility::one())); + } else { + result.emplace(std::make_pair(currId, storm::utility::zero())); + result_backup.emplace(std::make_pair(currId, storm::utility::zero())); + + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(beliefList[currId].observation).front()); + std::vector> observationProbabilitiesInAction(numChoices); + std::vector> nextBelievesInAction(numChoices); + + for (uint64_t action = 0; action < numChoices; ++action) { + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( + pomdp, beliefList[currId], action); + std::map actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); + iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + // THIS CALL IS SLOW + // TODO speed this up + uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, + observation, nextId); + nextId = beliefList.size(); + actionObservationBelieves[observation] = idNextBelief; + //Triangulate here and put the possibly resulting belief in the grid + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(idNextBelief) > 0) { + // TODO is this necessary here? Think later + subSimplex = subSimplexCache[idNextBelief]; + lambdas = lambdaCache[idNextBelief]; + } else { + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + beliefList[idNextBelief].probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[idNextBelief] = subSimplex; + lambdaCache[idNextBelief] = lambdas; + } + + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + if (getBeliefIdInVector(beliefGrid, observation, subSimplex[j]) == uint64_t(-1)) { + // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; + beliefList.push_back(gridBelief); + beliefGrid.push_back(gridBelief); + beliefIsTarget.push_back( + targetObservations.find(observation) != targetObservations.end()); + beliefsToBeExpanded.push_back(nextId); + ++nextId; + } + } + } + } + observationProbabilitiesInAction[action] = actionObservationProbabilities; + nextBelievesInAction[action] = actionObservationBelieves; + } + observationProbabilities.emplace( + std::make_pair(currId, observationProbabilitiesInAction)); + nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); + } + } + expansionTimer.stop(); + STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl) + STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) + STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + + storm::utility::Stopwatch overApproxTimer(true); + // Value Iteration + while (!finished && iteration < maxIterations) { + storm::utility::Stopwatch iterationTimer(true); + STORM_LOG_DEBUG("Iteration " << iteration + 1); + bool improvement = false; + for (size_t i = 0; i < beliefGrid.size(); ++i) { + storm::pomdp::Belief currentBelief = beliefGrid[i]; + bool isTarget = beliefIsTarget[currentBelief.id]; + if (!isTarget) { + // we can take any state with the observation as they have the same number of choices + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + // Initialize the values for the value iteration + ValueType chosenValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); + uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType currentValue; + + for (uint64_t action = 0; action < numChoices; ++action) { + currentValue = storm::utility::zero(); + for (auto iter = observationProbabilities[currentBelief.id][action].begin(); + iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { + uint32_t observation = iter->first; + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; + // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief + // cache the values to not always re-calculate + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(nextBelief.id) > 0) { + subSimplex = subSimplexCache[nextBelief.id]; + lambdas = lambdaCache[nextBelief.id]; + } else { + // TODO is this necessary here? Everything should have already been computed + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + nextBelief.probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[nextBelief.id] = subSimplex; + lambdaCache[nextBelief.id] = lambdas; + } + auto sum = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + sum += lambdas[j] * result_backup.at( + getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + } + } + currentValue += iter->second * sum; + } + // Update the selected actions + if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + chosenValue = currentValue; + chosenActionIndex = action; + } + } + result[currentBelief.id] = chosenValue; + chosenActions[currentBelief.id] = chosenActionIndex; + // Check if the iteration brought an improvement + if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + improvement = true; + } + } + } + finished = !improvement; + // back up + result_backup = result; + + ++iteration; + iterationTimer.stop(); + STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); + } + + STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + + auto overApprox = storm::utility::zero(); + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (initLambdas[j] != storm::utility::zero()) { + overApprox += initLambdas[j] * + result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, + initSubSimplex[j])]; + } + } + overApproxTimer.stop(); + + storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, false); + underApproxTimer.stop(); + + STORM_PRINT("Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl); + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + + return std::make_unique>( + POMDPCheckResult{overApprox, underApprox}); + + } + template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 4792f9127..5399f079d 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -25,6 +25,11 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t startingResolution, uint64_t stepSize, uint64_t maxNrOfRefinements); + std::unique_ptr> + computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, + uint64_t gridResolution); + std::unique_ptr> computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, From b9c0af6628af7120634f3744e18dead0a40a7f8c Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 8 Nov 2019 11:43:09 +0100 Subject: [PATCH 017/155] Added on-the-fly belief grid generation for rewards --- .../ApproximatePOMDPModelchecker.cpp | 262 ++++++++++++++++++ .../ApproximatePOMDPModelchecker.h | 4 + 2 files changed, 266 insertions(+) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 8d0650c54..a32d7dcc0 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -293,6 +293,268 @@ namespace storm { } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) + + RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); + + uint64_t maxIterations = 100; + bool finished = false; + uint64_t iteration = 0; + std::vector> beliefList; + std::vector beliefIsTarget; + std::vector> beliefGrid; + std::map result; + std::map result_backup; + //Use caching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + std::map chosenActions; + + std::deque beliefsToBeExpanded; + + // Belief ID -> Observation -> Probability + std::map>> observationProbabilities; + // current ID -> action -> next ID + std::map>> nextBelieves; + // current ID -> action -> reward + std::map> beliefActionRewards; + + uint64_t nextId = 0; + storm::utility::Stopwatch expansionTimer(true); + // Initial belief always has ID 0 + storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + ++nextId; + beliefList.push_back(initialBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + + // for the initial belief, add the triangulated initial states + std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( + initialBelief.probabilities, gridResolution); + std::vector> initSubSimplex = initTemp.first; + std::vector initLambdas = initTemp.second; + subSimplexCache[0] = initSubSimplex; + lambdaCache[0] = initLambdas; + bool initInserted = false; + + + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { + uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); + if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { + if (searchResult == 0) { + // the initial belief is on the grid itself + initInserted = true; + beliefGrid.push_back(initialBelief); + beliefsToBeExpanded.push_back(0); + } else { + // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; + beliefList.push_back(gridBelief); + beliefGrid.push_back(gridBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefsToBeExpanded.push_back(nextId); + ++nextId; + } + } + } + } + + + //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + + // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points + while (!beliefsToBeExpanded.empty()) { + uint64_t currId = beliefsToBeExpanded.front(); + beliefsToBeExpanded.pop_front(); + bool isTarget = beliefIsTarget[currId]; + + result.emplace(std::make_pair(currId, storm::utility::zero())); + result_backup.emplace(std::make_pair(currId, storm::utility::zero())); + if (!isTarget) { + + uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); + uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); + std::vector> observationProbabilitiesInAction(numChoices); + std::vector> nextBelievesInAction(numChoices); + std::vector actionRewardsInState(numChoices); + + for (uint64_t action = 0; action < numChoices; ++action) { + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); + std::map actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + // THIS CALL IS SLOW + // TODO speed this up + uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, + observation, nextId); + nextId = beliefList.size(); + actionObservationBelieves[observation] = idNextBelief; + //Triangulate here and put the possibly resulting belief in the grid + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(idNextBelief) > 0) { + // TODO is this necessary here? Think later + subSimplex = subSimplexCache[idNextBelief]; + lambdas = lambdaCache[idNextBelief]; + } else { + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + beliefList[idNextBelief].probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[idNextBelief] = subSimplex; + lambdaCache[idNextBelief] = lambdas; + } + + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + if (getBeliefIdInVector(beliefGrid, observation, subSimplex[j]) == uint64_t(-1)) { + // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; + beliefList.push_back(gridBelief); + beliefGrid.push_back(gridBelief); + beliefIsTarget.push_back( + targetObservations.find(observation) != targetObservations.end()); + beliefsToBeExpanded.push_back(nextId); + ++nextId; + } + } + } + } + observationProbabilitiesInAction[action] = actionObservationProbabilities; + nextBelievesInAction[action] = actionObservationBelieves; + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + beliefList[currId]); + } + observationProbabilities.emplace( + std::make_pair(currId, observationProbabilitiesInAction)); + nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); + beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + + } + } + expansionTimer.stop(); + STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl) + STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) + STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + + storm::utility::Stopwatch overApproxTimer(true); + // Value Iteration + while (!finished && iteration < maxIterations) { + storm::utility::Stopwatch iterationTimer(true); + STORM_LOG_DEBUG("Iteration " << iteration + 1); + bool improvement = false; + for (size_t i = 0; i < beliefGrid.size(); ++i) { + storm::pomdp::Belief currentBelief = beliefGrid[i]; + bool isTarget = beliefIsTarget[currentBelief.id]; + if (!isTarget) { + // we can take any state with the observation as they have the same number of choices + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + // Initialize the values for the value iteration + ValueType chosenValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); + uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType currentValue; + + for (uint64_t action = 0; action < numChoices; ++action) { + currentValue = beliefActionRewards[currentBelief.id][action]; + for (auto iter = observationProbabilities[currentBelief.id][action].begin(); + iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { + uint32_t observation = iter->first; + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; + // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief + // cache the values to not always re-calculate + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(nextBelief.id) > 0) { + subSimplex = subSimplexCache[nextBelief.id]; + lambdas = lambdaCache[nextBelief.id]; + } else { + //TODO This should not ne reachable + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + nextBelief.probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[nextBelief.id] = subSimplex; + lambdaCache[nextBelief.id] = lambdas; + } + auto sum = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + sum += lambdas[j] * result_backup.at( + getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + } + } + + currentValue += iter->second * sum; + } + // Update the selected actions + if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + + chosenValue = currentValue; + chosenActionIndex = action; + } + } + + result[currentBelief.id] = chosenValue; + + chosenActions[currentBelief.id] = chosenActionIndex; + // Check if the iteration brought an improvement + if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || + cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + improvement = true; + } + } + } + finished = !improvement; + // back up + result_backup = result; + + ++iteration; + iterationTimer.stop(); + STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); + } + + STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + + auto overApprox = storm::utility::zero(); + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (initLambdas[j] != storm::utility::zero()) { + overApprox += initLambdas[j] * + result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, + initSubSimplex[j])]; + } + } + overApproxTimer.stop(); + + storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, true); + underApproxTimer.stop(); + + STORM_PRINT("Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl); + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + + return std::make_unique>( + POMDPCheckResult{overApprox, underApprox}); + + } + + template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 5399f079d..1222d0b1f 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -30,6 +30,10 @@ namespace storm { std::set targetObservations, bool min, uint64_t gridResolution); + std::unique_ptr> + computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, + uint64_t gridResolution); + std::unique_ptr> computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, From bbd3ec7287184d4f6d79588fe8ed483e273f283e Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 22 Nov 2019 16:21:54 +0100 Subject: [PATCH 018/155] Fix of wrong MDP underapproximation --- .../ApproximatePOMDPModelchecker.cpp | 141 ++++++++++-------- .../ApproximatePOMDPModelchecker.h | 31 +++- 2 files changed, 108 insertions(+), 64 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index a32d7dcc0..ff9bb522b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -11,6 +11,7 @@ #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" #include "storm/api/properties.h" +#include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" namespace storm { @@ -18,7 +19,9 @@ namespace storm { namespace modelchecker { template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { - cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(0.00000000001), false); + precision = 0.000000001; + cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); + useMdp = false; } template @@ -59,7 +62,8 @@ namespace storm { //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; - std::map chosenActions; + std::map> chosenActions; + std::deque beliefsToBeExpanded; @@ -101,8 +105,7 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; } @@ -125,14 +128,12 @@ namespace storm { result.emplace(std::make_pair(currId, storm::utility::zero())); result_backup.emplace(std::make_pair(currId, storm::utility::zero())); - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(beliefList[currId].observation).front()); + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front()); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, beliefList[currId], action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { @@ -166,8 +167,7 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; } @@ -201,9 +201,8 @@ namespace storm { uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -231,8 +230,7 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } currentValue += iter->second * sum; @@ -243,11 +241,14 @@ namespace storm { cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { improvement = true; @@ -312,7 +313,7 @@ namespace storm { //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; - std::map chosenActions; + std::map> chosenActions; std::deque beliefsToBeExpanded; @@ -459,7 +460,7 @@ namespace storm { // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -499,15 +500,17 @@ namespace storm { (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { @@ -586,7 +589,7 @@ namespace storm { std::map result; std::map result_backup; // Belief ID -> ActionIndex - std::map chosenActions; + std::map> chosenActions; // Belief ID -> Observation -> Probability std::map>> observationProbabilities; @@ -611,11 +614,9 @@ namespace storm { std::vector> nextBelievesInAction(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, currentBelief, action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action); std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up @@ -659,7 +660,7 @@ namespace storm { // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -698,11 +699,14 @@ namespace storm { cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { improvement = true; @@ -721,8 +725,7 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); std::pair>, std::vector> temp = computeSubSimplexAndLambdas( initialBelief.probabilities, gridResolution); @@ -740,7 +743,6 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, false) : @@ -795,7 +797,7 @@ namespace storm { std::map result; std::map result_backup; // Belief ID -> ActionIndex - std::map chosenActions; + std::map> chosenActions; // Belief ID -> Observation -> Probability std::map>> observationProbabilities; @@ -874,7 +876,7 @@ namespace storm { // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -913,15 +915,17 @@ namespace storm { (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { @@ -960,7 +964,6 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, true) : @@ -989,7 +992,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward) { std::set visitedBelieves; @@ -1017,17 +1020,15 @@ namespace storm { } else { if (chosenActions.find(currentBeliefId) == chosenActions.end()) { // If the current Belief is not part of the grid, we have not computed the action to choose yet - chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, - targetObservations, + chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, gridResolution, currentBeliefId, beliefList.size(), min); } - for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].begin(); - iter != - observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].end(); ++iter) { + for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].begin(); + iter != observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].end(); ++iter) { uint32_t observation = iter->first; - uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId]][observation]; + uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId][0]][observation]; if (visitedBelieves.insert(nextBeliefId).second) { beliefStateMap[nextBeliefId] = stateId; ++stateId; @@ -1049,12 +1050,14 @@ namespace storm { } storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - // Add the reward collected by taking the chosen Action in the belief - rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( - storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first])), - currentBelief)); + if (computeReward) { + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + // Add the reward collected by taking the chosen Action in the belief + rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( + storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), + currentBelief)); + } } std::unordered_map rewardModels = {{"std", rewardModel}}; @@ -1092,7 +1095,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards) { std::set visitedBelieves; @@ -1152,8 +1155,7 @@ namespace storm { std::map transitionsInStateWithAction; for (auto iter = observationProbabilities[currentBeliefId][action].begin(); - iter != - observationProbabilities[currentBeliefId][action].end(); ++iter) { + iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { uint32_t observation = iter->first; uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; if (visitedBelieves.insert(nextBeliefId).second) { @@ -1270,7 +1272,7 @@ namespace storm { } template - uint64_t ApproximatePOMDPModelchecker::extractBestAction( + std::vector ApproximatePOMDPModelchecker::extractBestActions( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, @@ -1312,7 +1314,7 @@ namespace storm { // choose the action which results in the value computed by the over-approximation ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -1330,8 +1332,7 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result.at( - getBeliefIdInVector(beliefList, observation, subSimplex[j])); + sum += lambdas[j] * result.at(getBeliefIdInVector(beliefList, observation, subSimplex[j])); } } currentValue += iter->second * sum; @@ -1343,10 +1344,28 @@ namespace storm { cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } - return chosenActionIndex; + return chosenActionIndices; + } + + template + std::vector ApproximatePOMDPModelchecker::extractBestAction( + storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { + return std::vector{ + extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, gridResolution, currentBeliefId, + nextId, min).front()}; } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 1222d0b1f..f5e7ff805 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -58,7 +58,30 @@ namespace storm { * @param min * @return */ - uint64_t extractBestAction(storm::models::sparse::Pomdp const &pomdp, + std::vector extractBestActions(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &target_observations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, + bool min); + + /** + * TODO + * @param pomdp + * @param beliefList + * @param observationProbabilities + * @param nextBelieves + * @param result + * @param gridResolution + * @param currentBeliefId + * @param nextId + * @param min + * @return + */ + std::vector extractBestAction(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, std::set &target_observations, @@ -90,7 +113,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward); ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, @@ -100,7 +123,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards); /** @@ -200,6 +223,8 @@ namespace storm { ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); storm::utility::ConstantsComparator cc; + double precision; + bool useMdp; }; } From c663edbd853f96f05191f5d83204af6e2723b079 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 22 Nov 2019 16:29:29 +0100 Subject: [PATCH 019/155] Added generation of an MDP for the over-approximation in the on-the-fly state exploration --- .../ApproximatePOMDPModelchecker.cpp | 96 ++++++++++++++++--- 1 file changed, 82 insertions(+), 14 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index ff9bb522b..c1ce9449e 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -81,6 +81,15 @@ namespace storm { beliefIsTarget.push_back( targetObservations.find(initialBelief.observation) != targetObservations.end()); + // These are the components to build the MDP from the grid + std::map beliefStateMap; + std::vector>> mdpTransitions; + std::vector targetStates; + uint64_t mdpStateId = 0; + + beliefStateMap[initialBelief.id] = mdpStateId; + ++mdpStateId; + // for the initial belief, add the triangulated initial states std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( initialBelief.probabilities, gridResolution); @@ -90,6 +99,8 @@ namespace storm { lambdaCache[0] = initLambdas; bool initInserted = false; + std::vector> initTransitionsInBelief; + std::map initTransitionInActionBelief; for (size_t j = 0; j < initLambdas.size(); ++j) { if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { @@ -107,12 +118,21 @@ namespace storm { beliefGrid.push_back(gridBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); + + beliefStateMap[nextId] = mdpStateId; + initTransitionInActionBelief[mdpStateId] = initLambdas[j]; ++nextId; + ++mdpStateId; } } } } + // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs + if (!initTransitionInActionBelief.empty()) { + initTransitionsInBelief.push_back(initTransitionInActionBelief); + mdpTransitions.push_back(initTransitionsInBelief); + } //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting @@ -124,6 +144,14 @@ namespace storm { if (isTarget) { result.emplace(std::make_pair(currId, storm::utility::one())); result_backup.emplace(std::make_pair(currId, storm::utility::one())); + + // MDP stuff + std::vector> transitionsInBelief; + targetStates.push_back(beliefStateMap[currId]); + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + mdpTransitions.push_back(transitionsInBelief); } else { result.emplace(std::make_pair(currId, storm::utility::zero())); result_backup.emplace(std::make_pair(currId, storm::utility::zero())); @@ -131,12 +159,14 @@ namespace storm { uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front()); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); + std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + std::map transitionInActionBelief; + + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up @@ -159,7 +189,6 @@ namespace storm { subSimplexCache[idNextBelief] = subSimplex; lambdaCache[idNextBelief] = lambdas; } - for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { if (getBeliefIdInVector(beliefGrid, observation, subSimplex[j]) == uint64_t(-1)) { @@ -169,17 +198,31 @@ namespace storm { beliefGrid.push_back(gridBelief); beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); + + beliefStateMap[nextId] = mdpStateId; + transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; ++nextId; + ++mdpStateId; + } else { + transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j]; } } } } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; + if (!transitionInActionBelief.empty()) { + transitionsInBelief.push_back(transitionInActionBelief); + } } - observationProbabilities.emplace( - std::make_pair(currId, observationProbabilitiesInAction)); + observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); + if (transitionsInBelief.empty()) { + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + } + mdpTransitions.push_back(transitionsInBelief); } } expansionTimer.stop(); @@ -187,6 +230,18 @@ namespace storm { STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabel("target"); + mdpLabeling.addLabelToState("init", 0); + for (auto targetState : targetStates) { + mdpLabeling.addLabelToState("target", targetState); + } + + storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); + storm::models::sparse::Mdp overApproxMdp(modelComponents); + overApproxMdp.printModelInformationToStream(std::cout); + storm::utility::Stopwatch overApproxTimer(true); // Value Iteration while (!finished && iteration < maxIterations) { @@ -276,21 +331,34 @@ namespace storm { } overApproxTimer.stop(); - storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = storm::utility::zero(); + /*storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, false); - underApproxTimer.stop(); + underApproxTimer.stop();*/ - STORM_PRINT("Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); + // STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + auto model = std::make_shared>(overApproxMdp); + auto modelPtr = std::static_pointer_cast>(model); + + + std::vector parameterNames; + storm::api::exportSparseModelAsDrn(modelPtr, "test", parameterNames); + + std::string propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + STORM_LOG_ASSERT(res, "Result not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); + + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); } From c6902e0ca73fe084ed68074115e561931c711503 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Mon, 25 Nov 2019 11:08:07 +0100 Subject: [PATCH 020/155] Added reward MDP generation for the overapproximation --- .../ApproximatePOMDPModelchecker.cpp | 114 +++++++++++++++--- 1 file changed, 96 insertions(+), 18 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index c1ce9449e..bbb87684e 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -78,8 +78,7 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); // These are the components to build the MDP from the grid std::map beliefStateMap; @@ -344,8 +343,6 @@ namespace storm { auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); - - std::vector parameterNames; storm::api::exportSparseModelAsDrn(modelPtr, "test", parameterNames); @@ -398,8 +395,16 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); + + // These are the components to build the MDP from the grid + std::map beliefStateMap; + std::vector>> mdpTransitions; + std::vector targetStates; + uint64_t mdpStateId = 0; + + beliefStateMap[initialBelief.id] = mdpStateId; + ++mdpStateId; // for the initial belief, add the triangulated initial states std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( @@ -410,6 +415,8 @@ namespace storm { lambdaCache[0] = initLambdas; bool initInserted = false; + std::vector> initTransitionsInBelief; + std::map initTransitionInActionBelief; for (size_t j = 0; j < initLambdas.size(); ++j) { if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { @@ -429,11 +436,22 @@ namespace storm { targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; + + beliefStateMap[nextId] = mdpStateId; + initTransitionInActionBelief[mdpStateId] = initLambdas[j]; + ++nextId; + ++mdpStateId; } } } } + // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs + if (!initTransitionInActionBelief.empty()) { + initTransitionsInBelief.push_back(initTransitionInActionBelief); + mdpTransitions.push_back(initTransitionsInBelief); + } + //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting @@ -445,17 +463,26 @@ namespace storm { result.emplace(std::make_pair(currId, storm::utility::zero())); result_backup.emplace(std::make_pair(currId, storm::utility::zero())); - if (!isTarget) { - + if (isTarget) { + // MDP stuff + std::vector> transitionsInBelief; + targetStates.push_back(beliefStateMap[currId]); + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + mdpTransitions.push_back(transitionsInBelief); + } else { uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); std::vector actionRewardsInState(numChoices); + std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); std::map actionObservationBelieves; + std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW @@ -490,7 +517,13 @@ namespace storm { beliefIsTarget.push_back( targetObservations.find(observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); + + beliefStateMap[nextId] = mdpStateId; + transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; ++nextId; + ++mdpStateId; + } else { + transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j]; } } } @@ -499,12 +532,20 @@ namespace storm { nextBelievesInAction[action] = actionObservationBelieves; actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), beliefList[currId]); + if (!transitionInActionBelief.empty()) { + transitionsInBelief.push_back(transitionInActionBelief); + } } - observationProbabilities.emplace( - std::make_pair(currId, observationProbabilitiesInAction)); + observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + if (transitionsInBelief.empty()) { + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + } + mdpTransitions.push_back(transitionsInBelief); } } expansionTimer.stop(); @@ -512,6 +553,31 @@ namespace storm { STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabel("target"); + mdpLabeling.addLabelToState("init", 0); + for (auto targetState : targetStates) { + mdpLabeling.addLabelToState("target", targetState); + } + + storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); + storm::models::sparse::Mdp overApproxMdp(modelComponents); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } + } + overApproxMdp.addRewardModel("std", mdpRewardModel); + overApproxMdp.restrictRewardModels(std::set({"std"})); + overApproxMdp.printModelInformationToStream(std::cout); + storm::utility::Stopwatch overApproxTimer(true); // Value Iteration while (!finished && iteration < maxIterations) { @@ -600,13 +666,12 @@ namespace storm { auto overApprox = storm::utility::zero(); for (size_t j = 0; j < initLambdas.size(); ++j) { if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; + overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; } } overApproxTimer.stop(); - + ValueType underApprox = storm::utility::zero(); + /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, true); @@ -615,13 +680,26 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer - << std::endl); + << std::endl);*/ STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + auto model = std::make_shared>(overApproxMdp); + auto modelPtr = std::static_pointer_cast>(model); + std::vector parameterNames; + storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); + + std::string propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + STORM_LOG_ASSERT(res, "Result not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); + + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); } From 4664b4244b9c50edba4b1b8f68404c883696b2ba Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 26 Nov 2019 11:01:22 +0100 Subject: [PATCH 021/155] Refactoring of on-the-fly computation to reduce code duplication --- .../ApproximatePOMDPModelchecker.cpp | 410 +++--------------- .../ApproximatePOMDPModelchecker.h | 15 + 2 files changed, 73 insertions(+), 352 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index bbb87684e..1a2a24a38 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -22,6 +22,7 @@ namespace storm { precision = 0.000000001; cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); useMdp = false; + maxIterations = 1000; } template @@ -47,327 +48,15 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution, + bool computeRewards) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - uint64_t maxIterations = 100; - bool finished = false; - uint64_t iteration = 0; - std::vector> beliefList; - std::vector beliefIsTarget; - std::vector> beliefGrid; - std::map result; - std::map result_backup; - //Use caching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - std::map> chosenActions; - - - std::deque beliefsToBeExpanded; - - // Belief ID -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; - - uint64_t nextId = 0; - storm::utility::Stopwatch expansionTimer(true); - // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); - ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - - // These are the components to build the MDP from the grid - std::map beliefStateMap; - std::vector>> mdpTransitions; - std::vector targetStates; - uint64_t mdpStateId = 0; - - beliefStateMap[initialBelief.id] = mdpStateId; - ++mdpStateId; - - // for the initial belief, add the triangulated initial states - std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = initTemp.first; - std::vector initLambdas = initTemp.second; - subSimplexCache[0] = initSubSimplex; - lambdaCache[0] = initLambdas; - bool initInserted = false; - - std::vector> initTransitionsInBelief; - std::map initTransitionInActionBelief; - - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { - uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); - if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { - if (searchResult == 0) { - // the initial belief is on the grid itself - initInserted = true; - beliefGrid.push_back(initialBelief); - beliefsToBeExpanded.push_back(0); - } else { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - beliefsToBeExpanded.push_back(nextId); - - beliefStateMap[nextId] = mdpStateId; - initTransitionInActionBelief[mdpStateId] = initLambdas[j]; - ++nextId; - ++mdpStateId; - } - } - } - } - - // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs - if (!initTransitionInActionBelief.empty()) { - initTransitionsInBelief.push_back(initTransitionInActionBelief); - mdpTransitions.push_back(initTransitionsInBelief); - } - - //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - - // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = beliefIsTarget[currId]; - if (isTarget) { - result.emplace(std::make_pair(currId, storm::utility::one())); - result_backup.emplace(std::make_pair(currId, storm::utility::one())); - - // MDP stuff - std::vector> transitionsInBelief; - targetStates.push_back(beliefStateMap[currId]); - std::map transitionInActionBelief; - transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); - transitionsInBelief.push_back(transitionInActionBelief); - mdpTransitions.push_back(transitionsInBelief); - } else { - result.emplace(std::make_pair(currId, storm::utility::zero())); - result_backup.emplace(std::make_pair(currId, storm::utility::zero())); - - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front()); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); - std::vector> transitionsInBelief; - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); - std::map actionObservationBelieves; - std::map transitionInActionBelief; - - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, - observation, nextId); - nextId = beliefList.size(); - actionObservationBelieves[observation] = idNextBelief; - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(idNextBelief) > 0) { - // TODO is this necessary here? Think later - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - beliefList[idNextBelief].probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; - } - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - if (getBeliefIdInVector(beliefGrid, observation, subSimplex[j]) == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - beliefsToBeExpanded.push_back(nextId); - - beliefStateMap[nextId] = mdpStateId; - transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; - ++nextId; - ++mdpStateId; - } else { - transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j]; - } - } - } - } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; - if (!transitionInActionBelief.empty()) { - transitionsInBelief.push_back(transitionInActionBelief); - } - } - observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); - if (transitionsInBelief.empty()) { - std::map transitionInActionBelief; - transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); - transitionsInBelief.push_back(transitionInActionBelief); - } - mdpTransitions.push_back(transitionsInBelief); - } - } - expansionTimer.stop(); - STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl) - STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) - STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) - - storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", 0); - for (auto targetState : targetStates) { - mdpLabeling.addLabelToState("target", targetState); - } - - storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); - storm::models::sparse::Mdp overApproxMdp(modelComponents); - overApproxMdp.printModelInformationToStream(std::cout); - - storm::utility::Stopwatch overApproxTimer(true); - // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = storm::utility::zero(); - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - // TODO is this necessary here? Everything should have already been computed - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndices; - // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; - } + if (computeRewards) { + RewardModelType const &pomdpRewardModel = pomdp.getUniqueRewardModel(); } - overApproxTimer.stop(); - - ValueType underApprox = storm::utility::zero(); - /*storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, false); - underApproxTimer.stop();*/ - - // STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer << std::endl); - - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); - std::vector parameterNames; - storm::api::exportSparseModelAsDrn(modelPtr, "test", parameterNames); - - std::string propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); - STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); - - return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); - - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { - STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - - RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); - uint64_t maxIterations = 100; bool finished = false; uint64_t iteration = 0; std::vector> beliefList; @@ -407,8 +96,7 @@ namespace storm { ++mdpStateId; // for the initial belief, add the triangulated initial states - std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); + std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); std::vector> initSubSimplex = initTemp.first; std::vector initLambdas = initTemp.second; subSimplexCache[0] = initSubSimplex; @@ -432,8 +120,7 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; @@ -461,9 +148,11 @@ namespace storm { beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; - result.emplace(std::make_pair(currId, storm::utility::zero())); - result_backup.emplace(std::make_pair(currId, storm::utility::zero())); if (isTarget) { + // Depending on whether we compute rewards, we select the right initial result + result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); + result_backup.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); + // MDP stuff std::vector> transitionsInBelief; targetStates.push_back(beliefStateMap[currId]); @@ -472,6 +161,9 @@ namespace storm { transitionsInBelief.push_back(transitionInActionBelief); mdpTransitions.push_back(transitionsInBelief); } else { + result.emplace(std::make_pair(currId, storm::utility::zero())); + result_backup.emplace(std::make_pair(currId, storm::utility::zero())); + uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector> observationProbabilitiesInAction(numChoices); @@ -530,15 +222,20 @@ namespace storm { } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - beliefList[currId]); + if (computeRewards) { + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + beliefList[currId]); + } if (!transitionInActionBelief.empty()) { transitionsInBelief.push_back(transitionInActionBelief); } } observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); - beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + if (computeRewards) { + beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + } + if (transitionsInBelief.empty()) { std::map transitionInActionBelief; @@ -563,19 +260,21 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); storm::models::sparse::Mdp overApproxMdp(modelComponents); - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); + if (computeRewards) { + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } } + overApproxMdp.addRewardModel("std", mdpRewardModel); + overApproxMdp.restrictRewardModels(std::set({"std"})); } - overApproxMdp.addRewardModel("std", mdpRewardModel); - overApproxMdp.restrictRewardModels(std::set({"std"})); overApproxMdp.printModelInformationToStream(std::cout); storm::utility::Stopwatch overApproxTimer(true); @@ -589,16 +288,14 @@ namespace storm { bool isTarget = beliefIsTarget[currentBelief.id]; if (!isTarget) { // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = beliefActionRewards[currentBelief.id][action]; + currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); for (auto iter = observationProbabilities[currentBelief.id][action].begin(); iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { uint32_t observation = iter->first; @@ -622,8 +319,7 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } @@ -631,8 +327,7 @@ namespace storm { } // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { @@ -646,8 +341,7 @@ namespace storm { chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || - cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + if (!cc.isEqual(result_backup[currentBelief.id], result[currentBelief.id])) { improvement = true; } } @@ -674,7 +368,7 @@ namespace storm { /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true); + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); underApproxTimer.stop(); STORM_PRINT("Time Overapproximation: " << overApproxTimer @@ -690,7 +384,9 @@ namespace storm { std::vector parameterNames; storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); - std::string propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += min ? "min" : "max"; + propertyString += "=? [F \"target\"]"; std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); @@ -700,16 +396,27 @@ namespace storm { STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true); } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false); + } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, uint64_t gridResolution) { storm::utility::Stopwatch beliefGridTimer(true); - uint64_t maxIterations = 100; bool finished = false; uint64_t iteration = 0; @@ -915,7 +622,6 @@ namespace storm { ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, uint64_t gridResolution) { storm::utility::Stopwatch beliefGridTimer(true); - uint64_t maxIterations = 100; bool finished = false; uint64_t iteration = 0; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index f5e7ff805..78c2b5536 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -45,6 +45,20 @@ namespace storm { uint64_t gridResolution); private: + /** + * + * @param pomdp + * @param targetObservations + * @param min + * @param gridResolution + * @param computeRewards + * @return + */ + std::unique_ptr> + computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, + uint64_t gridResolution, bool computeRewards); + /** * TODO * @param pomdp @@ -225,6 +239,7 @@ namespace storm { storm::utility::ConstantsComparator cc; double precision; bool useMdp; + uint64_t maxIterations; }; } From 8f81958268781e4ee9f6c93312eaba52dd69fca2 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 26 Nov 2019 12:33:20 +0100 Subject: [PATCH 022/155] Refactoring of reachability reward and probability methods to reduce code duplication --- .../ApproximatePOMDPModelchecker.cpp | 310 +++--------------- .../ApproximatePOMDPModelchecker.h | 14 + 2 files changed, 66 insertions(+), 258 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 1a2a24a38..8d43e3977 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -414,12 +414,17 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + ApproximatePOMDPModelchecker::computeReachability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution, + bool computeRewards) { storm::utility::Stopwatch beliefGridTimer(true); bool finished = false; uint64_t iteration = 0; + if (computeRewards) { + RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); + } + std::vector> beliefList; std::vector beliefIsTarget; uint64_t nextId = 0; @@ -427,13 +432,10 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); std::vector> beliefGrid; - constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, - nextId); + constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); nextId = beliefList.size(); beliefGridTimer.stop(); @@ -448,24 +450,28 @@ namespace storm { std::map>> observationProbabilities; // current ID -> action -> next ID std::map>> nextBelieves; + // current ID -> action -> reward + std::map> beliefActionRewards; storm::utility::Stopwatch nextBeliefGeneration(true); for (size_t i = 0; i < beliefGrid.size(); ++i) { auto currentBelief = beliefGrid[i]; bool isTarget = beliefIsTarget[currentBelief.id]; if (isTarget) { - result.emplace(std::make_pair(currentBelief.id, storm::utility::one())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::one())); + result.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); + result_backup.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); } else { result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - //TODO put this in extra function - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); + // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative + uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); + std::vector actionRewardsInState(numChoices); + for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action); std::map actionObservationBelieves; @@ -473,23 +479,24 @@ namespace storm { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, + action, observation, nextId); nextId = beliefList.size(); } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; + if (computeRewards) { + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief); + } } - observationProbabilities.emplace( - std::make_pair(currentBelief.id, observationProbabilitiesInAction)); + observationProbabilities.emplace(std::make_pair(currentBelief.id, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); + if (computeRewards) { + beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); + } } + } nextBeliefGeneration.stop(); @@ -508,16 +515,14 @@ namespace storm { bool isTarget = beliefIsTarget[currentBelief.id]; if (!isTarget) { // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = storm::utility::zero(); + currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); for (auto iter = observationProbabilities[currentBelief.id][action].begin(); iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { uint32_t observation = iter->first; @@ -530,8 +535,8 @@ namespace storm { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, + gridResolution); subSimplex = temp.first; lambdas = temp.second; subSimplexCache[nextBelief.id] = subSimplex; @@ -540,16 +545,15 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } + currentValue += iter->second * sum; } // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { @@ -561,7 +565,7 @@ namespace storm { result[currentBelief.id] = chosenValue; chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + if (!cc.isEqual(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) { improvement = true; } } @@ -580,17 +584,14 @@ namespace storm { beliefGrid.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); std::vector> initSubSimplex = temp.first; std::vector initLambdas = temp.second; auto overApprox = storm::utility::zero(); for (size_t j = 0; j < initLambdas.size(); ++j) { if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; + overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; } } overApproxTimer.stop(); @@ -598,9 +599,9 @@ namespace storm { // Now onto the under-approximation storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, false) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, - chosenActions, gridResolution, initialBelief.id, min, false); + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards) : + computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -608,231 +609,24 @@ namespace storm { << std::endl << "Time Underapproximation: " << underApproxTimer << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + } + + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + return computeReachability(pomdp, targetObservations, min, gridResolution, false); } - //TODO This function reuses a lot of code from the probability computation, refactor to minimize code duplication! template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, uint64_t gridResolution) { - storm::utility::Stopwatch beliefGridTimer(true); - bool finished = false; - uint64_t iteration = 0; - - RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); - - std::vector> beliefList; - std::vector beliefIsTarget; - uint64_t nextId = 0; - // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); - ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - - - std::vector> beliefGrid; - constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, - nextId); - nextId = beliefList.size(); - beliefGridTimer.stop(); - - storm::utility::Stopwatch overApproxTimer(true); - // Belief ID -> Value - std::map result; - std::map result_backup; - // Belief ID -> ActionIndex - std::map> chosenActions; - - // Belief ID -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; - // current ID -> action -> reward - std::map> beliefActionRewards; - - storm::utility::Stopwatch nextBeliefGeneration(true); - for (size_t i = 0; i < beliefGrid.size(); ++i) { - auto currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - if (!isTarget) { - //TODO put this in extra function - // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative - uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); - - std::vector actionRewardsInState(numChoices); - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, currentBelief, action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); - nextId = beliefList.size(); - } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; - - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief); - } - observationProbabilities.emplace( - std::make_pair(currentBelief.id, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); - beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); - } - - } - nextBeliefGeneration.stop(); - - //Use chaching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - - STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) - // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = beliefActionRewards[currentBelief.id][action]; - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - - result[currentBelief.id] = chosenValue; - - chosenActions[currentBelief.id] = chosenActionIndices; - // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || - cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = temp.first; - std::vector initLambdas = temp.second; - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; - } - } - overApproxTimer.stop(); - - // Now onto the under-approximation - storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true); - underApproxTimer.stop(); - - STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl - << "Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + return computeReachability(pomdp, targetObservations, min, gridResolution, true); } template diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 78c2b5536..cdf19dcd5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -59,6 +59,20 @@ namespace storm { std::set targetObservations, bool min, uint64_t gridResolution, bool computeRewards); + /** + * + * @param pomdp + * @param targetObservations + * @param min + * @param gridResolution + * @param computeRewards + * @return + */ + std::unique_ptr> + computeReachability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, + uint64_t gridResolution, bool computeRewards); + /** * TODO * @param pomdp From b7b213571d7ad28393743c4e3c05a770687781e7 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 29 Nov 2019 13:27:49 +0100 Subject: [PATCH 023/155] Refactoring of underapproximation procedures to reduce code duplication --- .../ApproximatePOMDPModelchecker.cpp | 247 +++++------------- .../ApproximatePOMDPModelchecker.h | 28 +- 2 files changed, 75 insertions(+), 200 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 8d43e3977..c377461d8 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -598,10 +598,8 @@ namespace storm { // Now onto the under-approximation storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); + ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -631,119 +629,16 @@ namespace storm { template ValueType - ApproximatePOMDPModelchecker::computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, - bool computeReward) { - std::set visitedBelieves; - std::deque believesToBeExpanded; - std::map beliefStateMap; - std::vector> transitions; - std::vector targetStates; - - uint64_t stateId = 0; - beliefStateMap[initialBeliefId] = stateId; - ++stateId; - - // Expand the believes - visitedBelieves.insert(initialBeliefId); - believesToBeExpanded.push_back(initialBeliefId); - while (!believesToBeExpanded.empty()) { - auto currentBeliefId = believesToBeExpanded.front(); - std::map transitionsInState; - STORM_LOG_DEBUG("Exploring Belief " << beliefList[currentBeliefId].observation << "||" - << beliefList[currentBeliefId].probabilities); - if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states and save them - transitionsInState[beliefStateMap[currentBeliefId]] = storm::utility::one(); - targetStates.push_back(beliefStateMap[currentBeliefId]); - } else { - if (chosenActions.find(currentBeliefId) == chosenActions.end()) { - // If the current Belief is not part of the grid, we have not computed the action to choose yet - chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, - observationProbabilities, - nextBelieves, result, gridResolution, - currentBeliefId, beliefList.size(), min); - } - for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].begin(); - iter != observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].end(); ++iter) { - uint32_t observation = iter->first; - uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId][0]][observation]; - if (visitedBelieves.insert(nextBeliefId).second) { - beliefStateMap[nextBeliefId] = stateId; - ++stateId; - believesToBeExpanded.push_back(nextBeliefId); - } - transitionsInState[beliefStateMap[nextBeliefId]] = iter->second; - } - } - transitions.push_back(transitionsInState); - believesToBeExpanded.pop_front(); - } - - storm::models::sparse::StateLabeling labeling(transitions.size()); - labeling.addLabel("init"); - labeling.addLabel("target"); - labeling.addLabelToState("init", 0); - for (auto targetState : targetStates) { - labeling.addLabelToState("target", targetState); - } - - storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); - if (computeReward) { - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - // Add the reward collected by taking the chosen Action in the belief - rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( - storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), - currentBelief)); - } - } - - std::unordered_map rewardModels = {{"std", rewardModel}}; - - storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(transitions), labeling, rewardModels); - - storm::models::sparse::Dtmc underApproxDtmc(modelComponents); - auto model = std::make_shared>(underApproxDtmc); - model->printModelInformationToStream(std::cout); - - std::string propertyString; - if (computeReward) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties( - propertyVector).front(); - - std::unique_ptr res( - storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, - true))); - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; - } - - template - ValueType - ApproximatePOMDPModelchecker::computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, - bool computeRewards) { + ApproximatePOMDPModelchecker::computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map> chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min, + bool computeRewards, bool generateMdp) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; @@ -768,40 +663,21 @@ namespace storm { targetStates.push_back(beliefStateMap[currentBeliefId]); actionTransitionStorage.push_back(transitionsInStateWithAction); } else { - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); if (chosenActions.find(currentBeliefId) == chosenActions.end()) { - // If the current Belief is not part of the grid, the next states have not been computed yet. - std::vector> observationProbabilitiesInAction; - std::vector> nextBelievesInAction; - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, beliefList[currentBeliefId], action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - beliefList[currentBeliefId], - action, - observation, - beliefList.size()); - } - observationProbabilitiesInAction.push_back(actionObservationProbabilities); - nextBelievesInAction.push_back(actionObservationBelieves); - } - observationProbabilities.emplace(std::make_pair(currentBeliefId, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); + chosenActions[currentBeliefId] = generateMdp ? extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, + observationProbabilities, + nextBelieves, result, gridResolution, + currentBeliefId, beliefList.size(), min) : + extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, + observationProbabilities, + nextBelieves, result, gridResolution, + currentBeliefId, beliefList.size(), min); } // Iterate over all actions and add the corresponding transitions - for (uint64_t action = 0; action < numChoices; ++action) { + for (auto const &action : chosenActions[currentBeliefId]) { std::map transitionsInStateWithAction; - for (auto iter = observationProbabilities[currentBeliefId][action].begin(); - iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { + for (auto iter = observationProbabilities[currentBeliefId][action].begin(); iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { uint32_t observation = iter->first; uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; if (visitedBelieves.insert(nextBeliefId).second) { @@ -826,28 +702,48 @@ namespace storm { labeling.addLabelToState("target", targetState); } - storm::storage::sparse::ModelComponents modelComponents( - buildTransitionMatrix(transitions), labeling); - - storm::models::sparse::Mdp underApproxMdp(modelComponents); + std::shared_ptr> model; + auto transitionMatrix = buildTransitionMatrix(transitions); + if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) { + transitionMatrix.makeRowGroupingTrivial(); + } + storm::storage::sparse::ModelComponents modelComponents(transitionMatrix, labeling); + if (transitionMatrix.hasTrivialRowGrouping()) { + + storm::models::sparse::Dtmc underApproxMc(modelComponents); + storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); + if (computeRewards) { + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + // Add the reward collected by taking the chosen Action in the belief + rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( + storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), + currentBelief)); + } + } + underApproxMc.addRewardModel("std", rewardModel); + underApproxMc.restrictRewardModels(std::set({"std"})); - if (computeRewards) { - storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); + model = std::make_shared>(underApproxMc); + } else { + storm::models::sparse::Mdp underApproxMdp(modelComponents); + if (computeRewards) { + storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } } + underApproxMdp.addRewardModel("std", rewardModel); + underApproxMdp.restrictRewardModels(std::set({"std"})); } - underApproxMdp.addRewardModel("std", rewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); + model = std::make_shared>(underApproxMdp); } - - auto model = std::make_shared>(underApproxMdp); model->printModelInformationToStream(std::cout); std::string propertyString; @@ -857,12 +753,9 @@ namespace storm { propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; } std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties( - propertyVector).front(); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - std::unique_ptr res( - storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, - true))); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; @@ -938,17 +831,10 @@ namespace storm { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( pomdp, currentBelief, action); std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, + action, observation, nextId); nextId = beliefList.size(); } observationProbabilitiesInAction.push_back(actionObservationProbabilities); @@ -958,8 +844,7 @@ namespace storm { nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); // choose the action which results in the value computed by the over-approximation - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); std::vector chosenActionIndices; ValueType currentValue; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index cdf19dcd5..2c289d1f5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -134,25 +134,15 @@ namespace storm { * @param min * @return */ - ValueType computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward); - - ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards); + ValueType computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map> chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp); /** * From 877c15ed43bbabe17846bead9f89f531970c4513 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 29 Nov 2019 13:41:36 +0100 Subject: [PATCH 024/155] Removed obsolete function to create transition matrices from a data structure not used anymore --- .../ApproximatePOMDPModelchecker.cpp | 19 ------------------- .../ApproximatePOMDPModelchecker.h | 10 +++++----- 2 files changed, 5 insertions(+), 24 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index c377461d8..90fe6dc3b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -791,25 +791,6 @@ namespace storm { return smb.build(); } - template - storm::storage::SparseMatrix - ApproximatePOMDPModelchecker::buildTransitionMatrix( - std::vector> transitions) { - uint_fast64_t currentRow = 0; - uint64_t nrEntries = 0; - for (auto const &map : transitions) { - nrEntries += map.size(); - } - storm::storage::SparseMatrixBuilder smb(transitions.size(), transitions.size(), nrEntries); - for (auto const &map : transitions) { - for (auto const &transition : map) { - smb.addNextValue(currentRow, transition.first, transition.second); - } - ++currentRow; - } - return smb.build(); - } - template std::vector ApproximatePOMDPModelchecker::extractBestActions( storm::models::sparse::Pomdp const &pomdp, diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 2c289d1f5..89d1be286 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -232,11 +232,11 @@ namespace storm { uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, std::vector probabilities); - storm::storage::SparseMatrix - buildTransitionMatrix(std::vector> transitions); - - storm::storage::SparseMatrix - buildTransitionMatrix(std::vector>> transitions); + /** + * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) + * @return sparseMatrix representing the transitions + */ + storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> transitions); ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); From fe81e0d7cf6cd536fc4066381a7a76dd1540220c Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 29 Nov 2019 13:59:43 +0100 Subject: [PATCH 025/155] Smaller touch-ups (Removal of unused code, pass-by-reference) --- .../ApproximatePOMDPModelchecker.cpp | 40 ++++++++----------- .../ApproximatePOMDPModelchecker.h | 36 ++++++++--------- 2 files changed, 35 insertions(+), 41 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 90fe6dc3b..282f4dd68 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -49,14 +49,10 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution, + std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - if (computeRewards) { - RewardModelType const &pomdpRewardModel = pomdp.getUniqueRewardModel(); - } - bool finished = false; uint64_t iteration = 0; std::vector> beliefList; @@ -364,7 +360,7 @@ namespace storm { } } overApproxTimer.stop(); - ValueType underApprox = storm::utility::zero(); + auto underApprox = storm::utility::zero(); /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, @@ -401,30 +397,28 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + std::set const &targetObservations, bool min, + uint64_t gridResolution) { return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + std::set const &targetObservations, bool min, + uint64_t gridResolution) { return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution, + std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { storm::utility::Stopwatch beliefGridTimer(true); bool finished = false; uint64_t iteration = 0; - if (computeRewards) { - RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); - } - std::vector> beliefList; std::vector beliefIsTarget; uint64_t nextId = 0; @@ -616,14 +610,15 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + std::set const &targetObservations, bool min, + uint64_t gridResolution) { return computeReachability(pomdp, targetObservations, min, gridResolution, false); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + std::set const &targetObservations, bool min, uint64_t gridResolution) { return computeReachability(pomdp, targetObservations, min, gridResolution, true); } @@ -632,7 +627,7 @@ namespace storm { ApproximatePOMDPModelchecker::computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -796,7 +791,7 @@ namespace storm { storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -870,7 +865,7 @@ namespace storm { storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -925,7 +920,7 @@ namespace storm { template void ApproximatePOMDPModelchecker::constructBeliefGrid( storm::models::sparse::Pomdp const &pomdp, - std::set target_observations, uint64_t gridResolution, + std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, std::vector &beliefIsKnown, uint64_t nextId) { @@ -971,9 +966,7 @@ namespace storm { storm::utility::convertNumber(gridResolution); storm::pomdp::Belief belief = {newId, observation, distribution}; - STORM_LOG_TRACE( - "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) - << ")," << distribution << "]"); + STORM_LOG_TRACE("Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); beliefList.push_back(belief); grid.push_back(belief); beliefIsKnown.push_back(isTarget); @@ -1107,7 +1100,8 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, - std::vector &beliefIsTarget, std::set &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, + std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, + uint32_t observation, uint64_t id) { storm::utility::Stopwatch distrWatch(true); std::vector distributionAfter(pomdp.getNumberOfStates()); //, storm::utility::zero()); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 89d1be286..f20f4ebc1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -27,21 +27,21 @@ namespace storm { std::unique_ptr> computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution); std::unique_ptr> - computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, + computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution); std::unique_ptr> computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution); std::unique_ptr> computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution); private: @@ -56,7 +56,7 @@ namespace storm { */ std::unique_ptr> computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards); /** @@ -70,7 +70,7 @@ namespace storm { */ std::unique_ptr> computeReachability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards); /** @@ -89,7 +89,7 @@ namespace storm { std::vector extractBestActions(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &target_observations, + std::set const &target_observations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -110,14 +110,14 @@ namespace storm { * @return */ std::vector extractBestAction(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &target_observations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, - bool min); + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set const &target_observations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, + bool min); /** * TODO @@ -137,7 +137,7 @@ namespace storm { ValueType computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -172,7 +172,7 @@ namespace storm { * */ void constructBeliefGrid(storm::models::sparse::Pomdp const &pomdp, - std::set target_observations, uint64_t gridResolution, + std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, std::vector &beliefIsKnown, uint64_t nextId); @@ -206,7 +206,7 @@ namespace storm { storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, uint64_t id); From 8992b70da38147c0d3d1d66a8c4f69a680babe2d Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 29 Nov 2019 15:08:16 +0100 Subject: [PATCH 026/155] Made Value Iteration its own function to reduce duplicate code --- .../ApproximatePOMDPModelchecker.cpp | 214 ++++++------------ .../ApproximatePOMDPModelchecker.h | 11 + 2 files changed, 83 insertions(+), 142 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 282f4dd68..37a29f9b1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -52,14 +52,10 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - - bool finished = false; - uint64_t iteration = 0; std::vector> beliefList; std::vector beliefIsTarget; std::vector> beliefGrid; std::map result; - std::map result_backup; //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; @@ -147,7 +143,6 @@ namespace storm { if (isTarget) { // Depending on whether we compute rewards, we select the right initial result result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); - result_backup.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); // MDP stuff std::vector> transitionsInBelief; @@ -158,7 +153,6 @@ namespace storm { mdpTransitions.push_back(transitionsInBelief); } else { result.emplace(std::make_pair(currId, storm::utility::zero())); - result_backup.emplace(std::make_pair(currId, storm::utility::zero())); uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); @@ -274,6 +268,61 @@ namespace storm { overApproxMdp.printModelInformationToStream(std::cout); storm::utility::Stopwatch overApproxTimer(true); + auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, + subSimplexCache, lambdaCache, + result, chosenActions, gridResolution, min, computeRewards); + overApproxTimer.stop(); + auto underApprox = storm::utility::zero(); + /* + storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); + underApproxTimer.stop(); + + STORM_PRINT("Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl);*/ + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + + auto model = std::make_shared>(overApproxMdp); + auto modelPtr = std::static_pointer_cast>(model); + std::vector parameterNames; + storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); + + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += min ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + STORM_LOG_ASSERT(res, "Result not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); + + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + } + + template + ValueType + ApproximatePOMDPModelchecker::overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector> &beliefGrid, + std::vector &beliefIsTarget, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map> &beliefActionRewards, + std::map>> &subSimplexCache, + std::map> &lambdaCache, + std::map &result, + std::map> &chosenActions, + uint64_t gridResolution, bool min, bool computeRewards) { + std::map result_backup = result; + uint64_t iteration = 0; + bool finished = false; // Value Iteration while (!finished && iteration < maxIterations) { storm::utility::Stopwatch iterationTimer(true); @@ -304,9 +353,8 @@ namespace storm { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { - //TODO This should not ne reachable - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, + gridResolution); subSimplex = temp.first; lambdas = temp.second; subSimplexCache[nextBelief.id] = subSimplex; @@ -353,45 +401,14 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; + for (size_t j = 0; j < lambdaCache[0].size(); ++j) { + if (lambdaCache[0][j] != storm::utility::zero()) { + overApprox += lambdaCache[0][j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, subSimplexCache[0][j])]; } } - overApproxTimer.stop(); - auto underApprox = storm::utility::zero(); - /* - storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); - underApproxTimer.stop(); - - STORM_PRINT("Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl);*/ - - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); - std::vector parameterNames; - storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); - - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); - STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); - - return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + return overApprox; } template @@ -416,8 +433,6 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { storm::utility::Stopwatch beliefGridTimer(true); - bool finished = false; - uint64_t iteration = 0; std::vector> beliefList; std::vector beliefIsTarget; @@ -436,7 +451,6 @@ namespace storm { storm::utility::Stopwatch overApproxTimer(true); // Belief ID -> Value std::map result; - std::map result_backup; // Belief ID -> ActionIndex std::map> chosenActions; @@ -446,6 +460,13 @@ namespace storm { std::map>> nextBelieves; // current ID -> action -> reward std::map> beliefActionRewards; + //Use caching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); + subSimplexCache[0] = temp.first; + lambdaCache[0] = temp.second; storm::utility::Stopwatch nextBeliefGeneration(true); for (size_t i = 0; i < beliefGrid.size(); ++i) { @@ -453,10 +474,8 @@ namespace storm { bool isTarget = beliefIsTarget[currentBelief.id]; if (isTarget) { result.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); - result_backup.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); } else { result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); //TODO put this in extra function // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); @@ -494,100 +513,11 @@ namespace storm { } nextBeliefGeneration.stop(); - //Use chaching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, - gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndices; - // Check if the iteration brought an improvement - if (!cc.isEqual(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = temp.first; - std::vector initLambdas = temp.second; - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; - } - } + auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, + subSimplexCache, lambdaCache, + result, chosenActions, gridResolution, min, computeRewards); overApproxTimer.stop(); // Now onto the under-approximation diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index f20f4ebc1..6d8b50d43 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -240,6 +240,17 @@ namespace storm { ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); + ValueType + overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, + std::vector> &beliefGrid, std::vector &beliefIsTarget, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map> &beliefActionRewards, + std::map>> &subSimplexCache, + std::map> &lambdaCache, std::map &result, + std::map> &chosenActions, + uint64_t gridResolution, bool min, bool computeRewards); + storm::utility::ConstantsComparator cc; double precision; bool useMdp; From 77c63f4c128e19caaaf93ba5660d79ad94d732cd Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Fri, 29 Nov 2019 18:17:24 +0100 Subject: [PATCH 027/155] SAT based zerostate analysis: work in progress --- .../settings/modules/POMDPSettings.cpp | 8 +- .../settings/modules/POMDPSettings.h | 1 + src/storm-pomdp-cli/storm-pomdp.cpp | 162 +++++++++++++----- .../MemlessStrategySearchQualitative.cpp | 114 ++++++++++++ 4 files changed, 238 insertions(+), 47 deletions(-) create mode 100644 src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 617cebf36..2a637d436 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -26,6 +26,7 @@ namespace storm { std::vector fscModes = {"standard", "simple-linear", "simple-linear-inverse"}; const std::string transformBinaryOption = "transformbinary"; const std::string transformSimpleOption = "transformsimple"; + const std::string memlessSearchOption = "memlesssearch"; POMDPSettings::POMDPSettings() : ModuleSettings(moduleName) { this->addOption(storm::settings::OptionBuilder(moduleName, exportAsParametricModelOption, false, "Export the parametric file.").addArgument(storm::settings::ArgumentBuilder::createStringArgument("filename", "The name of the file to which to write the model.").build()).build()); @@ -45,6 +46,7 @@ namespace storm { 10).addValidatorUnsignedInteger( storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator( 0)).build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, memlessSearchOption, false, "Search for a qualitative memoryless scheuler").build()); } bool POMDPSettings::isExportToParametricSet() const { @@ -79,7 +81,11 @@ namespace storm { return this->getOption(gridApproximationOption).getArgumentByName( "resolution").getValueAsUnsignedInteger(); } - + + bool POMDPSettings::isMemlessSearchSet() const { + return this->getOption(memlessSearchOption).getHasOptionBeenSet(); + } + uint64_t POMDPSettings::getMemoryBound() const { return this->getOption(memoryBoundOption).getArgumentByName("bound").getValueAsUnsignedInteger(); } diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 1e68871e8..9f7332774 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -32,6 +32,7 @@ namespace storm { bool isSelfloopReductionSet() const; bool isTransformSimpleSet() const; bool isTransformBinarySet() const; + bool isMemlessSearchSet() const; std::string getFscApplicationTypeString() const; uint64_t getMemoryBound() const; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 0af965ddd..3927d0728 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -43,6 +43,7 @@ #include "storm-pomdp/analysis/UniqueObservationStates.h" #include "storm-pomdp/analysis/QualitativeAnalysis.h" #include "storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h" +#include "storm-pomdp/analysis/MemlessStrategySearchQualitative.h" #include "storm/api/storm.h" #include @@ -77,6 +78,91 @@ void initializeSettings() { storm::settings::addModule(); } +template +bool extractTargetAndSinkObservationSets(std::shared_ptr> const& pomdp, storm::logic::Formula const& subformula, std::set& targetObservationSet, storm::storage::BitVector& badStates) { + //TODO refactor (use model checker to determine the states, then transform into observations). + + bool validFormula = false; + if (subformula.isEventuallyFormula()) { + storm::logic::EventuallyFormula const &eventuallyFormula = subformula.asEventuallyFormula(); + storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); + if (subformula2.isAtomicLabelFormula()) { + storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); + validFormula = true; + std::string targetLabel = alFormula.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } else if (subformula2.isAtomicExpressionFormula()) { + validFormula = true; + std::stringstream stream; + stream << subformula2.asAtomicExpressionFormula().getExpression(); + storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); + std::string targetLabel = formula3.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } + } else if (subformula.isUntilFormula()) { + storm::logic::UntilFormula const &eventuallyFormula = subformula.asUntilFormula(); + storm::logic::Formula const &subformula1 = eventuallyFormula.getLeftSubformula(); + if (subformula1.isAtomicLabelFormula()) { + storm::logic::AtomicLabelFormula const &alFormula = subformula1.asAtomicLabelFormula(); + std::string targetLabel = alFormula.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + badStates.set(state); + } + } + } else if (subformula1.isAtomicExpressionFormula()) { + std::stringstream stream; + stream << subformula1.asAtomicExpressionFormula().getExpression(); + storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); + std::string targetLabel = formula3.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + badStates.set(state); + } + } + } else { + return false; + } + storm::logic::Formula const &subformula2 = eventuallyFormula.getRightSubformula(); + if (subformula2.isAtomicLabelFormula()) { + storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); + validFormula = true; + std::string targetLabel = alFormula.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } else if (subformula2.isAtomicExpressionFormula()) { + validFormula = true; + std::stringstream stream; + stream << subformula2.asAtomicExpressionFormula().getExpression(); + storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); + std::string targetLabel = formula3.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } + } + return validFormula; +} + /*! * Entry point for the pomdp backend. * @@ -85,7 +171,7 @@ void initializeSettings() { * @return Return code, 0 if successfull, not 0 otherwise. */ int main(const int argc, const char** argv) { - try { + //try { storm::utility::setUp(); storm::cli::printHeader("Storm-pomdp", argc, argv); initializeSettings(); @@ -134,7 +220,21 @@ int main(const int argc, const char** argv) { } if (formula) { + storm::logic::ProbabilityOperatorFormula const &probFormula = formula->asProbabilityOperatorFormula(); + storm::logic::Formula const &subformula1 = probFormula.getSubformula(); + + if (formula->isProbabilityOperatorFormula()) { + + std::set targetObservationSet; + std::set badObservationSet; + + bool validFormula = extractTargetAndSinkObservationSets(pomdp, subformula1, targetObservationSet, badObservationSet); + STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, + "The formula is not supported by the grid approximation"); + STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); + + boost::optional prob1States; boost::optional prob0States; if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) { @@ -159,42 +259,6 @@ int main(const int argc, const char** argv) { pomdp = kpt.transform(*pomdp, *prob0States, *prob1States); } if (pomdpSettings.isGridApproximationSet()) { - storm::logic::ProbabilityOperatorFormula const &probFormula = formula->asProbabilityOperatorFormula(); - storm::logic::Formula const &subformula1 = probFormula.getSubformula(); - - std::set targetObservationSet; - //TODO refactor - bool validFormula = false; - if (subformula1.isEventuallyFormula()) { - storm::logic::EventuallyFormula const &eventuallyFormula = subformula1.asEventuallyFormula(); - storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); - if (subformula2.isAtomicLabelFormula()) { - storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); - validFormula = true; - std::string targetLabel = alFormula.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - } - } - } else if (subformula2.isAtomicExpressionFormula()) { - validFormula = true; - std::stringstream stream; - stream << subformula2.asAtomicExpressionFormula().getExpression(); - storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); - std::string targetLabel = formula3.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - } - } - } - } - STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, - "The formula is not supported by the grid approximation"); - STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); double overRes = storm::utility::one(); @@ -213,6 +277,14 @@ int main(const int argc, const char** argv) { STORM_PRINT("Result: " << overRes << std::endl) } } + if (pomdpSettings.isMemlessSearchSet()) { + storm::expressions::ExpressionManager expressionManager; + std::shared_ptr smtSolverFactory = std::make_shared(); + + storm::pomdp::MemlessStrategySearchQualitative memlessSearch(*pomdp, targetObservationSet, smtSolverFactory); + memlessSearch.analyze(5); + + } } else if (formula->isRewardOperatorFormula()) { if (pomdpSettings.isSelfloopReductionSet() && storm::solver::minimize(formula->asRewardOperatorFormula().getOptimalityType())) { STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); @@ -344,16 +416,14 @@ int main(const int argc, const char** argv) { STORM_LOG_WARN("Nothing to be done. Did you forget to specify a formula?"); } - - // All operations have now been performed, so we clean up everything and terminate. storm::utility::cleanUp(); return 0; - } catch (storm::exceptions::BaseException const &exception) { - STORM_LOG_ERROR("An exception caused Storm-pomdp to terminate. The message of the exception is: " << exception.what()); - return 1; - } catch (std::exception const &exception) { - STORM_LOG_ERROR("An unexpected exception occurred and caused Storm-pomdp to terminate. The message of this exception is: " << exception.what()); - return 2; - } + // } catch (storm::exceptions::BaseException const &exception) { + // STORM_LOG_ERROR("An exception caused Storm-pomdp to terminate. The message of the exception is: " << exception.what()); + // return 1; + //} catch (std::exception const &exception) { + // STORM_LOG_ERROR("An unexpected exception occurred and caused Storm-pomdp to terminate. The message of this exception is: " << exception.what()); + // return 2; + //} } diff --git a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp new file mode 100644 index 000000000..0c46fd0d3 --- /dev/null +++ b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp @@ -0,0 +1,114 @@ +#include "storm-pomdp/analysis/MemlessStrategySearchQualitative.h" + + +namespace storm { + namespace pomdp { + + template + void MemlessStrategySearchQualitative::initialize(uint64_t k) { + if (maxK == -1) { + // not initialized at all. + + // Create some data structures. + for(uint64_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + actionSelectionVars.push_back(std::vector()); + statesPerObservation.push_back(std::vector()); // Consider using bitvectors instead. + } + + // Fill the states-per-observation mapping, + // declare the reachability variables, + // declare the path variables. + uint64_t stateId = 0; + for(auto obs : pomdp.getObservations()) { + pathVars.push_back(std::vector()); + for (uint64_t i = 0; i < k; ++i) { + pathVars.back().push_back(expressionManager->declareBooleanVariable("P-"+std::to_string(stateId)+"-"+std::to_string(i)).getExpression()); + } + reachVars.push_back(expressionManager->declareBooleanVariable("C-" + std::to_string(stateId)).getExpression()); + + statesPerObservation.at(obs).push_back(stateId++); + + } + assert(pathVars.size() == pomdp.getNumberOfStates()); + + // Create the action selection variables. + uint64_t obs = 0; + for(auto const& statesForObservation : statesPerObservation) { + for (uint64_t a = 0; a < pomdp.getNumberOfChoices(statesForObservation.front()); ++a) { + std::string varName = "A-" + std::to_string(obs) + "-" + std::to_string(a); + actionSelectionVars.at(obs).push_back(expressionManager->declareBooleanVariable(varName).getExpression()); + } + ++obs; + } + + + } else { + + assert(false); + + } + + uint64_t rowindex = 0; + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + std::vector> pathsubsubexprs; + for (uint64_t j = 1; j < k; ++j) { + pathsubsubexprs.push_back(std::vector()); + } + + if (targetObservations.count(pomdp.getObservation(state)) > 0) { + smtSolver->add(pathVars[state][0]); + } else { + smtSolver->add(!pathVars[state][0]); + } + + if (surelyReachSinkStates.at(state)) { + smtSolver->add(!reachVars[state]); + } + else { + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + std::vector subexprreach; + + subexprreach.push_back(!reachVars.at(state)); + subexprreach.push_back(!actionSelectionVars.at(pomdp.getObservation(state)).at(action)); + for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { + subexprreach.push_back(reachVars.at(entries.getColumn())); + } + smtSolver->add(storm::expressions::disjunction(subexprreach)); + for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { + for (uint64_t j = 1; j < k; ++j) { + pathsubsubexprs[j - 1].push_back(pathVars[entries.getColumn()][j - 1]); + } + } + rowindex++; + } + smtSolver->add(storm::expressions::implies(reachVars.at(state), pathVars.at(state).back())); + + + for (uint64_t j = 1; j < k; ++j) { + std::vector pathsubexprs; + + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + pathsubexprs.push_back(actionSelectionVars.at(pomdp.getObservation(state)).at(action) && storm::expressions::disjunction(pathsubsubexprs[j - 1])); + } + smtSolver->add(storm::expressions::iff(pathVars[state][j], storm::expressions::disjunction(pathsubexprs))); + } + } + + } + + for (auto const& actionVars : actionSelectionVars) { + smtSolver->add(storm::expressions::disjunction(actionVars)); + } + + + + + //for (auto const& ) + + } + + + + template class MemlessStrategySearchQualitative; + } +} From 4418422ea83afb312ca0e2f66b366b64e45109f8 Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Tue, 3 Dec 2019 17:30:49 +0100 Subject: [PATCH 028/155] merge -- but code is not working atm --- .../MemlessStrategySearchQualitative.cpp | 2 +- .../MemlessStrategySearchQualitative.h | 79 +++++++++++++++++++ .../ApproximatePOMDPModelchecker.cpp | 6 +- src/storm/adapters/Z3ExpressionAdapter.cpp | 3 +- src/storm/solver/SmtSolver.h | 4 + src/storm/solver/SmtlibSmtSolver.cpp | 5 ++ src/storm/solver/SmtlibSmtSolver.h | 2 +- src/storm/solver/Z3SmtSolver.cpp | 13 ++- src/storm/solver/Z3SmtSolver.h | 1 + 9 files changed, 107 insertions(+), 8 deletions(-) create mode 100644 src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h diff --git a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp index 0c46fd0d3..2ed1230e0 100644 --- a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp +++ b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp @@ -61,7 +61,7 @@ namespace storm { smtSolver->add(!pathVars[state][0]); } - if (surelyReachSinkStates.at(state)) { + if (surelyReachSinkStates.get(state)) { smtSolver->add(!reachVars[state]); } else { diff --git a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h new file mode 100644 index 000000000..ca1a84aa6 --- /dev/null +++ b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h @@ -0,0 +1,79 @@ +#include +#include "storm/storage/expressions/Expressions.h" +#include "storm/solver/SmtSolver.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/utility/solver.h" + +namespace storm { +namespace pomdp { + + template + class MemlessStrategySearchQualitative { + // Implements an extension to the Chatterjee, Chmelik, Davies (AAAI-16) paper. + + + public: + MemlessStrategySearchQualitative(storm::models::sparse::Pomdp const& pomdp, + std::set const& targetObservationSet, + std::shared_ptr& smtSolverFactory) : + pomdp(pomdp), + targetObservations(targetObservationSet) { + this->expressionManager = std::make_shared(); + smtSolver = smtSolverFactory->create(*expressionManager); + + } + + void setSurelyReachSinkStates(storm::storage::BitVector const& surelyReachSink) { + surelyReachSinkStates = surelyReachSink; + } + + void analyze(uint64_t k) { + if (k < maxK) { + initialize(k); + } + std::cout << smtSolver->getSmtLibString() << std::endl; + for (uint64_t state : pomdp.getInitialStates()) { + smtSolver->add(reachVars[state]); + } + auto result = smtSolver->check(); + switch(result) { + case storm::solver::SmtSolver::CheckResult::Sat: + std::cout << std::endl << "Satisfying assignment: " << std::endl << smtSolver->getModelAsValuation().toString(true) << std::endl; + + case storm::solver::SmtSolver::CheckResult::Unsat: + // std::cout << std::endl << "Unsatisfiability core: {" << std::endl; + // for (auto const& expr : solver->getUnsatCore()) { + // std::cout << "\t " << expr << std::endl; + // } + // std::cout << "}" << std::endl; + + default: + std::cout<< "oops." << std::endl; + // STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "SMT solver yielded an unexpected result"); + } + //std::cout << "get model:" << std::endl; + //std::cout << smtSolver->getModel().toString() << std::endl; + } + + + private: + void initialize(uint64_t k); + + std::unique_ptr smtSolver; + storm::models::sparse::Pomdp const& pomdp; + std::shared_ptr expressionManager; + uint64_t maxK = -1; + + std::set targetObservations; + storm::storage::BitVector surelyReachSinkStates; + + std::vector> statesPerObservation; + std::vector> actionSelectionVars; // A_{z,a} + std::vector reachVars; + std::vector> pathVars; + + + + }; +} +} diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 37a29f9b1..3cd362e58 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -10,6 +10,7 @@ #include "storm/modelchecker/results/CheckResult.h" #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/models/sparse/StandardRewardModel.h" #include "storm/api/properties.h" #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" @@ -548,7 +549,8 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, uint64_t gridResolution) { + std::set const &targetObservations, bool min, + uint64_t gridResolution) { return computeReachability(pomdp, targetObservations, min, gridResolution, true); } @@ -1088,8 +1090,6 @@ namespace storm { class ApproximatePOMDPModelchecker; #ifdef STORM_HAVE_CARL - - //template class ApproximatePOMDPModelchecker; template class ApproximatePOMDPModelchecker; diff --git a/src/storm/adapters/Z3ExpressionAdapter.cpp b/src/storm/adapters/Z3ExpressionAdapter.cpp index bb9afb6f3..437f003b9 100644 --- a/src/storm/adapters/Z3ExpressionAdapter.cpp +++ b/src/storm/adapters/Z3ExpressionAdapter.cpp @@ -37,8 +37,7 @@ namespace storm { result = result && assertion; } additionalAssertions.clear(); - - return result; + return result.simplify(); } z3::expr Z3ExpressionAdapter::translateExpression(storm::expressions::Variable const& variable) { diff --git a/src/storm/solver/SmtSolver.h b/src/storm/solver/SmtSolver.h index 7ee3896bd..4077c244a 100644 --- a/src/storm/solver/SmtSolver.h +++ b/src/storm/solver/SmtSolver.h @@ -23,6 +23,8 @@ namespace storm { public: //! possible check results enum class CheckResult { Sat, Unsat, Unknown }; + + /*! * The base class for all model references. They are used to provide a lightweight method of accessing the @@ -48,6 +50,8 @@ namespace storm { * @return The expression manager associated with this model reference. */ storm::expressions::ExpressionManager const& getManager() const; + + virtual std::string toString() const = 0; private: // The expression manager responsible for the variables whose value can be requested via this model diff --git a/src/storm/solver/SmtlibSmtSolver.cpp b/src/storm/solver/SmtlibSmtSolver.cpp index aad46d81e..1ef7fda76 100644 --- a/src/storm/solver/SmtlibSmtSolver.cpp +++ b/src/storm/solver/SmtlibSmtSolver.cpp @@ -40,6 +40,11 @@ namespace storm { STORM_LOG_THROW(false, storm::exceptions::NotImplementedException, "functionality not (yet) implemented"); } + std::string SmtlibSmtSolver::SmtlibModelReference::toString() const { + STORM_LOG_THROW(false, storm::exceptions::NotImplementedException, "functionality not (yet) implemented"); + } + + SmtlibSmtSolver::SmtlibSmtSolver(storm::expressions::ExpressionManager& manager, bool useCarlExpressions) : SmtSolver(manager), isCommandFileOpen(false), expressionAdapter(nullptr), useCarlExpressions(useCarlExpressions) { #ifndef STORM_HAVE_CARL STORM_LOG_THROW(!useCarlExpressions, storm::exceptions::IllegalArgumentException, "Tried to use carl expressions but storm is not linked with CARL"); diff --git a/src/storm/solver/SmtlibSmtSolver.h b/src/storm/solver/SmtlibSmtSolver.h index 6ae64eb92..cd31e58d5 100644 --- a/src/storm/solver/SmtlibSmtSolver.h +++ b/src/storm/solver/SmtlibSmtSolver.h @@ -26,7 +26,7 @@ namespace storm { virtual bool getBooleanValue(storm::expressions::Variable const& variable) const override; virtual int_fast64_t getIntegerValue(storm::expressions::Variable const& variable) const override; virtual double getRationalValue(storm::expressions::Variable const& variable) const override; - + virtual std::string toString() const override; }; public: diff --git a/src/storm/solver/Z3SmtSolver.cpp b/src/storm/solver/Z3SmtSolver.cpp index 13579a172..7d3745fa7 100644 --- a/src/storm/solver/Z3SmtSolver.cpp +++ b/src/storm/solver/Z3SmtSolver.cpp @@ -44,7 +44,18 @@ namespace storm { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Storm is compiled without Z3 support."); #endif } - + + std::string Z3SmtSolver::Z3ModelReference::toString() const { +#ifdef STORM_HAVE_Z3 + std::stringstream sstr; + sstr << model; + return sstr.str(); +#else + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Storm is compiled without Z3 support."); +#endif + } + + Z3SmtSolver::Z3SmtSolver(storm::expressions::ExpressionManager& manager) : SmtSolver(manager) #ifdef STORM_HAVE_Z3 , context(nullptr), solver(nullptr), expressionAdapter(nullptr), lastCheckAssumptions(false), lastResult(CheckResult::Unknown) diff --git a/src/storm/solver/Z3SmtSolver.h b/src/storm/solver/Z3SmtSolver.h index 6616e0292..fe92299a7 100644 --- a/src/storm/solver/Z3SmtSolver.h +++ b/src/storm/solver/Z3SmtSolver.h @@ -22,6 +22,7 @@ namespace storm { virtual bool getBooleanValue(storm::expressions::Variable const& variable) const override; virtual int_fast64_t getIntegerValue(storm::expressions::Variable const& variable) const override; virtual double getRationalValue(storm::expressions::Variable const& variable) const override; + virtual std::string toString() const override; private: #ifdef STORM_HAVE_Z3 From aca676a0a5dcfa2382672857af7bff0c5828304c Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Mon, 9 Dec 2019 09:48:09 +0100 Subject: [PATCH 029/155] Added model generation and checking for initial approximation bounds --- .../ApproximatePOMDPModelchecker.cpp | 116 ++++++++++++++---- .../ApproximatePOMDPModelchecker.h | 21 ++-- 2 files changed, 104 insertions(+), 33 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 37a29f9b1..27cbdb2bc 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -10,6 +10,7 @@ #include "storm/modelchecker/results/CheckResult.h" #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" #include "storm/api/properties.h" #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" @@ -21,7 +22,7 @@ namespace storm { ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { precision = 0.000000001; cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); - useMdp = false; + useMdp = true; maxIterations = 1000; } @@ -51,6 +52,51 @@ namespace storm { ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { + //TODO For the prototypical implementation, I put the refinement loop here. I'll change this later on + + // we define some positional scheduler for the POMDP as an experimental lower bound + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + auto obsStates = pomdp.getStatesWithObservation(obs); + // select a random action for all states with the same observation + uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); + for (auto const &state : obsStates) { + pomdpScheduler.setChoice(chosenAction, state); + } + } + + storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); + underlyingMdpLabeling.addLabel("goal"); + std::vector goalStates; + for (auto const &targetObs : targetObservations) { + for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) { + underlyingMdpLabeling.addLabelToState("goal", goalState); + } + } + storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); + auto underlyingModel = std::static_pointer_cast>( + std::make_shared>(underlyingMdp)); + std::string initPropString = computeRewards ? "R" : "P"; + initPropString += min ? "min" : "max"; + initPropString += "=? [F \"goal\"]"; + std::vector propVector = storm::api::parseProperties(initPropString); + std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); + underlyingMdp.printModelInformationToStream(std::cout); + + std::unique_ptr underlyingRes( + storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); + STORM_LOG_ASSERT(underlyingRes, "Result not exist."); + underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); + auto mdpResultMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); + + auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); + underApproxModel->printModelInformationToStream(std::cout); + std::unique_ptr underapproxRes( + storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); + STORM_LOG_ASSERT(underapproxRes, "Result not exist."); + underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); + auto mdpUnderapproxResultMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); + STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) std::vector> beliefList; std::vector beliefIsTarget; @@ -70,6 +116,7 @@ namespace storm { // current ID -> action -> reward std::map> beliefActionRewards; + std::vector hintVector; uint64_t nextId = 0; storm::utility::Stopwatch expansionTimer(true); // Initial belief always has ID 0 @@ -107,6 +154,8 @@ namespace storm { initInserted = true; beliefGrid.push_back(initialBelief); beliefsToBeExpanded.push_back(0); + hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + : storm::utility::zero()); } else { // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; @@ -116,6 +165,9 @@ namespace storm { beliefsToBeExpanded.push_back(nextId); ++nextId; + hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + : storm::utility::zero()); + beliefStateMap[nextId] = mdpStateId; initTransitionInActionBelief[mdpStateId] = initLambdas[j]; ++nextId; @@ -131,9 +183,11 @@ namespace storm { mdpTransitions.push_back(initTransitionsInBelief); } - //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + std::map weightedSumOverMap; + std::map weightedSumUnderMap; + // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points while (!beliefsToBeExpanded.empty()) { uint64_t currId = beliefsToBeExpanded.front(); @@ -196,9 +250,22 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(observation) != targetObservations.end()); + // compute overapproximate value using MDP result map + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { + tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(mdpResultMap[i]); + tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(mdpUnderapproxResultMap[i]); + } + beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); + + if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) { + hintVector.push_back(tempWeightedSumOver); + } + beliefsToBeExpanded.push_back(nextId); + weightedSumOverMap[nextId] = tempWeightedSumOver; + weightedSumUnderMap[nextId] = tempWeightedSumUnder; beliefStateMap[nextId] = mdpStateId; transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; @@ -267,12 +334,15 @@ namespace storm { } overApproxMdp.printModelInformationToStream(std::cout); + /* storm::utility::Stopwatch overApproxTimer(true); auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, subSimplexCache, lambdaCache, result, chosenActions, gridResolution, min, computeRewards); - overApproxTimer.stop(); + overApproxTimer.stop();*/ + auto underApprox = storm::utility::zero(); + auto overApprox = storm::utility::one(); /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, @@ -282,10 +352,10 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer - << std::endl);*/ + << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);*/ auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); @@ -298,7 +368,13 @@ namespace storm { std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + + auto task = storm::api::createTask(property, true); + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(hintVector); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); STORM_LOG_ASSERT(res, "Result not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); @@ -561,7 +637,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map> chosenActions, + std::map> &chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards, bool generateMdp) { std::set visitedBelieves; @@ -689,8 +765,7 @@ namespace storm { template storm::storage::SparseMatrix - ApproximatePOMDPModelchecker::buildTransitionMatrix( - std::vector>> transitions) { + ApproximatePOMDPModelchecker::buildTransitionMatrix(std::vector>> &transitions) { uint_fast64_t currentRow = 0; uint_fast64_t currentRowGroup = 0; uint64_t nrColumns = transitions.size(); @@ -809,7 +884,7 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( std::vector> const &grid, uint32_t observation, - std::vector probabilities) { + std::vector &probabilities) { // TODO This one is quite slow for (auto const &belief : grid) { if (belief.observation == observation) { @@ -929,7 +1004,7 @@ namespace storm { template std::pair>, std::vector> ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( - std::vector probabilities, uint64_t resolution) { + std::vector &probabilities, uint64_t resolution) { // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Variable names are based on the paper uint64_t probSize = probabilities.size(); @@ -989,7 +1064,7 @@ namespace storm { std::map ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, + storm::pomdp::Belief &belief, uint64_t actionIndex) { std::map res; // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) @@ -1009,10 +1084,8 @@ namespace storm { template storm::pomdp::Belief - ApproximatePOMDPModelchecker::getBeliefAfterAction( - storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, - uint64_t actionIndex, uint64_t id) { + ApproximatePOMDPModelchecker::getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, + storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id) { std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); uint32_t observation = 0; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -1030,9 +1103,8 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, - std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, - uint32_t observation, - uint64_t id) { + std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, + uint32_t observation, uint64_t id) { storm::utility::Stopwatch distrWatch(true); std::vector distributionAfter(pomdp.getNumberOfStates()); //, storm::utility::zero()); for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -1075,7 +1147,7 @@ namespace storm { template ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, - uint64_t action, storm::pomdp::Belief belief) { + uint64_t action, storm::pomdp::Belief &belief) { auto result = storm::utility::zero(); for (size_t i = 0; i < belief.probabilities.size(); ++i) { result += belief.probabilities[i] * pomdp.getUniqueRewardModel().getTotalStateActionReward(i, action, pomdp.getTransitionMatrix()); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 6d8b50d43..aada759d2 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -141,7 +141,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map> chosenActions, + std::map> &chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp); /** @@ -161,7 +161,7 @@ namespace storm { * @return */ std::pair>, std::vector> - computeSubSimplexAndLambdas(std::vector probabilities, uint64_t gridResolution); + computeSubSimplexAndLambdas(std::vector &probabilities, uint64_t gridResolution); /** @@ -188,7 +188,7 @@ namespace storm { */ std::map computeObservationProbabilitiesAfterAction( storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, + storm::pomdp::Belief &belief, uint64_t actionIndex); /** @@ -201,13 +201,12 @@ namespace storm { * @param observation the observation after the action was performed * @return the resulting belief (observation and distribution) */ - uint64_t - getBeliefAfterActionAndObservation( + uint64_t getBeliefAfterActionAndObservation( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, - storm::pomdp::Belief belief, + storm::pomdp::Belief &belief, uint64_t actionIndex, uint32_t observation, uint64_t id); /** @@ -219,8 +218,8 @@ namespace storm { * @return */ storm::pomdp::Belief - getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, uint64_t actionIndex, uint64_t id); + getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, storm::pomdp::Belief &belief, uint64_t actionIndex, + uint64_t id); /** * Helper to get the id of a Belief stored in a given vector structure @@ -230,15 +229,15 @@ namespace storm { * @return */ uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, - std::vector probabilities); + std::vector &probabilities); /** * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) * @return sparseMatrix representing the transitions */ - storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> transitions); + storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> &transitions); - ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); + ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief &belief); ValueType overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, From f416cc82911c945dab0e54896e62daf030e36b66 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 10 Dec 2019 12:23:26 +0100 Subject: [PATCH 030/155] Added flag to toggle caching of subsimplex and lambda values --- .../ApproximatePOMDPModelchecker.cpp | 96 ++++++++++--------- .../ApproximatePOMDPModelchecker.h | 1 + 2 files changed, 54 insertions(+), 43 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 27cbdb2bc..3939913c6 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -24,6 +24,7 @@ namespace storm { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); useMdp = true; maxIterations = 1000; + cacheSubsimplices = false; } template @@ -81,6 +82,7 @@ namespace storm { initPropString += "=? [F \"goal\"]"; std::vector propVector = storm::api::parseProperties(initPropString); std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); + STORM_PRINT("Underlying MDP" << std::endl) underlyingMdp.printModelInformationToStream(std::cout); std::unique_ptr underlyingRes( @@ -90,6 +92,7 @@ namespace storm { auto mdpResultMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); + STORM_PRINT("Random Positional Scheduler" << std::endl) underApproxModel->printModelInformationToStream(std::cout); std::unique_ptr underapproxRes( storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); @@ -138,13 +141,15 @@ namespace storm { std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); std::vector> initSubSimplex = initTemp.first; std::vector initLambdas = initTemp.second; - subSimplexCache[0] = initSubSimplex; - lambdaCache[0] = initLambdas; - bool initInserted = false; + if(cacheSubsimplices){ + subSimplexCache[0] = initSubSimplex; + lambdaCache[0] = initLambdas; + } + std::vector> initTransitionsInBelief; std::map initTransitionInActionBelief; - + bool initInserted = false; for (size_t j = 0; j < initLambdas.size(); ++j) { if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); @@ -183,13 +188,14 @@ namespace storm { mdpTransitions.push_back(initTransitionsInBelief); } - //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting std::map weightedSumOverMap; std::map weightedSumUnderMap; // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points while (!beliefsToBeExpanded.empty()) { + // TODO add direct generation of transition matrix uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; @@ -230,8 +236,7 @@ namespace storm { //Triangulate here and put the possibly resulting belief in the grid std::vector> subSimplex; std::vector lambdas; - if (subSimplexCache.count(idNextBelief) > 0) { - // TODO is this necessary here? Think later + if (cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { @@ -239,8 +244,10 @@ namespace storm { beliefList[idNextBelief].probabilities, gridResolution); subSimplex = temp.first; lambdas = temp.second; - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; + if(cacheSubsimplices){ + subSimplexCache[idNextBelief] = subSimplex; + lambdaCache[idNextBelief] = lambdas; + } } for (size_t j = 0; j < lambdas.size(); ++j) { @@ -307,6 +314,8 @@ namespace storm { STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + //auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, subSimplexCache, lambdaCache,result, chosenActions, gridResolution, min, computeRewards); + storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); @@ -334,29 +343,6 @@ namespace storm { } overApproxMdp.printModelInformationToStream(std::cout); - /* - storm::utility::Stopwatch overApproxTimer(true); - auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, - subSimplexCache, lambdaCache, - result, chosenActions, gridResolution, min, computeRewards); - overApproxTimer.stop();*/ - - auto underApprox = storm::utility::zero(); - auto overApprox = storm::utility::one(); - /* - storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); - underApproxTimer.stop(); - - STORM_PRINT("Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); - - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);*/ - auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); std::vector parameterNames; @@ -367,17 +353,26 @@ namespace storm { propertyString += "=? [F \"target\"]"; std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - auto task = storm::api::createTask(property, true); auto hint = storm::modelchecker::ExplicitModelCheckerHint(); hint.setResultHint(hintVector); auto hintPtr = std::make_shared>(hint); task.setHint(hintPtr); + storm::utility::Stopwatch overApproxTimer(true); std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + overApproxTimer.stop(); STORM_LOG_ASSERT(res, "Result not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); + auto overApprox = res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + /* storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); + underApproxTimer.stop();*/ + + STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) + auto underApprox = storm::utility::zero(); + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); } @@ -425,7 +420,7 @@ namespace storm { // cache the values to not always re-calculate std::vector> subSimplex; std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { + if (cacheSubsimplices && subSimplexCache.count(nextBelief.id) > 0) { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { @@ -433,8 +428,10 @@ namespace storm { gridResolution); subSimplex = temp.first; lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; + if(cacheSubsimplices) { + subSimplexCache[nextBelief.id] = subSimplex; + lambdaCache[nextBelief.id] = lambdas; + } } auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { @@ -477,11 +474,21 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + std::vector initialLambda; + std::vector> initialSubsimplex; + if(cacheSubsimplices){ + initialLambda = lambdaCache[0]; + initialSubsimplex = subSimplexCache[0]; + } else { + auto temp = computeSubSimplexAndLambdas(beliefList[0].probabilities, gridResolution); + initialSubsimplex= temp.first; + initialLambda = temp.second; + } auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < lambdaCache[0].size(); ++j) { - if (lambdaCache[0][j] != storm::utility::zero()) { - overApprox += lambdaCache[0][j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, subSimplexCache[0][j])]; + for (size_t j = 0; j < initialLambda.size(); ++j) { + if (initialLambda[j] != storm::utility::zero()) { + overApprox += initialLambda[j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, initialSubsimplex[j])]; } } return overApprox; @@ -541,8 +548,10 @@ namespace storm { std::map> lambdaCache; std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); - subSimplexCache[0] = temp.first; - lambdaCache[0] = temp.second; + if(cacheSubsimplices) { + subSimplexCache[0] = temp.first; + lambdaCache[0] = temp.second; + } storm::utility::Stopwatch nextBeliefGeneration(true); for (size_t i = 0; i < beliefGrid.size(); ++i) { @@ -553,6 +562,7 @@ namespace storm { } else { result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); //TODO put this in extra function + // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index aada759d2..e6f935a72 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -253,6 +253,7 @@ namespace storm { storm::utility::ConstantsComparator cc; double precision; bool useMdp; + bool cacheSubsimplices; uint64_t maxIterations; }; From 94b93f013caf783756c88d09970f7b74ae48403f Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 13 Dec 2019 14:24:36 +0100 Subject: [PATCH 031/155] Added option to stop approximation space exploration early if difference between over- and under-approximation is under a given threshold --- .../settings/modules/POMDPSettings.cpp | 24 ++-- .../settings/modules/POMDPSettings.h | 2 + src/storm-pomdp-cli/storm-pomdp.cpp | 2 +- .../ApproximatePOMDPModelchecker.cpp | 115 ++++++++++++------ .../ApproximatePOMDPModelchecker.h | 4 +- 5 files changed, 95 insertions(+), 52 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 617cebf36..08f1e8d50 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -15,6 +15,7 @@ namespace storm { const std::string POMDPSettings::moduleName = "pomdp"; const std::string exportAsParametricModelOption = "parametric-drn"; const std::string gridApproximationOption = "gridapproximation"; + const std::string limitBeliefExplorationOption = "limit-exploration"; const std::string qualitativeReductionOption = "qualitativereduction"; const std::string analyzeUniqueObservationsOption = "uniqueobservations"; const std::string mecReductionOption = "mecreduction"; @@ -38,13 +39,9 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, fscmode, false, "Sets the way the pMC is obtained").addArgument(storm::settings::ArgumentBuilder::createStringArgument("type", "type name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(fscModes)).setDefaultValueString("standard").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformBinaryOption, false, "Transforms the pomdp to a binary pomdp.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformSimpleOption, false, "Transforms the pomdp to a binary and simple pomdp.").build()); - this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false, - "Analyze the POMDP using grid approximation.").addArgument( - storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("resolution", - "the resolution of the grid").setDefaultValueUnsignedInteger( - 10).addValidatorUnsignedInteger( - storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator( - 0)).build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false,"Analyze the POMDP using grid approximation.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("resolution","the resolution of the grid").setDefaultValueUnsignedInteger(10).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, limitBeliefExplorationOption, false,"Sets whether to early in the belief space exploration if upper and lower bound are close").addArgument( + storm::settings::ArgumentBuilder::createDoubleArgument("threshold","the difference between upper and lower bound when to stop").setDefaultValueDouble(0.0).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); } bool POMDPSettings::isExportToParametricSet() const { @@ -76,10 +73,17 @@ namespace storm { } uint64_t POMDPSettings::getGridResolution() const { - return this->getOption(gridApproximationOption).getArgumentByName( - "resolution").getValueAsUnsignedInteger(); + return this->getOption(gridApproximationOption).getArgumentByName("resolution").getValueAsUnsignedInteger(); } - + + bool POMDPSettings::isLimitExplorationSet() const { + return this->getOption(limitBeliefExplorationOption).getHasOptionBeenSet(); + } + + double POMDPSettings::getExplorationThreshold() const { + return this->getOption(limitBeliefExplorationOption).getArgumentByName("threshold").getValueAsDouble(); + } + uint64_t POMDPSettings::getMemoryBound() const { return this->getOption(memoryBoundOption).getArgumentByName("bound").getValueAsUnsignedInteger(); } diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 1e68871e8..c0c9ecf44 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -27,6 +27,7 @@ namespace storm { bool isQualitativeReductionSet() const; bool isGridApproximationSet() const; + bool isLimitExplorationSet() const; bool isAnalyzeUniqueObservationsSet() const; bool isMecReductionSet() const; bool isSelfloopReductionSet() const; @@ -36,6 +37,7 @@ namespace storm { uint64_t getMemoryBound() const; uint64_t getGridResolution() const; + double getExplorationThreshold() const; storm::storage::PomdpMemoryPattern getMemoryPattern() const; bool check() const override; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 0af965ddd..8b28da0bf 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -203,7 +203,7 @@ int main(const int argc, const char** argv) { //result = checker.refineReachabilityProbability(*pomdp, targetObservationSet,probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(),1,10); result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, - pomdpSettings.getGridResolution()); + pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); overRes = result->OverapproximationValue; underRes = result->UnderapproximationValue; if (overRes != underRes) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 3939913c6..fd8502563 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -52,20 +52,9 @@ namespace storm { std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution, - bool computeRewards) { + bool computeRewards, double explorationThreshold) { //TODO For the prototypical implementation, I put the refinement loop here. I'll change this later on - - // we define some positional scheduler for the POMDP as an experimental lower bound - storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); - for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { - auto obsStates = pomdp.getStatesWithObservation(obs); - // select a random action for all states with the same observation - uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); - for (auto const &state : obsStates) { - pomdpScheduler.setChoice(chosenAction, state); - } - } - + storm::utility::Stopwatch underlyingWatch(true); storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); underlyingMdpLabeling.addLabel("goal"); std::vector goalStates; @@ -84,13 +73,23 @@ namespace storm { std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); STORM_PRINT("Underlying MDP" << std::endl) underlyingMdp.printModelInformationToStream(std::cout); - - std::unique_ptr underlyingRes( - storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); + std::unique_ptr underlyingRes(storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); STORM_LOG_ASSERT(underlyingRes, "Result not exist."); underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); auto mdpResultMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); + underlyingWatch.stop(); + storm::utility::Stopwatch positionalWatch(true); + // we define some positional scheduler for the POMDP as an experimental lower bound + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + auto obsStates = pomdp.getStatesWithObservation(obs); + // select a random action for all states with the same observation + uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); + for (auto const &state : obsStates) { + pomdpScheduler.setChoice(chosenAction, state); + } + } auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); STORM_PRINT("Random Positional Scheduler" << std::endl) underApproxModel->printModelInformationToStream(std::cout); @@ -99,6 +98,9 @@ namespace storm { STORM_LOG_ASSERT(underapproxRes, "Result not exist."); underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); auto mdpUnderapproxResultMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); + positionalWatch.stop(); + + STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl) STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) std::vector> beliefList; @@ -118,25 +120,30 @@ namespace storm { std::map>> nextBelieves; // current ID -> action -> reward std::map> beliefActionRewards; - - std::vector hintVector; uint64_t nextId = 0; storm::utility::Stopwatch expansionTimer(true); - // Initial belief always has ID 0 + // Initial belief always has belief ID 0 storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - // These are the components to build the MDP from the grid + // These are the components to build the MDP from the grid TODO make a better stucture to allow for fast reverse lookups (state-->belief) as it is a bijective function (boost:bimap?) std::map beliefStateMap; - std::vector>> mdpTransitions; - std::vector targetStates; - uint64_t mdpStateId = 0; + // Reserve states 0 and 1 as always sink/goal states + std::vector>> mdpTransitions = {{{{0, storm::utility::one()}}},{{{1, storm::utility::one()}}}}; + // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) + std::vector hintVector = {storm::utility::zero(), storm::utility::one()}; + std::vector targetStates = {1}; + uint64_t mdpStateId = 2; beliefStateMap[initialBelief.id] = mdpStateId; ++mdpStateId; + // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace + std::map weightedSumOverMap; + std::map weightedSumUnderMap; + // for the initial belief, add the triangulated initial states std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); std::vector> initSubSimplex = initTemp.first; @@ -145,8 +152,6 @@ namespace storm { subSimplexCache[0] = initSubSimplex; lambdaCache[0] = initLambdas; } - - std::vector> initTransitionsInBelief; std::map initTransitionInActionBelief; bool initInserted = false; @@ -156,6 +161,15 @@ namespace storm { if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { if (searchResult == 0) { // the initial belief is on the grid itself + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { + tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(mdpResultMap[i]); + tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(mdpUnderapproxResultMap[i]); + } + weightedSumOverMap[initialBelief.id] = tempWeightedSumOver; + weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder; + initInserted = true; beliefGrid.push_back(initialBelief); beliefsToBeExpanded.push_back(0); @@ -163,6 +177,17 @@ namespace storm { : storm::utility::zero()); } else { // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { + tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(mdpResultMap[i]); + tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(mdpUnderapproxResultMap[i]); + } + + weightedSumOverMap[nextId] = tempWeightedSumOver; + weightedSumUnderMap[nextId] = tempWeightedSumUnder; + storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); @@ -190,16 +215,22 @@ namespace storm { //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - std::map weightedSumOverMap; - std::map weightedSumUnderMap; - - // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points + // Expand the beliefs to generate the grid on-the-fly + if(explorationThreshold > 0){ + STORM_PRINT("Exploration threshold: " << explorationThreshold << std::endl) + } while (!beliefsToBeExpanded.empty()) { // TODO add direct generation of transition matrix uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; + if(cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(explorationThreshold))){ + result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : weightedSumOverMap[currId])); + mdpTransitions.push_back({{{1, weightedSumOverMap[currId]},{0, storm::utility::one() - weightedSumOverMap[currId]}}}); + continue; + } + if (isTarget) { // Depending on whether we compute rewards, we select the right initial result result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); @@ -240,8 +271,7 @@ namespace storm { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - beliefList[idNextBelief].probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities, gridResolution); subSimplex = temp.first; lambdas = temp.second; if(cacheSubsimplices){ @@ -268,6 +298,8 @@ namespace storm { if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) { hintVector.push_back(tempWeightedSumOver); + } else { + hintVector.push_back(storm::utility::zero()); } beliefsToBeExpanded.push_back(nextId); @@ -319,11 +351,10 @@ namespace storm { storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", 0); + mdpLabeling.addLabelToState("init", beliefStateMap[initialBelief.id]); for (auto targetState : targetStates) { mdpLabeling.addLabelToState("target", targetState); } - storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); storm::models::sparse::Mdp overApproxMdp(modelComponents); if (computeRewards) { @@ -353,7 +384,7 @@ namespace storm { propertyString += "=? [F \"target\"]"; std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - auto task = storm::api::createTask(property, true); + auto task = storm::api::createTask(property, false); auto hint = storm::modelchecker::ExplicitModelCheckerHint(); hint.setResultHint(hintVector); auto hintPtr = std::make_shared>(hint); @@ -362,8 +393,9 @@ namespace storm { std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); overApproxTimer.stop(); STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - auto overApprox = res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); + auto resultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); + auto overApprox = resultMap[beliefStateMap[initialBelief.id]]; /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); @@ -374,6 +406,11 @@ namespace storm { STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + std::map differences; + for(auto const &entry : weightedSumUnderMap){ + differences[beliefStateMap[entry.first]] = resultMap[beliefStateMap[entry.first]] - weightedSumUnderMap[entry.first]; + } + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); } @@ -499,15 +536,15 @@ namespace storm { ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution) { - return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true); + return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true, 0); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution) { - return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false); + uint64_t gridResolution, double explorationThreshold) { + return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false, explorationThreshold); } template diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index e6f935a72..438354eca 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -28,7 +28,7 @@ namespace storm { std::unique_ptr> computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution); + uint64_t gridResolution, double explorationThreshold); std::unique_ptr> computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, @@ -57,7 +57,7 @@ namespace storm { std::unique_ptr> computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution, bool computeRewards); + uint64_t gridResolution, bool computeRewards, double explorationThreshold); /** * From 5bbf54cb78cfacd009b59411b8ff30ec92715ced Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Sun, 15 Dec 2019 18:03:58 +0100 Subject: [PATCH 032/155] make everything compile again, add/fix method for memless strategy search (CCD16) and towards iterative search --- .../settings/modules/POMDPSettings.cpp | 9 +- .../settings/modules/POMDPSettings.h | 1 + src/storm-pomdp-cli/storm-pomdp.cpp | 46 +++-- .../MemlessStrategySearchQualitative.cpp | 153 +++++++++++--- .../MemlessStrategySearchQualitative.h | 54 +++-- .../QualitativeStrategySearchNaive.cpp | 186 ++++++++++++++++++ .../analysis/QualitativeStrategySearchNaive.h | 74 +++++++ 7 files changed, 450 insertions(+), 73 deletions(-) create mode 100644 src/storm-pomdp/analysis/QualitativeStrategySearchNaive.cpp create mode 100644 src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 2a637d436..5a26cda46 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -27,6 +27,7 @@ namespace storm { const std::string transformBinaryOption = "transformbinary"; const std::string transformSimpleOption = "transformsimple"; const std::string memlessSearchOption = "memlesssearch"; + std::vector memlessSearchMethods = {"none", "ccdmemless", "ccdmemory", "iterative"}; POMDPSettings::POMDPSettings() : ModuleSettings(moduleName) { this->addOption(storm::settings::OptionBuilder(moduleName, exportAsParametricModelOption, false, "Export the parametric file.").addArgument(storm::settings::ArgumentBuilder::createStringArgument("filename", "The name of the file to which to write the model.").build()).build()); @@ -46,7 +47,9 @@ namespace storm { 10).addValidatorUnsignedInteger( storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator( 0)).build()).build()); - this->addOption(storm::settings::OptionBuilder(moduleName, memlessSearchOption, false, "Search for a qualitative memoryless scheuler").build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, memlessSearchOption, false, "Search for a qualitative memoryless scheuler").addArgument(storm::settings::ArgumentBuilder::createStringArgument("method", "method name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(memlessSearchMethods)).setDefaultValueString("none").build()).build()); + } bool POMDPSettings::isExportToParametricSet() const { @@ -86,6 +89,10 @@ namespace storm { return this->getOption(memlessSearchOption).getHasOptionBeenSet(); } + std::string POMDPSettings::getMemlessSearchMethod() const { + return this->getOption(memlessSearchOption).getArgumentByName("method").getValueAsString(); + } + uint64_t POMDPSettings::getMemoryBound() const { return this->getOption(memoryBoundOption).getArgumentByName("bound").getValueAsUnsignedInteger(); } diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 9f7332774..bd3b13fa8 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -33,6 +33,7 @@ namespace storm { bool isTransformSimpleSet() const; bool isTransformBinarySet() const; bool isMemlessSearchSet() const; + std::string getMemlessSearchMethod() const; std::string getFscApplicationTypeString() const; uint64_t getMemoryBound() const; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 3927d0728..3fa2eb9d4 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -26,9 +26,10 @@ #include "storm/settings/modules/TopologicalEquationSolverSettings.h" #include "storm/settings/modules/ModelCheckerSettings.h" #include "storm/settings/modules/MultiplierSettings.h" + +#include "storm/settings/modules/TransformationSettings.h" #include "storm/settings/modules/MultiObjectiveSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" - #include "storm/analysis/GraphConditions.h" #include "storm-cli-utilities/cli.h" @@ -44,6 +45,7 @@ #include "storm-pomdp/analysis/QualitativeAnalysis.h" #include "storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h" #include "storm-pomdp/analysis/MemlessStrategySearchQualitative.h" +#include "storm-pomdp/analysis/QualitativeStrategySearchNaive.h" #include "storm/api/storm.h" #include @@ -59,6 +61,8 @@ void initializeSettings() { storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); + + storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); @@ -79,9 +83,9 @@ void initializeSettings() { } template -bool extractTargetAndSinkObservationSets(std::shared_ptr> const& pomdp, storm::logic::Formula const& subformula, std::set& targetObservationSet, storm::storage::BitVector& badStates) { +bool extractTargetAndSinkObservationSets(std::shared_ptr> const& pomdp, storm::logic::Formula const& subformula, std::set& targetObservationSet, storm::storage::BitVector& targetStates, storm::storage::BitVector& badStates) { //TODO refactor (use model checker to determine the states, then transform into observations). - + //TODO rename into appropriate function name. bool validFormula = false; if (subformula.isEventuallyFormula()) { storm::logic::EventuallyFormula const &eventuallyFormula = subformula.asEventuallyFormula(); @@ -94,6 +98,7 @@ bool extractTargetAndSinkObservationSets(std::shared_ptrgetNumberOfStates(); ++state) { if (labeling.getStateHasLabel(targetLabel, state)) { targetObservationSet.insert(pomdp->getObservation(state)); + targetStates.set(state); } } } else if (subformula2.isAtomicExpressionFormula()) { @@ -106,18 +111,19 @@ bool extractTargetAndSinkObservationSets(std::shared_ptrgetNumberOfStates(); ++state) { if (labeling.getStateHasLabel(targetLabel, state)) { targetObservationSet.insert(pomdp->getObservation(state)); + targetStates.set(state); } } } } else if (subformula.isUntilFormula()) { - storm::logic::UntilFormula const &eventuallyFormula = subformula.asUntilFormula(); - storm::logic::Formula const &subformula1 = eventuallyFormula.getLeftSubformula(); + storm::logic::UntilFormula const &untilFormula = subformula.asUntilFormula(); + storm::logic::Formula const &subformula1 = untilFormula.getLeftSubformula(); if (subformula1.isAtomicLabelFormula()) { storm::logic::AtomicLabelFormula const &alFormula = subformula1.asAtomicLabelFormula(); std::string targetLabel = alFormula.getLabel(); auto labeling = pomdp->getStateLabeling(); for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { + if (!labeling.getStateHasLabel(targetLabel, state)) { badStates.set(state); } } @@ -128,14 +134,14 @@ bool extractTargetAndSinkObservationSets(std::shared_ptrgetStateLabeling(); for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { + if (!labeling.getStateHasLabel(targetLabel, state)) { badStates.set(state); } } } else { return false; } - storm::logic::Formula const &subformula2 = eventuallyFormula.getRightSubformula(); + storm::logic::Formula const &subformula2 = untilFormula.getRightSubformula(); if (subformula2.isAtomicLabelFormula()) { storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); validFormula = true; @@ -144,7 +150,9 @@ bool extractTargetAndSinkObservationSets(std::shared_ptrgetNumberOfStates(); ++state) { if (labeling.getStateHasLabel(targetLabel, state)) { targetObservationSet.insert(pomdp->getObservation(state)); + targetStates.set(state); } + } } else if (subformula2.isAtomicExpressionFormula()) { validFormula = true; @@ -156,7 +164,9 @@ bool extractTargetAndSinkObservationSets(std::shared_ptrgetNumberOfStates(); ++state) { if (labeling.getStateHasLabel(targetLabel, state)) { targetObservationSet.insert(pomdp->getObservation(state)); + targetStates.set(state); } + } } } @@ -227,9 +237,10 @@ int main(const int argc, const char** argv) { if (formula->isProbabilityOperatorFormula()) { std::set targetObservationSet; - std::set badObservationSet; + storm::storage::BitVector targetStates(pomdp->getNumberOfStates()); + storm::storage::BitVector badStates(pomdp->getNumberOfStates()); - bool validFormula = extractTargetAndSinkObservationSets(pomdp, subformula1, targetObservationSet, badObservationSet); + bool validFormula = extractTargetAndSinkObservationSets(pomdp, subformula1, targetObservationSet, targetStates, badStates); STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, "The formula is not supported by the grid approximation"); STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); @@ -278,11 +289,22 @@ int main(const int argc, const char** argv) { } } if (pomdpSettings.isMemlessSearchSet()) { +// std::cout << std::endl; +// pomdp->writeDotToStream(std::cout); +// std::cout << std::endl; +// std::cout << std::endl; storm::expressions::ExpressionManager expressionManager; std::shared_ptr smtSolverFactory = std::make_shared(); + if (pomdpSettings.getMemlessSearchMethod() == "ccd16memless") { + storm::pomdp::QualitativeStrategySearchNaive memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); + memlessSearch.findNewStrategyForSomeState(5); + } else if (pomdpSettings.getMemlessSearchMethod() == "iterative") { + storm::pomdp::MemlessStrategySearchQualitative memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); + memlessSearch.findNewStrategyForSomeState(5); + } else { + STORM_LOG_ERROR("This method is not implemented."); + } - storm::pomdp::MemlessStrategySearchQualitative memlessSearch(*pomdp, targetObservationSet, smtSolverFactory); - memlessSearch.analyze(5); } } else if (formula->isRewardOperatorFormula()) { diff --git a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp index 2ed1230e0..675c91aab 100644 --- a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp +++ b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp @@ -6,12 +6,12 @@ namespace storm { template void MemlessStrategySearchQualitative::initialize(uint64_t k) { - if (maxK == -1) { + if (maxK == std::numeric_limits::max()) { // not initialized at all. - // Create some data structures. for(uint64_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { - actionSelectionVars.push_back(std::vector()); + actionSelectionVars.push_back(std::vector()); + actionSelectionVarExpressions.push_back(std::vector()); statesPerObservation.push_back(std::vector()); // Consider using bitvectors instead. } @@ -24,91 +24,182 @@ namespace storm { for (uint64_t i = 0; i < k; ++i) { pathVars.back().push_back(expressionManager->declareBooleanVariable("P-"+std::to_string(stateId)+"-"+std::to_string(i)).getExpression()); } - reachVars.push_back(expressionManager->declareBooleanVariable("C-" + std::to_string(stateId)).getExpression()); - + reachVars.push_back(expressionManager->declareBooleanVariable("C-" + std::to_string(stateId))); + reachVarExpressions.push_back(reachVars.back().getExpression()); statesPerObservation.at(obs).push_back(stateId++); - } assert(pathVars.size() == pomdp.getNumberOfStates()); + assert(reachVars.size() == pomdp.getNumberOfStates()); + assert(reachVarExpressions.size() == pomdp.getNumberOfStates()); // Create the action selection variables. uint64_t obs = 0; for(auto const& statesForObservation : statesPerObservation) { for (uint64_t a = 0; a < pomdp.getNumberOfChoices(statesForObservation.front()); ++a) { std::string varName = "A-" + std::to_string(obs) + "-" + std::to_string(a); - actionSelectionVars.at(obs).push_back(expressionManager->declareBooleanVariable(varName).getExpression()); + actionSelectionVars.at(obs).push_back(expressionManager->declareBooleanVariable(varName)); + actionSelectionVarExpressions.at(obs).push_back(actionSelectionVars.at(obs).back().getExpression()); } ++obs; } - - } else { - assert(false); - } uint64_t rowindex = 0; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - std::vector> pathsubsubexprs; - for (uint64_t j = 1; j < k; ++j) { - pathsubsubexprs.push_back(std::vector()); - } - - if (targetObservations.count(pomdp.getObservation(state)) > 0) { + if (targetStates.get(state)) { smtSolver->add(pathVars[state][0]); } else { smtSolver->add(!pathVars[state][0]); } if (surelyReachSinkStates.get(state)) { - smtSolver->add(!reachVars[state]); - } - else { + smtSolver->add(!reachVarExpressions[state]); + } else if(!targetStates.get(state)) { + std::vector>> pathsubsubexprs; + for (uint64_t j = 1; j < k; ++j) { + pathsubsubexprs.push_back(std::vector>()); + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + pathsubsubexprs.back().push_back(std::vector()); + } + } + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { std::vector subexprreach; - subexprreach.push_back(!reachVars.at(state)); - subexprreach.push_back(!actionSelectionVars.at(pomdp.getObservation(state)).at(action)); + subexprreach.push_back(!reachVarExpressions.at(state)); + subexprreach.push_back(!actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action)); for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { - subexprreach.push_back(reachVars.at(entries.getColumn())); + subexprreach.push_back(reachVarExpressions.at(entries.getColumn())); } smtSolver->add(storm::expressions::disjunction(subexprreach)); for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { for (uint64_t j = 1; j < k; ++j) { - pathsubsubexprs[j - 1].push_back(pathVars[entries.getColumn()][j - 1]); + pathsubsubexprs[j - 1][action].push_back(pathVars[entries.getColumn()][j - 1]); } } rowindex++; } - smtSolver->add(storm::expressions::implies(reachVars.at(state), pathVars.at(state).back())); - + smtSolver->add(storm::expressions::implies(reachVarExpressions.at(state), pathVars.at(state).back())); for (uint64_t j = 1; j < k; ++j) { std::vector pathsubexprs; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { - pathsubexprs.push_back(actionSelectionVars.at(pomdp.getObservation(state)).at(action) && storm::expressions::disjunction(pathsubsubexprs[j - 1])); + pathsubexprs.push_back(actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action) && storm::expressions::disjunction(pathsubsubexprs[j - 1][action])); } smtSolver->add(storm::expressions::iff(pathVars[state][j], storm::expressions::disjunction(pathsubexprs))); } } - } - for (auto const& actionVars : actionSelectionVars) { + for (auto const& actionVars : actionSelectionVarExpressions) { smtSolver->add(storm::expressions::disjunction(actionVars)); } + } + + template + bool MemlessStrategySearchQualitative::analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates) { + if (k < maxK) { + initialize(k); + } + + std::vector atLeastOneOfStates; + for (uint64_t state : oneOfTheseStates) { + STORM_LOG_ASSERT(reachVarExpressions.size() > state, "state id " << state << " exceeds number of states (" << reachVarExpressions.size() << ")" ); + atLeastOneOfStates.push_back(reachVarExpressions[state]); + } + assert(atLeastOneOfStates.size() > 0); + smtSolver->add(storm::expressions::disjunction(atLeastOneOfStates)); + for (uint64_t state : allOfTheseStates) { + assert(reachVarExpressions.size() > state); + smtSolver->add(reachVarExpressions[state]); + } + std::cout << smtSolver->getSmtLibString() << std::endl; - //for (auto const& ) - } + std::vector> scheduler; + while (true) { + + auto result = smtSolver->check(); + uint64_t i = 0; + + if (result == storm::solver::SmtSolver::CheckResult::Unknown) { + STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "SMT solver yielded an unexpected result"); + } else if (result == storm::solver::SmtSolver::CheckResult::Unsat) { + std::cout << std::endl << "Unsatisfiable!" << std::endl; + return false; + } + + std::cout << std::endl << "Satisfying assignment: " << std::endl << smtSolver->getModelAsValuation().toString(true) << std::endl; + auto model = smtSolver->getModel(); + std::cout << "states that are okay" << std::endl; + + + storm::storage::BitVector observations(pomdp.getNrObservations()); + storm::storage::BitVector remainingstates(pomdp.getNumberOfStates()); + for (auto rv : reachVars) { + if (model->getBooleanValue(rv)) { + std::cout << i << " " << std::endl; + observations.set(pomdp.getObservation(i)); + } else { + remainingstates.set(i); + } + //std::cout << i << ": " << model->getBooleanValue(rv) << ", "; + ++i; + } + + scheduler.clear(); + + std::vector schedulerSoFar; + uint64_t obs = 0; + for (auto const &actionSelectionVarsForObs : actionSelectionVars) { + uint64_t act = 0; + scheduler.push_back(std::set()); + for (auto const &asv : actionSelectionVarsForObs) { + if (model->getBooleanValue(asv)) { + scheduler.back().insert(act); + schedulerSoFar.push_back(actionSelectionVarExpressions[obs][act]); + } + act++; + } + obs++; + } + + std::cout << "the scheduler: " << std::endl; + for (uint64_t obs = 0; obs < scheduler.size(); ++obs) { + if (observations.get(obs)) { + std::cout << "observation: " << obs << std::endl; + std::cout << "actions:"; + for (auto act : scheduler[obs]) { + std::cout << " " << act; + } + std::cout << std::endl; + } + } + + + std::vector remainingExpressions; + for (auto index : remainingstates) { + remainingExpressions.push_back(reachVarExpressions[index]); + } + + smtSolver->push(); + // Add scheduler + smtSolver->add(storm::expressions::conjunction(schedulerSoFar)); + smtSolver->add(storm::expressions::disjunction(remainingExpressions)); + + } + + } template class MemlessStrategySearchQualitative; + template class MemlessStrategySearchQualitative; } } diff --git a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h index ca1a84aa6..5aa69b3ce 100644 --- a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h +++ b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h @@ -3,6 +3,7 @@ #include "storm/solver/SmtSolver.h" #include "storm/models/sparse/Pomdp.h" #include "storm/utility/solver.h" +#include "storm/exceptions/UnexpectedException.h" namespace storm { namespace pomdp { @@ -15,8 +16,12 @@ namespace pomdp { public: MemlessStrategySearchQualitative(storm::models::sparse::Pomdp const& pomdp, std::set const& targetObservationSet, + storm::storage::BitVector const& targetStates, + storm::storage::BitVector const& surelyReachSinkStates, std::shared_ptr& smtSolverFactory) : pomdp(pomdp), + targetStates(targetStates), + surelyReachSinkStates(surelyReachSinkStates), targetObservations(targetObservationSet) { this->expressionManager = std::make_shared(); smtSolver = smtSolverFactory->create(*expressionManager); @@ -27,49 +32,40 @@ namespace pomdp { surelyReachSinkStates = surelyReachSink; } - void analyze(uint64_t k) { - if (k < maxK) { - initialize(k); - } - std::cout << smtSolver->getSmtLibString() << std::endl; - for (uint64_t state : pomdp.getInitialStates()) { - smtSolver->add(reachVars[state]); - } - auto result = smtSolver->check(); - switch(result) { - case storm::solver::SmtSolver::CheckResult::Sat: - std::cout << std::endl << "Satisfying assignment: " << std::endl << smtSolver->getModelAsValuation().toString(true) << std::endl; - - case storm::solver::SmtSolver::CheckResult::Unsat: - // std::cout << std::endl << "Unsatisfiability core: {" << std::endl; - // for (auto const& expr : solver->getUnsatCore()) { - // std::cout << "\t " << expr << std::endl; - // } - // std::cout << "}" << std::endl; - - default: - std::cout<< "oops." << std::endl; - // STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "SMT solver yielded an unexpected result"); - } - //std::cout << "get model:" << std::endl; - //std::cout << smtSolver->getModel().toString() << std::endl; + void analyzeForInitialStates(uint64_t k) { + analyze(k, pomdp.getInitialStates(), pomdp.getInitialStates()); } + void findNewStrategyForSomeState(uint64_t k) { + std::cout << surelyReachSinkStates << std::endl; + std::cout << targetStates << std::endl; + std::cout << (~surelyReachSinkStates & ~targetStates) << std::endl; + analyze(k, ~surelyReachSinkStates & ~targetStates); + + + } + + bool analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates = storm::storage::BitVector()); + private: void initialize(uint64_t k); + std::unique_ptr smtSolver; storm::models::sparse::Pomdp const& pomdp; std::shared_ptr expressionManager; - uint64_t maxK = -1; + uint64_t maxK = std::numeric_limits::max(); std::set targetObservations; + storm::storage::BitVector targetStates; storm::storage::BitVector surelyReachSinkStates; std::vector> statesPerObservation; - std::vector> actionSelectionVars; // A_{z,a} - std::vector reachVars; + std::vector> actionSelectionVarExpressions; // A_{z,a} + std::vector> actionSelectionVars; + std::vector reachVars; + std::vector reachVarExpressions; std::vector> pathVars; diff --git a/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.cpp b/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.cpp new file mode 100644 index 000000000..f1ec0b8e8 --- /dev/null +++ b/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.cpp @@ -0,0 +1,186 @@ + + +#include "storm-pomdp/analysis/QualitativeStrategySearchNaive.h" + + +namespace storm { + namespace pomdp { + + template + void QualitativeStrategySearchNaive::initialize(uint64_t k) { + if (maxK == std::numeric_limits::max()) { + // not initialized at all. + // Create some data structures. + for(uint64_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + actionSelectionVars.push_back(std::vector()); + actionSelectionVarExpressions.push_back(std::vector()); + statesPerObservation.push_back(std::vector()); // Consider using bitvectors instead. + } + + // Fill the states-per-observation mapping, + // declare the reachability variables, + // declare the path variables. + uint64_t stateId = 0; + for(auto obs : pomdp.getObservations()) { + pathVars.push_back(std::vector()); + for (uint64_t i = 0; i < k; ++i) { + pathVars.back().push_back(expressionManager->declareBooleanVariable("P-"+std::to_string(stateId)+"-"+std::to_string(i)).getExpression()); + } + reachVars.push_back(expressionManager->declareBooleanVariable("C-" + std::to_string(stateId))); + reachVarExpressions.push_back(reachVars.back().getExpression()); + statesPerObservation.at(obs).push_back(stateId++); + } + assert(pathVars.size() == pomdp.getNumberOfStates()); + + // Create the action selection variables. + uint64_t obs = 0; + for(auto const& statesForObservation : statesPerObservation) { + for (uint64_t a = 0; a < pomdp.getNumberOfChoices(statesForObservation.front()); ++a) { + std::string varName = "A-" + std::to_string(obs) + "-" + std::to_string(a); + actionSelectionVars.at(obs).push_back(expressionManager->declareBooleanVariable(varName)); + actionSelectionVarExpressions.at(obs).push_back(actionSelectionVars.at(obs).back().getExpression()); + } + ++obs; + } + } else { + assert(false); + } + + uint64_t rowindex = 0; + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + if (targetStates.get(state)) { + smtSolver->add(pathVars[state][0]); + } else { + smtSolver->add(!pathVars[state][0]); + } + + if (surelyReachSinkStates.get(state)) { + smtSolver->add(!reachVarExpressions[state]); + } else if(!targetStates.get(state)) { + std::vector>> pathsubsubexprs; + for (uint64_t j = 1; j < k; ++j) { + pathsubsubexprs.push_back(std::vector>()); + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + pathsubsubexprs.back().push_back(std::vector()); + } + } + + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + std::vector subexprreach; + + subexprreach.push_back(!reachVarExpressions.at(state)); + subexprreach.push_back(!actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action)); + for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { + subexprreach.push_back(reachVarExpressions.at(entries.getColumn())); + } + smtSolver->add(storm::expressions::disjunction(subexprreach)); + for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { + for (uint64_t j = 1; j < k; ++j) { + pathsubsubexprs[j - 1][action].push_back(pathVars[entries.getColumn()][j - 1]); + } + } + rowindex++; + } + smtSolver->add(storm::expressions::implies(reachVarExpressions.at(state), pathVars.at(state).back())); + + for (uint64_t j = 1; j < k; ++j) { + std::vector pathsubexprs; + + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + pathsubexprs.push_back(actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action) && storm::expressions::disjunction(pathsubsubexprs[j - 1][action])); + } + smtSolver->add(storm::expressions::iff(pathVars[state][j], storm::expressions::disjunction(pathsubexprs))); + } + } + } + + for (auto const& actionVars : actionSelectionVarExpressions) { + smtSolver->add(storm::expressions::disjunction(actionVars)); + } + } + + template + bool QualitativeStrategySearchNaive::analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates) { + if (k < maxK) { + initialize(k); + } + + std::vector atLeastOneOfStates; + + for(uint64_t state : oneOfTheseStates) { + atLeastOneOfStates.push_back(reachVarExpressions[state]); + } + assert(atLeastOneOfStates.size() > 0); + smtSolver->add(storm::expressions::disjunction(atLeastOneOfStates)); + + for(uint64_t state : allOfTheseStates) { + smtSolver->add(reachVarExpressions[state]); + } + + + + std::cout << smtSolver->getSmtLibString() << std::endl; + + auto result = smtSolver->check(); + uint64_t i = 0; + smtSolver->push(); + + + + if (result == storm::solver::SmtSolver::CheckResult::Unknown) { + STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "SMT solver yielded an unexpected result"); + } else if(result == storm::solver::SmtSolver::CheckResult::Unsat) { + std::cout << std::endl << "Unsatisfiable!" << std::endl; + return false; + } else { + + std::cout << std::endl << "Satisfying assignment: " << std::endl << smtSolver->getModelAsValuation().toString(true) << std::endl; + auto model = smtSolver->getModel(); + std::cout << "states that are okay" << std::endl; + storm::storage::BitVector observations(pomdp.getNrObservations()); + storm::storage::BitVector remainingstates(pomdp.getNumberOfStates()); + for (auto rv : reachVars) { + if (model->getBooleanValue(rv)) { + std::cout << i << " " << std::endl; + observations.set(pomdp.getObservation(i)); + } else { + remainingstates.set(i); + } + //std::cout << i << ": " << model->getBooleanValue(rv) << ", "; + ++i; + } + std::vector > scheduler; + for (auto const &actionSelectionVarsForObs : actionSelectionVars) { + uint64_t act = 0; + scheduler.push_back(std::set()); + for (auto const &asv : actionSelectionVarsForObs) { + if (model->getBooleanValue(asv)) { + scheduler.back().insert(act); + } + act++; + } + } + std::cout << "the scheduler: " << std::endl; + for (uint64_t obs = 0; obs < scheduler.size(); ++obs) { + if (observations.get(obs)) { + std::cout << "observation: " << obs << std::endl; + std::cout << "actions:"; + for (auto act : scheduler[obs]) { + std::cout << " " << act; + } + std::cout << std::endl; + } + } + + + return true; + } + + + + } + + template class QualitativeStrategySearchNaive; + template class QualitativeStrategySearchNaive; + } +} diff --git a/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h b/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h new file mode 100644 index 000000000..5020fc9ab --- /dev/null +++ b/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h @@ -0,0 +1,74 @@ +#include +#include "storm/storage/expressions/Expressions.h" +#include "storm/solver/SmtSolver.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/utility/solver.h" +#include "storm/exceptions/UnexpectedException.h" + +namespace storm { + namespace pomdp { + + template + class QualitativeStrategySearchNaive { + // Implements an extension to the Chatterjee, Chmelik, Davies (AAAI-16) paper. + + + public: + QualitativeStrategySearchNaive(storm::models::sparse::Pomdp const& pomdp, + std::set const& targetObservationSet, + storm::storage::BitVector const& targetStates, + storm::storage::BitVector const& surelyReachSinkStates, + std::shared_ptr& smtSolverFactory) : + pomdp(pomdp), + targetStates(targetStates), + surelyReachSinkStates(surelyReachSinkStates), + targetObservations(targetObservationSet) { + this->expressionManager = std::make_shared(); + smtSolver = smtSolverFactory->create(*expressionManager); + + } + + void setSurelyReachSinkStates(storm::storage::BitVector const& surelyReachSink) { + surelyReachSinkStates = surelyReachSink; + } + + void analyzeForInitialStates(uint64_t k) { + analyze(k, pomdp.getInitialStates(), pomdp.getInitialStates()); + } + + void findNewStrategyForSomeState(uint64_t k) { + std::cout << surelyReachSinkStates << std::endl; + std::cout << targetStates << std::endl; + std::cout << (~surelyReachSinkStates & ~targetStates) << std::endl; + analyze(k, ~surelyReachSinkStates & ~targetStates); + + + } + + bool analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates = storm::storage::BitVector()); + + private: + void initialize(uint64_t k); + + + std::unique_ptr smtSolver; + storm::models::sparse::Pomdp const& pomdp; + std::shared_ptr expressionManager; + uint64_t maxK = std::numeric_limits::max(); + + std::set targetObservations; + storm::storage::BitVector targetStates; + storm::storage::BitVector surelyReachSinkStates; + + std::vector> statesPerObservation; + std::vector> actionSelectionVarExpressions; // A_{z,a} + std::vector> actionSelectionVars; + std::vector reachVars; + std::vector reachVarExpressions; + std::vector> pathVars; + + + + }; + } +} From fe2dcfc9758735e583c0af259287dc462c274011 Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Sun, 15 Dec 2019 18:04:31 +0100 Subject: [PATCH 033/155] better dot output for pomdp models --- src/storm/models/sparse/Model.cpp | 11 +++++++++-- src/storm/models/sparse/Model.h | 11 +++++++++-- src/storm/models/sparse/Pomdp.cpp | 8 ++++++++ src/storm/models/sparse/Pomdp.h | 7 +++++++ 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/storm/models/sparse/Model.cpp b/src/storm/models/sparse/Model.cpp index 6e087822f..1867978c5 100644 --- a/src/storm/models/sparse/Model.cpp +++ b/src/storm/models/sparse/Model.cpp @@ -363,7 +363,9 @@ namespace storm { storm::utility::outputFixedWidth(outStream, this->getLabelsOfState(state), maxWidthLabel); outStream << "}"; } - + + outStream << this->additionalDotStateInfo(state); + // If we are to include some values for the state as well, we do so now. if (firstValue != nullptr || secondValue != nullptr) { outStream << " ["; @@ -397,7 +399,12 @@ namespace storm { outStream << "}" << std::endl; } } - + + template + std::string Model::additionalDotStateInfo(uint64_t state) const { + return ""; + } + template std::set Model::getLabelsOfState(storm::storage::sparse::state_type state) const { return this->stateLabeling.getLabelsOfState(state); diff --git a/src/storm/models/sparse/Model.h b/src/storm/models/sparse/Model.h index d1ef45bc1..962fda437 100644 --- a/src/storm/models/sparse/Model.h +++ b/src/storm/models/sparse/Model.h @@ -333,8 +333,8 @@ namespace storm { * @param finalizeOutput A flag that sets whether or not the dot stream is closed with a curly brace. * @return A string containing the exported model in dot-format. */ - virtual void writeDotToStream(std::ostream& outStream, size_t maxWidthLabel = 30, bool includeLabeling = true, storm::storage::BitVector const* subsystem = nullptr, std::vector const* firstValue = nullptr, std::vector const* secondValue = nullptr, std::vector const* stateColoring = nullptr, std::vector const* colors = nullptr, std::vector* scheduler = nullptr, bool finalizeOutput = true) const; - + virtual void writeDotToStream(std::ostream& outStream, size_t maxWidthLabel = 30, bool includeLabeling = true, storm::storage::BitVector const* subsystem = nullptr, std::vector const* firstValue = nullptr, std::vector const* secondValue = nullptr, std::vector const* stateColoring = nullptr, std::vector const* colors = nullptr, std::vector* scheduler = nullptr, bool finalizeOutput = true) const; + /*! * Retrieves the set of labels attached to the given state. * @@ -394,6 +394,13 @@ namespace storm { * @param out The stream the information is to be printed to. */ void printRewardModelsInformationToStream(std::ostream& out) const; + + /*! + * Return a string that is additonally added to the state information in the dot stream. + * @param state + * @return + */ + virtual std::string additionalDotStateInfo(uint64_t state) const; private: diff --git a/src/storm/models/sparse/Pomdp.cpp b/src/storm/models/sparse/Pomdp.cpp index babc81e6b..6dbe7b365 100644 --- a/src/storm/models/sparse/Pomdp.cpp +++ b/src/storm/models/sparse/Pomdp.cpp @@ -59,6 +59,12 @@ namespace storm { return observations; } + template + std::string Pomdp::additionalDotStateInfo(uint64_t state) const { + return "<" + std::to_string(getObservation(state)) + ">"; + } + + template std::vector Pomdp::getStatesWithObservation(uint32_t observation) const { @@ -71,6 +77,8 @@ namespace storm { return result; } + + template class Pomdp; template class Pomdp; template class Pomdp>; diff --git a/src/storm/models/sparse/Pomdp.h b/src/storm/models/sparse/Pomdp.h index d480deebc..6ceea00f9 100644 --- a/src/storm/models/sparse/Pomdp.h +++ b/src/storm/models/sparse/Pomdp.h @@ -64,6 +64,13 @@ namespace storm { std::vector getStatesWithObservation(uint32_t observation) const; protected: + /*! + * Return a string that is additonally added to the state information in the dot stream. + * @param state + * @return + */ + virtual std::string additionalDotStateInfo(uint64_t state) const override; + // TODO: consider a bitvector based presentation (depending on our needs). std::vector observations; From bb0b14bfa2d884a6b8db827ea23adde16802bca8 Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Sun, 15 Dec 2019 18:28:57 +0100 Subject: [PATCH 034/155] oops. missed a brace --- src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 8217ba6f2..ccd72779a 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -85,6 +85,7 @@ namespace storm { std::string POMDPSettings::getMemlessSearchMethod() const { return this->getOption(memlessSearchOption).getArgumentByName("method").getValueAsString(); + } bool POMDPSettings::isLimitExplorationSet() const { return this->getOption(limitBeliefExplorationOption).getHasOptionBeenSet(); From 0facf4a5726f1edf3e2a7896e7f5320c50aead65 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 17 Dec 2019 10:52:56 +0100 Subject: [PATCH 035/155] Preparation work for the implementation of the refinement procedure --- src/storm-pomdp-cli/storm-pomdp.cpp | 22 +- .../ApproximatePOMDPModelchecker.cpp | 205 ++++++++++-------- .../ApproximatePOMDPModelchecker.h | 47 +++- 3 files changed, 164 insertions(+), 110 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 8b28da0bf..7cbd84592 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -197,15 +197,15 @@ int main(const int argc, const char** argv) { STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - double overRes = storm::utility::one(); - double underRes = storm::utility::zero(); + auto overRes = storm::utility::one(); + auto underRes = storm::utility::zero(); std::unique_ptr> result; - //result = checker.refineReachabilityProbability(*pomdp, targetObservationSet,probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(),1,10); - result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, - pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); - overRes = result->OverapproximationValue; - underRes = result->UnderapproximationValue; + result = checker.refineReachabilityProbability(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, + pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); + //result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); + overRes = result->overApproxValue; + underRes = result->underApproxValue; if (overRes != underRes) { STORM_PRINT("Overapproximation Result: " << overRes << std::endl) STORM_PRINT("Underapproximation Result: " << underRes << std::endl) @@ -264,15 +264,15 @@ int main(const int argc, const char** argv) { STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - double overRes = storm::utility::one(); - double underRes = storm::utility::zero(); + auto overRes = storm::utility::one(); + auto underRes = storm::utility::zero(); std::unique_ptr> result; result = checker.computeReachabilityReward(*pomdp, targetObservationSet, rewFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution()); - overRes = result->OverapproximationValue; - underRes = result->UnderapproximationValue; + overRes = result->overApproxValue; + underRes = result->underApproxValue; } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index fd8502563..550724792 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -30,31 +30,12 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::refineReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t startingResolution, uint64_t stepSize, uint64_t maxNrOfRefinements) { - uint64_t currentResolution = startingResolution; - uint64_t currentRefinement = 0; - std::unique_ptr> res = std::make_unique>( - POMDPCheckResult{storm::utility::one(), storm::utility::zero()}); - while (currentRefinement < maxNrOfRefinements && !cc.isEqual(storm::utility::zero(), res->OverapproximationValue - res->UnderapproximationValue)) { - STORM_PRINT("--------------------------------------------------------------" << std::endl) - STORM_PRINT("Refinement Step " << currentRefinement + 1 << " - Resolution " << currentResolution << std::endl) - STORM_PRINT("--------------------------------------------------------------" << std::endl) - res = computeReachabilityProbability(pomdp, targetObservations, min, currentResolution); - currentResolution += stepSize; - ++currentRefinement; - } - STORM_PRINT("Procedure took " << currentRefinement << " refinement steps" << std::endl) - return res; - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, uint64_t gridResolution, - bool computeRewards, double explorationThreshold) { - //TODO For the prototypical implementation, I put the refinement loop here. I'll change this later on + std::set const &targetObservations, bool min, uint64_t gridResolution, + double explorationThreshold) { + std::srand(time(NULL)); + // Compute easy upper and lower bounds storm::utility::Stopwatch underlyingWatch(true); + // Compute the results on the underlying MDP as a basic overapproximation storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); underlyingMdpLabeling.addLabel("goal"); std::vector goalStates; @@ -66,8 +47,7 @@ namespace storm { storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); auto underlyingModel = std::static_pointer_cast>( std::make_shared>(underlyingMdp)); - std::string initPropString = computeRewards ? "R" : "P"; - initPropString += min ? "min" : "max"; + std::string initPropString = min ? "Pmin" : "Pmax"; initPropString += "=? [F \"goal\"]"; std::vector propVector = storm::api::parseProperties(initPropString); std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); @@ -76,11 +56,11 @@ namespace storm { std::unique_ptr underlyingRes(storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); STORM_LOG_ASSERT(underlyingRes, "Result not exist."); underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); - auto mdpResultMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); + auto overApproxMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); underlyingWatch.stop(); storm::utility::Stopwatch positionalWatch(true); - // we define some positional scheduler for the POMDP as an experimental lower bound + // we define some positional scheduler for the POMDP as a basic lower bound storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { auto obsStates = pomdp.getStatesWithObservation(obs); @@ -97,20 +77,62 @@ namespace storm { storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); STORM_LOG_ASSERT(underapproxRes, "Result not exist."); underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); - auto mdpUnderapproxResultMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); + auto underApproxMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); positionalWatch.stop(); STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl) + // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution. + // This can probably be improved (i.e. resolutions for single belief states) + STORM_PRINT("Initial Resolution: " << gridResolution << std::endl) + std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); + auto overRes = storm::utility::one(); + auto underRes = storm::utility::zero(); + uint64_t refinementCounter = 1; + std::unique_ptr> res = computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, + overApproxMap, underApproxMap); + // TODO the actual refinement + return res; + } + + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, + std::vector &observationResolutionVector, + bool computeRewards, double explorationThreshold, + boost::optional> overApproximationMap, + boost::optional> underApproximationMap) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) + auto result = computeRefinementFirstStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap, + underApproximationMap); + return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); + } + + + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeRefinementFirstStep(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, + std::vector &observationResolutionVector, + bool computeRewards, double explorationThreshold, + boost::optional> overApproximationMap, + boost::optional> underApproximationMap) { + bool boundMapsSet = overApproximationMap && underApproximationMap; + std::map overMap; + std::map underMap; + if (boundMapsSet) { + overMap = overApproximationMap.value(); + underMap = underApproximationMap.value(); + } + std::vector> beliefList; std::vector beliefIsTarget; std::vector> beliefGrid; - std::map result; //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; - std::map> chosenActions; + std::map beliefStateMap; std::deque beliefsToBeExpanded; @@ -128,10 +150,11 @@ namespace storm { beliefList.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - // These are the components to build the MDP from the grid TODO make a better stucture to allow for fast reverse lookups (state-->belief) as it is a bijective function (boost:bimap?) - std::map beliefStateMap; + // These are the components to build the MDP from the grid TODO make a better structure to allow for fast reverse lookups (state-->belief) as it is a bijective function (boost:bimap?) + // Reserve states 0 and 1 as always sink/goal states - std::vector>> mdpTransitions = {{{{0, storm::utility::one()}}},{{{1, storm::utility::one()}}}}; + std::vector>> mdpTransitions = {{{{0, storm::utility::one()}}}, + {{{1, storm::utility::one()}}}}; // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) std::vector hintVector = {storm::utility::zero(), storm::utility::one()}; std::vector targetStates = {1}; @@ -145,7 +168,8 @@ namespace storm { std::map weightedSumUnderMap; // for the initial belief, add the triangulated initial states - std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); + std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, + observationResolutionVector[initialBelief.observation]); std::vector> initSubSimplex = initTemp.first; std::vector initLambdas = initTemp.second; if(cacheSubsimplices){ @@ -161,15 +185,16 @@ namespace storm { if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { if (searchResult == 0) { // the initial belief is on the grid itself - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { - tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(mdpResultMap[i]); - tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(mdpUnderapproxResultMap[i]); + if (boundMapsSet) { + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { + tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(overMap[i]); + tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(underMap[i]); + } + weightedSumOverMap[initialBelief.id] = tempWeightedSumOver; + weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder; } - weightedSumOverMap[initialBelief.id] = tempWeightedSumOver; - weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder; - initInserted = true; beliefGrid.push_back(initialBelief); beliefsToBeExpanded.push_back(0); @@ -177,17 +202,18 @@ namespace storm { : storm::utility::zero()); } else { // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + if (boundMapsSet) { + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { + tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(overMap[i]); + tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(underMap[i]); + } - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { - tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(mdpResultMap[i]); - tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(mdpUnderapproxResultMap[i]); + weightedSumOverMap[nextId] = tempWeightedSumOver; + weightedSumUnderMap[nextId] = tempWeightedSumUnder; } - weightedSumOverMap[nextId] = tempWeightedSumOver; - weightedSumUnderMap[nextId] = tempWeightedSumUnder; - storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); @@ -225,16 +251,13 @@ namespace storm { beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; - if(cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(explorationThreshold))){ - result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : weightedSumOverMap[currId])); - mdpTransitions.push_back({{{1, weightedSumOverMap[currId]},{0, storm::utility::one() - weightedSumOverMap[currId]}}}); + if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(explorationThreshold))) { + mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); continue; } if (isTarget) { // Depending on whether we compute rewards, we select the right initial result - result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); - // MDP stuff std::vector> transitionsInBelief; targetStates.push_back(beliefStateMap[currId]); @@ -243,8 +266,6 @@ namespace storm { transitionsInBelief.push_back(transitionInActionBelief); mdpTransitions.push_back(transitionsInBelief); } else { - result.emplace(std::make_pair(currId, storm::utility::zero())); - uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector> observationProbabilitiesInAction(numChoices); @@ -271,7 +292,8 @@ namespace storm { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + beliefList[idNextBelief].probabilities, observationResolutionVector[beliefList[idNextBelief].observation]); subSimplex = temp.first; lambdas = temp.second; if(cacheSubsimplices){ @@ -287,25 +309,28 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - // compute overapproximate value using MDP result map - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(mdpResultMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(mdpUnderapproxResultMap[i]); - } beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - - if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) { - hintVector.push_back(tempWeightedSumOver); + // compute overapproximate value using MDP result map + if (boundMapsSet) { + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { + tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(overMap[i]); + tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(underMap[i]); + } + if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) { + hintVector.push_back(tempWeightedSumOver); + } else { + hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one() + : storm::utility::zero()); + } + weightedSumOverMap[nextId] = tempWeightedSumOver; + weightedSumUnderMap[nextId] = tempWeightedSumUnder; } else { - hintVector.push_back(storm::utility::zero()); + hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one() + : storm::utility::zero()); } - beliefsToBeExpanded.push_back(nextId); - weightedSumOverMap[nextId] = tempWeightedSumOver; - weightedSumUnderMap[nextId] = tempWeightedSumUnder; - beliefStateMap[nextId] = mdpStateId; transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; ++nextId; @@ -343,11 +368,8 @@ namespace storm { } expansionTimer.stop(); STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl) - STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) - //auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, subSimplexCache, lambdaCache,result, chosenActions, gridResolution, min, computeRewards); - storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); @@ -376,9 +398,6 @@ namespace storm { auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); - std::vector parameterNames; - storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); - std::string propertyString = computeRewards ? "R" : "P"; propertyString += min ? "min" : "max"; propertyString += "=? [F \"target\"]"; @@ -394,24 +413,22 @@ namespace storm { overApproxTimer.stop(); STORM_LOG_ASSERT(res, "Result not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); - auto resultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = resultMap[beliefStateMap[initialBelief.id]]; - /* storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); - underApproxTimer.stop();*/ + auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); + auto overApprox = overApproxResultMap[beliefStateMap[initialBelief.id]]; STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) - auto underApprox = storm::utility::zero(); + auto underApprox = weightedSumUnderMap[initialBelief.id]; STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - std::map differences; - for(auto const &entry : weightedSumUnderMap){ - differences[beliefStateMap[entry.first]] = resultMap[beliefStateMap[entry.first]] - weightedSumUnderMap[entry.first]; + // Transfer the underapproximation results from the belief id space to the MDP id space + std::map underApproxResultMap; + for (auto const &belief : beliefGrid) { + underApproxResultMap[beliefStateMap[belief.id]] = weightedSumUnderMap[belief.id]; } - return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + return std::make_unique>( + RefinementComponents{modelPtr, overApprox, underApprox, overApproxResultMap, underApproxResultMap, beliefList, beliefIsTarget, beliefStateMap}); } template @@ -536,7 +553,8 @@ namespace storm { ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution) { - return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true, 0); + std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); + return computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, true, 0); } template @@ -544,7 +562,8 @@ namespace storm { ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution, double explorationThreshold) { - return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false, explorationThreshold); + std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); + return computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold); } template diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 438354eca..f61c0f9a6 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -11,8 +11,24 @@ namespace storm { namespace modelchecker { template struct POMDPCheckResult { - ValueType OverapproximationValue; - ValueType UnderapproximationValue; + ValueType overApproxValue; + ValueType underApproxValue; + }; + + /** + * Struct containing information which is supposed to be persistent over multiple refinement steps + * + */ + template> + struct RefinementComponents { + std::shared_ptr> overApproxModelPtr; + ValueType overApproxValue; + ValueType underApproxValue; + std::map &overApproxMap; + std::map &underApproxMap; + std::vector> &beliefList; + std::vector &beliefIsTarget; + std::map &beliefStateMap; }; template> @@ -21,9 +37,8 @@ namespace storm { explicit ApproximatePOMDPModelchecker(); std::unique_ptr> - refineReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t startingResolution, uint64_t stepSize, uint64_t maxNrOfRefinements); + refineReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, + uint64_t gridResolution, double explorationThreshold); std::unique_ptr> computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, @@ -45,6 +60,24 @@ namespace storm { uint64_t gridResolution); private: + /** + * + * @param pomdp + * @param targetObservations + * @param min + * @param observationResolutionVector + * @param computeRewards + * @param explorationThreshold + * @param overApproximationMap + * @param underApproximationMap + * @return + */ + std::unique_ptr> + computeRefinementFirstStep(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, std::vector &observationResolutionVector, + bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap = boost::none, + boost::optional> underApproximationMap = boost::none); + /** * * @param pomdp @@ -57,7 +90,9 @@ namespace storm { std::unique_ptr> computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution, bool computeRewards, double explorationThreshold); + std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, + boost::optional> overApproximationMap = boost::none, + boost::optional> underApproximationMap = boost::none); /** * From 8d3254b8d09fdde5061a8e3dd75e824de40ec7b5 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 17 Dec 2019 14:36:34 +0100 Subject: [PATCH 036/155] Use boost::bimap for the belief space <-> state space mapping --- .../ApproximatePOMDPModelchecker.cpp | 38 +++++++++---------- .../ApproximatePOMDPModelchecker.h | 5 ++- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 6debaf5a0..2955bdb7f 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -2,7 +2,6 @@ #include - #include "storm/utility/ConstantsComparator.h" #include "storm/models/sparse/Dtmc.h" #include "storm/models/sparse/StandardRewardModel.h" @@ -105,7 +104,7 @@ namespace storm { boost::optional> overApproximationMap, boost::optional> underApproximationMap) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - auto result = computeRefinementFirstStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap, + auto result = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap, underApproximationMap); return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); } @@ -113,7 +112,7 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeRefinementFirstStep(storm::models::sparse::Pomdp const &pomdp, + ApproximatePOMDPModelchecker::computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, @@ -133,7 +132,7 @@ namespace storm { //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; - std::map beliefStateMap; + bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; @@ -151,8 +150,7 @@ namespace storm { beliefList.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - // These are the components to build the MDP from the grid TODO make a better structure to allow for fast reverse lookups (state-->belief) as it is a bijective function (boost:bimap?) - + // These are the components to build the MDP from the grid // Reserve states 0 and 1 as always sink/goal states std::vector>> mdpTransitions = {{{{0, storm::utility::one()}}}, {{{1, storm::utility::one()}}}}; @@ -161,7 +159,7 @@ namespace storm { std::vector targetStates = {1}; uint64_t mdpStateId = 2; - beliefStateMap[initialBelief.id] = mdpStateId; + beliefStateMap.insert(bsmap_type::value_type(initialBelief.id, mdpStateId)); ++mdpStateId; // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace @@ -173,7 +171,7 @@ namespace storm { observationResolutionVector[initialBelief.observation]); std::vector> initSubSimplex = initTemp.first; std::vector initLambdas = initTemp.second; - if(cacheSubsimplices){ + if (cacheSubsimplices) { subSimplexCache[0] = initSubSimplex; lambdaCache[0] = initLambdas; } @@ -225,7 +223,7 @@ namespace storm { hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() : storm::utility::zero()); - beliefStateMap[nextId] = mdpStateId; + beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId)); initTransitionInActionBelief[mdpStateId] = initLambdas[j]; ++nextId; ++mdpStateId; @@ -261,9 +259,9 @@ namespace storm { // Depending on whether we compute rewards, we select the right initial result // MDP stuff std::vector> transitionsInBelief; - targetStates.push_back(beliefStateMap[currId]); + targetStates.push_back(beliefStateMap.left.at(currId)); std::map transitionInActionBelief; - transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one(); transitionsInBelief.push_back(transitionInActionBelief); mdpTransitions.push_back(transitionsInBelief); } else { @@ -332,12 +330,13 @@ namespace storm { : storm::utility::zero()); } beliefsToBeExpanded.push_back(nextId); - beliefStateMap[nextId] = mdpStateId; + beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId)); transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; ++nextId; ++mdpStateId; } else { - transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j]; + transitionInActionBelief[beliefStateMap.left.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j]))] = + iter->second * lambdas[j]; } } } @@ -361,7 +360,7 @@ namespace storm { if (transitionsInBelief.empty()) { std::map transitionInActionBelief; - transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one(); transitionsInBelief.push_back(transitionInActionBelief); } mdpTransitions.push_back(transitionsInBelief); @@ -374,7 +373,7 @@ namespace storm { storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", beliefStateMap[initialBelief.id]); + mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBelief.id)); for (auto targetState : targetStates) { mdpLabeling.addLabelToState("target", targetState); } @@ -382,7 +381,7 @@ namespace storm { storm::models::sparse::Mdp overApproxMdp(modelComponents); if (computeRewards) { storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { + for (auto const &iter : beliefStateMap.left) { auto currentBelief = beliefList[iter.first]; auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { @@ -415,7 +414,7 @@ namespace storm { STORM_LOG_ASSERT(res, "Result not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[beliefStateMap[initialBelief.id]]; + auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)]; STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) auto underApprox = weightedSumUnderMap[initialBelief.id]; @@ -423,9 +422,10 @@ namespace storm { STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); // Transfer the underapproximation results from the belief id space to the MDP id space - std::map underApproxResultMap; + std::map underApproxResultMap = {{0, storm::utility::zero()}, + {1, storm::utility::one()}}; for (auto const &belief : beliefGrid) { - underApproxResultMap[beliefStateMap[belief.id]] = weightedSumUnderMap[belief.id]; + underApproxResultMap[beliefStateMap.left.at(belief.id)] = weightedSumUnderMap[belief.id]; } return std::make_unique>( diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index f61c0f9a6..ab85b2cc1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -3,12 +3,15 @@ #include "storm/models/sparse/Pomdp.h" #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" +#include #include "storm/storage/jani/Property.h" namespace storm { namespace pomdp { namespace modelchecker { + typedef boost::bimap bsmap_type; + template struct POMDPCheckResult { ValueType overApproxValue; @@ -73,7 +76,7 @@ namespace storm { * @return */ std::unique_ptr> - computeRefinementFirstStep(storm::models::sparse::Pomdp const &pomdp, + computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap = boost::none, boost::optional> underApproximationMap = boost::none); From 4bbb02dcaa66111df92fbaaa8092e374783b7e1e Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Mon, 20 Jan 2020 10:37:03 +0100 Subject: [PATCH 037/155] Wrong method for underapproximation for future reference --- .../ApproximatePOMDPModelchecker.cpp | 182 +++++++++++++++++- .../ApproximatePOMDPModelchecker.h | 3 +- 2 files changed, 179 insertions(+), 6 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 2955bdb7f..4144bf91c 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -258,12 +258,8 @@ namespace storm { if (isTarget) { // Depending on whether we compute rewards, we select the right initial result // MDP stuff - std::vector> transitionsInBelief; targetStates.push_back(beliefStateMap.left.at(currId)); - std::map transitionInActionBelief; - transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one(); - transitionsInBelief.push_back(transitionInActionBelief); - mdpTransitions.push_back(transitionsInBelief); + mdpTransitions.push_back({{{beliefStateMap.left.at(currId), storm::utility::one()}}}); } else { uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); @@ -417,6 +413,182 @@ namespace storm { auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)]; STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) + + // Prototypical implementation of the underapproximation - WRONG + /* + // The map has the following form: (beliefId, action) --> stateId + uamap_type uaStateMap; + // Reserve states 0 and 1 as always sink/goal states + std::map>> uaTransitions = {{0,{{{0, storm::utility::one()}}}}, + {1,{{{1, storm::utility::one()}}}}}; + // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) + std::vector uaTargetStates = {1}; + uint64_t uaStateId = 2; + + // for beliefs which are both in the actual belief support and the grid, we use the max value for the action to indicate the support belief + uaStateMap.insert(uamap_type::value_type(std::make_pair(initialBelief.id, std::numeric_limits::max()), uaStateId)); + ++uaStateId; + beliefsToBeExpanded.push_back(initialBelief.id); + while(!beliefsToBeExpanded.empty()){ + uint64_t currId = beliefsToBeExpanded.front(); + beliefsToBeExpanded.pop_front(); + bool isTarget = beliefIsTarget[currId]; + + if(isTarget){ + // For target states we add a self-loop + uaTargetStates.push_back(uaStateMap.left.at(std::make_pair(currId, std::numeric_limits::max()))); + uaTransitions[uaStateMap.left.at(std::make_pair(currId, std::numeric_limits::max()))] = {{{uaStateMap.left.at(std::make_pair(currId, std::numeric_limits::max())), storm::utility::one()}}}; + } else { + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front()); + //Triangulate the current belief to determine its approximation bases + std::vector> subSimplex; + std::vector lambdas; + if (cacheSubsimplices && subSimplexCache.count(currId) > 0) { + subSimplex = subSimplexCache[currId]; + lambdas = lambdaCache[currId]; + } else { + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + beliefList[currId].probabilities, observationResolutionVector[beliefList[currId].observation]); + subSimplex = temp.first; + lambdas = temp.second; + if(cacheSubsimplices){ + subSimplexCache[currId] = subSimplex; + lambdaCache[currId] = lambdas; + } + } + std::deque> approxToExpand; + std::vector> approxActionTransitions(numChoices); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())){ + uint64_t approxId = getBeliefIdInVector(beliefGrid, beliefList[currId].observation, subSimplex[j]); + //STORM_PRINT("ApproxId " << approxId << std::endl) + if (approxId == uint64_t(-1)) { + // If the approximation base is not yet in the grid, we add it and it has to be expanded + storm::pomdp::Belief gridBelief = {nextId, beliefList[currId].observation, subSimplex[j]}; + beliefList.push_back(gridBelief); + beliefGrid.push_back(gridBelief); + beliefIsTarget.push_back(targetObservations.find(beliefList[currId].observation) != targetObservations.end()); + for(uint64_t action=0; action < numChoices; ++action) { + approxToExpand.push_back(std::make_pair(nextId, action)); + uaStateMap.insert(uamap_type::value_type(std::make_pair(nextId, action), uaStateId)); + approxActionTransitions[action][uaStateId] = lambdas[j]; + ++uaStateId; + } + ++nextId; + } else if(uaStateMap.left.find(std::pair(approxId,0)) != uaStateMap.left.end()){ + // we can check only for (approxId,0) as that always exists if the grid state is mapped + for(uint64_t action=0; action < numChoices; ++action) { + approxActionTransitions[action][uaStateMap.left.at(std::make_pair(approxId,action))] = lambdas[j]; + } + } else { + for(uint64_t action=0; action < numChoices; ++action) { + approxToExpand.push_back(std::make_pair(approxId, action)); + uaStateMap.insert(uamap_type::value_type(std::make_pair(approxId, action), uaStateId)); + approxActionTransitions[action][uaStateId] = lambdas[j]; + ++uaStateId; + } + } + } + } + uaTransitions[uaStateMap.left.at(std::make_pair(currId,std::numeric_limits::max()))] = approxActionTransitions; + // Now expand all approximation bases + while(!approxToExpand.empty()){ + uint64_t approxId = approxToExpand.front().first; + uint64_t approxAction = approxToExpand.front().second; + approxToExpand.pop_front(); + + // Iterate over all actions and determine the successor states + std::map transitionsInAction; + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[approxId], approxAction); + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[approxId], approxAction, + observation, nextId); + nextId = beliefList.size(); + if(uaStateMap.left.find(std::make_pair(idNextBelief, std::numeric_limits::max())) == uaStateMap.left.end()){ + // add state to the mapping and set it t be expanded + uaStateMap.insert(uamap_type::value_type(std::make_pair(idNextBelief,std::numeric_limits::max()), uaStateId)); + ++uaStateId; + beliefsToBeExpanded.push_back(idNextBelief); + } + transitionsInAction[uaStateMap.left.at(std::make_pair(idNextBelief,std::numeric_limits::max()))] = iter->second; + } + uaTransitions[uaStateMap.left.at(std::make_pair(approxId,approxAction))] = {transitionsInAction}; + } + } + } + + + std::vector>> uaTransitionVector; + for(auto iter = uaTransitions.begin(); iter != uaTransitions.end(); ++iter){ + uaTransitionVector.push_back(iter->second); + } + STORM_PRINT(uaTransitions.size() << std::endl) + + storm::models::sparse::StateLabeling uaLabeling(uaTransitions.size()); + uaLabeling.addLabel("init"); + uaLabeling.addLabel("target"); + uaLabeling.addLabel("belief"); + uaLabeling.addLabel("grid"); + uaLabeling.addLabelToState("init", uaStateMap.left.at(std::make_pair(initialBelief.id,std::numeric_limits::max()))); + for (auto targetState : uaTargetStates) { + uaLabeling.addLabelToState("target", targetState); + } + for (auto &iter : uaStateMap.right) { + std::stringstream mapEntryStr; + mapEntryStr << std::to_string(iter.first); + mapEntryStr << " --> "; + mapEntryStr << "[{" + std::to_string(beliefList[iter.second.first].observation) << "} | " ; + for(uint64_t state = 0; state < beliefList[iter.second.first].probabilities.size(); ++state){ + if(beliefList[iter.second.first].probabilities[state] > storm::utility::zero()){ + mapEntryStr << std::to_string(state) << " : " << beliefList[iter.second.first].probabilities[state] << ", "; + } + } + mapEntryStr << "]" << std::endl; + STORM_PRINT(mapEntryStr.str()); + if(!uaLabeling.containsLabel(mapEntryStr.str())){ + uaLabeling.addLabel(mapEntryStr.str()); + } + if(iter.second.second == std::numeric_limits::max()){ + uaLabeling.addLabelToState("belief", iter.first); + } else { + uaLabeling.addLabelToState("grid", iter.first); + } + uaLabeling.addLabelToState(mapEntryStr.str(), iter.first); + } + + //STORM_PRINT(buildTransitionMatrix(uaTransitionVector)) + storm::storage::sparse::ModelComponents uaModelComponents(buildTransitionMatrix(uaTransitionVector), uaLabeling); + storm::models::sparse::Mdp underApproxMdp(uaModelComponents); + if (computeRewards) { + storm::models::sparse::StandardRewardModel uaMdpRewardModel(boost::none, std::vector(uaModelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : uaStateMap.left) { + auto currentBelief = beliefList[iter.first.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + uaMdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } + } + underApproxMdp.addRewardModel("std", uaMdpRewardModel); + underApproxMdp.restrictRewardModels(std::set({"std"})); + } + underApproxMdp.printModelInformationToStream(std::cout); + + auto uaModel = std::make_shared>(underApproxMdp); + auto uaModelPtr = std::static_pointer_cast>(uaModel); + storm::api::exportSparseModelAsDot(uaModelPtr, "ua_model.dot"); + auto uaTask = storm::api::createTask(property, false); + storm::utility::Stopwatch underApproxTimer(true); + std::unique_ptr uaRes(storm::api::verifyWithSparseEngine(uaModelPtr, uaTask)); + underApproxTimer.stop(); + STORM_LOG_ASSERT(uaRes, "Result not exist."); + uaRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); + auto underApproxResultMap = uaRes->asExplicitQuantitativeCheckResult().getValueMap(); + auto underApprox = underApproxResultMap[uaStateMap.left.at(std::make_pair(initialBelief.id, std::numeric_limits::max()))]; + */ auto underApprox = weightedSumUnderMap[initialBelief.id]; STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index ab85b2cc1..df3e72af9 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -11,6 +11,7 @@ namespace storm { namespace pomdp { namespace modelchecker { typedef boost::bimap bsmap_type; + typedef boost::bimap, uint64_t> uamap_type; template struct POMDPCheckResult { @@ -31,7 +32,7 @@ namespace storm { std::map &underApproxMap; std::vector> &beliefList; std::vector &beliefIsTarget; - std::map &beliefStateMap; + bsmap_type &beliefStateMap; }; template> From a5fc9fc5b75a8f802d7cdd169f99741d73750396 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 24 Jan 2020 11:05:25 +0100 Subject: [PATCH 038/155] Added naive underapproximation by unrolling the belief support up to a given size --- .../ApproximatePOMDPModelchecker.cpp | 311 ++++-------------- .../ApproximatePOMDPModelchecker.h | 17 +- 2 files changed, 66 insertions(+), 262 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 4144bf91c..84c055d1f 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -88,9 +88,10 @@ namespace storm { std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); auto overRes = storm::utility::one(); auto underRes = storm::utility::zero(); + uint64_t underApproxModelSize = 50; uint64_t refinementCounter = 1; std::unique_ptr> res = computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, - overApproxMap, underApproxMap); + overApproxMap, underApproxMap, underApproxModelSize); // TODO the actual refinement return res; } @@ -102,10 +103,11 @@ namespace storm { std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap, - boost::optional> underApproximationMap) { + boost::optional> underApproximationMap, + uint64_t maxUaModelSize) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) auto result = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap, - underApproximationMap); + underApproximationMap, maxUaModelSize); return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); } @@ -117,7 +119,8 @@ namespace storm { std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap, - boost::optional> underApproximationMap) { + boost::optional> underApproximationMap, + uint64_t maxUaModelSize) { bool boundMapsSet = overApproximationMap && underApproximationMap; std::map overMap; std::map underMap; @@ -413,183 +416,8 @@ namespace storm { auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)]; STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) - - // Prototypical implementation of the underapproximation - WRONG - /* - // The map has the following form: (beliefId, action) --> stateId - uamap_type uaStateMap; - // Reserve states 0 and 1 as always sink/goal states - std::map>> uaTransitions = {{0,{{{0, storm::utility::one()}}}}, - {1,{{{1, storm::utility::one()}}}}}; - // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) - std::vector uaTargetStates = {1}; - uint64_t uaStateId = 2; - - // for beliefs which are both in the actual belief support and the grid, we use the max value for the action to indicate the support belief - uaStateMap.insert(uamap_type::value_type(std::make_pair(initialBelief.id, std::numeric_limits::max()), uaStateId)); - ++uaStateId; - beliefsToBeExpanded.push_back(initialBelief.id); - while(!beliefsToBeExpanded.empty()){ - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = beliefIsTarget[currId]; - - if(isTarget){ - // For target states we add a self-loop - uaTargetStates.push_back(uaStateMap.left.at(std::make_pair(currId, std::numeric_limits::max()))); - uaTransitions[uaStateMap.left.at(std::make_pair(currId, std::numeric_limits::max()))] = {{{uaStateMap.left.at(std::make_pair(currId, std::numeric_limits::max())), storm::utility::one()}}}; - } else { - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front()); - //Triangulate the current belief to determine its approximation bases - std::vector> subSimplex; - std::vector lambdas; - if (cacheSubsimplices && subSimplexCache.count(currId) > 0) { - subSimplex = subSimplexCache[currId]; - lambdas = lambdaCache[currId]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - beliefList[currId].probabilities, observationResolutionVector[beliefList[currId].observation]); - subSimplex = temp.first; - lambdas = temp.second; - if(cacheSubsimplices){ - subSimplexCache[currId] = subSimplex; - lambdaCache[currId] = lambdas; - } - } - std::deque> approxToExpand; - std::vector> approxActionTransitions(numChoices); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())){ - uint64_t approxId = getBeliefIdInVector(beliefGrid, beliefList[currId].observation, subSimplex[j]); - //STORM_PRINT("ApproxId " << approxId << std::endl) - if (approxId == uint64_t(-1)) { - // If the approximation base is not yet in the grid, we add it and it has to be expanded - storm::pomdp::Belief gridBelief = {nextId, beliefList[currId].observation, subSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back(targetObservations.find(beliefList[currId].observation) != targetObservations.end()); - for(uint64_t action=0; action < numChoices; ++action) { - approxToExpand.push_back(std::make_pair(nextId, action)); - uaStateMap.insert(uamap_type::value_type(std::make_pair(nextId, action), uaStateId)); - approxActionTransitions[action][uaStateId] = lambdas[j]; - ++uaStateId; - } - ++nextId; - } else if(uaStateMap.left.find(std::pair(approxId,0)) != uaStateMap.left.end()){ - // we can check only for (approxId,0) as that always exists if the grid state is mapped - for(uint64_t action=0; action < numChoices; ++action) { - approxActionTransitions[action][uaStateMap.left.at(std::make_pair(approxId,action))] = lambdas[j]; - } - } else { - for(uint64_t action=0; action < numChoices; ++action) { - approxToExpand.push_back(std::make_pair(approxId, action)); - uaStateMap.insert(uamap_type::value_type(std::make_pair(approxId, action), uaStateId)); - approxActionTransitions[action][uaStateId] = lambdas[j]; - ++uaStateId; - } - } - } - } - uaTransitions[uaStateMap.left.at(std::make_pair(currId,std::numeric_limits::max()))] = approxActionTransitions; - // Now expand all approximation bases - while(!approxToExpand.empty()){ - uint64_t approxId = approxToExpand.front().first; - uint64_t approxAction = approxToExpand.front().second; - approxToExpand.pop_front(); - - // Iterate over all actions and determine the successor states - std::map transitionsInAction; - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[approxId], approxAction); - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[approxId], approxAction, - observation, nextId); - nextId = beliefList.size(); - if(uaStateMap.left.find(std::make_pair(idNextBelief, std::numeric_limits::max())) == uaStateMap.left.end()){ - // add state to the mapping and set it t be expanded - uaStateMap.insert(uamap_type::value_type(std::make_pair(idNextBelief,std::numeric_limits::max()), uaStateId)); - ++uaStateId; - beliefsToBeExpanded.push_back(idNextBelief); - } - transitionsInAction[uaStateMap.left.at(std::make_pair(idNextBelief,std::numeric_limits::max()))] = iter->second; - } - uaTransitions[uaStateMap.left.at(std::make_pair(approxId,approxAction))] = {transitionsInAction}; - } - } - } - - - std::vector>> uaTransitionVector; - for(auto iter = uaTransitions.begin(); iter != uaTransitions.end(); ++iter){ - uaTransitionVector.push_back(iter->second); - } - STORM_PRINT(uaTransitions.size() << std::endl) - - storm::models::sparse::StateLabeling uaLabeling(uaTransitions.size()); - uaLabeling.addLabel("init"); - uaLabeling.addLabel("target"); - uaLabeling.addLabel("belief"); - uaLabeling.addLabel("grid"); - uaLabeling.addLabelToState("init", uaStateMap.left.at(std::make_pair(initialBelief.id,std::numeric_limits::max()))); - for (auto targetState : uaTargetStates) { - uaLabeling.addLabelToState("target", targetState); - } - for (auto &iter : uaStateMap.right) { - std::stringstream mapEntryStr; - mapEntryStr << std::to_string(iter.first); - mapEntryStr << " --> "; - mapEntryStr << "[{" + std::to_string(beliefList[iter.second.first].observation) << "} | " ; - for(uint64_t state = 0; state < beliefList[iter.second.first].probabilities.size(); ++state){ - if(beliefList[iter.second.first].probabilities[state] > storm::utility::zero()){ - mapEntryStr << std::to_string(state) << " : " << beliefList[iter.second.first].probabilities[state] << ", "; - } - } - mapEntryStr << "]" << std::endl; - STORM_PRINT(mapEntryStr.str()); - if(!uaLabeling.containsLabel(mapEntryStr.str())){ - uaLabeling.addLabel(mapEntryStr.str()); - } - if(iter.second.second == std::numeric_limits::max()){ - uaLabeling.addLabelToState("belief", iter.first); - } else { - uaLabeling.addLabelToState("grid", iter.first); - } - uaLabeling.addLabelToState(mapEntryStr.str(), iter.first); - } - - //STORM_PRINT(buildTransitionMatrix(uaTransitionVector)) - storm::storage::sparse::ModelComponents uaModelComponents(buildTransitionMatrix(uaTransitionVector), uaLabeling); - storm::models::sparse::Mdp underApproxMdp(uaModelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel uaMdpRewardModel(boost::none, std::vector(uaModelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : uaStateMap.left) { - auto currentBelief = beliefList[iter.first.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - uaMdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } - } - underApproxMdp.addRewardModel("std", uaMdpRewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); - } - underApproxMdp.printModelInformationToStream(std::cout); - - auto uaModel = std::make_shared>(underApproxMdp); - auto uaModelPtr = std::static_pointer_cast>(uaModel); - storm::api::exportSparseModelAsDot(uaModelPtr, "ua_model.dot"); - auto uaTask = storm::api::createTask(property, false); - storm::utility::Stopwatch underApproxTimer(true); - std::unique_ptr uaRes(storm::api::verifyWithSparseEngine(uaModelPtr, uaTask)); - underApproxTimer.stop(); - STORM_LOG_ASSERT(uaRes, "Result not exist."); - uaRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); - auto underApproxResultMap = uaRes->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[uaStateMap.left.at(std::make_pair(initialBelief.id, std::numeric_limits::max()))]; - */ - auto underApprox = weightedSumUnderMap[initialBelief.id]; + //auto underApprox = weightedSumUnderMap[initialBelief.id]; + auto underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); @@ -837,10 +665,10 @@ namespace storm { // Now onto the under-approximation storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp); + /*ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);*/ underApproxTimer.stop(); - + auto underApprox = storm::utility::zero(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl << "Time Overapproximation: " << overApproxTimer << std::endl @@ -874,64 +702,60 @@ namespace storm { std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> &chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, - bool computeRewards, bool generateMdp) { + uint64_t initialBeliefId, bool min, + bool computeRewards, uint64_t maxModelSize) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; - std::vector>> transitions; - std::vector targetStates; + std::vector>> transitions = {{{{0, storm::utility::one()}}}, + {{{1, storm::utility::one()}}}}; + std::vector targetStates = {1}; - uint64_t stateId = 0; + uint64_t stateId = 2; beliefStateMap[initialBeliefId] = stateId; ++stateId; + uint64_t nextId = beliefList.size(); + uint64_t counter = 0; // Expand the believes visitedBelieves.insert(initialBeliefId); believesToBeExpanded.push_back(initialBeliefId); while (!believesToBeExpanded.empty()) { + //TODO think of other ways to stop exploration besides model size auto currentBeliefId = believesToBeExpanded.front(); - std::vector> actionTransitionStorage; + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); // for targets, we only consider one action with one transition if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states and save them - std::map transitionsInStateWithAction; - transitionsInStateWithAction[beliefStateMap[currentBeliefId]] = storm::utility::one(); + // add a self-loop to target states targetStates.push_back(beliefStateMap[currentBeliefId]); - actionTransitionStorage.push_back(transitionsInStateWithAction); + transitions.push_back({{{beliefStateMap[currentBeliefId], storm::utility::one()}}}); + } else if (counter > maxModelSize) { + transitions.push_back({{{0, storm::utility::one()}}}); } else { - if (chosenActions.find(currentBeliefId) == chosenActions.end()) { - chosenActions[currentBeliefId] = generateMdp ? extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, - observationProbabilities, - nextBelieves, result, gridResolution, - currentBeliefId, beliefList.size(), min) : - extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, - observationProbabilities, - nextBelieves, result, gridResolution, - currentBeliefId, beliefList.size(), min); - } // Iterate over all actions and add the corresponding transitions - for (auto const &action : chosenActions[currentBeliefId]) { + std::vector> actionTransitionStorage; + //TODO add a way to extract the actions from the over-approx and use them here? + for (uint64_t action = 0; action < numChoices; ++action) { std::map transitionsInStateWithAction; - - for (auto iter = observationProbabilities[currentBeliefId][action].begin(); iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { + std::map observationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currentBeliefId], action); + for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; + uint64_t nextBeliefId = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId], + action, + observation, nextId); + nextId = beliefList.size(); if (visitedBelieves.insert(nextBeliefId).second) { beliefStateMap[nextBeliefId] = stateId; ++stateId; believesToBeExpanded.push_back(nextBeliefId); + ++counter; } transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second; } actionTransitionStorage.push_back(transitionsInStateWithAction); } + transitions.push_back(actionTransitionStorage); } - transitions.push_back(actionTransitionStorage); believesToBeExpanded.pop_front(); } @@ -949,44 +773,28 @@ namespace storm { transitionMatrix.makeRowGroupingTrivial(); } storm::storage::sparse::ModelComponents modelComponents(transitionMatrix, labeling); - if (transitionMatrix.hasTrivialRowGrouping()) { - - storm::models::sparse::Dtmc underApproxMc(modelComponents); - storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); - if (computeRewards) { - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - // Add the reward collected by taking the chosen Action in the belief - rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( - storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), - currentBelief)); - } - } - underApproxMc.addRewardModel("std", rewardModel); - underApproxMc.restrictRewardModels(std::set({"std"})); - - model = std::make_shared>(underApproxMc); - } else { - storm::models::sparse::Mdp underApproxMdp(modelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } + storm::models::sparse::Mdp underApproxMdp(modelComponents); + if (computeRewards) { + storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); } - underApproxMdp.addRewardModel("std", rewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); } - model = std::make_shared>(underApproxMdp); + underApproxMdp.addRewardModel("std", rewardModel); + underApproxMdp.restrictRewardModels(std::set({"std"})); } + model = std::make_shared>(underApproxMdp); + model->printModelInformationToStream(std::cout); + storm::api::exportSparseModelAsDot(model, "ua_model.dot"); + std::string propertyString; if (computeRewards) { propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; @@ -996,10 +804,13 @@ namespace storm { std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); + auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); + auto underApprox = underApproxResultMap[beliefStateMap[initialBeliefId]]; + + return underApprox; } @@ -1406,4 +1217,4 @@ namespace storm { #endif } } -} \ No newline at end of file +} diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index df3e72af9..867f9d673 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -80,7 +80,7 @@ namespace storm { computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none); + boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); /** * @@ -96,7 +96,7 @@ namespace storm { std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none); + boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); /** * @@ -164,24 +164,17 @@ namespace storm { * @param beliefList * @param beliefIsTarget * @param targetObservations - * @param observationProbabilities - * @param nextBelieves - * @param result - * @param chosenActions - * @param gridResolution * @param initialBeliefId * @param min + * @param computeReward + * @param maxModelSize * @return */ ValueType computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> &chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp); + uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize); /** * From e02ea57a58772057925542b53237d36790493553 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 24 Jan 2020 17:08:46 +0100 Subject: [PATCH 039/155] Rudimentary version of the refinement loop --- .../ApproximatePOMDPModelchecker.cpp | 112 +++++++++++++----- .../ApproximatePOMDPModelchecker.h | 30 +++-- 2 files changed, 104 insertions(+), 38 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 84c055d1f..ab14f7349 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -56,7 +56,7 @@ namespace storm { std::unique_ptr underlyingRes(storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); STORM_LOG_ASSERT(underlyingRes, "Result not exist."); underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); - auto overApproxMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); + auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); underlyingWatch.stop(); storm::utility::Stopwatch positionalWatch(true); @@ -80,6 +80,8 @@ namespace storm { auto underApproxMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); positionalWatch.stop(); + STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // " + << underApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl) STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl) // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution. @@ -90,10 +92,65 @@ namespace storm { auto underRes = storm::utility::zero(); uint64_t underApproxModelSize = 50; uint64_t refinementCounter = 1; - std::unique_ptr> res = computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, - overApproxMap, underApproxMap, underApproxModelSize); - // TODO the actual refinement - return res; + std::unique_ptr> res; + while (refinementCounter < 30) { + res = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, + initialOverApproxMap, underApproxMap, underApproxModelSize); + // TODO the actual refinement + // choose which observation(s) to refine + std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); + std::vector beliefCount(pomdp.getNrObservations(), 0); + bsmap_type::right_map::const_iterator iter = res->underApproxBeliefStateMap.right.begin(); + while (iter != res->underApproxBeliefStateMap.right.end()) { + auto currentBelief = res->beliefList[iter->second]; + beliefCount[currentBelief.observation] += 1; + //TODO rename, this is getting confusing + bsmap_type::left_const_iterator it = res->overApproxBeliefStateMap.left.find(iter->second); + if (it != res->overApproxBeliefStateMap.left.end()) { + // If there is an over-approximate value for the belief, use it + auto diff = res->overApproxMap[it->second] - res->underApproxMap[iter->first]; + obsAccumulator[currentBelief.observation] += diff; + } else { + //otherwise, we approximate a value TODO this is critical, we have to think about it + auto overApproxValue = storm::utility::zero(); + auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation]); + auto subSimplex = temp.first; + auto lambdas = temp.second; + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]); + bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId); + if (approxIter != res->overApproxBeliefStateMap.left.end()) { + overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second]; + } else { + overApproxValue += lambdas[j]; + } + } + } + obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[iter->first]; + } + ++iter; + } + + for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { + obsAccumulator[i] /= beliefCount[i]; + } + + //TODO think about some other scoring methods + auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end()); + STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) + STORM_PRINT(" Obs | Score " << std::endl << "---------|---------" << std::endl) + for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { + STORM_PRINT(i << " |" << obsAccumulator[i] << std::endl) + if (obsAccumulator[i] == maxAvgDifference) { + observationResolutionVector[i] *= 2; + } + } + underApproxModelSize += 10; + ++refinementCounter; + } + + return std::make_unique>(POMDPCheckResult{res->overApproxValue, res->underApproxValue}); } template @@ -417,21 +474,22 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize); + auto underApproxComponents = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, + maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - // Transfer the underapproximation results from the belief id space to the MDP id space - std::map underApproxResultMap = {{0, storm::utility::zero()}, - {1, storm::utility::one()}}; - for (auto const &belief : beliefGrid) { - underApproxResultMap[beliefStateMap.left.at(belief.id)] = weightedSumUnderMap[belief.id]; - } + STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); return std::make_unique>( - RefinementComponents{modelPtr, overApprox, underApprox, overApproxResultMap, underApproxResultMap, beliefList, beliefIsTarget, beliefStateMap}); + RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, + underApproxComponents->underApproxMap, beliefList, beliefIsTarget, beliefStateMap, + underApproxComponents->underApproxBeliefStateMap}); } +/* + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeRefinementStep(){}*/ + template ValueType ApproximatePOMDPModelchecker::overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, @@ -499,8 +557,8 @@ namespace storm { } // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { chosenActionIndices.clear(); @@ -697,7 +755,7 @@ namespace storm { } template - ValueType + std::unique_ptr> ApproximatePOMDPModelchecker::computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, @@ -706,13 +764,13 @@ namespace storm { bool computeRewards, uint64_t maxModelSize) { std::set visitedBelieves; std::deque believesToBeExpanded; - std::map beliefStateMap; + bsmap_type beliefStateMap; std::vector>> transitions = {{{{0, storm::utility::one()}}}, {{{1, storm::utility::one()}}}}; std::vector targetStates = {1}; uint64_t stateId = 2; - beliefStateMap[initialBeliefId] = stateId; + beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId)); ++stateId; uint64_t nextId = beliefList.size(); uint64_t counter = 0; @@ -727,8 +785,8 @@ namespace storm { // for targets, we only consider one action with one transition if (beliefIsTarget[currentBeliefId]) { // add a self-loop to target states - targetStates.push_back(beliefStateMap[currentBeliefId]); - transitions.push_back({{{beliefStateMap[currentBeliefId], storm::utility::one()}}}); + targetStates.push_back(beliefStateMap.left.at(currentBeliefId)); + transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one()}}}); } else if (counter > maxModelSize) { transitions.push_back({{{0, storm::utility::one()}}}); } else { @@ -745,12 +803,12 @@ namespace storm { observation, nextId); nextId = beliefList.size(); if (visitedBelieves.insert(nextBeliefId).second) { - beliefStateMap[nextBeliefId] = stateId; + beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId)); ++stateId; believesToBeExpanded.push_back(nextBeliefId); ++counter; } - transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second; + transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second; } actionTransitionStorage.push_back(transitionsInStateWithAction); } @@ -776,7 +834,7 @@ namespace storm { storm::models::sparse::Mdp underApproxMdp(modelComponents); if (computeRewards) { storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { + for (auto const &iter : beliefStateMap.left) { auto currentBelief = beliefList[iter.first]; auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { @@ -808,9 +866,9 @@ namespace storm { STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap[initialBeliefId]]; + auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; - return underApprox; + return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 867f9d673..5e8a95025 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -11,7 +11,6 @@ namespace storm { namespace pomdp { namespace modelchecker { typedef boost::bimap bsmap_type; - typedef boost::bimap, uint64_t> uamap_type; template struct POMDPCheckResult { @@ -28,11 +27,19 @@ namespace storm { std::shared_ptr> overApproxModelPtr; ValueType overApproxValue; ValueType underApproxValue; - std::map &overApproxMap; - std::map &underApproxMap; - std::vector> &beliefList; - std::vector &beliefIsTarget; - bsmap_type &beliefStateMap; + std::map overApproxMap; + std::map underApproxMap; + std::vector> beliefList; + std::vector beliefIsTarget; + bsmap_type overApproxBeliefStateMap; + bsmap_type underApproxBeliefStateMap; + }; + + template> + struct UnderApproxComponents { + ValueType underApproxValue; + std::map underApproxMap; + bsmap_type underApproxBeliefStateMap; }; template> @@ -170,11 +177,12 @@ namespace storm { * @param maxModelSize * @return */ - ValueType computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize); + std::unique_ptr> computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set const &targetObservations, + uint64_t initialBeliefId, bool min, bool computeReward, + uint64_t maxModelSize); /** * From 91232a2b11ce852ed3ebd19c09c8e3cc114c8ef0 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 11 Feb 2020 16:11:42 +0100 Subject: [PATCH 040/155] Changed data structure for belief distributions from vector to map to exploit sparsity --- .../ApproximatePOMDPModelchecker.cpp | 194 +++++++++--------- .../ApproximatePOMDPModelchecker.h | 8 +- src/storm-pomdp/storage/Belief.h | 2 +- 3 files changed, 101 insertions(+), 103 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index ab14f7349..bdf9fab1a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -113,7 +113,7 @@ namespace storm { } else { //otherwise, we approximate a value TODO this is critical, we have to think about it auto overApproxValue = storm::utility::zero(); - auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation]); + auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation], pomdp.getNumberOfStates()); auto subSimplex = temp.first; auto lambdas = temp.second; for (size_t j = 0; j < lambdas.size(); ++j) { @@ -190,16 +190,12 @@ namespace storm { std::vector beliefIsTarget; std::vector> beliefGrid; //Use caching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; + std::map>> subSimplexCache; std::map> lambdaCache; bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; - // Belief ID -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; // current ID -> action -> reward std::map> beliefActionRewards; uint64_t nextId = 0; @@ -227,10 +223,9 @@ namespace storm { std::map weightedSumUnderMap; // for the initial belief, add the triangulated initial states - std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, - observationResolutionVector[initialBelief.observation]); - std::vector> initSubSimplex = initTemp.first; - std::vector initLambdas = initTemp.second; + auto initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, observationResolutionVector[initialBelief.observation], pomdp.getNumberOfStates()); + auto initSubSimplex = initTemp.first; + auto initLambdas = initTemp.second; if (cacheSubsimplices) { subSimplexCache[0] = initSubSimplex; lambdaCache[0] = initLambdas; @@ -297,7 +292,6 @@ namespace storm { initTransitionsInBelief.push_back(initTransitionInActionBelief); mdpTransitions.push_back(initTransitionsInBelief); } - //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting // Expand the beliefs to generate the grid on-the-fly @@ -305,7 +299,7 @@ namespace storm { STORM_PRINT("Exploration threshold: " << explorationThreshold << std::endl) } while (!beliefsToBeExpanded.empty()) { - // TODO add direct generation of transition matrix + // TODO direct generation of transition matrix? uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; @@ -323,14 +317,11 @@ namespace storm { } else { uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); std::vector actionRewardsInState(numChoices); std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); - std::map actionObservationBelieves; std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; @@ -339,16 +330,15 @@ namespace storm { uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, observation, nextId); nextId = beliefList.size(); - actionObservationBelieves[observation] = idNextBelief; //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; + std::vector> subSimplex; std::vector lambdas; if (cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - beliefList[idNextBelief].probabilities, observationResolutionVector[beliefList[idNextBelief].observation]); + auto temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities, + observationResolutionVector[beliefList[idNextBelief].observation], pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; if(cacheSubsimplices){ @@ -397,8 +387,6 @@ namespace storm { } } } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; if (computeRewards) { actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), beliefList[currId]); @@ -407,8 +395,6 @@ namespace storm { transitionsInBelief.push_back(transitionInActionBelief); } } - observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); if (computeRewards) { beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); } @@ -499,7 +485,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map> &beliefActionRewards, - std::map>> &subSimplexCache, + std::map>> &subSimplexCache, std::map> &lambdaCache, std::map &result, std::map> &chosenActions, @@ -531,14 +517,13 @@ namespace storm { storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief // cache the values to not always re-calculate - std::vector> subSimplex; + std::vector> subSimplex; std::vector lambdas; if (cacheSubsimplices && subSimplexCache.count(nextBelief.id) > 0) { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, - gridResolution); + auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; if(cacheSubsimplices) { @@ -588,13 +573,13 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); std::vector initialLambda; - std::vector> initialSubsimplex; + std::vector> initialSubsimplex; if(cacheSubsimplices){ initialLambda = lambdaCache[0]; initialSubsimplex = subSimplexCache[0]; } else { - auto temp = computeSubSimplexAndLambdas(beliefList[0].probabilities, gridResolution); - initialSubsimplex= temp.first; + auto temp = computeSubSimplexAndLambdas(beliefList[0].probabilities, gridResolution, pomdp.getNumberOfStates()); + initialSubsimplex = temp.first; initialLambda = temp.second; } @@ -659,10 +644,10 @@ namespace storm { // current ID -> action -> reward std::map> beliefActionRewards; //Use caching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; + std::map>> subSimplexCache; std::map> lambdaCache; - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); + auto temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); if(cacheSubsimplices) { subSimplexCache[0] = temp.first; lambdaCache[0] = temp.second; @@ -784,7 +769,7 @@ namespace storm { uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); // for targets, we only consider one action with one transition if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states + // add a self-loop to target states targetStates.push_back(beliefStateMap.left.at(currentBeliefId)); transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one()}}}); } else if (counter > maxModelSize) { @@ -946,8 +931,8 @@ namespace storm { storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution); - std::vector> subSimplex = temp.first; + auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); + std::vector> subSimplex = temp.first; std::vector lambdas = temp.second; auto sum = storm::utility::zero(); @@ -993,13 +978,17 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( std::vector> const &grid, uint32_t observation, - std::vector &probabilities) { + std::map &probabilities) { // TODO This one is quite slow for (auto const &belief : grid) { if (belief.observation == observation) { bool same = true; - for (size_t i = 0; i < belief.probabilities.size(); ++i) { - if (!cc.isEqual(belief.probabilities[i], probabilities[i])) { + for (auto const &probEntry : belief.probabilities) { + if (probabilities.find(probEntry.first) == probabilities.end()) { + same = false; + break; + } + if (!cc.isEqual(probEntry.second, probabilities[probEntry.first])) { same = false; break; } @@ -1009,7 +998,6 @@ namespace storm { } } } - return -1; } @@ -1020,12 +1008,13 @@ namespace storm { "POMDP contains more than one initial state"); STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, "POMDP does not contain an initial state"); - std::vector distribution(pomdp.getNumberOfStates(), storm::utility::zero()); + std::map distribution; uint32_t observation = 0; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { if (pomdp.getInitialStates()[state] == 1) { distribution[state] = storm::utility::one(); observation = pomdp.getObservation(state); + break; } } return storm::pomdp::Belief{id, observation, distribution}; @@ -1048,8 +1037,7 @@ namespace storm { // TODO this can probably be condensed if (statesWithObservation.size() == 1) { // If there is only one state with the observation, we can directly add the corresponding belief - std::vector distribution(pomdp.getNumberOfStates(), - storm::utility::zero()); + std::map distribution; distribution[statesWithObservation.front()] = storm::utility::one(); storm::pomdp::Belief belief = {newId, observation, distribution}; STORM_LOG_TRACE( @@ -1068,17 +1056,19 @@ namespace storm { uint64_t index = 0; while (!done) { - std::vector distribution(pomdp.getNumberOfStates(), - storm::utility::zero()); + std::map distribution; for (size_t i = 0; i < statesWithObservation.size() - 1; ++i) { - distribution[statesWithObservation[i]] = (helper[i] - helper[i + 1]) / - storm::utility::convertNumber( - gridResolution); + if (helper[i] - helper[i + 1] > ValueType(0)) { + distribution[statesWithObservation[i]] = (helper[i] - helper[i + 1]) / + storm::utility::convertNumber( + gridResolution); + } + } + if (helper[statesWithObservation.size() - 1] > ValueType(0)) { + distribution[statesWithObservation.back()] = + helper[statesWithObservation.size() - 1] / + storm::utility::convertNumber(gridResolution); } - distribution[statesWithObservation.back()] = - helper[statesWithObservation.size() - 1] / - storm::utility::convertNumber(gridResolution); - storm::pomdp::Belief belief = {newId, observation, distribution}; STORM_LOG_TRACE("Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); beliefList.push_back(belief); @@ -1111,20 +1101,24 @@ namespace storm { } template - std::pair>, std::vector> + std::pair>, std::vector> ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( - std::vector &probabilities, uint64_t resolution) { + std::map &probabilities, uint64_t resolution, uint64_t nrStates) { + + //TODO this can also be simplified using the sparse vector interpretation + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Variable names are based on the paper - uint64_t probSize = probabilities.size(); - std::vector x(probSize); - std::vector v(probSize); - std::vector d(probSize); + std::vector x(nrStates); + std::vector v(nrStates); + std::vector d(nrStates); auto convResolution = storm::utility::convertNumber(resolution); - for (size_t i = 0; i < probSize; ++i) { - for (size_t j = i; j < probSize; ++j) { - x[i] += convResolution * probabilities[j]; + for (size_t i = 0; i < nrStates; ++i) { + for (auto const &probEntry : probabilities) { + if (probEntry.first >= i) { + x[i] += convResolution * probEntry.second; + } } v[i] = storm::utility::floor(x[i]); d[i] = x[i] - v[i]; @@ -1132,14 +1126,14 @@ namespace storm { auto p = storm::utility::vector::getSortedIndices(d); - std::vector> qs(probSize, std::vector(probSize)); - for (size_t i = 0; i < probSize; ++i) { + std::vector> qs(nrStates, std::vector(nrStates)); + for (size_t i = 0; i < nrStates; ++i) { if (i == 0) { - for (size_t j = 0; j < probSize; ++j) { + for (size_t j = 0; j < nrStates; ++j) { qs[i][j] = v[j]; } } else { - for (size_t j = 0; j < probSize; ++j) { + for (size_t j = 0; j < nrStates; ++j) { if (j == p[i - 1]) { qs[i][j] = qs[i - 1][j] + storm::utility::one(); } else { @@ -1148,18 +1142,22 @@ namespace storm { } } } - std::vector> subSimplex(probSize, std::vector(probSize)); - for (size_t j = 0; j < probSize; ++j) { - for (size_t i = 0; i < probSize - 1; ++i) { - subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution; + std::vector> subSimplex(nrStates); + for (size_t j = 0; j < nrStates; ++j) { + for (size_t i = 0; i < nrStates - 1; ++i) { + if (cc.isLess(storm::utility::zero(), qs[j][i] - qs[j][i + 1])) { + subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution; + } } - subSimplex[j][probSize - 1] = qs[j][probSize - 1] / convResolution; + if (cc.isLess(storm::utility::zero(), qs[j][nrStates - 1])) { + subSimplex[j][nrStates - 1] = qs[j][nrStates - 1] / convResolution; + } } - std::vector lambdas(probSize, storm::utility::zero()); + std::vector lambdas(nrStates, storm::utility::zero()); auto sum = storm::utility::zero(); - for (size_t i = 1; i < probSize; ++i) { + for (size_t i = 1; i < nrStates; ++i) { lambdas[i] = d[p[i - 1]] - d[p[i]]; sum += d[p[i - 1]] - d[p[i]]; } @@ -1177,17 +1175,16 @@ namespace storm { uint64_t actionIndex) { std::map res; // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) - std::vector postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, 0).probabilities; - for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - uint32_t observation = pomdp.getObservation(state); - if (postProbabilities[state] != storm::utility::zero()) { - if (res.count(observation) == 0) { - res[observation] = postProbabilities[state]; - } else { - res[observation] += postProbabilities[state]; - } + std::map postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, 0).probabilities; + for (auto const &probEntry : postProbabilities) { + uint32_t observation = pomdp.getObservation(probEntry.first); + if (res.count(observation) == 0) { + res[observation] = probEntry.second; + } else { + res[observation] += probEntry.second; } } + return res; } @@ -1195,12 +1192,13 @@ namespace storm { storm::pomdp::Belief ApproximatePOMDPModelchecker::getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id) { - std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); + std::map distributionAfter; uint32_t observation = 0; - for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - if (belief.probabilities[state] != storm::utility::zero()) { - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { + for (auto const &probEntry : belief.probabilities) { + uint64_t state = probEntry.first; + auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + for (auto const &entry : row) { + if (entry.getValue() > 0) { observation = pomdp.getObservation(entry.getColumn()); distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); } @@ -1215,14 +1213,13 @@ namespace storm { std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, uint32_t observation, uint64_t id) { storm::utility::Stopwatch distrWatch(true); - std::vector distributionAfter(pomdp.getNumberOfStates()); //, storm::utility::zero()); - for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - if (belief.probabilities[state] != storm::utility::zero()) { - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (pomdp.getObservation(entry.getColumn()) == observation) { - distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } + std::map distributionAfter; + for (auto const &probEntry : belief.probabilities) { + uint64_t state = probEntry.first; + auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + for (auto const &entry : row) { + if (pomdp.getObservation(entry.getColumn()) == observation) { + distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); } } } @@ -1230,12 +1227,12 @@ namespace storm { // We have to normalize the distribution storm::utility::Stopwatch normalizationWatch(true); auto sum = storm::utility::zero(); - for (ValueType const &entry : distributionAfter) { - sum += entry; + for (auto const &entry : distributionAfter) { + sum += entry.second; } - for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) { - distributionAfter[i] /= sum; + for (auto const &entry : distributionAfter) { + distributionAfter[entry.first] /= sum; } normalizationWatch.stop(); if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { @@ -1259,7 +1256,8 @@ namespace storm { uint64_t action, storm::pomdp::Belief &belief) { auto result = storm::utility::zero(); for (size_t i = 0; i < belief.probabilities.size(); ++i) { - result += belief.probabilities[i] * pomdp.getUniqueRewardModel().getTotalStateActionReward(i, action, pomdp.getTransitionMatrix()); + for (auto const &probEntry : belief.probabilities) + result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); } return result; } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 5e8a95025..5108d3135 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -200,8 +200,8 @@ namespace storm { * @param gridResolution * @return */ - std::pair>, std::vector> - computeSubSimplexAndLambdas(std::vector &probabilities, uint64_t gridResolution); + std::pair>, std::vector> + computeSubSimplexAndLambdas(std::map &probabilities, uint64_t gridResolution, uint64_t nrStates); /** @@ -269,7 +269,7 @@ namespace storm { * @return */ uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, - std::vector &probabilities); + std::map &probabilities); /** * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) @@ -285,7 +285,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map> &beliefActionRewards, - std::map>> &subSimplexCache, + std::map>> &subSimplexCache, std::map> &lambdaCache, std::map &result, std::map> &chosenActions, uint64_t gridResolution, bool min, bool computeRewards); diff --git a/src/storm-pomdp/storage/Belief.h b/src/storm-pomdp/storage/Belief.h index bf4df4c9e..ed7591f1b 100644 --- a/src/storm-pomdp/storage/Belief.h +++ b/src/storm-pomdp/storage/Belief.h @@ -6,7 +6,7 @@ namespace storm { uint64_t id; uint32_t observation; //TODO make this sparse? - std::vector probabilities; + std::map probabilities; }; } } \ No newline at end of file From 53d5e9edf5791a8c76e9b31ea1b671e264b15741 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 28 Feb 2020 12:45:07 +0100 Subject: [PATCH 041/155] Code Cleanup --- .../ApproximatePOMDPModelchecker.cpp | 30 +++++++++---------- .../ApproximatePOMDPModelchecker.h | 2 +- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index bdf9fab1a..66135c02b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -299,7 +299,6 @@ namespace storm { STORM_PRINT("Exploration threshold: " << explorationThreshold << std::endl) } while (!beliefsToBeExpanded.empty()) { - // TODO direct generation of transition matrix? uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; @@ -637,7 +636,7 @@ namespace storm { // Belief ID -> ActionIndex std::map> chosenActions; - // Belief ID -> Observation -> Probability + // Belief ID -> action -> Observation -> Probability std::map>> observationProbabilities; // current ID -> action -> next ID std::map>> nextBelieves; @@ -1025,7 +1024,7 @@ namespace storm { storm::models::sparse::Pomdp const &pomdp, std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, - std::vector> &grid, std::vector &beliefIsKnown, + std::vector> &grid, std::vector &beliefIsTarget, uint64_t nextId) { bool isTarget; uint64_t newId = nextId; @@ -1045,7 +1044,7 @@ namespace storm { << distribution << "]"); beliefList.push_back(belief); grid.push_back(belief); - beliefIsKnown.push_back(isTarget); + beliefIsTarget.push_back(isTarget); ++newId; } else { // Otherwise we have to enumerate all possible distributions with regards to the grid @@ -1073,7 +1072,7 @@ namespace storm { STORM_LOG_TRACE("Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); beliefList.push_back(belief); grid.push_back(belief); - beliefIsKnown.push_back(isTarget); + beliefIsTarget.push_back(isTarget); if (helper[statesWithObservation.size() - 1] == storm::utility::convertNumber(gridResolution)) { // If the last entry of helper is the gridResolution, we have enumerated all necessary distributions @@ -1175,7 +1174,16 @@ namespace storm { uint64_t actionIndex) { std::map res; // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) - std::map postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, 0).probabilities; + std::map postProbabilities; + for (auto const &probEntry : belief.probabilities) { + uint64_t state = probEntry.first; + auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + for (auto const &entry : row) { + if (entry.getValue() > 0) { + postProbabilities[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); + } + } + } for (auto const &probEntry : postProbabilities) { uint32_t observation = pomdp.getObservation(probEntry.first); if (res.count(observation) == 0) { @@ -1212,7 +1220,6 @@ namespace storm { storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, uint32_t observation, uint64_t id) { - storm::utility::Stopwatch distrWatch(true); std::map distributionAfter; for (auto const &probEntry : belief.probabilities) { uint64_t state = probEntry.first; @@ -1223,9 +1230,7 @@ namespace storm { } } } - distrWatch.stop(); // We have to normalize the distribution - storm::utility::Stopwatch normalizationWatch(true); auto sum = storm::utility::zero(); for (auto const &entry : distributionAfter) { sum += entry.second; @@ -1234,19 +1239,12 @@ namespace storm { for (auto const &entry : distributionAfter) { distributionAfter[entry.first] /= sum; } - normalizationWatch.stop(); if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { - storm::utility::Stopwatch getWatch(true); auto res = getBeliefIdInVector(beliefList, observation, distributionAfter); - getWatch.stop(); - //STORM_PRINT("Distribution: "<< distrWatch.getTimeInNanoseconds() << " / Normalization: " << normalizationWatch.getTimeInNanoseconds() << " / getId: " << getWatch.getTimeInNanoseconds() << std::endl) return res; } else { - storm::utility::Stopwatch pushWatch(true); beliefList.push_back(storm::pomdp::Belief{id, observation, distributionAfter}); beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - pushWatch.stop(); - //STORM_PRINT("Distribution: "<< distrWatch.getTimeInNanoseconds() << " / Normalization: " << normalizationWatch.getTimeInNanoseconds() << " / generateBelief: " << pushWatch.getTimeInNanoseconds() << std::endl) return id; } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 5108d3135..eb3247de1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -215,7 +215,7 @@ namespace storm { std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, - std::vector &beliefIsKnown, uint64_t nextId); + std::vector &beliefIsTarget, uint64_t nextId); /** From e1bd87b91a561e3ddebbe880aeeabdce4d1e8326 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 28 Feb 2020 12:45:34 +0100 Subject: [PATCH 042/155] Added documentation --- .../ApproximatePOMDPModelchecker.h | 213 +++++++++++++----- 1 file changed, 159 insertions(+), 54 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index eb3247de1..66e5d563d 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -47,24 +47,76 @@ namespace storm { public: explicit ApproximatePOMDPModelchecker(); + /** + * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop + * + * @param pomdp the POMDP to be checked + * @param targetObservations the set of observations to be reached + * @param min true if minimum probability is to be computed + * @param gridResolution the initial grid resolution + * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state + * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values + */ std::unique_ptr> refineReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution, double explorationThreshold); + /** + * Compute the reachability probability of given target observations on a POMDP for the given resolution only. + * On-the-fly state space generation is used for the overapproximation + * + * @param pomdp the POMDP to be checked + * @param targetObservations the set of observations to be reached + * @param min true if minimum probability is to be computed + * @param gridResolution the grid resolution + * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state + * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values + */ std::unique_ptr> computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution, double explorationThreshold); + /** + * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. + * On-the-fly state space generation is used for the overapproximation + * + * @param pomdp the POMDP to be checked + * @param targetObservations the set of observations to be reached + * @param min true if minimum rewards are to be computed + * @param gridResolution the initial grid resolution + * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state + * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values + */ std::unique_ptr> computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution); + /** + * Compute the reachability probability for given target observations on a POMDP for the given resolution only. + * Static state space generation is used for the overapproximation, i.e. the whole grid is generated + * + * @param pomdp the POMDP to be checked + * @param targetObservations the set of observations to be reached + * @param min true if the minimum probability is to be computed + * @param gridResolution the initial grid resolution + * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values + */ std::unique_ptr> computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution); + /** + * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. + * Static state space generation is used for the overapproximation, i.e. the whole grid is generated + * + * @param pomdp the POMDP to be checked + * @param targetObservations the set of observations to be reached + * @param min true if the minimum rewards are to be computed + * @param gridResolution the initial grid resolution + * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values + */ std::unique_ptr> computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, @@ -72,16 +124,18 @@ namespace storm { private: /** + * Helper method to compute the inital step of the refinement loop * - * @param pomdp - * @param targetObservations - * @param min - * @param observationResolutionVector - * @param computeRewards - * @param explorationThreshold - * @param overApproximationMap - * @param underApproximationMap - * @return + * @param pomdp the pomdp to be checked + * @param targetObservations set of target observations + * @param min true if minimum value is to be computed + * @param observationResolutionVector vector containing the resolution to be used for each observation + * @param computeRewards true if rewards are to be computed, false if probability is computed + * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state + * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value + * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value + * @param maxUaModelSize the maximum size of the underapproximation model to be generated + * @return struct containing components generated during the computation to be used in later refinement iterations */ std::unique_ptr> computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, @@ -90,13 +144,18 @@ namespace storm { boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); /** + * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size * - * @param pomdp - * @param targetObservations - * @param min - * @param gridResolution - * @param computeRewards - * @return + * @param pomdp the pomdp to be checked + * @param targetObservations set of target observations + * @param min true if minimum value is to be computed + * @param observationResolutionVector vector containing the resolution to be used for each observation + * @param computeRewards true if rewards are to be computed, false if probability is computed + * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state + * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value + * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value + * @param maxUaModelSize the maximum size of the underapproximation model to be generated + * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, @@ -106,13 +165,14 @@ namespace storm { boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); /** + * Helper method to compute reachability properties using static state space generation * - * @param pomdp - * @param targetObservations - * @param min - * @param gridResolution - * @param computeRewards - * @return + * @param pomdp the POMDP to be checked + * @param targetObservations set of target observations + * @param min true if minimum value is to be computed + * @param gridResolution the resolution of the grid to be used + * @param computeRewards true if rewards are to be computed, false if probability is computed + * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> computeReachability(storm::models::sparse::Pomdp const &pomdp, @@ -166,16 +226,18 @@ namespace storm { bool min); /** - * TODO - * @param pomdp - * @param beliefList - * @param beliefIsTarget - * @param targetObservations - * @param initialBeliefId - * @param min - * @param computeReward - * @param maxModelSize - * @return + * Helper to compute an underapproximation of the reachability property. + * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. + * + * @param pomdp the POMDP to be checked + * @param beliefList vector containing already generated beliefs + * @param beliefIsTarget vector containinf for each belief in beliefList true if the belief is a target + * @param targetObservations set of target observations + * @param initialBeliefId Id of the belief corresponding to the POMDP's initial state + * @param min true if minimum value is to be computed + * @param computeReward true if rewards are to be computed + * @param maxModelSize number of states up until which the belief support should be unrolled + * @return struct containing the components generated during the under approximation */ std::unique_ptr> computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, @@ -185,31 +247,38 @@ namespace storm { uint64_t maxModelSize); /** + * Constructs the initial belief for the given POMDP * - * @param pomdp - * @param id - * @return + * @param pomdp the POMDP + * @param id the id the initial belief is given + * @return a belief representing the initial belief */ storm::pomdp::Belief getInitialBelief(storm::models::sparse::Pomdp const &pomdp, uint64_t id); /** + * Subroutine to compute the subsimplex a given belief is contained in and the corresponding lambda values necessary for the Freudenthal triangulation * - * @param probabilities - * @param gridResolution - * @return + * @param probabilities the probability distribution of the belief + * @param gridResolution the resolution used for the belief + * @param nrStates number of states in the POMDP + * @return a pair containing: 1) the subsimplices 2) the lambda values */ std::pair>, std::vector> computeSubSimplexAndLambdas(std::map &probabilities, uint64_t gridResolution, uint64_t nrStates); /** - * Helper method to construct the grid of Belief states to approximate the POMDP - * - * @param pomdp - * @param gridResolution + * Helper method to construct the static belief grid for the POMDP overapproximation * + * @param pomdp the POMDP to be approximated + * @param target_observations set of target observations + * @param gridResolution the resolution of the grid to be constructed + * @param beliefList data structure to store all generated beliefs + * @param grid data structure to store references to the grid beliefs specifically + * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief + * @param nextId the ID to be used for the next generated belief */ void constructBeliefGrid(storm::models::sparse::Pomdp const &pomdp, std::set const &target_observations, uint64_t gridResolution, @@ -219,12 +288,12 @@ namespace storm { /** - * Helper method to get the probabilities of each observation after performing an action + * Helper method to get the probabilities to be in a state with each observation after performing an action * - * @param pomdp - * @param belief - * @param actionIndex - * @return + * @param pomdp the POMDP + * @param belief the belief in which the action is performed + * @param actionIndex the index of the action to be performed + * @return mapping from each observation to the probability to be in a state with that observation after performing the action */ std::map computeObservationProbabilitiesAfterAction( storm::models::sparse::Pomdp const &pomdp, @@ -236,6 +305,9 @@ namespace storm { * If the belief does not exist yet, it is created and added to the list of all beliefs * * @param pomdp the POMDP on which the evaluation should be performed + * @param beliefList data structure to store all generated beliefs + * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief + * @param targetObservations set of target observations * @param belief the starting belief * @param actionIndex the index of the action to be performed * @param observation the observation after the action was performed @@ -250,12 +322,13 @@ namespace storm { uint64_t actionIndex, uint32_t observation, uint64_t id); /** - * Helper method to get the next belief that results from a belief by performing an action + * Helper method to generate the next belief that results from a belief by performing an action * - * @param pomdp - * @param belief - * @param actionIndex - * @return + * @param pomdp the POMDP + * @param belief the starting belief + * @param actionIndex the index of the action to be performed + * @param id the ID for the generated belief + * @return a belief object representing the belief after performing the action in the starting belief */ storm::pomdp::Belief getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, storm::pomdp::Belief &belief, uint64_t actionIndex, @@ -264,21 +337,53 @@ namespace storm { /** * Helper to get the id of a Belief stored in a given vector structure * - * @param observation - * @param probabilities - * @return + * @param grid the vector on which the lookup is performed + * @param observation the observation of the belief + * @param probabilities the probability distribution over the POMDP states of the Belief + * @return if the belief was found in the vector, the belief's ID, otherwise -1 */ uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, std::map &probabilities); /** + * Helper method to build the transition matrix from a data structure containing transations + * * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) * @return sparseMatrix representing the transitions */ storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> &transitions); + /** + * Get the reward for performing an action in a given belief + * + * @param pomdp the POMDP + * @param action the index of the action to be performed + * @param belief the belief in which the action is performed + * @return the reward earned by performing the action in the belief + */ ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief &belief); + + /** + * Helper method for value iteration on data structures representing the belief grid + * This is very close to the method implemented in PRISM POMDP + * + * @param pomdp The POMDP + * @param beliefList data structure to store all generated beliefs + * @param beliefGrid data structure to store references to the grid beliefs specifically + * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief + * @param observationProbabilities data structure containing for each belief and possible action the probability to go to a state with a given observation + * @param nextBelieves data structure containing for each belief the successor belief after performing an action and observing a given observation + * @param beliefActionRewards data structure containing for each belief and possible action the reward for performing the action + * @param subSimplexCache caching data structure to store already computed subsimplices + * @param lambdaCache caching data structure to store already computed lambda values + * @param result data structure to store result values for each grid state + * @param chosenActions data structure to store the action(s) that lead to the computed result value + * @param gridResolution the resolution of the grid + * @param min true if minimal values are to be computed + * @param computeRewards true if rewards are to be computed + * @return the resulting probability/reward in the initial state + */ ValueType overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector> &beliefGrid, std::vector &beliefIsTarget, From c74a6ea2011b951a75229558448b62656c505428 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 28 Feb 2020 13:01:51 +0100 Subject: [PATCH 043/155] Removal of unused extractActions function --- .../ApproximatePOMDPModelchecker.cpp | 105 ++---------------- .../ApproximatePOMDPModelchecker.h | 46 -------- 2 files changed, 8 insertions(+), 143 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 66135c02b..e88daf09b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -295,7 +295,7 @@ namespace storm { //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting // Expand the beliefs to generate the grid on-the-fly - if(explorationThreshold > 0){ + if (explorationThreshold > 0) { STORM_PRINT("Exploration threshold: " << explorationThreshold << std::endl) } while (!beliefsToBeExpanded.empty()) { @@ -340,7 +340,7 @@ namespace storm { observationResolutionVector[beliefList[idNextBelief].observation], pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; - if(cacheSubsimplices){ + if (cacheSubsimplices) { subSimplexCache[idNextBelief] = subSimplex; lambdaCache[idNextBelief] = lambdas; } @@ -525,7 +525,7 @@ namespace storm { auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; - if(cacheSubsimplices) { + if (cacheSubsimplices) { subSimplexCache[nextBelief.id] = subSimplex; lambdaCache[nextBelief.id] = lambdas; } @@ -541,8 +541,8 @@ namespace storm { } // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { chosenActionIndices.clear(); @@ -573,7 +573,7 @@ namespace storm { std::vector initialLambda; std::vector> initialSubsimplex; - if(cacheSubsimplices){ + if (cacheSubsimplices) { initialLambda = lambdaCache[0]; initialSubsimplex = subSimplexCache[0]; } else { @@ -647,7 +647,7 @@ namespace storm { std::map> lambdaCache; auto temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); - if(cacheSubsimplices) { + if (cacheSubsimplices) { subSimplexCache[0] = temp.first; lambdaCache[0] = temp.second; } @@ -884,96 +884,6 @@ namespace storm { return smb.build(); } - template - std::vector ApproximatePOMDPModelchecker::extractBestActions( - storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { - storm::pomdp::Belief currentBelief = beliefList[currentBeliefId]; - - //TODO put this in extra function - std::vector> observationProbabilitiesInAction; - std::vector> nextBelievesInAction; - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, currentBelief, action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, - action, observation, nextId); - nextId = beliefList.size(); - } - observationProbabilitiesInAction.push_back(actionObservationProbabilities); - nextBelievesInAction.push_back(actionObservationBelieves); - } - observationProbabilities.emplace(std::make_pair(currentBeliefId, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); - - // choose the action which results in the value computed by the over-approximation - ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = storm::utility::zero(); // simply change this for rewards? - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); - std::vector> subSimplex = temp.first; - std::vector lambdas = temp.second; - - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result.at(getBeliefIdInVector(beliefList, observation, subSimplex[j])); - } - } - currentValue += iter->second * sum; - } - - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - return chosenActionIndices; - } - - template - std::vector ApproximatePOMDPModelchecker::extractBestAction( - storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { - return std::vector{ - extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, gridResolution, currentBeliefId, - nextId, min).front()}; - } - - template uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( std::vector> const &grid, uint32_t observation, @@ -1265,6 +1175,7 @@ namespace storm { class ApproximatePOMDPModelchecker; #ifdef STORM_HAVE_CARL + template class ApproximatePOMDPModelchecker; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 66e5d563d..27e8f6bb7 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -179,52 +179,6 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards); - /** - * TODO - * @param pomdp - * @param beliefList - * @param observationProbabilities - * @param nextBelieves - * @param result - * @param gridResolution - * @param currentBeliefId - * @param nextId - * @param min - * @return - */ - std::vector extractBestActions(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &target_observations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, - bool min); - - /** - * TODO - * @param pomdp - * @param beliefList - * @param observationProbabilities - * @param nextBelieves - * @param result - * @param gridResolution - * @param currentBeliefId - * @param nextId - * @param min - * @return - */ - std::vector extractBestAction(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &target_observations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, - bool min); - /** * Helper to compute an underapproximation of the reachability property. * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. From d703516d2d0710295f9b0402118b80150a4c1f7c Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Wed, 4 Mar 2020 15:00:58 -0800 Subject: [PATCH 044/155] make code compile --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index e88daf09b..91947e075 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -133,7 +133,7 @@ namespace storm { } for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { - obsAccumulator[i] /= beliefCount[i]; + obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); } //TODO think about some other scoring methods From 0f967409e66e02e437fda55dacc6a436fda29b4a Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Wed, 4 Mar 2020 17:34:33 -0800 Subject: [PATCH 045/155] post merge compile issues with double vs rationals in storm pomdp --- src/storm-pomdp-cli/storm-pomdp.cpp | 47 +++++++++++++++-------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index bc4511dc0..e669888d5 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -197,13 +197,14 @@ int main(const int argc, const char** argv) { } storm::cli::setUrgentOptions(); - + typedef double VT; auto const& coreSettings = storm::settings::getModule(); auto const& pomdpSettings = storm::settings::getModule(); auto const &general = storm::settings::getModule(); auto const &debug = storm::settings::getModule(); + if (general.isVerboseSet()) { storm::utility::setLogLevel(l3pp::LogLevel::INFO); } @@ -222,8 +223,8 @@ int main(const int argc, const char** argv) { auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, engine); STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp, storm::exceptions::WrongFormatException, "Expected a POMDP."); - std::shared_ptr> pomdp = model->template as>(); - storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); + std::shared_ptr> pomdp = model->template as>(); + storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); pomdp = makeCanonic.transform(); std::shared_ptr formula; @@ -235,7 +236,7 @@ int main(const int argc, const char** argv) { if (pomdpSettings.isAnalyzeUniqueObservationsSet()) { STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl); - storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); + storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); std::cout << uniqueAnalysis.analyse() << std::endl; } @@ -261,12 +262,12 @@ int main(const int argc, const char** argv) { if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) { STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); + storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); pomdp = selfLoopEliminator.transform(); STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); } if (pomdpSettings.isQualitativeReductionSet()) { - storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); + storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); STORM_PRINT_AND_LOG("Computing states with probability 0 ..."); prob0States = qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()); std::cout << *prob0States << std::endl; @@ -276,15 +277,15 @@ int main(const int argc, const char** argv) { std::cout << *prob1States << std::endl; STORM_PRINT_AND_LOG(" done." << std::endl); //std::cout << "actual reduction not yet implemented..." << std::endl; - storm::pomdp::transformer::KnownProbabilityTransformer kpt = storm::pomdp::transformer::KnownProbabilityTransformer(); + storm::pomdp::transformer::KnownProbabilityTransformer kpt = storm::pomdp::transformer::KnownProbabilityTransformer(); pomdp = kpt.transform(*pomdp, *prob0States, *prob1States); } if (pomdpSettings.isGridApproximationSet()) { - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - auto overRes = storm::utility::one(); - auto underRes = storm::utility::zero(); - std::unique_ptr> result; + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + auto overRes = storm::utility::one(); + auto underRes = storm::utility::zero(); + std::unique_ptr> result; result = checker.refineReachabilityProbability(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); @@ -306,10 +307,10 @@ int main(const int argc, const char** argv) { storm::expressions::ExpressionManager expressionManager; std::shared_ptr smtSolverFactory = std::make_shared(); if (pomdpSettings.getMemlessSearchMethod() == "ccd16memless") { - storm::pomdp::QualitativeStrategySearchNaive memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); + storm::pomdp::QualitativeStrategySearchNaive memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); memlessSearch.findNewStrategyForSomeState(5); } else if (pomdpSettings.getMemlessSearchMethod() == "iterative") { - storm::pomdp::MemlessStrategySearchQualitative memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); + storm::pomdp::MemlessStrategySearchQualitative memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); memlessSearch.findNewStrategyForSomeState(5); } else { STORM_LOG_ERROR("This method is not implemented."); @@ -321,7 +322,7 @@ int main(const int argc, const char** argv) { if (pomdpSettings.isSelfloopReductionSet() && storm::solver::minimize(formula->asRewardOperatorFormula().getOptimalityType())) { STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); + storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); pomdp = selfLoopEliminator.transform(); STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); } @@ -367,10 +368,10 @@ int main(const int argc, const char** argv) { "The formula is not supported by the grid approximation"); STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - auto overRes = storm::utility::one(); - auto underRes = storm::utility::zero(); - std::unique_ptr> result; + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + auto overRes = storm::utility::one(); + auto underRes = storm::utility::zero(); + std::unique_ptr> result; result = checker.computeReachabilityReward(*pomdp, targetObservationSet, rewFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, @@ -384,7 +385,7 @@ int main(const int argc, const char** argv) { STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ..."); storm::storage::PomdpMemory memory = storm::storage::PomdpMemoryBuilder().build(pomdpSettings.getMemoryPattern(), pomdpSettings.getMemoryBound()); std::cout << memory.toString() << std::endl; - storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, memory); + storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, memory); pomdp = memoryUnfolder.transform(); STORM_PRINT_AND_LOG(" done." << std::endl); pomdp->printModelInformationToStream(std::cout); @@ -398,7 +399,7 @@ int main(const int argc, const char** argv) { STORM_PRINT_AND_LOG("Eliminating mec choices ..."); // Note: Elimination of mec choices only preserves memoryless schedulers. uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); + storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); pomdp = mecChoiceEliminator.transform(*formula); STORM_PRINT_AND_LOG(" done." << std::endl); STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl); @@ -408,10 +409,10 @@ int main(const int argc, const char** argv) { if (pomdpSettings.isTransformBinarySet() || pomdpSettings.isTransformSimpleSet()) { if (pomdpSettings.isTransformSimpleSet()) { STORM_PRINT_AND_LOG("Transforming the POMDP to a simple POMDP."); - pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, true); + pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, true); } else { STORM_PRINT_AND_LOG("Transforming the POMDP to a binary POMDP."); - pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, false); + pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, false); } pomdp->printModelInformationToStream(std::cout); STORM_PRINT_AND_LOG(" done." << std::endl); @@ -420,7 +421,7 @@ int main(const int argc, const char** argv) { if (pomdpSettings.isExportToParametricSet()) { STORM_PRINT_AND_LOG("Transforming memoryless POMDP to pMC..."); - storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); + storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); std::string transformMode = pomdpSettings.getFscApplicationTypeString(); auto pmc = toPMCTransformer.transform(storm::transformer::parsePomdpFscApplicationMode(transformMode)); STORM_PRINT_AND_LOG(" done." << std::endl); From b61775570b13589563e6349e05cb57ba57afef69 Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Thu, 5 Mar 2020 10:21:13 -0800 Subject: [PATCH 046/155] minor --- src/storm-pomdp-cli/storm-pomdp.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index e669888d5..0e6a9d627 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -65,6 +65,8 @@ void initializeSettings() { storm::settings::addModule(); storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); @@ -80,11 +82,7 @@ void initializeSettings() { storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); - - storm::settings::addModule(); storm::settings::addModule(); - - storm::settings::addModule(); } template From 5c7a6b791abb692070939c7f198658359edf9d3b Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Sun, 8 Mar 2020 17:50:57 -0700 Subject: [PATCH 047/155] fixed (merge?) mistake that yielded errors for expected rewards --- src/storm-pomdp-cli/storm-pomdp.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 0e6a9d627..146f842aa 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -239,12 +239,9 @@ int main(const int argc, const char** argv) { } if (formula) { - storm::logic::ProbabilityOperatorFormula const &probFormula = formula->asProbabilityOperatorFormula(); - storm::logic::Formula const &subformula1 = probFormula.getSubformula(); - - if (formula->isProbabilityOperatorFormula()) { - + storm::logic::ProbabilityOperatorFormula const &probFormula = formula->asProbabilityOperatorFormula(); + storm::logic::Formula const &subformula1 = probFormula.getSubformula(); std::set targetObservationSet; storm::storage::BitVector targetStates(pomdp->getNumberOfStates()); storm::storage::BitVector badStates(pomdp->getNumberOfStates()); From 63e0d772a49f689a1688e5e60b58d60a34ccd6bf Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Sun, 8 Mar 2020 20:02:49 -0700 Subject: [PATCH 048/155] do not use the 'goal' label for internal purposes, but rather __goal__. TODO: Consider if we can do without a fresh label --- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 91947e075..0a910d64f 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -37,18 +37,19 @@ namespace storm { storm::utility::Stopwatch underlyingWatch(true); // Compute the results on the underlying MDP as a basic overapproximation storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); - underlyingMdpLabeling.addLabel("goal"); + // TODO: Is the following really necessary + underlyingMdpLabeling.addLabel("__goal__"); std::vector goalStates; for (auto const &targetObs : targetObservations) { for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) { - underlyingMdpLabeling.addLabelToState("goal", goalState); + underlyingMdpLabeling.addLabelToState("__goal__", goalState); } } storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); auto underlyingModel = std::static_pointer_cast>( std::make_shared>(underlyingMdp)); std::string initPropString = min ? "Pmin" : "Pmax"; - initPropString += "=? [F \"goal\"]"; + initPropString += "=? [F \"__goal__\"]"; std::vector propVector = storm::api::parseProperties(initPropString); std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); STORM_PRINT("Underlying MDP" << std::endl) From 5933467670e9eb5b4571e97c24dd55590608c3d7 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 17 Mar 2020 09:21:08 +0100 Subject: [PATCH 049/155] Silenced warnings regarding member initialization in unexpected order. --- src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h | 4 ++-- src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h index 5aa69b3ce..3ddba7586 100644 --- a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h +++ b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h @@ -20,9 +20,9 @@ namespace pomdp { storm::storage::BitVector const& surelyReachSinkStates, std::shared_ptr& smtSolverFactory) : pomdp(pomdp), + targetObservations(targetObservationSet), targetStates(targetStates), - surelyReachSinkStates(surelyReachSinkStates), - targetObservations(targetObservationSet) { + surelyReachSinkStates(surelyReachSinkStates) { this->expressionManager = std::make_shared(); smtSolver = smtSolverFactory->create(*expressionManager); diff --git a/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h b/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h index 5020fc9ab..97dc0f679 100644 --- a/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h +++ b/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h @@ -20,9 +20,9 @@ namespace storm { storm::storage::BitVector const& surelyReachSinkStates, std::shared_ptr& smtSolverFactory) : pomdp(pomdp), + targetObservations(targetObservationSet), targetStates(targetStates), - surelyReachSinkStates(surelyReachSinkStates), - targetObservations(targetObservationSet) { + surelyReachSinkStates(surelyReachSinkStates) { this->expressionManager = std::make_shared(); smtSolver = smtSolverFactory->create(*expressionManager); From 0d58ea529123e96f4a3e9156ba61f7c7fccad4c3 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 17 Mar 2020 09:21:22 +0100 Subject: [PATCH 050/155] Adding missing template instantiation. --- src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp b/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp index 45016e515..e764b69ce 100644 --- a/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp +++ b/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp @@ -116,8 +116,8 @@ namespace storm { return std::make_shared>(newPomdp); } - template - class KnownProbabilityTransformer; + template class KnownProbabilityTransformer; + template class KnownProbabilityTransformer; } } } \ No newline at end of file From 5c748254a664acc243a1e32de8dfa28661254fd8 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 17 Mar 2020 09:22:19 +0100 Subject: [PATCH 051/155] Cleaning up the POMDP CLI code a bit. Now supports switching to exact arithmetic with the --exact switch. --- .../settings/PomdpSettings.cpp | 67 ++ src/storm-pomdp-cli/settings/PomdpSettings.h | 13 + src/storm-pomdp-cli/storm-pomdp.cpp | 739 +++++++++--------- 3 files changed, 435 insertions(+), 384 deletions(-) create mode 100644 src/storm-pomdp-cli/settings/PomdpSettings.cpp create mode 100644 src/storm-pomdp-cli/settings/PomdpSettings.h diff --git a/src/storm-pomdp-cli/settings/PomdpSettings.cpp b/src/storm-pomdp-cli/settings/PomdpSettings.cpp new file mode 100644 index 000000000..f08dbe932 --- /dev/null +++ b/src/storm-pomdp-cli/settings/PomdpSettings.cpp @@ -0,0 +1,67 @@ +#include "storm-pomdp-cli/settings/PomdpSettings.h" + +#include "storm/settings/SettingsManager.h" + +#include "storm/settings/modules/GeneralSettings.h" +#include "storm/settings/modules/CoreSettings.h" +#include "storm/settings/modules/IOSettings.h" +#include "storm/settings/modules/DebugSettings.h" +#include "storm/settings/modules/CuddSettings.h" +#include "storm/settings/modules/SylvanSettings.h" +#include "storm/settings/modules/EigenEquationSolverSettings.h" +#include "storm/settings/modules/GmmxxEquationSolverSettings.h" +#include "storm/settings/modules/NativeEquationSolverSettings.h" +#include "storm/settings/modules/EliminationSettings.h" +#include "storm/settings/modules/MinMaxEquationSolverSettings.h" +#include "storm/settings/modules/GameSolverSettings.h" +#include "storm/settings/modules/BisimulationSettings.h" +#include "storm/settings/modules/GlpkSettings.h" +#include "storm/settings/modules/GurobiSettings.h" +#include "storm/settings/modules/Smt2SmtSolverSettings.h" +#include "storm/settings/modules/ExplorationSettings.h" +#include "storm/settings/modules/ResourceSettings.h" +#include "storm/settings/modules/AbstractionSettings.h" +#include "storm/settings/modules/BuildSettings.h" +#include "storm/settings/modules/JitBuilderSettings.h" +#include "storm/settings/modules/TopologicalEquationSolverSettings.h" +#include "storm/settings/modules/ModelCheckerSettings.h" +#include "storm/settings/modules/MultiplierSettings.h" +#include "storm/settings/modules/TransformationSettings.h" +#include "storm/settings/modules/MultiObjectiveSettings.h" +#include "storm/settings/modules/HintSettings.h" + +#include "storm-pomdp-cli/settings/modules/POMDPSettings.h" + +namespace storm { + namespace settings { + void initializePomdpSettings(std::string const& name, std::string const& executableName) { + storm::settings::mutableManager().setName(name, executableName); + + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + + storm::settings::addModule(); + + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + } + } +} + diff --git a/src/storm-pomdp-cli/settings/PomdpSettings.h b/src/storm-pomdp-cli/settings/PomdpSettings.h new file mode 100644 index 000000000..f3788c598 --- /dev/null +++ b/src/storm-pomdp-cli/settings/PomdpSettings.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace storm { + namespace settings { + /*! + * Initialize the settings manager. + */ + void initializePomdpSettings(std::string const& name, std::string const& executableName); + + } +} \ No newline at end of file diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 5420e9b75..6aae4d493 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -3,35 +3,10 @@ #include "storm/utility/initialize.h" #include "storm/settings/modules/GeneralSettings.h" -#include "storm/settings/modules/CoreSettings.h" -#include "storm/settings/modules/IOSettings.h" #include "storm/settings/modules/DebugSettings.h" -#include "storm/settings/modules/CuddSettings.h" -#include "storm/settings/modules/SylvanSettings.h" -#include "storm/settings/modules/EigenEquationSolverSettings.h" -#include "storm/settings/modules/GmmxxEquationSolverSettings.h" -#include "storm/settings/modules/NativeEquationSolverSettings.h" -#include "storm/settings/modules/EliminationSettings.h" -#include "storm/settings/modules/MinMaxEquationSolverSettings.h" -#include "storm/settings/modules/GameSolverSettings.h" -#include "storm/settings/modules/BisimulationSettings.h" -#include "storm/settings/modules/GlpkSettings.h" -#include "storm/settings/modules/GurobiSettings.h" -#include "storm/settings/modules/Smt2SmtSolverSettings.h" -#include "storm/settings/modules/ExplorationSettings.h" -#include "storm/settings/modules/ResourceSettings.h" -#include "storm/settings/modules/AbstractionSettings.h" -#include "storm/settings/modules/BuildSettings.h" -#include "storm/settings/modules/JitBuilderSettings.h" -#include "storm/settings/modules/TopologicalEquationSolverSettings.h" -#include "storm/settings/modules/ModelCheckerSettings.h" -#include "storm/settings/modules/MultiplierSettings.h" - -#include "storm/settings/modules/TransformationSettings.h" -#include "storm/settings/modules/MultiObjectiveSettings.h" - -#include "storm/settings/modules/HintSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" +#include "storm-pomdp-cli/settings/PomdpSettings.h" + #include "storm/analysis/GraphConditions.h" #include "storm-cli-utilities/cli.h" @@ -51,129 +26,370 @@ #include "storm-pomdp/analysis/QualitativeStrategySearchNaive.h" #include "storm/api/storm.h" -#include - -/*! - * Initialize the settings manager. - */ -void initializeSettings() { - storm::settings::mutableManager().setName("Storm-POMDP", "storm-pomdp"); - - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - - storm::settings::addModule(); +#include "storm/exceptions/UnexpectedException.h" - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); -} +#include -template -bool extractTargetAndSinkObservationSets(std::shared_ptr> const& pomdp, storm::logic::Formula const& subformula, std::set& targetObservationSet, storm::storage::BitVector& targetStates, storm::storage::BitVector& badStates) { - //TODO refactor (use model checker to determine the states, then transform into observations). - //TODO rename into appropriate function name. - bool validFormula = false; - if (subformula.isEventuallyFormula()) { - storm::logic::EventuallyFormula const &eventuallyFormula = subformula.asEventuallyFormula(); - storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); - if (subformula2.isAtomicLabelFormula()) { - storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); - validFormula = true; - std::string targetLabel = alFormula.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - targetStates.set(state); +namespace storm { + namespace pomdp { + namespace cli { + + + template + bool extractTargetAndSinkObservationSets(std::shared_ptr> const& pomdp, storm::logic::Formula const& subformula, std::set& targetObservationSet, storm::storage::BitVector& targetStates, storm::storage::BitVector& badStates) { + //TODO refactor (use model checker to determine the states, then transform into observations). + //TODO rename into appropriate function name. + bool validFormula = false; + if (subformula.isEventuallyFormula()) { + storm::logic::EventuallyFormula const &eventuallyFormula = subformula.asEventuallyFormula(); + storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); + if (subformula2.isAtomicLabelFormula()) { + storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); + validFormula = true; + std::string targetLabel = alFormula.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + targetStates.set(state); + } + } + } else if (subformula2.isAtomicExpressionFormula()) { + validFormula = true; + std::stringstream stream; + stream << subformula2.asAtomicExpressionFormula().getExpression(); + storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); + std::string targetLabel = formula3.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + targetStates.set(state); + } + } + } + } else if (subformula.isUntilFormula()) { + storm::logic::UntilFormula const &untilFormula = subformula.asUntilFormula(); + storm::logic::Formula const &subformula1 = untilFormula.getLeftSubformula(); + if (subformula1.isAtomicLabelFormula()) { + storm::logic::AtomicLabelFormula const &alFormula = subformula1.asAtomicLabelFormula(); + std::string targetLabel = alFormula.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (!labeling.getStateHasLabel(targetLabel, state)) { + badStates.set(state); + } + } + } else if (subformula1.isAtomicExpressionFormula()) { + std::stringstream stream; + stream << subformula1.asAtomicExpressionFormula().getExpression(); + storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); + std::string targetLabel = formula3.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (!labeling.getStateHasLabel(targetLabel, state)) { + badStates.set(state); + } + } + } else { + return false; + } + storm::logic::Formula const &subformula2 = untilFormula.getRightSubformula(); + if (subformula2.isAtomicLabelFormula()) { + storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); + validFormula = true; + std::string targetLabel = alFormula.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + targetStates.set(state); + } + + } + } else if (subformula2.isAtomicExpressionFormula()) { + validFormula = true; + std::stringstream stream; + stream << subformula2.asAtomicExpressionFormula().getExpression(); + storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); + std::string targetLabel = formula3.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + targetStates.set(state); + } + + } + } } + return validFormula; } - } else if (subformula2.isAtomicExpressionFormula()) { - validFormula = true; - std::stringstream stream; - stream << subformula2.asAtomicExpressionFormula().getExpression(); - storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); - std::string targetLabel = formula3.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - targetStates.set(state); + + template + void processOptionsWithValueTypeAndDdLib(storm::cli::SymbolicInput const& symbolicInput, storm::cli::ModelProcessingInformation const& mpi) { + auto const& pomdpSettings = storm::settings::getModule(); + + auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, mpi); + STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp && model->isSparseModel(), storm::exceptions::WrongFormatException, "Expected a POMDP in sparse representation."); + + std::shared_ptr> pomdp = model->template as>(); + storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); + pomdp = makeCanonic.transform(); + + std::shared_ptr formula; + if (!symbolicInput.properties.empty()) { + formula = symbolicInput.properties.front().getRawFormula(); + STORM_PRINT_AND_LOG("Analyzing property '" << *formula << "'" << std::endl); + STORM_LOG_WARN_COND(symbolicInput.properties.size() == 1, "There is currently no support for multiple properties. All other properties will be ignored."); } - } - } - } else if (subformula.isUntilFormula()) { - storm::logic::UntilFormula const &untilFormula = subformula.asUntilFormula(); - storm::logic::Formula const &subformula1 = untilFormula.getLeftSubformula(); - if (subformula1.isAtomicLabelFormula()) { - storm::logic::AtomicLabelFormula const &alFormula = subformula1.asAtomicLabelFormula(); - std::string targetLabel = alFormula.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (!labeling.getStateHasLabel(targetLabel, state)) { - badStates.set(state); + + if (pomdpSettings.isAnalyzeUniqueObservationsSet()) { + STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl); + storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); + std::cout << uniqueAnalysis.analyse() << std::endl; } - } - } else if (subformula1.isAtomicExpressionFormula()) { - std::stringstream stream; - stream << subformula1.asAtomicExpressionFormula().getExpression(); - storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); - std::string targetLabel = formula3.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (!labeling.getStateHasLabel(targetLabel, state)) { - badStates.set(state); + + if (formula) { + if (formula->isProbabilityOperatorFormula()) { + storm::logic::ProbabilityOperatorFormula const &probFormula = formula->asProbabilityOperatorFormula(); + storm::logic::Formula const &subformula1 = probFormula.getSubformula(); + std::set targetObservationSet; + storm::storage::BitVector targetStates(pomdp->getNumberOfStates()); + storm::storage::BitVector badStates(pomdp->getNumberOfStates()); + + bool validFormula = extractTargetAndSinkObservationSets(pomdp, subformula1, targetObservationSet, targetStates, badStates); + STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, + "The formula is not supported by the grid approximation"); + STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); + + + boost::optional prob1States; + boost::optional prob0States; + if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) { + STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); + uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); + storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); + pomdp = selfLoopEliminator.transform(); + STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); + } + if (pomdpSettings.isQualitativeReductionSet()) { + storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); + STORM_PRINT_AND_LOG("Computing states with probability 0 ..."); + prob0States = qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()); + std::cout << *prob0States << std::endl; + STORM_PRINT_AND_LOG(" done." << std::endl); + STORM_PRINT_AND_LOG("Computing states with probability 1 ..."); + prob1States = qualitativeAnalysis.analyseProb1(formula->asProbabilityOperatorFormula()); + std::cout << *prob1States << std::endl; + STORM_PRINT_AND_LOG(" done." << std::endl); + //std::cout << "actual reduction not yet implemented..." << std::endl; + storm::pomdp::transformer::KnownProbabilityTransformer kpt = storm::pomdp::transformer::KnownProbabilityTransformer(); + pomdp = kpt.transform(*pomdp, *prob0States, *prob1States); + } + if (pomdpSettings.isGridApproximationSet()) { + + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + auto overRes = storm::utility::one(); + auto underRes = storm::utility::zero(); + std::unique_ptr> result; + + result = checker.refineReachabilityProbability(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, + pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); + //result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); + overRes = result->overApproxValue; + underRes = result->underApproxValue; + if (overRes != underRes) { + STORM_PRINT("Overapproximation Result: " << overRes << std::endl) + STORM_PRINT("Underapproximation Result: " << underRes << std::endl) + } else { + STORM_PRINT("Result: " << overRes << std::endl) + } + } + if (pomdpSettings.isMemlessSearchSet()) { + // std::cout << std::endl; + // pomdp->writeDotToStream(std::cout); + // std::cout << std::endl; + // std::cout << std::endl; + storm::expressions::ExpressionManager expressionManager; + std::shared_ptr smtSolverFactory = std::make_shared(); + if (pomdpSettings.getMemlessSearchMethod() == "ccd16memless") { + storm::pomdp::QualitativeStrategySearchNaive memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); + memlessSearch.findNewStrategyForSomeState(5); + } else if (pomdpSettings.getMemlessSearchMethod() == "iterative") { + storm::pomdp::MemlessStrategySearchQualitative memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); + memlessSearch.findNewStrategyForSomeState(5); + } else { + STORM_LOG_ERROR("This method is not implemented."); + } + + + } + } else if (formula->isRewardOperatorFormula()) { + if (pomdpSettings.isSelfloopReductionSet() && storm::solver::minimize(formula->asRewardOperatorFormula().getOptimalityType())) { + STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); + uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); + storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); + pomdp = selfLoopEliminator.transform(); + STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); + } + if (pomdpSettings.isGridApproximationSet()) { + std::string rewardModelName; + storm::logic::RewardOperatorFormula const &rewFormula = formula->asRewardOperatorFormula(); + if (rewFormula.hasRewardModelName()) { + rewardModelName = rewFormula.getRewardModelName(); + } + storm::logic::Formula const &subformula1 = rewFormula.getSubformula(); + + std::set targetObservationSet; + //TODO refactor + bool validFormula = false; + if (subformula1.isEventuallyFormula()) { + storm::logic::EventuallyFormula const &eventuallyFormula = subformula1.asEventuallyFormula(); + storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); + if (subformula2.isAtomicLabelFormula()) { + storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); + validFormula = true; + std::string targetLabel = alFormula.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } else if (subformula2.isAtomicExpressionFormula()) { + validFormula = true; + std::stringstream stream; + stream << subformula2.asAtomicExpressionFormula().getExpression(); + storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); + std::string targetLabel = formula3.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } + } + STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, + "The formula is not supported by the grid approximation"); + STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); + + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + auto overRes = storm::utility::one(); + auto underRes = storm::utility::zero(); + std::unique_ptr> result; + result = checker.computeReachabilityReward(*pomdp, targetObservationSet, + rewFormula.getOptimalityType() == + storm::OptimizationDirection::Minimize, + pomdpSettings.getGridResolution()); + overRes = result->overApproxValue; + underRes = result->underApproxValue; + } + + } + if (pomdpSettings.getMemoryBound() > 1) { + STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ..."); + storm::storage::PomdpMemory memory = storm::storage::PomdpMemoryBuilder().build(pomdpSettings.getMemoryPattern(), pomdpSettings.getMemoryBound()); + std::cout << memory.toString() << std::endl; + storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, memory); + pomdp = memoryUnfolder.transform(); + STORM_PRINT_AND_LOG(" done." << std::endl); + pomdp->printModelInformationToStream(std::cout); + } else { + STORM_PRINT_AND_LOG("Assumming memoryless schedulers." << std::endl;) + } + + // From now on the pomdp is considered memoryless + + if (pomdpSettings.isMecReductionSet()) { + STORM_PRINT_AND_LOG("Eliminating mec choices ..."); + // Note: Elimination of mec choices only preserves memoryless schedulers. + uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); + storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); + pomdp = mecChoiceEliminator.transform(*formula); + STORM_PRINT_AND_LOG(" done." << std::endl); + STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl); + pomdp->printModelInformationToStream(std::cout); + } + + if (pomdpSettings.isTransformBinarySet() || pomdpSettings.isTransformSimpleSet()) { + if (pomdpSettings.isTransformSimpleSet()) { + STORM_PRINT_AND_LOG("Transforming the POMDP to a simple POMDP."); + pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, true); + } else { + STORM_PRINT_AND_LOG("Transforming the POMDP to a binary POMDP."); + pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, false); + } + pomdp->printModelInformationToStream(std::cout); + STORM_PRINT_AND_LOG(" done." << std::endl); + } + + + if (pomdpSettings.isExportToParametricSet()) { + STORM_PRINT_AND_LOG("Transforming memoryless POMDP to pMC..."); + storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); + std::string transformMode = pomdpSettings.getFscApplicationTypeString(); + auto pmc = toPMCTransformer.transform(storm::transformer::parsePomdpFscApplicationMode(transformMode)); + STORM_PRINT_AND_LOG(" done." << std::endl); + pmc->printModelInformationToStream(std::cout); + STORM_PRINT_AND_LOG("Simplifying pMC..."); + //if (generalSettings.isBisimulationSet()) { + pmc = storm::api::performBisimulationMinimization(pmc->template as>(),{formula}, storm::storage::BisimulationType::Strong)->template as>(); + + //} + STORM_PRINT_AND_LOG(" done." << std::endl); + pmc->printModelInformationToStream(std::cout); + STORM_PRINT_AND_LOG("Exporting pMC..."); + storm::analysis::ConstraintCollector constraints(*pmc); + auto const& parameterSet = constraints.getVariables(); + std::vector parameters(parameterSet.begin(), parameterSet.end()); + std::vector parameterNames; + for (auto const& parameter : parameters) { + parameterNames.push_back(parameter.name()); + } + storm::api::exportSparseModelAsDrn(pmc, pomdpSettings.getExportToParametricFilename(), parameterNames); + STORM_PRINT_AND_LOG(" done." << std::endl); + } + + } else { + STORM_LOG_WARN("Nothing to be done. Did you forget to specify a formula?"); } + } - } else { - return false; - } - storm::logic::Formula const &subformula2 = untilFormula.getRightSubformula(); - if (subformula2.isAtomicLabelFormula()) { - storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); - validFormula = true; - std::string targetLabel = alFormula.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - targetStates.set(state); + + template + void processOptionsWithDdLib(storm::cli::SymbolicInput const& symbolicInput, storm::cli::ModelProcessingInformation const& mpi) { + switch (mpi.buildValueType) { + case storm::cli::ModelProcessingInformation::ValueType::FinitePrecision: + processOptionsWithValueTypeAndDdLib(symbolicInput, mpi); + break; + case storm::cli::ModelProcessingInformation::ValueType::Exact: + STORM_LOG_THROW(DdType == storm::dd::DdType::Sylvan, storm::exceptions::UnexpectedException, "Exact arithmetic is only supported with Dd library Sylvan."); + processOptionsWithValueTypeAndDdLib(symbolicInput, mpi); + break; + default: + STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "Unexpected ValueType for model building."); } - } - } else if (subformula2.isAtomicExpressionFormula()) { - validFormula = true; - std::stringstream stream; - stream << subformula2.asAtomicExpressionFormula().getExpression(); - storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); - std::string targetLabel = formula3.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - targetStates.set(state); + + void processOptions() { + auto symbolicInput = storm::cli::parseSymbolicInput(); + storm::cli::ModelProcessingInformation mpi; + std::tie(symbolicInput, mpi) = storm::cli::preprocessSymbolicInput(symbolicInput); + switch (mpi.ddType) { + case storm::dd::DdType::CUDD: + processOptionsWithDdLib(symbolicInput, mpi); + break; + case storm::dd::DdType::Sylvan: + processOptionsWithDdLib(symbolicInput, mpi); + break; + default: + STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "Unexpected Dd Type."); } - } } } - return validFormula; } /*! @@ -187,7 +403,7 @@ int main(const int argc, const char** argv) { //try { storm::utility::setUp(); storm::cli::printHeader("Storm-pomdp", argc, argv); - initializeSettings(); + storm::settings::initializePomdpSettings("Storm-POMDP", "storm-pomdp"); bool optionsCorrect = storm::cli::parseOptions(argc, argv); if (!optionsCorrect) { @@ -195,253 +411,8 @@ int main(const int argc, const char** argv) { } storm::cli::setUrgentOptions(); - typedef double VT; - - auto const& pomdpSettings = storm::settings::getModule(); - auto const &general = storm::settings::getModule(); - auto const &debug = storm::settings::getModule(); - - - if (general.isVerboseSet()) { - storm::utility::setLogLevel(l3pp::LogLevel::INFO); - } - if (debug.isDebugSet()) { - storm::utility::setLogLevel(l3pp::LogLevel::DEBUG); - } - if (debug.isTraceSet()) { - storm::utility::setLogLevel(l3pp::LogLevel::TRACE); - } - - auto symbolicInput = storm::cli::parseSymbolicInput(); - storm::cli::ModelProcessingInformation mpi; - std::tie(symbolicInput, mpi) = storm::cli::preprocessSymbolicInput(symbolicInput); - - // We should not export here if we are going to do some processing first. - auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, mpi); - STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp, storm::exceptions::WrongFormatException, "Expected a POMDP."); - - std::shared_ptr> pomdp = model->template as>(); - storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); - pomdp = makeCanonic.transform(); - - std::shared_ptr formula; - if (!symbolicInput.properties.empty()) { - formula = symbolicInput.properties.front().getRawFormula(); - STORM_PRINT_AND_LOG("Analyzing property '" << *formula << "'" << std::endl); - STORM_LOG_WARN_COND(symbolicInput.properties.size() == 1, "There is currently no support for multiple properties. All other properties will be ignored."); - } - - if (pomdpSettings.isAnalyzeUniqueObservationsSet()) { - STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl); - storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); - std::cout << uniqueAnalysis.analyse() << std::endl; - } - - if (formula) { - if (formula->isProbabilityOperatorFormula()) { - storm::logic::ProbabilityOperatorFormula const &probFormula = formula->asProbabilityOperatorFormula(); - storm::logic::Formula const &subformula1 = probFormula.getSubformula(); - std::set targetObservationSet; - storm::storage::BitVector targetStates(pomdp->getNumberOfStates()); - storm::storage::BitVector badStates(pomdp->getNumberOfStates()); - - bool validFormula = extractTargetAndSinkObservationSets(pomdp, subformula1, targetObservationSet, targetStates, badStates); - STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, - "The formula is not supported by the grid approximation"); - STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); - - - boost::optional prob1States; - boost::optional prob0States; - if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) { - STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); - uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); - pomdp = selfLoopEliminator.transform(); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); - } - if (pomdpSettings.isQualitativeReductionSet()) { - storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); - STORM_PRINT_AND_LOG("Computing states with probability 0 ..."); - prob0States = qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()); - std::cout << *prob0States << std::endl; - STORM_PRINT_AND_LOG(" done." << std::endl); - STORM_PRINT_AND_LOG("Computing states with probability 1 ..."); - prob1States = qualitativeAnalysis.analyseProb1(formula->asProbabilityOperatorFormula()); - std::cout << *prob1States << std::endl; - STORM_PRINT_AND_LOG(" done." << std::endl); - //std::cout << "actual reduction not yet implemented..." << std::endl; - storm::pomdp::transformer::KnownProbabilityTransformer kpt = storm::pomdp::transformer::KnownProbabilityTransformer(); - pomdp = kpt.transform(*pomdp, *prob0States, *prob1States); - } - if (pomdpSettings.isGridApproximationSet()) { - - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - auto overRes = storm::utility::one(); - auto underRes = storm::utility::zero(); - std::unique_ptr> result; - - result = checker.refineReachabilityProbability(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, - pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); - //result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); - overRes = result->overApproxValue; - underRes = result->underApproxValue; - if (overRes != underRes) { - STORM_PRINT("Overapproximation Result: " << overRes << std::endl) - STORM_PRINT("Underapproximation Result: " << underRes << std::endl) - } else { - STORM_PRINT("Result: " << overRes << std::endl) - } - } - if (pomdpSettings.isMemlessSearchSet()) { -// std::cout << std::endl; -// pomdp->writeDotToStream(std::cout); -// std::cout << std::endl; -// std::cout << std::endl; - storm::expressions::ExpressionManager expressionManager; - std::shared_ptr smtSolverFactory = std::make_shared(); - if (pomdpSettings.getMemlessSearchMethod() == "ccd16memless") { - storm::pomdp::QualitativeStrategySearchNaive memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); - memlessSearch.findNewStrategyForSomeState(5); - } else if (pomdpSettings.getMemlessSearchMethod() == "iterative") { - storm::pomdp::MemlessStrategySearchQualitative memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); - memlessSearch.findNewStrategyForSomeState(5); - } else { - STORM_LOG_ERROR("This method is not implemented."); - } - - - } - } else if (formula->isRewardOperatorFormula()) { - if (pomdpSettings.isSelfloopReductionSet() && storm::solver::minimize(formula->asRewardOperatorFormula().getOptimalityType())) { - STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); - uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); - pomdp = selfLoopEliminator.transform(); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); - } - if (pomdpSettings.isGridApproximationSet()) { - std::string rewardModelName; - storm::logic::RewardOperatorFormula const &rewFormula = formula->asRewardOperatorFormula(); - if (rewFormula.hasRewardModelName()) { - rewardModelName = rewFormula.getRewardModelName(); - } - storm::logic::Formula const &subformula1 = rewFormula.getSubformula(); - - std::set targetObservationSet; - //TODO refactor - bool validFormula = false; - if (subformula1.isEventuallyFormula()) { - storm::logic::EventuallyFormula const &eventuallyFormula = subformula1.asEventuallyFormula(); - storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); - if (subformula2.isAtomicLabelFormula()) { - storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); - validFormula = true; - std::string targetLabel = alFormula.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - } - } - } else if (subformula2.isAtomicExpressionFormula()) { - validFormula = true; - std::stringstream stream; - stream << subformula2.asAtomicExpressionFormula().getExpression(); - storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); - std::string targetLabel = formula3.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - } - } - } - } - STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, - "The formula is not supported by the grid approximation"); - STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); - - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - auto overRes = storm::utility::one(); - auto underRes = storm::utility::zero(); - std::unique_ptr> result; - result = checker.computeReachabilityReward(*pomdp, targetObservationSet, - rewFormula.getOptimalityType() == - storm::OptimizationDirection::Minimize, - pomdpSettings.getGridResolution()); - overRes = result->overApproxValue; - underRes = result->underApproxValue; - } - - } - if (pomdpSettings.getMemoryBound() > 1) { - STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ..."); - storm::storage::PomdpMemory memory = storm::storage::PomdpMemoryBuilder().build(pomdpSettings.getMemoryPattern(), pomdpSettings.getMemoryBound()); - std::cout << memory.toString() << std::endl; - storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, memory); - pomdp = memoryUnfolder.transform(); - STORM_PRINT_AND_LOG(" done." << std::endl); - pomdp->printModelInformationToStream(std::cout); - } else { - STORM_PRINT_AND_LOG("Assumming memoryless schedulers." << std::endl;) - } - - // From now on the pomdp is considered memoryless - - if (pomdpSettings.isMecReductionSet()) { - STORM_PRINT_AND_LOG("Eliminating mec choices ..."); - // Note: Elimination of mec choices only preserves memoryless schedulers. - uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); - pomdp = mecChoiceEliminator.transform(*formula); - STORM_PRINT_AND_LOG(" done." << std::endl); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl); - pomdp->printModelInformationToStream(std::cout); - } - - if (pomdpSettings.isTransformBinarySet() || pomdpSettings.isTransformSimpleSet()) { - if (pomdpSettings.isTransformSimpleSet()) { - STORM_PRINT_AND_LOG("Transforming the POMDP to a simple POMDP."); - pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, true); - } else { - STORM_PRINT_AND_LOG("Transforming the POMDP to a binary POMDP."); - pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, false); - } - pomdp->printModelInformationToStream(std::cout); - STORM_PRINT_AND_LOG(" done." << std::endl); - } - - - if (pomdpSettings.isExportToParametricSet()) { - STORM_PRINT_AND_LOG("Transforming memoryless POMDP to pMC..."); - storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); - std::string transformMode = pomdpSettings.getFscApplicationTypeString(); - auto pmc = toPMCTransformer.transform(storm::transformer::parsePomdpFscApplicationMode(transformMode)); - STORM_PRINT_AND_LOG(" done." << std::endl); - pmc->printModelInformationToStream(std::cout); - STORM_PRINT_AND_LOG("Simplifying pMC..."); - //if (generalSettings.isBisimulationSet()) { - pmc = storm::api::performBisimulationMinimization(pmc->as>(),{formula}, storm::storage::BisimulationType::Strong)->as>(); - - //} - STORM_PRINT_AND_LOG(" done." << std::endl); - pmc->printModelInformationToStream(std::cout); - STORM_PRINT_AND_LOG("Exporting pMC..."); - storm::analysis::ConstraintCollector constraints(*pmc); - auto const& parameterSet = constraints.getVariables(); - std::vector parameters(parameterSet.begin(), parameterSet.end()); - std::vector parameterNames; - for (auto const& parameter : parameters) { - parameterNames.push_back(parameter.name()); - } - storm::api::exportSparseModelAsDrn(pmc, pomdpSettings.getExportToParametricFilename(), parameterNames); - STORM_PRINT_AND_LOG(" done." << std::endl); - } - - } else { - STORM_LOG_WARN("Nothing to be done. Did you forget to specify a formula?"); - } + // Invoke storm-pomdp with obtained settings + storm::pomdp::cli::processOptions(); // All operations have now been performed, so we clean up everything and terminate. storm::utility::cleanUp(); From 635fbc658a0a09c11e4e628d56a44f5dc61e2530 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 17 Mar 2020 15:11:25 +0100 Subject: [PATCH 052/155] storm-pomdp: towards a more mature cli --- src/storm-pomdp-cli/storm-pomdp.cpp | 478 ++++++++---------- .../analysis/FormulaInformation.cpp | 177 +++++++ src/storm-pomdp/analysis/FormulaInformation.h | 69 +++ 3 files changed, 444 insertions(+), 280 deletions(-) create mode 100644 src/storm-pomdp/analysis/FormulaInformation.cpp create mode 100644 src/storm-pomdp/analysis/FormulaInformation.h diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 6aae4d493..634316e33 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -22,11 +22,15 @@ #include "storm-pomdp/analysis/UniqueObservationStates.h" #include "storm-pomdp/analysis/QualitativeAnalysis.h" #include "storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h" +#include "storm-pomdp/analysis/FormulaInformation.h" #include "storm-pomdp/analysis/MemlessStrategySearchQualitative.h" #include "storm-pomdp/analysis/QualitativeStrategySearchNaive.h" + #include "storm/api/storm.h" +#include "storm/utility/Stopwatch.h" #include "storm/exceptions/UnexpectedException.h" +#include "storm/exceptions/NotSupportedException.h" #include @@ -34,96 +38,184 @@ namespace storm { namespace pomdp { namespace cli { - - template - bool extractTargetAndSinkObservationSets(std::shared_ptr> const& pomdp, storm::logic::Formula const& subformula, std::set& targetObservationSet, storm::storage::BitVector& targetStates, storm::storage::BitVector& badStates) { - //TODO refactor (use model checker to determine the states, then transform into observations). - //TODO rename into appropriate function name. - bool validFormula = false; - if (subformula.isEventuallyFormula()) { - storm::logic::EventuallyFormula const &eventuallyFormula = subformula.asEventuallyFormula(); - storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); - if (subformula2.isAtomicLabelFormula()) { - storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); - validFormula = true; - std::string targetLabel = alFormula.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - targetStates.set(state); - } - } - } else if (subformula2.isAtomicExpressionFormula()) { - validFormula = true; - std::stringstream stream; - stream << subformula2.asAtomicExpressionFormula().getExpression(); - storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); - std::string targetLabel = formula3.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - targetStates.set(state); - } - } + /// Perform preprocessings based on the graph structure (if requested or necessary). Return true, if some preprocessing has been done + template + bool performPreprocessing(std::shared_ptr>& pomdp, storm::pomdp::analysis::FormulaInformation& formulaInfo, storm::logic::Formula const& formula) { + auto const& pomdpSettings = storm::settings::getModule(); + bool preprocessingPerformed = false; + if (pomdpSettings.isSelfloopReductionSet()) { + bool apply = formulaInfo.isNonNestedReachabilityProbability() && formulaInfo.maximize(); + apply = apply || (formulaInfo.isNonNestedExpectedRewardFormula() && formulaInfo.minimize()); + if (apply) { + STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); + uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); + storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); + pomdp = selfLoopEliminator.transform(); + STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); + preprocessingPerformed = true; } - } else if (subformula.isUntilFormula()) { - storm::logic::UntilFormula const &untilFormula = subformula.asUntilFormula(); - storm::logic::Formula const &subformula1 = untilFormula.getLeftSubformula(); - if (subformula1.isAtomicLabelFormula()) { - storm::logic::AtomicLabelFormula const &alFormula = subformula1.asAtomicLabelFormula(); - std::string targetLabel = alFormula.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (!labeling.getStateHasLabel(targetLabel, state)) { - badStates.set(state); - } - } - } else if (subformula1.isAtomicExpressionFormula()) { - std::stringstream stream; - stream << subformula1.asAtomicExpressionFormula().getExpression(); - storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); - std::string targetLabel = formula3.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (!labeling.getStateHasLabel(targetLabel, state)) { - badStates.set(state); - } + } + if (pomdpSettings.isQualitativeReductionSet() && formulaInfo.isNonNestedReachabilityProbability()) { + storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); + STORM_PRINT_AND_LOG("Computing states with probability 0 ..."); + storm::storage::BitVector prob0States = qualitativeAnalysis.analyseProb0(formula.asProbabilityOperatorFormula()); + std::cout << prob0States << std::endl; + STORM_PRINT_AND_LOG(" done." << std::endl); + STORM_PRINT_AND_LOG("Computing states with probability 1 ..."); + storm::storage::BitVector prob1States = qualitativeAnalysis.analyseProb1(formula.asProbabilityOperatorFormula()); + std::cout << prob1States << std::endl; + STORM_PRINT_AND_LOG(" done." << std::endl); + storm::pomdp::transformer::KnownProbabilityTransformer kpt = storm::pomdp::transformer::KnownProbabilityTransformer(); + pomdp = kpt.transform(*pomdp, prob0States, prob1States); + // Update formulaInfo to changes from Preprocessing + formulaInfo.updateTargetStates(*pomdp, std::move(prob1States)); + formulaInfo.updateSinkStates(*pomdp, std::move(prob0States)); + preprocessingPerformed = true; + } else if (pomdpSettings.isGridApproximationSet()) { + // We still might need to apply the KnownProbabilityTransformer, to ensure that the grid approximation works properly + if (formulaInfo.isNonNestedReachabilityProbability()) { + if (!formulaInfo.getTargetStates().observationClosed || !formulaInfo.getSinkStates().states.empty()) { + // Make target states observation closed and/or sink states absorbing + storm::pomdp::transformer::KnownProbabilityTransformer kpt = storm::pomdp::transformer::KnownProbabilityTransformer(); + auto prob0States = formulaInfo.getSinkStates().states; + auto prob1States = formulaInfo.getTargetStates().states; + pomdp = kpt.transform(*pomdp, prob0States, prob1States); + // Update formulaInfo to changes from Preprocessing + formulaInfo.updateTargetStates(*pomdp, std::move(prob1States)); + formulaInfo.updateSinkStates(*pomdp, std::move(prob0States)); + preprocessingPerformed = true; } + } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { + STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "Target states of reward property are not observation closed. This case is not yet implemented."); + } + } + return preprocessingPerformed; + } + + template + bool performAnalysis(std::shared_ptr> const& pomdp, storm::pomdp::analysis::FormulaInformation const& formulaInfo) { + auto const& pomdpSettings = storm::settings::getModule(); + bool analysisPerformed = false; + if (pomdpSettings.isGridApproximationSet()) { + STORM_LOG_THROW(formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "Unsupported formula type for Grid approximation."); + STORM_LOG_THROW(!formulaInfo.getTargetStates().empty(), storm::exceptions::UnexpectedException, "The set of target states is empty."); + STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::UnexpectedException, "Observations on target states also occur on non-target states. This is unexpected at this point."); + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + std::unique_ptr> result; + if (formulaInfo.isNonNestedReachabilityProbability()) { + result = checker.refineReachabilityProbability(*pomdp, formulaInfo.getTargetStates().observations, formulaInfo.minimize(), pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); + } else { + // TODO: no exploration threshold? + result = checker.computeReachabilityReward(*pomdp, formulaInfo.getTargetStates().observations, formulaInfo.minimize(), pomdpSettings.getGridResolution()); + } + ValueType overRes = result->overApproxValue; + ValueType underRes = result->underApproxValue; + if (overRes != underRes) { + STORM_PRINT("Overapproximation Result: " << overRes << std::endl) + STORM_PRINT("Underapproximation Result: " << underRes << std::endl) + } else { + STORM_PRINT("Result: " << overRes << std::endl) + } + analysisPerformed = true; + } + if (pomdpSettings.isMemlessSearchSet()) { + STORM_LOG_THROW(formulaInfo.isNonNestedReachabilityProbability(), storm::exceptions::NotSupportedException, "Qualitative memoryless scheduler search is not implemented for this property type."); + + // std::cout << std::endl; + // pomdp->writeDotToStream(std::cout); + // std::cout << std::endl; + // std::cout << std::endl; + storm::expressions::ExpressionManager expressionManager; + std::shared_ptr smtSolverFactory = std::make_shared(); + if (pomdpSettings.getMemlessSearchMethod() == "ccd16memless") { + storm::pomdp::QualitativeStrategySearchNaive memlessSearch(*pomdp, formulaInfo.getTargetStates().observations, formulaInfo.getTargetStates().states, formulaInfo.getSinkStates().states, smtSolverFactory); + memlessSearch.findNewStrategyForSomeState(5); + } else if (pomdpSettings.getMemlessSearchMethod() == "iterative") { + storm::pomdp::MemlessStrategySearchQualitative memlessSearch(*pomdp, formulaInfo.getTargetStates().observations, formulaInfo.getTargetStates().states, formulaInfo.getSinkStates().states, smtSolverFactory); + memlessSearch.findNewStrategyForSomeState(5); } else { - return false; + STORM_LOG_ERROR("This method is not implemented."); } - storm::logic::Formula const &subformula2 = untilFormula.getRightSubformula(); - if (subformula2.isAtomicLabelFormula()) { - storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); - validFormula = true; - std::string targetLabel = alFormula.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - targetStates.set(state); - } + analysisPerformed = true; + } + return analysisPerformed; + } - } - } else if (subformula2.isAtomicExpressionFormula()) { - validFormula = true; - std::stringstream stream; - stream << subformula2.asAtomicExpressionFormula().getExpression(); - storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); - std::string targetLabel = formula3.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - targetStates.set(state); - } - } + template + bool performTransformation(std::shared_ptr>& pomdp, storm::logic::Formula const& formula) { + auto const& pomdpSettings = storm::settings::getModule(); + bool transformationPerformed = false; + bool memoryUnfolded = false; + if (pomdpSettings.getMemoryBound() > 1) { + STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ..."); + storm::storage::PomdpMemory memory = storm::storage::PomdpMemoryBuilder().build(pomdpSettings.getMemoryPattern(), pomdpSettings.getMemoryBound()); + std::cout << memory.toString() << std::endl; + storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, memory); + pomdp = memoryUnfolder.transform(); + STORM_PRINT_AND_LOG(" done." << std::endl); + pomdp->printModelInformationToStream(std::cout); + transformationPerformed = true; + memoryUnfolded = true; + } + + // From now on the pomdp is considered memoryless + + if (pomdpSettings.isMecReductionSet()) { + STORM_PRINT_AND_LOG("Eliminating mec choices ..."); + // Note: Elimination of mec choices only preserves memoryless schedulers. + uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); + storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); + pomdp = mecChoiceEliminator.transform(formula); + STORM_PRINT_AND_LOG(" done." << std::endl); + STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl); + pomdp->printModelInformationToStream(std::cout); + transformationPerformed = true; + } + + if (pomdpSettings.isTransformBinarySet() || pomdpSettings.isTransformSimpleSet()) { + if (pomdpSettings.isTransformSimpleSet()) { + STORM_PRINT_AND_LOG("Transforming the POMDP to a simple POMDP."); + pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, true); + } else { + STORM_PRINT_AND_LOG("Transforming the POMDP to a binary POMDP."); + pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, false); + } + pomdp->printModelInformationToStream(std::cout); + STORM_PRINT_AND_LOG(" done." << std::endl); + transformationPerformed = true; + } + + if (pomdpSettings.isExportToParametricSet()) { + STORM_PRINT_AND_LOG("Transforming memoryless POMDP to pMC..."); + storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); + std::string transformMode = pomdpSettings.getFscApplicationTypeString(); + auto pmc = toPMCTransformer.transform(storm::transformer::parsePomdpFscApplicationMode(transformMode)); + STORM_PRINT_AND_LOG(" done." << std::endl); + pmc->printModelInformationToStream(std::cout); + STORM_PRINT_AND_LOG("Simplifying pMC..."); + //if (generalSettings.isBisimulationSet()) { + pmc = storm::api::performBisimulationMinimization(pmc->template as>(),{formula.asSharedPointer()}, storm::storage::BisimulationType::Strong)->template as>(); + + //} + STORM_PRINT_AND_LOG(" done." << std::endl); + pmc->printModelInformationToStream(std::cout); + STORM_PRINT_AND_LOG("Exporting pMC..."); + storm::analysis::ConstraintCollector constraints(*pmc); + auto const& parameterSet = constraints.getVariables(); + std::vector parameters(parameterSet.begin(), parameterSet.end()); + std::vector parameterNames; + for (auto const& parameter : parameters) { + parameterNames.push_back(parameter.name()); } + storm::api::exportSparseModelAsDrn(pmc, pomdpSettings.getExportToParametricFilename(), parameterNames); + STORM_PRINT_AND_LOG(" done." << std::endl); + transformationPerformed = true; + } + if (transformationPerformed && !memoryUnfolded) { + STORM_PRINT_AND_LOG("Implicitly assumed restriction to memoryless schedulers for at least one transformation." << std::endl); } - return validFormula; + return transformationPerformed; } template @@ -131,7 +223,11 @@ namespace storm { auto const& pomdpSettings = storm::settings::getModule(); auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, mpi); - STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp && model->isSparseModel(), storm::exceptions::WrongFormatException, "Expected a POMDP in sparse representation."); + if (!model) { + STORM_PRINT_AND_LOG("No input model given."); + return; + } + STORM_LOG_THROW(model->getType() == storm::models::ModelType::Pomdp && model->isSparseModel(), storm::exceptions::WrongFormatException, "Expected a POMDP in sparse representation."); std::shared_ptr> pomdp = model->template as>(); storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); @@ -151,207 +247,28 @@ namespace storm { } if (formula) { - if (formula->isProbabilityOperatorFormula()) { - storm::logic::ProbabilityOperatorFormula const &probFormula = formula->asProbabilityOperatorFormula(); - storm::logic::Formula const &subformula1 = probFormula.getSubformula(); - std::set targetObservationSet; - storm::storage::BitVector targetStates(pomdp->getNumberOfStates()); - storm::storage::BitVector badStates(pomdp->getNumberOfStates()); - - bool validFormula = extractTargetAndSinkObservationSets(pomdp, subformula1, targetObservationSet, targetStates, badStates); - STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, - "The formula is not supported by the grid approximation"); - STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); - - - boost::optional prob1States; - boost::optional prob0States; - if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) { - STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); - uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); - pomdp = selfLoopEliminator.transform(); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); - } - if (pomdpSettings.isQualitativeReductionSet()) { - storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); - STORM_PRINT_AND_LOG("Computing states with probability 0 ..."); - prob0States = qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()); - std::cout << *prob0States << std::endl; - STORM_PRINT_AND_LOG(" done." << std::endl); - STORM_PRINT_AND_LOG("Computing states with probability 1 ..."); - prob1States = qualitativeAnalysis.analyseProb1(formula->asProbabilityOperatorFormula()); - std::cout << *prob1States << std::endl; - STORM_PRINT_AND_LOG(" done." << std::endl); - //std::cout << "actual reduction not yet implemented..." << std::endl; - storm::pomdp::transformer::KnownProbabilityTransformer kpt = storm::pomdp::transformer::KnownProbabilityTransformer(); - pomdp = kpt.transform(*pomdp, *prob0States, *prob1States); - } - if (pomdpSettings.isGridApproximationSet()) { - - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - auto overRes = storm::utility::one(); - auto underRes = storm::utility::zero(); - std::unique_ptr> result; - - result = checker.refineReachabilityProbability(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, - pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); - //result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); - overRes = result->overApproxValue; - underRes = result->underApproxValue; - if (overRes != underRes) { - STORM_PRINT("Overapproximation Result: " << overRes << std::endl) - STORM_PRINT("Underapproximation Result: " << underRes << std::endl) - } else { - STORM_PRINT("Result: " << overRes << std::endl) - } - } - if (pomdpSettings.isMemlessSearchSet()) { - // std::cout << std::endl; - // pomdp->writeDotToStream(std::cout); - // std::cout << std::endl; - // std::cout << std::endl; - storm::expressions::ExpressionManager expressionManager; - std::shared_ptr smtSolverFactory = std::make_shared(); - if (pomdpSettings.getMemlessSearchMethod() == "ccd16memless") { - storm::pomdp::QualitativeStrategySearchNaive memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); - memlessSearch.findNewStrategyForSomeState(5); - } else if (pomdpSettings.getMemlessSearchMethod() == "iterative") { - storm::pomdp::MemlessStrategySearchQualitative memlessSearch(*pomdp, targetObservationSet, targetStates, badStates, smtSolverFactory); - memlessSearch.findNewStrategyForSomeState(5); - } else { - STORM_LOG_ERROR("This method is not implemented."); - } - - - } - } else if (formula->isRewardOperatorFormula()) { - if (pomdpSettings.isSelfloopReductionSet() && storm::solver::minimize(formula->asRewardOperatorFormula().getOptimalityType())) { - STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); - uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); - pomdp = selfLoopEliminator.transform(); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); - } - if (pomdpSettings.isGridApproximationSet()) { - std::string rewardModelName; - storm::logic::RewardOperatorFormula const &rewFormula = formula->asRewardOperatorFormula(); - if (rewFormula.hasRewardModelName()) { - rewardModelName = rewFormula.getRewardModelName(); - } - storm::logic::Formula const &subformula1 = rewFormula.getSubformula(); - - std::set targetObservationSet; - //TODO refactor - bool validFormula = false; - if (subformula1.isEventuallyFormula()) { - storm::logic::EventuallyFormula const &eventuallyFormula = subformula1.asEventuallyFormula(); - storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); - if (subformula2.isAtomicLabelFormula()) { - storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); - validFormula = true; - std::string targetLabel = alFormula.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - } - } - } else if (subformula2.isAtomicExpressionFormula()) { - validFormula = true; - std::stringstream stream; - stream << subformula2.asAtomicExpressionFormula().getExpression(); - storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); - std::string targetLabel = formula3.getLabel(); - auto labeling = pomdp->getStateLabeling(); - for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { - if (labeling.getStateHasLabel(targetLabel, state)) { - targetObservationSet.insert(pomdp->getObservation(state)); - } - } - } - } - STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, - "The formula is not supported by the grid approximation"); - STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); - - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - auto overRes = storm::utility::one(); - auto underRes = storm::utility::zero(); - std::unique_ptr> result; - result = checker.computeReachabilityReward(*pomdp, targetObservationSet, - rewFormula.getOptimalityType() == - storm::OptimizationDirection::Minimize, - pomdpSettings.getGridResolution()); - overRes = result->overApproxValue; - underRes = result->underApproxValue; - } - - } - if (pomdpSettings.getMemoryBound() > 1) { - STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ..."); - storm::storage::PomdpMemory memory = storm::storage::PomdpMemoryBuilder().build(pomdpSettings.getMemoryPattern(), pomdpSettings.getMemoryBound()); - std::cout << memory.toString() << std::endl; - storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, memory); - pomdp = memoryUnfolder.transform(); - STORM_PRINT_AND_LOG(" done." << std::endl); - pomdp->printModelInformationToStream(std::cout); - } else { - STORM_PRINT_AND_LOG("Assumming memoryless schedulers." << std::endl;) - } - - // From now on the pomdp is considered memoryless - - if (pomdpSettings.isMecReductionSet()) { - STORM_PRINT_AND_LOG("Eliminating mec choices ..."); - // Note: Elimination of mec choices only preserves memoryless schedulers. - uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); - pomdp = mecChoiceEliminator.transform(*formula); - STORM_PRINT_AND_LOG(" done." << std::endl); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl); + auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(*pomdp, *formula); + STORM_LOG_THROW(!formulaInfo.isUnsupported(), storm::exceptions::InvalidPropertyException, "The formula '" << *formula << "' is not supported by storm-pomdp."); + + storm::utility::Stopwatch sw(true); + // Note that formulaInfo contains state-based information which potentially needs to be updated during preprocessing + if (performPreprocessing(pomdp, formulaInfo, *formula)) { + sw.stop(); + STORM_PRINT_AND_LOG("Time for graph-based POMDP (pre-)processing: " << sw << "s." << std::endl); pomdp->printModelInformationToStream(std::cout); } - - if (pomdpSettings.isTransformBinarySet() || pomdpSettings.isTransformSimpleSet()) { - if (pomdpSettings.isTransformSimpleSet()) { - STORM_PRINT_AND_LOG("Transforming the POMDP to a simple POMDP."); - pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, true); - } else { - STORM_PRINT_AND_LOG("Transforming the POMDP to a binary POMDP."); - pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, false); - } - pomdp->printModelInformationToStream(std::cout); - STORM_PRINT_AND_LOG(" done." << std::endl); + + sw.restart(); + if (performAnalysis(pomdp, formulaInfo)) { + sw.stop(); + STORM_PRINT_AND_LOG("Time for POMDP analysis: " << sw << "s." << std::endl); } - - - if (pomdpSettings.isExportToParametricSet()) { - STORM_PRINT_AND_LOG("Transforming memoryless POMDP to pMC..."); - storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); - std::string transformMode = pomdpSettings.getFscApplicationTypeString(); - auto pmc = toPMCTransformer.transform(storm::transformer::parsePomdpFscApplicationMode(transformMode)); - STORM_PRINT_AND_LOG(" done." << std::endl); - pmc->printModelInformationToStream(std::cout); - STORM_PRINT_AND_LOG("Simplifying pMC..."); - //if (generalSettings.isBisimulationSet()) { - pmc = storm::api::performBisimulationMinimization(pmc->template as>(),{formula}, storm::storage::BisimulationType::Strong)->template as>(); - - //} - STORM_PRINT_AND_LOG(" done." << std::endl); - pmc->printModelInformationToStream(std::cout); - STORM_PRINT_AND_LOG("Exporting pMC..."); - storm::analysis::ConstraintCollector constraints(*pmc); - auto const& parameterSet = constraints.getVariables(); - std::vector parameters(parameterSet.begin(), parameterSet.end()); - std::vector parameterNames; - for (auto const& parameter : parameters) { - parameterNames.push_back(parameter.name()); - } - storm::api::exportSparseModelAsDrn(pmc, pomdpSettings.getExportToParametricFilename(), parameterNames); - STORM_PRINT_AND_LOG(" done." << std::endl); + + sw.restart(); + if (performTransformation(pomdp, *formula)) { + sw.stop(); + STORM_PRINT_AND_LOG("Time for POMDP transformation(s): " << sw << "s." << std::endl); } - } else { STORM_LOG_WARN("Nothing to be done. Did you forget to specify a formula?"); } @@ -360,6 +277,7 @@ namespace storm { template void processOptionsWithDdLib(storm::cli::SymbolicInput const& symbolicInput, storm::cli::ModelProcessingInformation const& mpi) { + STORM_LOG_ERROR_COND(mpi.buildValueType == mpi.verificationValueType, "Build value type differs from verification value type. Will ignore Verification value type."); switch (mpi.buildValueType) { case storm::cli::ModelProcessingInformation::ValueType::FinitePrecision: processOptionsWithValueTypeAndDdLib(symbolicInput, mpi); diff --git a/src/storm-pomdp/analysis/FormulaInformation.cpp b/src/storm-pomdp/analysis/FormulaInformation.cpp new file mode 100644 index 000000000..836abf881 --- /dev/null +++ b/src/storm-pomdp/analysis/FormulaInformation.cpp @@ -0,0 +1,177 @@ +#include "storm-pomdp/analysis/FormulaInformation.h" +#include "storm/logic/Formulas.h" +#include "storm/logic/FragmentSpecification.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/models/sparse/StandardRewardModel.h" +#include "storm/modelchecker/propositional/SparsePropositionalModelChecker.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" + +#include "storm/utility/macros.h" +#include "storm/exceptions/InvalidPropertyException.h" + +namespace storm { + namespace pomdp { + namespace analysis { + + bool FormulaInformation::StateSet::empty() const { + STORM_LOG_ASSERT(states.empty() == observations.empty(), "Inconsistent StateSet."); + return observations.empty(); + } + + FormulaInformation::FormulaInformation() : type(Type::Unsupported) { + // Intentionally left empty + } + + FormulaInformation::FormulaInformation(Type const& type, storm::solver::OptimizationDirection const& dir, boost::optional const& rewardModelName) : type(type), optimizationDirection(dir), rewardModelName(rewardModelName) { + STORM_LOG_ASSERT(!this->rewardModelName.is_initialized() || this->type == Type::NonNestedExpectedRewardFormula, "Got a reward model name for a non-reward formula."); + + } + + FormulaInformation::Type const& FormulaInformation::getType() const { + return type; + } + + bool FormulaInformation::isNonNestedReachabilityProbability() const { + return type == Type::NonNestedReachabilityProbability; + } + + bool FormulaInformation::isNonNestedExpectedRewardFormula() const { + return type == Type::NonNestedExpectedRewardFormula; + } + + bool FormulaInformation::isUnsupported() const { + return type == Type::Unsupported; + } + + typename FormulaInformation::StateSet const& FormulaInformation::getTargetStates() const { + STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula || this->type == Type::NonNestedReachabilityProbability, "Target states requested for unexpected formula type."); + return targetStates.get(); + } + + typename FormulaInformation::StateSet const& FormulaInformation::getSinkStates() const { + STORM_LOG_ASSERT(this->type == Type::NonNestedReachabilityProbability, "Sink states requested for unexpected formula type."); + return sinkStates.get(); + } + + std::string const& FormulaInformation::getRewardModelName() const { + STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula, "Reward model requested for unexpected formula type."); + return rewardModelName.get(); + } + + storm::solver::OptimizationDirection const& FormulaInformation::getOptimizationDirection() const { + return optimizationDirection; + } + + bool FormulaInformation::minimize() const { + return storm::solver::minimize(optimizationDirection); + } + + bool FormulaInformation::maximize() const { + return storm::solver::maximize(optimizationDirection); + } + + template + FormulaInformation::StateSet getStateSet(PomdpType const& pomdp, storm::storage::BitVector&& inputStates) { + FormulaInformation::StateSet result; + result.states = std::move(inputStates); + for (auto const& state : result.states) { + result.observations.insert(pomdp.getObservation(state)); + } + // check if this set is observation-closed, i.e., whether there is a state outside of this set with one of the observations collected above + result.observationClosed = true; + for (uint64_t state = result.states.getNextUnsetIndex(0); state < result.states.size(); state = result.states.getNextUnsetIndex(state + 1)) { + if (result.observations.count(pomdp.getObservation(state)) > 0) { + result.observationClosed = false; + break; + } + } + return result; + } + + template + void FormulaInformation::updateTargetStates(PomdpType const& pomdp, storm::storage::BitVector&& newTargetStates) { + STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula || this->type == Type::NonNestedReachabilityProbability, "Target states updated for unexpected formula type."); + targetStates = getStateSet(pomdp, std::move(newTargetStates)); + } + + template + void FormulaInformation::updateSinkStates(PomdpType const& pomdp, storm::storage::BitVector&& newSinkStates) { + STORM_LOG_ASSERT(this->type == Type::NonNestedReachabilityProbability, "Sink states requested for unexpected formula type."); + sinkStates = getStateSet(pomdp, std::move(newSinkStates)); + } + + template + storm::storage::BitVector getStates(storm::logic::Formula const& propositionalFormula, bool formulaInverted, PomdpType const& pomdp) { + storm::modelchecker::SparsePropositionalModelChecker mc(pomdp); + auto checkResult = mc.check(propositionalFormula); + storm::storage::BitVector resultBitVector(checkResult->asExplicitQualitativeCheckResult().getTruthValuesVector()); + if (formulaInverted) { + resultBitVector.complement(); + } + return resultBitVector; + } + + template + FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::ProbabilityOperatorFormula const& formula) { + STORM_LOG_THROW(formula.hasOptimalityType(), storm::exceptions::InvalidPropertyException, "The property does not specify an optimization direction (min/max)"); + STORM_LOG_WARN_COND(!formula.hasBound(), "The probability threshold for the given property will be ignored."); + auto const& subformula = formula.getSubformula(); + std::shared_ptr targetStatesFormula, constraintsStatesFormula; + if (subformula.isEventuallyFormula()) { + targetStatesFormula = subformula.asEventuallyFormula().getSubformula().asSharedPointer(); + constraintsStatesFormula = storm::logic::Formula::getTrueFormula()->asSharedPointer(); + } else if (subformula.isUntilFormula()) { + storm::logic::UntilFormula const &untilFormula = subformula.asUntilFormula(); + targetStatesFormula = untilFormula.getRightSubformula().asSharedPointer(); + constraintsStatesFormula = untilFormula.getLeftSubformula().asSharedPointer(); + } + if (targetStatesFormula && targetStatesFormula->isInFragment(storm::logic::propositional()) && constraintsStatesFormula && constraintsStatesFormula->isInFragment(storm::logic::propositional())) { + FormulaInformation result(FormulaInformation::Type::NonNestedReachabilityProbability, formula.getOptimalityType()); + result.updateTargetStates(pomdp, getStates(*targetStatesFormula, false, pomdp)); + result.updateSinkStates(pomdp, getStates(*constraintsStatesFormula, true, pomdp)); + return result; + } + return FormulaInformation(); + } + + template + FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::RewardOperatorFormula const& formula) { + STORM_LOG_THROW(formula.hasOptimalityType(), storm::exceptions::InvalidPropertyException, "The property does not specify an optimization direction (min/max)"); + STORM_LOG_WARN_COND(formula.hasBound(), "The reward threshold for the given property will be ignored."); + std::string rewardModelName = ""; + if (formula.hasRewardModelName()) { + rewardModelName = formula.getRewardModelName(); + STORM_LOG_THROW(pomdp.hasRewardModel(rewardModelName), storm::exceptions::InvalidPropertyException, "Selected reward model with name '" << rewardModelName << "' does not exist."); + } else { + STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::InvalidPropertyException, "Reward operator formula does not specify a reward model and the reward model is not unique."); + rewardModelName = pomdp.getUniqueRewardModelName(); + } + auto const& subformula = formula.getSubformula(); + std::shared_ptr targetStatesFormula; + if (subformula.isEventuallyFormula()) { + targetStatesFormula = subformula.asEventuallyFormula().getSubformula().asSharedPointer(); + } + if (targetStatesFormula && targetStatesFormula->isInFragment(storm::logic::propositional())) { + FormulaInformation result(FormulaInformation::Type::NonNestedReachabilityProbability, formula.getOptimalityType(), rewardModelName); + result.updateTargetStates(pomdp, getStates(*targetStatesFormula, false, pomdp)); + return result; + } + return FormulaInformation(); + } + + template + FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::Formula const& formula) { + if (formula.isProbabilityOperatorFormula()) { + return getFormulaInformation(pomdp, formula.asProbabilityOperatorFormula()); + } else if (formula.isRewardOperatorFormula()) { + return getFormulaInformation(pomdp, formula.asRewardOperatorFormula()); + } + return FormulaInformation(); + } + + template FormulaInformation getFormulaInformation>(storm::models::sparse::Pomdp const& pomdp, storm::logic::Formula const& formula); + template FormulaInformation getFormulaInformation>(storm::models::sparse::Pomdp const& pomdp, storm::logic::Formula const& formula); + + } + } +} diff --git a/src/storm-pomdp/analysis/FormulaInformation.h b/src/storm-pomdp/analysis/FormulaInformation.h new file mode 100644 index 000000000..10a92ab02 --- /dev/null +++ b/src/storm-pomdp/analysis/FormulaInformation.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include + +#include "storm/storage/BitVector.h" +#include "storm/solver/OptimizationDirection.h" + +namespace storm { + + namespace logic { + class Formula; + } + + namespace pomdp { + namespace analysis { + class FormulaInformation { + public: + /// Characterizes a certain set of states + struct StateSet { + storm::storage::BitVector states; // The set of states + std::set observations; // The set of the observations that are assigned to at least one state of the set + bool observationClosed; // True iff this state set can be uniquely characterized by the observations + bool empty() const; + }; + + /// Possible supported formula types + enum class Type { + NonNestedReachabilityProbability, // e.g. 'Pmax=? [F "target"]' or 'Pmin=? [!"sink" U "target"]' + NonNestedExpectedRewardFormula, // e.g. 'Rmin=? [F x>0 ]' + Unsupported // The formula type is unsupported + }; + + FormulaInformation(); // Unsupported + FormulaInformation(Type const& type, storm::solver::OptimizationDirection const& dir, boost::optional const& rewardModelName = boost::none); + + Type const& getType() const; + bool isNonNestedReachabilityProbability() const; + bool isNonNestedExpectedRewardFormula() const; + bool isUnsupported() const; + StateSet const& getTargetStates() const; + StateSet const& getSinkStates() const; // Shall not be called for reward formulas + std::string const& getRewardModelName() const; // Shall not be called for probability formulas + storm::solver::OptimizationDirection const& getOptimizationDirection() const; + bool minimize() const; + bool maximize() const; + + template + void updateTargetStates(PomdpType const& pomdp, storm::storage::BitVector&& newTargetStates); + + template + void updateSinkStates(PomdpType const& pomdp, storm::storage::BitVector&& newSinkStates); + + private: + Type type; + storm::solver::OptimizationDirection optimizationDirection; + boost::optional targetStates; + boost::optional sinkStates; + boost::optional rewardModelName; + }; + + template + FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::Formula const& formula); + + + } + } +} From c2582058c97252e0e3f2e474504b66fe9a5f8be8 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 17 Mar 2020 19:33:51 +0100 Subject: [PATCH 053/155] Added first version of refinement with reuse of previous results --- .../ApproximatePOMDPModelchecker.cpp | 390 ++++++++++++++++-- .../ApproximatePOMDPModelchecker.h | 12 +- 2 files changed, 366 insertions(+), 36 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index e88daf09b..63ea67ae3 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -53,7 +53,8 @@ namespace storm { std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); STORM_PRINT("Underlying MDP" << std::endl) underlyingMdp.printModelInformationToStream(std::cout); - std::unique_ptr underlyingRes(storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); + std::unique_ptr underlyingRes( + storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); STORM_LOG_ASSERT(underlyingRes, "Result not exist."); underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); @@ -90,25 +91,26 @@ namespace storm { std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); auto overRes = storm::utility::one(); auto underRes = storm::utility::zero(); - uint64_t underApproxModelSize = 50; + std::set changedObservations; + uint64_t underApproxModelSize = 200; uint64_t refinementCounter = 1; - std::unique_ptr> res; - while (refinementCounter < 30) { - res = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, - initialOverApproxMap, underApproxMap, underApproxModelSize); + STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) + std::shared_ptr> res = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, + explorationThreshold, initialOverApproxMap, underApproxMap, underApproxModelSize); + ValueType lastMinScore = storm::utility::infinity(); + while (refinementCounter < 1000) { // TODO the actual refinement // choose which observation(s) to refine std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); std::vector beliefCount(pomdp.getNrObservations(), 0); - bsmap_type::right_map::const_iterator iter = res->underApproxBeliefStateMap.right.begin(); - while (iter != res->underApproxBeliefStateMap.right.end()) { - auto currentBelief = res->beliefList[iter->second]; + bsmap_type::right_map::const_iterator underApproxStateBeliefIter = res->underApproxBeliefStateMap.right.begin(); + while (underApproxStateBeliefIter != res->underApproxBeliefStateMap.right.end()) { + auto currentBelief = res->beliefList[underApproxStateBeliefIter->second]; beliefCount[currentBelief.observation] += 1; - //TODO rename, this is getting confusing - bsmap_type::left_const_iterator it = res->overApproxBeliefStateMap.left.find(iter->second); - if (it != res->overApproxBeliefStateMap.left.end()) { + bsmap_type::left_const_iterator overApproxBeliefStateIter = res->overApproxBeliefStateMap.left.find(underApproxStateBeliefIter->second); + if (overApproxBeliefStateIter != res->overApproxBeliefStateMap.left.end()) { // If there is an over-approximate value for the belief, use it - auto diff = res->overApproxMap[it->second] - res->underApproxMap[iter->first]; + auto diff = res->overApproxMap[overApproxBeliefStateIter->second] - res->underApproxMap[underApproxStateBeliefIter->first]; obsAccumulator[currentBelief.observation] += diff; } else { //otherwise, we approximate a value TODO this is critical, we have to think about it @@ -127,26 +129,52 @@ namespace storm { } } } - obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[iter->first]; + obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[underApproxStateBeliefIter->first]; } - ++iter; + ++underApproxStateBeliefIter; } - for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { - obsAccumulator[i] /= beliefCount[i]; - } + /*for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { + obsAccumulator[i] /= beliefCount[i]; + }*/ + changedObservations.clear(); //TODO think about some other scoring methods auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end()); - STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) - STORM_PRINT(" Obs | Score " << std::endl << "---------|---------" << std::endl) + //if (cc.isEqual(maxAvgDifference, lastMinScore) || cc.isLess(lastMinScore, maxAvgDifference)) { + lastMinScore = maxAvgDifference; + auto maxRes = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); + STORM_PRINT("Set all to " << maxRes + 1 << std::endl) for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { - STORM_PRINT(i << " |" << obsAccumulator[i] << std::endl) - if (obsAccumulator[i] == maxAvgDifference) { - observationResolutionVector[i] *= 2; + observationResolutionVector[i] = maxRes + 1; + changedObservations.insert(i); + } + /*} else { + lastMinScore = std::min(maxAvgDifference, lastMinScore); + STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) + STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) + //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) + for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { + //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i]) + if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { + //STORM_PRINT(" *** ") + observationResolutionVector[i] += 1; + changedObservations.insert(i); + } + //STORM_PRINT(std::endl) } + }*/ + if (underApproxModelSize < std::numeric_limits::max() - 101) { + underApproxModelSize += 100; + } + STORM_PRINT( + "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) + res = computeRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, + res, changedObservations, initialOverApproxMap, underApproxMap, underApproxModelSize); + //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); + if (cc.isEqual(res->overApproxValue, res->underApproxValue)) { + break; } - underApproxModelSize += 10; ++refinementCounter; } @@ -170,7 +198,7 @@ namespace storm { template - std::unique_ptr> + std::shared_ptr> ApproximatePOMDPModelchecker::computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, std::vector &observationResolutionVector, @@ -348,7 +376,8 @@ namespace storm { for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - if (getBeliefIdInVector(beliefGrid, observation, subSimplex[j]) == uint64_t(-1)) { + auto approxId = getBeliefIdInVector(beliefGrid, observation, subSimplex[j]); + if (approxId == uint64_t(-1)) { // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; beliefList.push_back(gridBelief); @@ -380,8 +409,7 @@ namespace storm { ++nextId; ++mdpStateId; } else { - transitionInActionBelief[beliefStateMap.left.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j]))] = - iter->second * lambdas[j]; + transitionInActionBelief[beliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; } } } @@ -466,14 +494,308 @@ namespace storm { return std::make_unique>( RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, beliefList, beliefIsTarget, beliefStateMap, - underApproxComponents->underApproxBeliefStateMap}); + underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, + underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); } -/* template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeRefinementStep(){}*/ + std::shared_ptr> + ApproximatePOMDPModelchecker::computeRefinementStep(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, + std::vector &observationResolutionVector, + bool computeRewards, double explorationThreshold, + std::shared_ptr> refinementComponents, + std::set changedObservations, + boost::optional> overApproximationMap, + boost::optional> underApproximationMap, + uint64_t maxUaModelSize) { + // Note that a persistent cache is not support by the current data structure. The resolution for the given belief also has to be stored somewhere to cache effectively + std::map>> subSimplexCache; + std::map> lambdaCache; + + uint64_t nextBeliefId = refinementComponents->beliefList.size(); + uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); + std::set relevantStates; + for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { + auto currentBelief = refinementComponents->beliefList[iter.first]; + if (changedObservations.find(currentBelief.observation) != changedObservations.end()) { + relevantStates.insert(iter.second); + } + } + + std::set> statesAndActionsToCheck; + for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) { + for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) { + for (typename storm::storage::SparseMatrix::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow( + state, row).begin(); + itEntry != refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(state, row).end(); ++itEntry) { + if (relevantStates.find(itEntry->getColumn()) != relevantStates.end()) { + statesAndActionsToCheck.insert(std::make_pair(state, row)); + break; + } + } + } + } + + std::deque beliefsToBeExpanded; + + std::map, std::map> transitionsStateActionPair; + for (auto const &stateActionPair : statesAndActionsToCheck) { + auto currId = refinementComponents->overApproxBeliefStateMap.right.at(stateActionPair.first); + auto action = stateActionPair.second; + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, refinementComponents->beliefList[currId], + action); + std::map transitionInActionBelief; + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, refinementComponents->beliefList, refinementComponents->beliefIsTarget, + targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); + nextBeliefId = refinementComponents->beliefList.size(); + //Triangulate here and put the possibly resulting belief in the grid + std::vector> subSimplex; + std::vector lambdas; + //TODO add caching + if (cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { + subSimplex = subSimplexCache[idNextBelief]; + lambdas = lambdaCache[idNextBelief]; + } else { + auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, + observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], + pomdp.getNumberOfStates()); + subSimplex = temp.first; + lambdas = temp.second; + if (cacheSubsimplices) { + subSimplexCache[idNextBelief] = subSimplex; + lambdaCache[idNextBelief] = lambdas; + } + } + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); + if (approxId == uint64_t(-1)) { + // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; + refinementComponents->beliefList.push_back(gridBelief); + refinementComponents->beliefGrid.push_back(gridBelief); + refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); + // compute overapproximate value using MDP result map + //TODO do this + /* + if (boundMapsSet) { + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { + tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(overMap[i]); + tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(underMap[i]); + } + weightedSumOverMap[nextId] = tempWeightedSumOver; + weightedSumUnderMap[nextId] = tempWeightedSumUnder; + } */ + beliefsToBeExpanded.push_back(nextBeliefId); + refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); + transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; + ++nextBeliefId; + ++nextStateId; + } else { + transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; + } + } + } + } + /* TODO + if (computeRewards) { + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + refinementComponents->beliefList[currId]); + }*/ + if (!transitionInActionBelief.empty()) { + transitionsStateActionPair[stateActionPair] = transitionInActionBelief; + } + } + // Expand newly added beliefs + while (!beliefsToBeExpanded.empty()) { + uint64_t currId = beliefsToBeExpanded.front(); + beliefsToBeExpanded.pop_front(); + bool isTarget = refinementComponents->beliefIsTarget[currId]; + + /* TODO + if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(explorationThreshold))) { + mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); + continue; + }*/ + + if (isTarget) { + // Depending on whether we compute rewards, we select the right initial result + // MDP stuff + transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = + {{refinementComponents->overApproxBeliefStateMap.left.at(currId), storm::utility::one()}}; + } else { + uint64_t representativeState = pomdp.getStatesWithObservation(refinementComponents->beliefList[currId].observation).front(); + uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); + std::vector actionRewardsInState(numChoices); + + for (uint64_t action = 0; action < numChoices; ++action) { + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, + refinementComponents->beliefList[currId], + action); + std::map transitionInActionBelief; + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + // THIS CALL IS SLOW + // TODO speed this up + uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, refinementComponents->beliefList, refinementComponents->beliefIsTarget, + targetObservations, refinementComponents->beliefList[currId], action, observation, + nextBeliefId); + nextBeliefId = refinementComponents->beliefList.size(); + //Triangulate here and put the possibly resulting belief in the grid + std::vector> subSimplex; + std::vector lambdas; + /* TODO Caching + if (cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { + subSimplex = subSimplexCache[idNextBelief]; + lambdas = lambdaCache[idNextBelief]; + } else { */ + auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, + observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], + pomdp.getNumberOfStates()); + subSimplex = temp.first; + lambdas = temp.second; + /*if (cacheSubsimplices) { + subSimplexCache[idNextBelief] = subSimplex; + lambdaCache[idNextBelief] = lambdas; + } + }*/ + + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); + if (approxId == uint64_t(-1)) { + // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; + refinementComponents->beliefList.push_back(gridBelief); + refinementComponents->beliefGrid.push_back(gridBelief); + refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); + // compute overapproximate value using MDP result map + /* + if (boundMapsSet) { + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { + tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(overMap[i]); + tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(underMap[i]); + } + weightedSumOverMap[nextId] = tempWeightedSumOver; + weightedSumUnderMap[nextId] = tempWeightedSumUnder; + } */ + beliefsToBeExpanded.push_back(nextBeliefId); + refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); + transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; + ++nextBeliefId; + ++nextStateId; + } else { + transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; + } + } + } + } + /* + if (computeRewards) { + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + beliefList[currId]); + }*/ + if (!transitionInActionBelief.empty()) { + transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief; + } + } + /* + if (computeRewards) { + beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + } + + + if (transitionsInBelief.empty()) { + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + } + mdpTransitions.push_back(transitionsInBelief);*/ + } + } + + storm::models::sparse::StateLabeling mdpLabeling(nextStateId); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabel("target"); + mdpLabeling.addLabelToState("init", refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)); + + uint_fast64_t currentRow = 0; + uint_fast64_t currentRowGroup = 0; + storm::storage::SparseMatrixBuilder smb(0, nextStateId, 0, false, true); + auto oldTransitionMatrix = refinementComponents->overApproxModelPtr->getTransitionMatrix(); + smb.newRowGroup(currentRow); + smb.addNextValue(currentRow, 0, storm::utility::one()); + ++currentRow; + ++currentRowGroup; + smb.newRowGroup(currentRow); + smb.addNextValue(currentRow, 1, storm::utility::one()); + ++currentRow; + ++currentRowGroup; + for (uint64_t state = 2; state < nextStateId; ++state) { + smb.newRowGroup(currentRow); + //STORM_PRINT("Loop State: " << state << std::endl) + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(refinementComponents->beliefList[refinementComponents->overApproxBeliefStateMap.right.at(state)].observation).front()); + bool isTarget = refinementComponents->beliefIsTarget[refinementComponents->overApproxBeliefStateMap.right.at(state)]; + for (uint64_t action = 0; action < numChoices; ++action) { + if (transitionsStateActionPair.find(std::make_pair(state, action)) == transitionsStateActionPair.end()) { + for (auto const &entry : oldTransitionMatrix.getRow(state, action)) { + smb.addNextValue(currentRow, entry.getColumn(), entry.getValue()); + } + } else { + for (auto const &iter : transitionsStateActionPair[std::make_pair(state, action)]) { + smb.addNextValue(currentRow, iter.first, iter.second); + } + } + ++currentRow; + if (isTarget) { + // If the state is a target, we only have one action, thus we add the target label and stop the iteration + mdpLabeling.addLabelToState("target", state); + break; + } + } + ++currentRowGroup; + } + storm::storage::sparse::ModelComponents modelComponents(smb.build(), mdpLabeling); + storm::models::sparse::Mdp overApproxMdp(modelComponents); + overApproxMdp.printModelInformationToStream(std::cout); + + auto model = std::make_shared>(overApproxMdp); + auto modelPtr = std::static_pointer_cast>(model); + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += min ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + auto task = storm::api::createTask(property, false); + storm::utility::Stopwatch overApproxTimer(true); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + overApproxTimer.stop(); + STORM_LOG_ASSERT(res, "Result not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); + auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); + auto overApprox = overApproxResultMap[refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)]; + + STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) + //auto underApprox = weightedSumUnderMap[initialBelief.id]; + auto underApproxComponents = computeUnderapproximation(pomdp, refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, + refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize); + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); + + return std::make_shared>( + RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, + underApproxComponents->underApproxMap, refinementComponents->beliefList, refinementComponents->beliefGrid, + refinementComponents->beliefIsTarget, + refinementComponents->overApproxBeliefStateMap, underApproxComponents->underApproxBeliefStateMap}); + } template ValueType @@ -835,8 +1157,6 @@ namespace storm { model->printModelInformationToStream(std::cout); - storm::api::exportSparseModelAsDot(model, "ua_model.dot"); - std::string propertyString; if (computeRewards) { propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 27e8f6bb7..da6451db5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -30,9 +30,11 @@ namespace storm { std::map overApproxMap; std::map underApproxMap; std::vector> beliefList; + std::vector> beliefGrid; std::vector beliefIsTarget; bsmap_type overApproxBeliefStateMap; bsmap_type underApproxBeliefStateMap; + uint64_t initialBeliefId; }; template> @@ -137,12 +139,20 @@ namespace storm { * @param maxUaModelSize the maximum size of the underapproximation model to be generated * @return struct containing components generated during the computation to be used in later refinement iterations */ - std::unique_ptr> + std::shared_ptr> computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap = boost::none, boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + std::shared_ptr> + computeRefinementStep(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, std::vector &observationResolutionVector, + bool computeRewards, double explorationThreshold, std::shared_ptr> refinementComponents, + std::set changedObservations, + boost::optional> overApproximationMap = boost::none, + boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + /** * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size * From 6c32b645c4321834a5475d14829a84a1aa85c70a Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 17 Mar 2020 09:31:00 +0100 Subject: [PATCH 054/155] Fixed compilation with mathsat. --- src/storm/adapters/MathsatExpressionAdapter.h | 4 +++ src/storm/solver/MathsatSmtSolver.cpp | 35 +++++++++++++++++++ src/storm/solver/MathsatSmtSolver.h | 6 ++-- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/storm/adapters/MathsatExpressionAdapter.h b/src/storm/adapters/MathsatExpressionAdapter.h index 88ed84dcd..ca6de5d8c 100644 --- a/src/storm/adapters/MathsatExpressionAdapter.h +++ b/src/storm/adapters/MathsatExpressionAdapter.h @@ -106,6 +106,10 @@ namespace storm { STORM_LOG_ASSERT(declarationVariablePair != declarationToVariableMapping.end(), "Unknown variable declaration."); return declarationVariablePair->second; } + + std::unordered_map const& getAllDeclaredVariables() const { + return variableToDeclarationMapping; + } virtual boost::any visit(storm::expressions::BinaryBooleanFunctionExpression const& expression, boost::any const& data) override { msat_term leftResult = boost::any_cast(expression.getFirstOperand()->accept(*this, data)); diff --git a/src/storm/solver/MathsatSmtSolver.cpp b/src/storm/solver/MathsatSmtSolver.cpp index 219efe530..f8c1d6f86 100644 --- a/src/storm/solver/MathsatSmtSolver.cpp +++ b/src/storm/solver/MathsatSmtSolver.cpp @@ -32,6 +32,22 @@ namespace storm { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unable to retrieve double value from model that only contains boolean values."); } + std::string MathsatSmtSolver::MathsatAllsatModelReference::toString() const { + std::stringstream str; + bool first = true; + str << "["; + for (auto const& varSlot : variableToSlotMapping) { + if (first) { + first = false; + } else { + str << ", "; + } + str << varSlot.first.getName() << "=" << std::boolalpha << getBooleanValue(varSlot.first); + } + str << "]"; + return str.str(); + } + MathsatSmtSolver::MathsatModelReference::MathsatModelReference(storm::expressions::ExpressionManager const& manager, msat_env const& env, storm::adapters::MathsatExpressionAdapter& expressionAdapter) : ModelReference(manager), env(env), expressionAdapter(expressionAdapter) { // Intentionally left empty. } @@ -62,6 +78,25 @@ namespace storm { storm::expressions::Expression value = expressionAdapter.translateExpression(msatValue); return value.evaluateAsDouble(); } + + std::string MathsatSmtSolver::MathsatModelReference::toString() const { + std::stringstream str; + bool first = true; + str << "["; + for (auto const& varDecl : expressionAdapter.getAllDeclaredVariables()) { + if (first) { + first = false; + } else { + str << ", "; + } + msat_term msatValue = msat_get_model_value(env, expressionAdapter.translateExpression(varDecl.first)); + STORM_LOG_ASSERT(!MSAT_ERROR_TERM(msatValue), "Unable to retrieve value of variable in model. This could be caused by calls to the solver between checking for satisfiability and model retrieval."); + str << varDecl.first.getName() << "=" << expressionAdapter.translateExpression(msatValue); + } + str << "]"; + return str.str(); + } + #endif MathsatSmtSolver::MathsatSmtSolver(storm::expressions::ExpressionManager& manager, Options const& options) : SmtSolver(manager) diff --git a/src/storm/solver/MathsatSmtSolver.h b/src/storm/solver/MathsatSmtSolver.h index 0d27c71af..851c00d82 100644 --- a/src/storm/solver/MathsatSmtSolver.h +++ b/src/storm/solver/MathsatSmtSolver.h @@ -38,7 +38,8 @@ namespace storm { virtual bool getBooleanValue(storm::expressions::Variable const& variable) const override; virtual int_fast64_t getIntegerValue(storm::expressions::Variable const& variable) const override; virtual double getRationalValue(storm::expressions::Variable const& variable) const override; - + virtual std::string toString() const override; + private: msat_env const& env; msat_term* model; @@ -54,7 +55,8 @@ namespace storm { virtual bool getBooleanValue(storm::expressions::Variable const& variable) const override; virtual int_fast64_t getIntegerValue(storm::expressions::Variable const& variable) const override; virtual double getRationalValue(storm::expressions::Variable const& variable) const override; - + virtual std::string toString() const override; + private: msat_env const& env; storm::adapters::MathsatExpressionAdapter& expressionAdapter; From 12f498356a3089e79260240d5828a638744eb806 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 18 Mar 2020 06:53:15 +0100 Subject: [PATCH 055/155] Fixed help message --- src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index ccd72779a..37addcbab 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -42,7 +42,7 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, transformBinaryOption, false, "Transforms the pomdp to a binary pomdp.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformSimpleOption, false, "Transforms the pomdp to a binary and simple pomdp.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false,"Analyze the POMDP using grid approximation.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("resolution","the resolution of the grid").setDefaultValueUnsignedInteger(10).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).build()); - this->addOption(storm::settings::OptionBuilder(moduleName, limitBeliefExplorationOption, false,"Sets whether to early in the belief space exploration if upper and lower bound are close").addArgument( + this->addOption(storm::settings::OptionBuilder(moduleName, limitBeliefExplorationOption, false,"Sets whether the belief space exploration is stopped if upper and lower bound are close").addArgument( storm::settings::ArgumentBuilder::createDoubleArgument("threshold","the difference between upper and lower bound when to stop").setDefaultValueDouble(0.0).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, memlessSearchOption, false, "Search for a qualitative memoryless scheuler").addArgument(storm::settings::ArgumentBuilder::createStringArgument("method", "method name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(memlessSearchMethods)).setDefaultValueString("none").build()).build()); } From 24faf636d77259c867d413f9fbe836479f61e588 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 18 Mar 2020 07:01:39 +0100 Subject: [PATCH 056/155] removed unused variables. --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index e740ec7b6..cb68794de 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -90,8 +90,6 @@ namespace storm { // This can probably be improved (i.e. resolutions for single belief states) STORM_PRINT("Initial Resolution: " << gridResolution << std::endl) std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); - auto overRes = storm::utility::one(); - auto underRes = storm::utility::zero(); std::set changedObservations; uint64_t underApproxModelSize = 200; uint64_t refinementCounter = 1; From 5bdcb66fcb4ad96c7e7b612a72326752381d9a18 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 18 Mar 2020 09:53:03 +0100 Subject: [PATCH 057/155] Fixes for reward formulas --- src/storm-pomdp/analysis/FormulaInformation.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/storm-pomdp/analysis/FormulaInformation.cpp b/src/storm-pomdp/analysis/FormulaInformation.cpp index 836abf881..8bb00450c 100644 --- a/src/storm-pomdp/analysis/FormulaInformation.cpp +++ b/src/storm-pomdp/analysis/FormulaInformation.cpp @@ -137,7 +137,7 @@ namespace storm { template FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::RewardOperatorFormula const& formula) { STORM_LOG_THROW(formula.hasOptimalityType(), storm::exceptions::InvalidPropertyException, "The property does not specify an optimization direction (min/max)"); - STORM_LOG_WARN_COND(formula.hasBound(), "The reward threshold for the given property will be ignored."); + STORM_LOG_WARN_COND(!formula.hasBound(), "The reward threshold for the given property will be ignored."); std::string rewardModelName = ""; if (formula.hasRewardModelName()) { rewardModelName = formula.getRewardModelName(); @@ -152,7 +152,7 @@ namespace storm { targetStatesFormula = subformula.asEventuallyFormula().getSubformula().asSharedPointer(); } if (targetStatesFormula && targetStatesFormula->isInFragment(storm::logic::propositional())) { - FormulaInformation result(FormulaInformation::Type::NonNestedReachabilityProbability, formula.getOptimalityType(), rewardModelName); + FormulaInformation result(FormulaInformation::Type::NonNestedExpectedRewardFormula, formula.getOptimalityType(), rewardModelName); result.updateTargetStates(pomdp, getStates(*targetStatesFormula, false, pomdp)); return result; } From bf7f84f796d48a2886e50665bf638e0021c3b1a1 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 18 Mar 2020 09:53:48 +0100 Subject: [PATCH 058/155] Added --check-fully-observable option to easily check the underlying MDP --- .../settings/modules/POMDPSettings.cpp | 9 ++++++++- .../settings/modules/POMDPSettings.h | 1 + src/storm-pomdp-cli/storm-pomdp.cpp | 18 ++++++++++++++---- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 37addcbab..dc5539b43 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -29,6 +29,7 @@ namespace storm { const std::string transformSimpleOption = "transformsimple"; const std::string memlessSearchOption = "memlesssearch"; std::vector memlessSearchMethods = {"none", "ccdmemless", "ccdmemory", "iterative"}; + const std::string checkFullyObservableOption = "check-fully-observable"; POMDPSettings::POMDPSettings() : ModuleSettings(moduleName) { this->addOption(storm::settings::OptionBuilder(moduleName, exportAsParametricModelOption, false, "Export the parametric file.").addArgument(storm::settings::ArgumentBuilder::createStringArgument("filename", "The name of the file to which to write the model.").build()).build()); @@ -41,10 +42,12 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, fscmode, false, "Sets the way the pMC is obtained").addArgument(storm::settings::ArgumentBuilder::createStringArgument("type", "type name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(fscModes)).setDefaultValueString("standard").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformBinaryOption, false, "Transforms the pomdp to a binary pomdp.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformSimpleOption, false, "Transforms the pomdp to a binary and simple pomdp.").build()); - this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false,"Analyze the POMDP using grid approximation.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("resolution","the resolution of the grid").setDefaultValueUnsignedInteger(10).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false,"Analyze the POMDP using grid approximation.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("resolution","the resolution of the grid").setDefaultValueUnsignedInteger(10).makeOptional().addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, limitBeliefExplorationOption, false,"Sets whether the belief space exploration is stopped if upper and lower bound are close").addArgument( storm::settings::ArgumentBuilder::createDoubleArgument("threshold","the difference between upper and lower bound when to stop").setDefaultValueDouble(0.0).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, memlessSearchOption, false, "Search for a qualitative memoryless scheuler").addArgument(storm::settings::ArgumentBuilder::createStringArgument("method", "method name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(memlessSearchMethods)).setDefaultValueString("none").build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, checkFullyObservableOption, false, "Performs standard model checking on the underlying MDP").build()); + } bool POMDPSettings::isExportToParametricSet() const { @@ -83,6 +86,10 @@ namespace storm { return this->getOption(memlessSearchOption).getHasOptionBeenSet(); } + bool POMDPSettings::isCheckFullyObservableSet() const { + return this->getOption(checkFullyObservableOption).getHasOptionBeenSet(); + } + std::string POMDPSettings::getMemlessSearchMethod() const { return this->getOption(memlessSearchOption).getArgumentByName("method").getValueAsString(); } diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 5af7ea589..891cca9e0 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -34,6 +34,7 @@ namespace storm { bool isTransformSimpleSet() const; bool isTransformBinarySet() const; bool isMemlessSearchSet() const; + bool isCheckFullyObservableSet() const; std::string getMemlessSearchMethod() const; std::string getFscApplicationTypeString() const; uint64_t getMemoryBound() const; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 634316e33..ee1877531 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -27,6 +27,8 @@ #include "storm-pomdp/analysis/QualitativeStrategySearchNaive.h" #include "storm/api/storm.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/utility/Stopwatch.h" #include "storm/exceptions/UnexpectedException.h" @@ -93,10 +95,11 @@ namespace storm { } template - bool performAnalysis(std::shared_ptr> const& pomdp, storm::pomdp::analysis::FormulaInformation const& formulaInfo) { + bool performAnalysis(std::shared_ptr> const& pomdp, storm::pomdp::analysis::FormulaInformation const& formulaInfo, storm::logic::Formula const& formula) { auto const& pomdpSettings = storm::settings::getModule(); bool analysisPerformed = false; if (pomdpSettings.isGridApproximationSet()) { + STORM_PRINT_AND_LOG("Applying grid approximation... "); STORM_LOG_THROW(formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "Unsupported formula type for Grid approximation."); STORM_LOG_THROW(!formulaInfo.getTargetStates().empty(), storm::exceptions::UnexpectedException, "The set of target states is empty."); STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::UnexpectedException, "Observations on target states also occur on non-target states. This is unexpected at this point."); @@ -111,8 +114,8 @@ namespace storm { ValueType overRes = result->overApproxValue; ValueType underRes = result->underApproxValue; if (overRes != underRes) { - STORM_PRINT("Overapproximation Result: " << overRes << std::endl) - STORM_PRINT("Underapproximation Result: " << underRes << std::endl) + STORM_PRINT("Overapproximation Result: " << overRes << std::endl); + STORM_PRINT("Underapproximation Result: " << underRes << std::endl); } else { STORM_PRINT("Result: " << overRes << std::endl) } @@ -138,6 +141,13 @@ namespace storm { } analysisPerformed = true; } + if (pomdpSettings.isCheckFullyObservableSet()) { + STORM_PRINT_AND_LOG("Analyzing the formula on the fully observable MDP ... "); + auto result = storm::api::verifyWithSparseEngine(pomdp->template as>(), storm::api::createTask(formula.asSharedPointer(), true))->template asExplicitQuantitativeCheckResult(); + result.filter(storm::modelchecker::ExplicitQualitativeCheckResult(pomdp->getInitialStates())); + STORM_PRINT_AND_LOG("Result: " << result.getMax() << std::endl); + analysisPerformed = true; + } return analysisPerformed; } @@ -259,7 +269,7 @@ namespace storm { } sw.restart(); - if (performAnalysis(pomdp, formulaInfo)) { + if (performAnalysis(pomdp, formulaInfo, *formula)) { sw.stop(); STORM_PRINT_AND_LOG("Time for POMDP analysis: " << sw << "s." << std::endl); } From a11ec691a9a52d54c3d9e5aeebe6062b3eb83f31 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 18 Mar 2020 13:44:32 +0100 Subject: [PATCH 059/155] Introduced options in the ApproximatePOMDPModelChecker. --- .../ApproximatePOMDPModelchecker.cpp | 181 ++++++++++-------- .../ApproximatePOMDPModelchecker.h | 95 ++++----- 2 files changed, 147 insertions(+), 129 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index cb68794de..90fa96039 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -2,7 +2,13 @@ #include +#include "storm-pomdp/analysis/FormulaInformation.h" + #include "storm/utility/ConstantsComparator.h" +#include "storm/utility/NumberTraits.h" +#include "storm/utility/graph.h" +#include "storm/logic/Formulas.h" + #include "storm/models/sparse/Dtmc.h" #include "storm/models/sparse/StandardRewardModel.h" #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" @@ -15,23 +21,65 @@ #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" +#include "storm/utility/macros.h" +#include "storm/exceptions/NotSupportedException.h" + namespace storm { namespace pomdp { namespace modelchecker { template - ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { - precision = 0.000000001; - cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); + ApproximatePOMDPModelchecker::Options::Options() { + initialGridResolution = 10; + explorationThreshold = storm::utility::zero(); + doRefinement = true; + refinementPrecision = storm::utility::convertNumber(1e-4); + numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); + } + + template + ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options) : pomdp(pomdp), options(options) { + cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); useMdp = true; maxIterations = 1000; cacheSubsimplices = false; } + template + std::unique_ptr> ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { + auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); + if (formulaInfo.isNonNestedReachabilityProbability()) { + // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. + STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); + if (!formulaInfo.getSinkStates().empty()) { + auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); + reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; + STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); + } + if (options.doRefinement) { + return refineReachabilityProbability(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + } else { + return computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + } + } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { + // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. + STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); + if (options.doRefinement) { + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Rewards with refinement not implemented yet"); + //return refineReachabilityProbability(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + } else { + // FIXME: pick the non-unique reward model here + STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); + return computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + } + } else { + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); + } + } + + template std::unique_ptr> - ApproximatePOMDPModelchecker::refineReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, uint64_t gridResolution, - double explorationThreshold) { + ApproximatePOMDPModelchecker::refineReachabilityProbability(std::set const &targetObservations, bool min) { std::srand(time(NULL)); // Compute easy upper and lower bounds storm::utility::Stopwatch underlyingWatch(true); @@ -88,14 +136,13 @@ namespace storm { // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution. // This can probably be improved (i.e. resolutions for single belief states) - STORM_PRINT("Initial Resolution: " << gridResolution << std::endl) - std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); + STORM_PRINT("Initial Resolution: " << options.initialGridResolution << std::endl) + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); std::set changedObservations; uint64_t underApproxModelSize = 200; uint64_t refinementCounter = 1; STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) - std::shared_ptr> res = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, - explorationThreshold, initialOverApproxMap, underApproxMap, underApproxModelSize); + std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, false, initialOverApproxMap, underApproxMap, underApproxModelSize); ValueType lastMinScore = storm::utility::infinity(); while (refinementCounter < 1000) { // TODO the actual refinement @@ -169,7 +216,7 @@ namespace storm { } STORM_PRINT( "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) - res = computeRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, + res = computeRefinementStep(targetObservations, min, observationResolutionVector, false, res, changedObservations, initialOverApproxMap, underApproxMap, underApproxModelSize); //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); if (cc.isEqual(res->overApproxValue, res->underApproxValue)) { @@ -183,15 +230,14 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, + ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, double explorationThreshold, + bool computeRewards, boost::optional> overApproximationMap, boost::optional> underApproximationMap, uint64_t maxUaModelSize) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - auto result = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap, + auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap, underApproximationMap, maxUaModelSize); return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); } @@ -199,10 +245,9 @@ namespace storm { template std::shared_ptr> - ApproximatePOMDPModelchecker::computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, + ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, double explorationThreshold, + bool computeRewards, boost::optional> overApproximationMap, boost::optional> underApproximationMap, uint64_t maxUaModelSize) { @@ -229,7 +274,7 @@ namespace storm { uint64_t nextId = 0; storm::utility::Stopwatch expansionTimer(true); // Initial belief always has belief ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + storm::pomdp::Belief initialBelief = getInitialBelief(nextId); ++nextId; beliefList.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); @@ -323,15 +368,16 @@ namespace storm { //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting // Expand the beliefs to generate the grid on-the-fly - if (explorationThreshold > 0) { - STORM_PRINT("Exploration threshold: " << explorationThreshold << std::endl) + if (options.explorationThreshold > storm::utility::zero()) { + STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } while (!beliefsToBeExpanded.empty()) { uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; - - if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(explorationThreshold))) { + + if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { + // TODO: with rewards whe would have to assign the corresponding reward to this transition mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); continue; } @@ -348,13 +394,13 @@ namespace storm { std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currId], action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, + uint64_t idNextBelief = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, observation, nextId); nextId = beliefList.size(); //Triangulate here and put the possibly resulting belief in the grid @@ -415,7 +461,7 @@ namespace storm { } } if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), beliefList[currId]); } if (!transitionInActionBelief.empty()) { @@ -456,7 +502,7 @@ namespace storm { for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { // Add the reward mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), currentBelief)); } } @@ -487,7 +533,7 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, + auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); @@ -500,10 +546,9 @@ namespace storm { template std::shared_ptr> - ApproximatePOMDPModelchecker::computeRefinementStep(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, + ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, double explorationThreshold, + bool computeRewards, std::shared_ptr> refinementComponents, std::set changedObservations, boost::optional> overApproximationMap, @@ -543,12 +588,12 @@ namespace storm { for (auto const &stateActionPair : statesAndActionsToCheck) { auto currId = refinementComponents->overApproxBeliefStateMap.right.at(stateActionPair.first); auto action = stateActionPair.second; - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, refinementComponents->beliefList[currId], + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, refinementComponents->beliefList, refinementComponents->beliefIsTarget, + uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); nextBeliefId = refinementComponents->beliefList.size(); //Triangulate here and put the possibly resulting belief in the grid @@ -604,7 +649,7 @@ namespace storm { } /* TODO if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), refinementComponents->beliefList[currId]); }*/ if (!transitionInActionBelief.empty()) { @@ -618,7 +663,7 @@ namespace storm { bool isTarget = refinementComponents->beliefIsTarget[currId]; /* TODO - if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(explorationThreshold))) { + if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(options.explorationThreshold))) { mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); continue; }*/ @@ -634,15 +679,13 @@ namespace storm { std::vector actionRewardsInState(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, - refinementComponents->beliefList[currId], - action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, refinementComponents->beliefList, refinementComponents->beliefIsTarget, + uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); nextBeliefId = refinementComponents->beliefList.size(); @@ -699,7 +742,7 @@ namespace storm { } /* if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), beliefList[currId]); }*/ if (!transitionInActionBelief.empty()) { @@ -785,7 +828,7 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(pomdp, refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, + auto underApproxComponents = computeUnderapproximation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); @@ -799,8 +842,7 @@ namespace storm { template ValueType - ApproximatePOMDPModelchecker::overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, + ApproximatePOMDPModelchecker::overApproximationValueIteration(std::vector> &beliefList, std::vector> &beliefGrid, std::vector &beliefIsTarget, std::map>> &observationProbabilities, @@ -915,20 +957,16 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution) { - std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); - return computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, true, 0); + ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); } template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution, double explorationThreshold) { - std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); - return computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold); + ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min) { + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); } template @@ -942,13 +980,13 @@ namespace storm { std::vector beliefIsTarget; uint64_t nextId = 0; // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + storm::pomdp::Belief initialBelief = getInitialBelief(nextId); ++nextId; beliefList.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); std::vector> beliefGrid; - constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); + constructBeliefGrid(targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); nextId = beliefList.size(); beliefGridTimer.stop(); @@ -993,20 +1031,20 @@ namespace storm { std::vector actionRewardsInState(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(currentBelief, action); std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, currentBelief, action, observation, nextId); nextId = beliefList.size(); } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), currentBelief); } } @@ -1022,14 +1060,14 @@ namespace storm { STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration - auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, + auto overApprox = overApproximationValueIteration(beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, subSimplexCache, lambdaCache, result, chosenActions, gridResolution, min, computeRewards); overApproxTimer.stop(); // Now onto the under-approximation storm::utility::Stopwatch underApproxTimer(true); - /*ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + /*ValueType underApprox = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);*/ underApproxTimer.stop(); auto underApprox = storm::utility::zero(); @@ -1062,8 +1100,7 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, + ApproximatePOMDPModelchecker::computeUnderapproximation(std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, uint64_t initialBeliefId, bool min, @@ -1101,10 +1138,10 @@ namespace storm { //TODO add a way to extract the actions from the over-approx and use them here? for (uint64_t action = 0; action < numChoices; ++action) { std::map transitionsInStateWithAction; - std::map observationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currentBeliefId], action); + std::map observationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currentBeliefId], action); for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - uint64_t nextBeliefId = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId], + uint64_t nextBeliefId = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId], action, observation, nextId); nextId = beliefList.size(); @@ -1146,7 +1183,7 @@ namespace storm { for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { // Add the reward rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), currentBelief)); } } @@ -1231,8 +1268,7 @@ namespace storm { } template - storm::pomdp::Belief ApproximatePOMDPModelchecker::getInitialBelief( - storm::models::sparse::Pomdp const &pomdp, uint64_t id) { + storm::pomdp::Belief ApproximatePOMDPModelchecker::getInitialBelief(uint64_t id) { STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, "POMDP contains more than one initial state"); STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, @@ -1251,7 +1287,6 @@ namespace storm { template void ApproximatePOMDPModelchecker::constructBeliefGrid( - storm::models::sparse::Pomdp const &pomdp, std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, std::vector &beliefIsTarget, @@ -1399,7 +1434,6 @@ namespace storm { template std::map ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( - storm::models::sparse::Pomdp const &pomdp, storm::pomdp::Belief &belief, uint64_t actionIndex) { std::map res; @@ -1428,8 +1462,7 @@ namespace storm { template storm::pomdp::Belief - ApproximatePOMDPModelchecker::getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id) { + ApproximatePOMDPModelchecker::getBeliefAfterAction(storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id) { std::map distributionAfter; uint32_t observation = 0; for (auto const &probEntry : belief.probabilities) { @@ -1446,8 +1479,7 @@ namespace storm { } template - uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( - storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, + uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation(std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, uint32_t observation, uint64_t id) { std::map distributionAfter; @@ -1480,8 +1512,7 @@ namespace storm { } template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, - uint64_t action, storm::pomdp::Belief &belief) { + ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief) { auto result = storm::utility::zero(); for (size_t i = 0; i < belief.probabilities.size(); ++i) { for (auto const &probEntry : belief.probabilities) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index da6451db5..3cd5f06ba 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -8,6 +8,10 @@ #include "storm/storage/jani/Property.h" namespace storm { + namespace logic { + class Formula; + } + namespace pomdp { namespace modelchecker { typedef boost::bimap bsmap_type; @@ -47,53 +51,54 @@ namespace storm { template> class ApproximatePOMDPModelchecker { public: - explicit ApproximatePOMDPModelchecker(); + + struct Options { + Options(); + uint64_t initialGridResolution; /// Decides how precise the bounds are + ValueType explorationThreshold; /// the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state + bool doRefinement; /// Sets whether the bounds should be refined automatically until the refinement precision is reached + ValueType refinementPrecision; /// Used to decide when the refinement should terminate + ValueType numericPrecision; /// Used to decide whether two values are equal + }; + + ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options = Options()); + + std::unique_ptr> check(storm::logic::Formula const& formula); + private: /** * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop * - * @param pomdp the POMDP to be checked * @param targetObservations the set of observations to be reached * @param min true if minimum probability is to be computed - * @param gridResolution the initial grid resolution - * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - refineReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution, double explorationThreshold); + refineReachabilityProbability(std::set const &targetObservations, bool min); /** * Compute the reachability probability of given target observations on a POMDP for the given resolution only. * On-the-fly state space generation is used for the overapproximation * - * @param pomdp the POMDP to be checked * @param targetObservations the set of observations to be reached * @param min true if minimum probability is to be computed - * @param gridResolution the grid resolution - * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution, double explorationThreshold); + computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min); /** * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. * On-the-fly state space generation is used for the overapproximation * - * @param pomdp the POMDP to be checked * @param targetObservations the set of observations to be reached * @param min true if minimum rewards are to be computed - * @param gridResolution the initial grid resolution - * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution); + computeReachabilityRewardOTF(std::set const &targetObservations, bool min); + // TODO: Check if this is obsolete /** * Compute the reachability probability for given target observations on a POMDP for the given resolution only. * Static state space generation is used for the overapproximation, i.e. the whole grid is generated @@ -109,11 +114,11 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t gridResolution); + // TODO: Check if this is obsolete /** * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. * Static state space generation is used for the overapproximation, i.e. the whole grid is generated * - * @param pomdp the POMDP to be checked * @param targetObservations the set of observations to be reached * @param min true if the minimum rewards are to be computed * @param gridResolution the initial grid resolution @@ -128,27 +133,23 @@ namespace storm { /** * Helper method to compute the inital step of the refinement loop * - * @param pomdp the pomdp to be checked * @param targetObservations set of target observations * @param min true if minimum value is to be computed * @param observationResolutionVector vector containing the resolution to be used for each observation * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value * @param maxUaModelSize the maximum size of the underapproximation model to be generated * @return struct containing components generated during the computation to be used in later refinement iterations */ std::shared_ptr> - computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap = boost::none, + computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, + bool computeRewards, boost::optional> overApproximationMap = boost::none, boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); std::shared_ptr> - computeRefinementStep(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, double explorationThreshold, std::shared_ptr> refinementComponents, + computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, + bool computeRewards, std::shared_ptr> refinementComponents, std::set changedObservations, boost::optional> overApproximationMap = boost::none, boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); @@ -156,28 +157,25 @@ namespace storm { /** * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size * - * @param pomdp the pomdp to be checked * @param targetObservations set of target observations * @param min true if minimum value is to be computed * @param observationResolutionVector vector containing the resolution to be used for each observation * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value * @param maxUaModelSize the maximum size of the underapproximation model to be generated * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, + computeReachabilityOTF(std::set const &targetObservations, bool min, + std::vector &observationResolutionVector, bool computeRewards, boost::optional> overApproximationMap = boost::none, boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + // TODO: Check if this is obsolete /** * Helper method to compute reachability properties using static state space generation * - * @param pomdp the POMDP to be checked * @param targetObservations set of target observations * @param min true if minimum value is to be computed * @param gridResolution the resolution of the grid to be used @@ -193,7 +191,6 @@ namespace storm { * Helper to compute an underapproximation of the reachability property. * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. * - * @param pomdp the POMDP to be checked * @param beliefList vector containing already generated beliefs * @param beliefIsTarget vector containinf for each belief in beliefList true if the belief is a target * @param targetObservations set of target observations @@ -203,8 +200,7 @@ namespace storm { * @param maxModelSize number of states up until which the belief support should be unrolled * @return struct containing the components generated during the under approximation */ - std::unique_ptr> computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, + std::unique_ptr> computeUnderapproximation(std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, uint64_t initialBeliefId, bool min, bool computeReward, @@ -218,7 +214,7 @@ namespace storm { * @return a belief representing the initial belief */ storm::pomdp::Belief - getInitialBelief(storm::models::sparse::Pomdp const &pomdp, uint64_t id); + getInitialBelief(uint64_t id); /** @@ -236,7 +232,6 @@ namespace storm { /** * Helper method to construct the static belief grid for the POMDP overapproximation * - * @param pomdp the POMDP to be approximated * @param target_observations set of target observations * @param gridResolution the resolution of the grid to be constructed * @param beliefList data structure to store all generated beliefs @@ -244,8 +239,7 @@ namespace storm { * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief * @param nextId the ID to be used for the next generated belief */ - void constructBeliefGrid(storm::models::sparse::Pomdp const &pomdp, - std::set const &target_observations, uint64_t gridResolution, + void constructBeliefGrid(std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, std::vector &beliefIsTarget, uint64_t nextId); @@ -254,21 +248,17 @@ namespace storm { /** * Helper method to get the probabilities to be in a state with each observation after performing an action * - * @param pomdp the POMDP * @param belief the belief in which the action is performed * @param actionIndex the index of the action to be performed * @return mapping from each observation to the probability to be in a state with that observation after performing the action */ - std::map computeObservationProbabilitiesAfterAction( - storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief &belief, + std::map computeObservationProbabilitiesAfterAction(storm::pomdp::Belief &belief, uint64_t actionIndex); /** * Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation. * If the belief does not exist yet, it is created and added to the list of all beliefs * - * @param pomdp the POMDP on which the evaluation should be performed * @param beliefList data structure to store all generated beliefs * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief * @param targetObservations set of target observations @@ -277,9 +267,7 @@ namespace storm { * @param observation the observation after the action was performed * @return the resulting belief (observation and distribution) */ - uint64_t getBeliefAfterActionAndObservation( - storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, + uint64_t getBeliefAfterActionAndObservation(std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, @@ -288,15 +276,13 @@ namespace storm { /** * Helper method to generate the next belief that results from a belief by performing an action * - * @param pomdp the POMDP * @param belief the starting belief * @param actionIndex the index of the action to be performed * @param id the ID for the generated belief * @return a belief object representing the belief after performing the action in the starting belief */ storm::pomdp::Belief - getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, storm::pomdp::Belief &belief, uint64_t actionIndex, - uint64_t id); + getBeliefAfterAction(storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id); /** * Helper to get the id of a Belief stored in a given vector structure @@ -320,12 +306,11 @@ namespace storm { /** * Get the reward for performing an action in a given belief * - * @param pomdp the POMDP * @param action the index of the action to be performed * @param belief the belief in which the action is performed * @return the reward earned by performing the action in the belief */ - ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief &belief); + ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief); /** @@ -349,7 +334,7 @@ namespace storm { * @return the resulting probability/reward in the initial state */ ValueType - overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, + overApproximationValueIteration(std::vector> &beliefList, std::vector> &beliefGrid, std::vector &beliefIsTarget, std::map>> &observationProbabilities, std::map>> &nextBelieves, @@ -359,8 +344,10 @@ namespace storm { std::map> &chosenActions, uint64_t gridResolution, bool min, bool computeRewards); + storm::models::sparse::Pomdp const& pomdp; + Options options; storm::utility::ConstantsComparator cc; - double precision; + // TODO: these should be obsolete, right? bool useMdp; bool cacheSubsimplices; uint64_t maxIterations; From 3aaea1eb0a3d3a846d050eb6df56155eaf7ef6ce Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 18 Mar 2020 13:45:32 +0100 Subject: [PATCH 060/155] Added new CLI settings for GridApproximation --- .../settings/PomdpSettings.cpp | 2 + .../modules/GridApproximationSettings.cpp | 63 +++++++++++++++++++ .../modules/GridApproximationSettings.h | 40 ++++++++++++ .../settings/modules/POMDPSettings.cpp | 17 +---- .../settings/modules/POMDPSettings.h | 3 - src/storm-pomdp-cli/storm-pomdp.cpp | 45 ++++++------- 6 files changed, 124 insertions(+), 46 deletions(-) create mode 100644 src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp create mode 100644 src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h diff --git a/src/storm-pomdp-cli/settings/PomdpSettings.cpp b/src/storm-pomdp-cli/settings/PomdpSettings.cpp index f08dbe932..1181bb2ff 100644 --- a/src/storm-pomdp-cli/settings/PomdpSettings.cpp +++ b/src/storm-pomdp-cli/settings/PomdpSettings.cpp @@ -31,6 +31,7 @@ #include "storm/settings/modules/HintSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" +#include "storm-pomdp-cli/settings/modules/GridApproximationSettings.h" namespace storm { namespace settings { @@ -44,6 +45,7 @@ namespace storm { storm::settings::addModule(); storm::settings::addModule(); + storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp new file mode 100644 index 000000000..2aaf3239c --- /dev/null +++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp @@ -0,0 +1,63 @@ +#include "storm-pomdp-cli/settings/modules/GridApproximationSettings.h" + +#include "storm/settings/SettingsManager.h" +#include "storm/settings/SettingMemento.h" +#include "storm/settings/Option.h" +#include "storm/settings/OptionBuilder.h" +#include "storm/settings/ArgumentBuilder.h" + +#include "storm/exceptions/InvalidArgumentException.h" + +namespace storm { + namespace settings { + namespace modules { + + const std::string GridApproximationSettings::moduleName = "grid"; + + const std::string refineOption = "refine"; + const std::string resolutionOption = "resolution"; + const std::string limitBeliefExplorationOption = "limit-exploration"; + const std::string numericPrecisionOption = "numeric-precision"; + + GridApproximationSettings::GridApproximationSettings() : ModuleSettings(moduleName) { + + this->addOption(storm::settings::OptionBuilder(moduleName, refineOption, false,"Enables automatic refinement of the grid until the goal precision is reached").addArgument( + storm::settings::ArgumentBuilder::createDoubleArgument("precision","Allowed difference between upper and lower bound of the result.").setDefaultValueDouble(1e-6).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, resolutionOption, false,"Sets the (initial-) resolution of the grid (higher means more precise results)").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("value","the resolution").setDefaultValueUnsignedInteger(10).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, limitBeliefExplorationOption, false,"Sets whether the belief space exploration is stopped if upper and lower bound are close").addArgument( + storm::settings::ArgumentBuilder::createDoubleArgument("threshold","the difference between upper and lower bound when to stop").setDefaultValueDouble(0.0).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, numericPrecisionOption, false,"Sets the precision used to determine whether two belief-states are equal.").addArgument( + storm::settings::ArgumentBuilder::createDoubleArgument("value","the precision").setDefaultValueDouble(1e-9).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0, 1)).build()).build()); + + } + + bool GridApproximationSettings::isRefineSet() const { + return this->getOption(refineOption).getHasOptionBeenSet(); + } + + double GridApproximationSettings::getRefinementPrecision() const { + return this->getOption(refineOption).getArgumentByName("precision").getValueAsDouble(); + } + + uint64_t GridApproximationSettings::getGridResolution() const { + return this->getOption(resolutionOption).getArgumentByName("value").getValueAsUnsignedInteger(); + } + + double GridApproximationSettings::getExplorationThreshold() const { + return this->getOption(limitBeliefExplorationOption).getArgumentByName("threshold").getValueAsDouble(); + } + + bool GridApproximationSettings::isNumericPrecisionSetFromDefault() const { + return !this->getOption(numericPrecisionOption).getHasOptionBeenSet() || this->getOption(numericPrecisionOption).getArgumentByName("value").wasSetFromDefaultValue(); + } + + double GridApproximationSettings::getNumericPrecision() const { + return this->getOption(numericPrecisionOption).getArgumentByName("value").getValueAsDouble(); + } + + } // namespace modules + } // namespace settings +} // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h new file mode 100644 index 000000000..5c1e281d4 --- /dev/null +++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h @@ -0,0 +1,40 @@ +#pragma once + +#include "storm-config.h" +#include "storm/settings/modules/ModuleSettings.h" + +namespace storm { + namespace settings { + namespace modules { + + /*! + * This class represents the settings for POMDP model checking. + */ + class GridApproximationSettings : public ModuleSettings { + public: + + /*! + * Creates a new set of POMDP settings. + */ + GridApproximationSettings(); + + virtual ~GridApproximationSettings() = default; + + bool isRefineSet() const; + double getRefinementPrecision() const; + uint64_t getGridResolution() const; + double getExplorationThreshold() const; + bool isNumericPrecisionSetFromDefault() const; + double getNumericPrecision() const; + + // The name of the module. + static const std::string moduleName; + + private: + + + }; + + } // namespace modules + } // namespace settings +} // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index dc5539b43..4ba3f8148 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -15,7 +15,6 @@ namespace storm { const std::string POMDPSettings::moduleName = "pomdp"; const std::string exportAsParametricModelOption = "parametric-drn"; const std::string gridApproximationOption = "gridapproximation"; - const std::string limitBeliefExplorationOption = "limit-exploration"; const std::string qualitativeReductionOption = "qualitativereduction"; const std::string analyzeUniqueObservationsOption = "uniqueobservations"; const std::string mecReductionOption = "mecreduction"; @@ -42,9 +41,7 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, fscmode, false, "Sets the way the pMC is obtained").addArgument(storm::settings::ArgumentBuilder::createStringArgument("type", "type name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(fscModes)).setDefaultValueString("standard").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformBinaryOption, false, "Transforms the pomdp to a binary pomdp.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformSimpleOption, false, "Transforms the pomdp to a binary and simple pomdp.").build()); - this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false,"Analyze the POMDP using grid approximation.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("resolution","the resolution of the grid").setDefaultValueUnsignedInteger(10).makeOptional().addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).build()); - this->addOption(storm::settings::OptionBuilder(moduleName, limitBeliefExplorationOption, false,"Sets whether the belief space exploration is stopped if upper and lower bound are close").addArgument( - storm::settings::ArgumentBuilder::createDoubleArgument("threshold","the difference between upper and lower bound when to stop").setDefaultValueDouble(0.0).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false,"Analyze the POMDP using grid approximation.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, memlessSearchOption, false, "Search for a qualitative memoryless scheuler").addArgument(storm::settings::ArgumentBuilder::createStringArgument("method", "method name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(memlessSearchMethods)).setDefaultValueString("none").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, checkFullyObservableOption, false, "Performs standard model checking on the underlying MDP").build()); @@ -78,10 +75,6 @@ namespace storm { return this->getOption(gridApproximationOption).getHasOptionBeenSet(); } - uint64_t POMDPSettings::getGridResolution() const { - return this->getOption(gridApproximationOption).getArgumentByName("resolution").getValueAsUnsignedInteger(); - } - bool POMDPSettings::isMemlessSearchSet() const { return this->getOption(memlessSearchOption).getHasOptionBeenSet(); } @@ -94,14 +87,6 @@ namespace storm { return this->getOption(memlessSearchOption).getArgumentByName("method").getValueAsString(); } - bool POMDPSettings::isLimitExplorationSet() const { - return this->getOption(limitBeliefExplorationOption).getHasOptionBeenSet(); - } - - double POMDPSettings::getExplorationThreshold() const { - return this->getOption(limitBeliefExplorationOption).getArgumentByName("threshold").getValueAsDouble(); - } - uint64_t POMDPSettings::getMemoryBound() const { return this->getOption(memoryBoundOption).getArgumentByName("bound").getValueAsUnsignedInteger(); } diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 891cca9e0..768766536 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -27,7 +27,6 @@ namespace storm { bool isQualitativeReductionSet() const; bool isGridApproximationSet() const; - bool isLimitExplorationSet() const; bool isAnalyzeUniqueObservationsSet() const; bool isMecReductionSet() const; bool isSelfloopReductionSet() const; @@ -39,8 +38,6 @@ namespace storm { std::string getFscApplicationTypeString() const; uint64_t getMemoryBound() const; - uint64_t getGridResolution() const; - double getExplorationThreshold() const; storm::storage::PomdpMemoryPattern getMemoryPattern() const; bool check() const override; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index ee1877531..e62b93f4b 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -5,6 +5,7 @@ #include "storm/settings/modules/GeneralSettings.h" #include "storm/settings/modules/DebugSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" +#include "storm-pomdp-cli/settings/modules/GridApproximationSettings.h" #include "storm-pomdp-cli/settings/PomdpSettings.h" #include "storm/analysis/GraphConditions.h" @@ -29,6 +30,7 @@ #include "storm/api/storm.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" +#include "storm/utility/NumberTraits.h" #include "storm/utility/Stopwatch.h" #include "storm/exceptions/UnexpectedException.h" @@ -73,23 +75,6 @@ namespace storm { formulaInfo.updateTargetStates(*pomdp, std::move(prob1States)); formulaInfo.updateSinkStates(*pomdp, std::move(prob0States)); preprocessingPerformed = true; - } else if (pomdpSettings.isGridApproximationSet()) { - // We still might need to apply the KnownProbabilityTransformer, to ensure that the grid approximation works properly - if (formulaInfo.isNonNestedReachabilityProbability()) { - if (!formulaInfo.getTargetStates().observationClosed || !formulaInfo.getSinkStates().states.empty()) { - // Make target states observation closed and/or sink states absorbing - storm::pomdp::transformer::KnownProbabilityTransformer kpt = storm::pomdp::transformer::KnownProbabilityTransformer(); - auto prob0States = formulaInfo.getSinkStates().states; - auto prob1States = formulaInfo.getTargetStates().states; - pomdp = kpt.transform(*pomdp, prob0States, prob1States); - // Update formulaInfo to changes from Preprocessing - formulaInfo.updateTargetStates(*pomdp, std::move(prob1States)); - formulaInfo.updateSinkStates(*pomdp, std::move(prob0States)); - preprocessingPerformed = true; - } - } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { - STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "Target states of reward property are not observation closed. This case is not yet implemented."); - } } return preprocessingPerformed; } @@ -100,17 +85,23 @@ namespace storm { bool analysisPerformed = false; if (pomdpSettings.isGridApproximationSet()) { STORM_PRINT_AND_LOG("Applying grid approximation... "); - STORM_LOG_THROW(formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "Unsupported formula type for Grid approximation."); - STORM_LOG_THROW(!formulaInfo.getTargetStates().empty(), storm::exceptions::UnexpectedException, "The set of target states is empty."); - STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::UnexpectedException, "Observations on target states also occur on non-target states. This is unexpected at this point."); - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); - std::unique_ptr> result; - if (formulaInfo.isNonNestedReachabilityProbability()) { - result = checker.refineReachabilityProbability(*pomdp, formulaInfo.getTargetStates().observations, formulaInfo.minimize(), pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); - } else { - // TODO: no exploration threshold? - result = checker.computeReachabilityReward(*pomdp, formulaInfo.getTargetStates().observations, formulaInfo.minimize(), pomdpSettings.getGridResolution()); + auto const& gridSettings = storm::settings::getModule(); + typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker::Options options; + options.initialGridResolution = gridSettings.getGridResolution(); + options.explorationThreshold = gridSettings.getExplorationThreshold(); + options.doRefinement = gridSettings.isRefineSet(); + options.refinementPrecision = gridSettings.getRefinementPrecision(); + options.numericPrecision = gridSettings.getNumericPrecision(); + if (storm::NumberTraits::IsExact) { + if (gridSettings.isNumericPrecisionSetFromDefault()) { + STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); + options.numericPrecision = storm::utility::zero(); + } else { + STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "A non-zero numeric precision was set although exact arithmethic is used. Results might be inexact."); + } } + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(*pomdp, options); + std::unique_ptr> result = checker.check(formula); ValueType overRes = result->overApproxValue; ValueType underRes = result->underApproxValue; if (overRes != underRes) { From b3493b5888c75d96214ee329523ad1e724cf41b7 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 18 Mar 2020 13:49:42 +0100 Subject: [PATCH 061/155] Grid: Added cli setting to cache subsimplices. --- .../modules/GridApproximationSettings.cpp | 7 ++++++ .../modules/GridApproximationSettings.h | 1 + src/storm-pomdp-cli/storm-pomdp.cpp | 1 + .../ApproximatePOMDPModelchecker.cpp | 24 +++++++++---------- .../ApproximatePOMDPModelchecker.h | 2 +- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp index 2aaf3239c..8006b3851 100644 --- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp @@ -18,6 +18,7 @@ namespace storm { const std::string resolutionOption = "resolution"; const std::string limitBeliefExplorationOption = "limit-exploration"; const std::string numericPrecisionOption = "numeric-precision"; + const std::string cacheSimplicesOption = "cache-simplices"; GridApproximationSettings::GridApproximationSettings() : ModuleSettings(moduleName) { @@ -32,6 +33,8 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, numericPrecisionOption, false,"Sets the precision used to determine whether two belief-states are equal.").addArgument( storm::settings::ArgumentBuilder::createDoubleArgument("value","the precision").setDefaultValueDouble(1e-9).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0, 1)).build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, cacheSimplicesOption, false,"Enables caching of simplices which requires more memory but can be faster.").build()); + } bool GridApproximationSettings::isRefineSet() const { @@ -58,6 +61,10 @@ namespace storm { return this->getOption(numericPrecisionOption).getArgumentByName("value").getValueAsDouble(); } + bool GridApproximationSettings::isCacheSimplicesSet() const { + return this->getOption(cacheSimplicesOption).getHasOptionBeenSet(); + } + } // namespace modules } // namespace settings } // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h index 5c1e281d4..a01fdbd77 100644 --- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h +++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h @@ -26,6 +26,7 @@ namespace storm { double getExplorationThreshold() const; bool isNumericPrecisionSetFromDefault() const; double getNumericPrecision() const; + bool isCacheSimplicesSet() const; // The name of the module. static const std::string moduleName; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index e62b93f4b..e0000c0d6 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -92,6 +92,7 @@ namespace storm { options.doRefinement = gridSettings.isRefineSet(); options.refinementPrecision = gridSettings.getRefinementPrecision(); options.numericPrecision = gridSettings.getNumericPrecision(); + options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); if (storm::NumberTraits::IsExact) { if (gridSettings.isNumericPrecisionSetFromDefault()) { STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 90fa96039..1c40e96f6 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -34,6 +34,7 @@ namespace storm { doRefinement = true; refinementPrecision = storm::utility::convertNumber(1e-4); numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); + cacheSubsimplices = false; } template @@ -41,7 +42,6 @@ namespace storm { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); useMdp = true; maxIterations = 1000; - cacheSubsimplices = false; } template @@ -299,7 +299,7 @@ namespace storm { auto initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, observationResolutionVector[initialBelief.observation], pomdp.getNumberOfStates()); auto initSubSimplex = initTemp.first; auto initLambdas = initTemp.second; - if (cacheSubsimplices) { + if (options.cacheSubsimplices) { subSimplexCache[0] = initSubSimplex; lambdaCache[0] = initLambdas; } @@ -406,7 +406,7 @@ namespace storm { //Triangulate here and put the possibly resulting belief in the grid std::vector> subSimplex; std::vector lambdas; - if (cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { + if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { @@ -414,7 +414,7 @@ namespace storm { observationResolutionVector[beliefList[idNextBelief].observation], pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; - if (cacheSubsimplices) { + if (options.cacheSubsimplices) { subSimplexCache[idNextBelief] = subSimplex; lambdaCache[idNextBelief] = lambdas; } @@ -600,7 +600,7 @@ namespace storm { std::vector> subSimplex; std::vector lambdas; //TODO add caching - if (cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { + if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { @@ -609,7 +609,7 @@ namespace storm { pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; - if (cacheSubsimplices) { + if (options.cacheSubsimplices) { subSimplexCache[idNextBelief] = subSimplex; lambdaCache[idNextBelief] = lambdas; } @@ -693,7 +693,7 @@ namespace storm { std::vector> subSimplex; std::vector lambdas; /* TODO Caching - if (cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { + if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { */ @@ -702,7 +702,7 @@ namespace storm { pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; - /*if (cacheSubsimplices) { + /*if (options.cacheSubsimplices) { subSimplexCache[idNextBelief] = subSimplex; lambdaCache[idNextBelief] = lambdas; } @@ -882,14 +882,14 @@ namespace storm { // cache the values to not always re-calculate std::vector> subSimplex; std::vector lambdas; - if (cacheSubsimplices && subSimplexCache.count(nextBelief.id) > 0) { + if (options.cacheSubsimplices && subSimplexCache.count(nextBelief.id) > 0) { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; - if (cacheSubsimplices) { + if (options.cacheSubsimplices) { subSimplexCache[nextBelief.id] = subSimplex; lambdaCache[nextBelief.id] = lambdas; } @@ -937,7 +937,7 @@ namespace storm { std::vector initialLambda; std::vector> initialSubsimplex; - if (cacheSubsimplices) { + if (options.cacheSubsimplices) { initialLambda = lambdaCache[0]; initialSubsimplex = subSimplexCache[0]; } else { @@ -1007,7 +1007,7 @@ namespace storm { std::map> lambdaCache; auto temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); - if (cacheSubsimplices) { + if (options.cacheSubsimplices) { subSimplexCache[0] = temp.first; lambdaCache[0] = temp.second; } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 3cd5f06ba..e5cb6ea1b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -59,6 +59,7 @@ namespace storm { bool doRefinement; /// Sets whether the bounds should be refined automatically until the refinement precision is reached ValueType refinementPrecision; /// Used to decide when the refinement should terminate ValueType numericPrecision; /// Used to decide whether two values are equal + bool cacheSubsimplices; /// Enables caching of subsimplices }; ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options = Options()); @@ -349,7 +350,6 @@ namespace storm { storm::utility::ConstantsComparator cc; // TODO: these should be obsolete, right? bool useMdp; - bool cacheSubsimplices; uint64_t maxIterations; }; From de483cd3c16182cbfda5554189dae9d429ea0ccf Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 18 Mar 2020 13:59:29 +0100 Subject: [PATCH 062/155] Added missing number conversion. --- src/storm-pomdp-cli/storm-pomdp.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index e0000c0d6..ac48327b5 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -88,10 +88,10 @@ namespace storm { auto const& gridSettings = storm::settings::getModule(); typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker::Options options; options.initialGridResolution = gridSettings.getGridResolution(); - options.explorationThreshold = gridSettings.getExplorationThreshold(); + options.explorationThreshold = storm::utility::convertNumber(gridSettings.getExplorationThreshold()); options.doRefinement = gridSettings.isRefineSet(); - options.refinementPrecision = gridSettings.getRefinementPrecision(); - options.numericPrecision = gridSettings.getNumericPrecision(); + options.refinementPrecision = storm::utility::convertNumber(gridSettings.getRefinementPrecision()); + options.numericPrecision = storm::utility::convertNumber(gridSettings.getNumericPrecision()); options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); if (storm::NumberTraits::IsExact) { if (gridSettings.isNumericPrecisionSetFromDefault()) { From 581e165fb91b1d16658fd362eb74d7b711cc7221 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 18 Mar 2020 13:59:46 +0100 Subject: [PATCH 063/155] Actually use the refinement precision.... --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 1c40e96f6..eb5ac7048 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -219,7 +219,8 @@ namespace storm { res = computeRefinementStep(targetObservations, min, observationResolutionVector, false, res, changedObservations, initialOverApproxMap, underApproxMap, underApproxModelSize); //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); - if (cc.isEqual(res->overApproxValue, res->underApproxValue)) { + STORM_LOG_ERROR_COND(cc.isLess(res->underApproxValue, res->overApproxValue), "The value for the under-approximation is larger than the value for the over-approximation."); + if (res->overApproxValue - res->underApproxValue <= options.refinementPrecision) { break; } ++refinementCounter; From 8e30e27eb96cfc8761c877be3d512218fa167fed Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 18 Mar 2020 14:34:56 +0100 Subject: [PATCH 064/155] Removal of obsolete code --- .../ApproximatePOMDPModelchecker.cpp | 246 ------------------ .../ApproximatePOMDPModelchecker.h | 80 ------ 2 files changed, 326 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 1c40e96f6..7a51609ba 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -40,8 +40,6 @@ namespace storm { template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options) : pomdp(pomdp), options(options) { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); - useMdp = true; - maxIterations = 1000; } template @@ -840,121 +838,6 @@ namespace storm { refinementComponents->overApproxBeliefStateMap, underApproxComponents->underApproxBeliefStateMap}); } - template - ValueType - ApproximatePOMDPModelchecker::overApproximationValueIteration(std::vector> &beliefList, - std::vector> &beliefGrid, - std::vector &beliefIsTarget, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map> &beliefActionRewards, - std::map>> &subSimplexCache, - std::map> &lambdaCache, - std::map &result, - std::map> &chosenActions, - uint64_t gridResolution, bool min, bool computeRewards) { - std::map result_backup = result; - uint64_t iteration = 0; - bool finished = false; - // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (options.cacheSubsimplices && subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - - result[currentBelief.id] = chosenValue; - - chosenActions[currentBelief.id] = chosenActionIndices; - // Check if the iteration brought an improvement - if (!cc.isEqual(result_backup[currentBelief.id], result[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - std::vector initialLambda; - std::vector> initialSubsimplex; - if (options.cacheSubsimplices) { - initialLambda = lambdaCache[0]; - initialSubsimplex = subSimplexCache[0]; - } else { - auto temp = computeSubSimplexAndLambdas(beliefList[0].probabilities, gridResolution, pomdp.getNumberOfStates()); - initialSubsimplex = temp.first; - initialLambda = temp.second; - } - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initialLambda.size(); ++j) { - if (initialLambda[j] != storm::utility::zero()) { - overApprox += initialLambda[j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, initialSubsimplex[j])]; - } - } - return overApprox; - } - template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { @@ -969,135 +852,6 @@ namespace storm { return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); } - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, uint64_t gridResolution, - bool computeRewards) { - storm::utility::Stopwatch beliefGridTimer(true); - - std::vector> beliefList; - std::vector beliefIsTarget; - uint64_t nextId = 0; - // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(nextId); - ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - - std::vector> beliefGrid; - constructBeliefGrid(targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); - nextId = beliefList.size(); - beliefGridTimer.stop(); - - storm::utility::Stopwatch overApproxTimer(true); - // Belief ID -> Value - std::map result; - // Belief ID -> ActionIndex - std::map> chosenActions; - - // Belief ID -> action -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; - // current ID -> action -> reward - std::map> beliefActionRewards; - //Use caching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - - auto temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); - if (options.cacheSubsimplices) { - subSimplexCache[0] = temp.first; - lambdaCache[0] = temp.second; - } - - storm::utility::Stopwatch nextBeliefGeneration(true); - for (size_t i = 0; i < beliefGrid.size(); ++i) { - auto currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (isTarget) { - result.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); - } else { - result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - //TODO put this in extra function - - // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative - uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); - - std::vector actionRewardsInState(numChoices); - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(currentBelief, action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, currentBelief, - action, observation, nextId); - nextId = beliefList.size(); - } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; - if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief); - } - } - observationProbabilities.emplace(std::make_pair(currentBelief.id, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); - if (computeRewards) { - beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); - } - } - - } - nextBeliefGeneration.stop(); - - STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) - // Value Iteration - auto overApprox = overApproximationValueIteration(beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, - subSimplexCache, lambdaCache, - result, chosenActions, gridResolution, min, computeRewards); - overApproxTimer.stop(); - - // Now onto the under-approximation - storm::utility::Stopwatch underApproxTimer(true); - /*ValueType underApprox = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);*/ - underApproxTimer.stop(); - auto underApprox = storm::utility::zero(); - STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl - << "Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution) { - return computeReachability(pomdp, targetObservations, min, gridResolution, false); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution) { - return computeReachability(pomdp, targetObservations, min, gridResolution, true); - } - template std::unique_ptr> ApproximatePOMDPModelchecker::computeUnderapproximation(std::vector> &beliefList, diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index e5cb6ea1b..5805449f4 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -99,37 +99,6 @@ namespace storm { std::unique_ptr> computeReachabilityRewardOTF(std::set const &targetObservations, bool min); - // TODO: Check if this is obsolete - /** - * Compute the reachability probability for given target observations on a POMDP for the given resolution only. - * Static state space generation is used for the overapproximation, i.e. the whole grid is generated - * - * @param pomdp the POMDP to be checked - * @param targetObservations the set of observations to be reached - * @param min true if the minimum probability is to be computed - * @param gridResolution the initial grid resolution - * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution); - - // TODO: Check if this is obsolete - /** - * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. - * Static state space generation is used for the overapproximation, i.e. the whole grid is generated - * - * @param targetObservations the set of observations to be reached - * @param min true if the minimum rewards are to be computed - * @param gridResolution the initial grid resolution - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution); - private: /** * Helper method to compute the inital step of the refinement loop @@ -173,21 +142,6 @@ namespace storm { boost::optional> overApproximationMap = boost::none, boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); - // TODO: Check if this is obsolete - /** - * Helper method to compute reachability properties using static state space generation - * - * @param targetObservations set of target observations - * @param min true if minimum value is to be computed - * @param gridResolution the resolution of the grid to be used - * @param computeRewards true if rewards are to be computed, false if probability is computed - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution, bool computeRewards); - /** * Helper to compute an underapproximation of the reachability property. * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. @@ -314,43 +268,9 @@ namespace storm { ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief); - /** - * Helper method for value iteration on data structures representing the belief grid - * This is very close to the method implemented in PRISM POMDP - * - * @param pomdp The POMDP - * @param beliefList data structure to store all generated beliefs - * @param beliefGrid data structure to store references to the grid beliefs specifically - * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief - * @param observationProbabilities data structure containing for each belief and possible action the probability to go to a state with a given observation - * @param nextBelieves data structure containing for each belief the successor belief after performing an action and observing a given observation - * @param beliefActionRewards data structure containing for each belief and possible action the reward for performing the action - * @param subSimplexCache caching data structure to store already computed subsimplices - * @param lambdaCache caching data structure to store already computed lambda values - * @param result data structure to store result values for each grid state - * @param chosenActions data structure to store the action(s) that lead to the computed result value - * @param gridResolution the resolution of the grid - * @param min true if minimal values are to be computed - * @param computeRewards true if rewards are to be computed - * @return the resulting probability/reward in the initial state - */ - ValueType - overApproximationValueIteration(std::vector> &beliefList, - std::vector> &beliefGrid, std::vector &beliefIsTarget, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map> &beliefActionRewards, - std::map>> &subSimplexCache, - std::map> &lambdaCache, std::map &result, - std::map> &chosenActions, - uint64_t gridResolution, bool min, bool computeRewards); - storm::models::sparse::Pomdp const& pomdp; Options options; storm::utility::ConstantsComparator cc; - // TODO: these should be obsolete, right? - bool useMdp; - uint64_t maxIterations; }; } From d28c982fbdf71bb1973885b9da23d7620009660a Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 18 Mar 2020 14:37:15 +0100 Subject: [PATCH 065/155] Fix for missing initial belief ID in return struct --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 9f6a64ec0..16d7f8b71 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -835,8 +835,8 @@ namespace storm { return std::make_shared>( RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, underApproxComponents->underApproxMap, refinementComponents->beliefList, refinementComponents->beliefGrid, - refinementComponents->beliefIsTarget, - refinementComponents->overApproxBeliefStateMap, underApproxComponents->underApproxBeliefStateMap}); + refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, + underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId}); } template From 054c2a906eceec65280ff6f258eeb002aba8fcd3 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 18 Mar 2020 14:45:38 +0100 Subject: [PATCH 066/155] Fixed wrong error when over- and under-approximation values are equal --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 16d7f8b71..ce37e0f0c 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -217,7 +217,8 @@ namespace storm { res = computeRefinementStep(targetObservations, min, observationResolutionVector, false, res, changedObservations, initialOverApproxMap, underApproxMap, underApproxModelSize); //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); - STORM_LOG_ERROR_COND(cc.isLess(res->underApproxValue, res->overApproxValue), "The value for the under-approximation is larger than the value for the over-approximation."); + STORM_LOG_ERROR_COND(cc.isLess(res->underApproxValue, res->overApproxValue) || cc.isEqual(res->underApproxValue, res->overApproxValue), + "The value for the under-approximation is larger than the value for the over-approximation."); if (res->overApproxValue - res->underApproxValue <= options.refinementPrecision) { break; } From 02a325ba75574e586282141a200003977a825ba3 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 18 Mar 2020 15:29:37 +0100 Subject: [PATCH 067/155] Fixed error that refinement did not stop if initial computation already yields same values for over- and under-approximation --- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index ce37e0f0c..936db1c38 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -142,7 +142,7 @@ namespace storm { STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, false, initialOverApproxMap, underApproxMap, underApproxModelSize); ValueType lastMinScore = storm::utility::infinity(); - while (refinementCounter < 1000) { + while (refinementCounter < 1000 && res->overApproxValue - res->underApproxValue > options.refinementPrecision) { // TODO the actual refinement // choose which observation(s) to refine std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); @@ -219,9 +219,6 @@ namespace storm { //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); STORM_LOG_ERROR_COND(cc.isLess(res->underApproxValue, res->overApproxValue) || cc.isEqual(res->underApproxValue, res->overApproxValue), "The value for the under-approximation is larger than the value for the over-approximation."); - if (res->overApproxValue - res->underApproxValue <= options.refinementPrecision) { - break; - } ++refinementCounter; } From 77b1de510f55a746eb50f3fb37950ecffe80c4a8 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 18 Mar 2020 16:29:34 +0100 Subject: [PATCH 068/155] Renaming of naive underapproximation value map --- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 936db1c38..9a88e5d8e 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -125,11 +125,11 @@ namespace storm { storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); STORM_LOG_ASSERT(underapproxRes, "Result not exist."); underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); - auto underApproxMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); + auto initialUnderApproxMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); positionalWatch.stop(); STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // " - << underApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl) + << initialUnderApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl) STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl) // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution. @@ -140,7 +140,8 @@ namespace storm { uint64_t underApproxModelSize = 200; uint64_t refinementCounter = 1; STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) - std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, false, initialOverApproxMap, underApproxMap, underApproxModelSize); + std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, false, initialOverApproxMap, + initialUnderApproxMap, underApproxModelSize); ValueType lastMinScore = storm::utility::infinity(); while (refinementCounter < 1000 && res->overApproxValue - res->underApproxValue > options.refinementPrecision) { // TODO the actual refinement @@ -215,7 +216,7 @@ namespace storm { STORM_PRINT( "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) res = computeRefinementStep(targetObservations, min, observationResolutionVector, false, - res, changedObservations, initialOverApproxMap, underApproxMap, underApproxModelSize); + res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); STORM_LOG_ERROR_COND(cc.isLess(res->underApproxValue, res->overApproxValue) || cc.isEqual(res->underApproxValue, res->overApproxValue), "The value for the under-approximation is larger than the value for the over-approximation."); From 44fd26bd13fccf7bec4d5a9562371193e45593dd Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 18 Mar 2020 16:30:30 +0100 Subject: [PATCH 069/155] Implementation of exploration stopping in refinement procedure for newly added states --- .../ApproximatePOMDPModelchecker.cpp | 91 +++++++++++-------- 1 file changed, 51 insertions(+), 40 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 9a88e5d8e..91971def3 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -552,10 +552,21 @@ namespace storm { boost::optional> overApproximationMap, boost::optional> underApproximationMap, uint64_t maxUaModelSize) { + bool initialBoundMapsSet = overApproximationMap && underApproximationMap; + std::map initialOverMap; + std::map initialUnderMap; + if (initialBoundMapsSet) { + initialOverMap = overApproximationMap.value(); + initialUnderMap = underApproximationMap.value(); + } // Note that a persistent cache is not support by the current data structure. The resolution for the given belief also has to be stored somewhere to cache effectively std::map>> subSimplexCache; std::map> lambdaCache; + // Map to save the weighted values resulting from the initial preprocessing for newly added beliefs / indices in beliefSpace + std::map weightedSumOverMap; + std::map weightedSumUnderMap; + uint64_t nextBeliefId = refinementComponents->beliefList.size(); uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); std::set relevantStates; @@ -622,18 +633,16 @@ namespace storm { refinementComponents->beliefGrid.push_back(gridBelief); refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); // compute overapproximate value using MDP result map - //TODO do this - /* - if (boundMapsSet) { + if (initialBoundMapsSet) { auto tempWeightedSumOver = storm::utility::zero(); auto tempWeightedSumUnder = storm::utility::zero(); for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(overMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(underMap[i]); + tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); + tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); } - weightedSumOverMap[nextId] = tempWeightedSumOver; - weightedSumUnderMap[nextId] = tempWeightedSumUnder; - } */ + weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; + weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; + } beliefsToBeExpanded.push_back(nextBeliefId); refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; @@ -654,17 +663,25 @@ namespace storm { transitionsStateActionPair[stateActionPair] = transitionInActionBelief; } } + + std::set stoppedExplorationStateSet; + // Expand newly added beliefs while (!beliefsToBeExpanded.empty()) { uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = refinementComponents->beliefIsTarget[currId]; - /* TODO - if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(options.explorationThreshold))) { - mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); + if (initialBoundMapsSet && + cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(options.explorationThreshold))) { + STORM_PRINT("Stop Exploration in State " << refinementComponents->overApproxBeliefStateMap.left.at(currId) << " with Value " << weightedSumOverMap[currId] + << std::endl) + transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = {{1, weightedSumOverMap[currId]}, + {0, storm::utility::one() - + weightedSumOverMap[currId]}}; + stoppedExplorationStateSet.insert(refinementComponents->overApproxBeliefStateMap.left.at(currId)); continue; - }*/ + } if (isTarget) { // Depending on whether we compute rewards, we select the right initial result @@ -690,21 +707,21 @@ namespace storm { //Triangulate here and put the possibly resulting belief in the grid std::vector> subSimplex; std::vector lambdas; - /* TODO Caching + if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; - } else { */ - auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, - observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], - pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - /*if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; + } else { + auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, + observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], + pomdp.getNumberOfStates()); + subSimplex = temp.first; + lambdas = temp.second; + if (options.cacheSubsimplices) { + subSimplexCache[idNextBelief] = subSimplex; + lambdaCache[idNextBelief] = lambdas; + } } - }*/ for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { @@ -716,17 +733,16 @@ namespace storm { refinementComponents->beliefGrid.push_back(gridBelief); refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); // compute overapproximate value using MDP result map - /* - if (boundMapsSet) { + if (initialBoundMapsSet) { auto tempWeightedSumOver = storm::utility::zero(); auto tempWeightedSumUnder = storm::utility::zero(); for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(overMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(underMap[i]); + tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); + tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); } - weightedSumOverMap[nextId] = tempWeightedSumOver; - weightedSumUnderMap[nextId] = tempWeightedSumUnder; - } */ + weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; + weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; + } beliefsToBeExpanded.push_back(nextBeliefId); refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; @@ -750,15 +766,7 @@ namespace storm { /* if (computeRewards) { beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); - } - - - if (transitionsInBelief.empty()) { - std::map transitionInActionBelief; - transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one(); - transitionsInBelief.push_back(transitionInActionBelief); - } - mdpTransitions.push_back(transitionsInBelief);*/ + }*/ } } @@ -766,7 +774,7 @@ namespace storm { mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); mdpLabeling.addLabelToState("init", refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)); - + mdpLabeling.addLabelToState("target", 1); uint_fast64_t currentRow = 0; uint_fast64_t currentRowGroup = 0; storm::storage::SparseMatrixBuilder smb(0, nextStateId, 0, false, true); @@ -801,6 +809,9 @@ namespace storm { mdpLabeling.addLabelToState("target", state); break; } + if (stoppedExplorationStateSet.find(state) != stoppedExplorationStateSet.end()) { + break; + } } ++currentRowGroup; } From 62e3a626860dc52397a166fa79a387122fef4a3f Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 18 Mar 2020 17:41:12 +0100 Subject: [PATCH 070/155] Fix for belief reward computation --- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 91971def3..594d213b8 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -1279,9 +1279,8 @@ namespace storm { template ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief) { auto result = storm::utility::zero(); - for (size_t i = 0; i < belief.probabilities.size(); ++i) { - for (auto const &probEntry : belief.probabilities) - result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); + for (auto const &probEntry : belief.probabilities) { + result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); } return result; } From 0507da4ffa9d7af8c007b41bb60eca8261fad769 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 18 Mar 2020 19:15:27 +0100 Subject: [PATCH 071/155] Adjusted Refinement Procedure for rewards --- .../ApproximatePOMDPModelchecker.cpp | 69 ++++++++++--------- .../ApproximatePOMDPModelchecker.h | 2 +- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 594d213b8..9b1c24a70 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -54,7 +54,7 @@ namespace storm { STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); } if (options.doRefinement) { - return refineReachabilityProbability(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + return refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); } else { return computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); } @@ -62,8 +62,7 @@ namespace storm { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); if (options.doRefinement) { - STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Rewards with refinement not implemented yet"); - //return refineReachabilityProbability(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + return refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true); } else { // FIXME: pick the non-unique reward model here STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); @@ -77,7 +76,7 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::refineReachabilityProbability(std::set const &targetObservations, bool min) { + ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, bool computeRewards) { std::srand(time(NULL)); // Compute easy upper and lower bounds storm::utility::Stopwatch underlyingWatch(true); @@ -94,11 +93,15 @@ namespace storm { storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); auto underlyingModel = std::static_pointer_cast>( std::make_shared>(underlyingMdp)); - std::string initPropString = min ? "Pmin" : "Pmax"; + std::string initPropString = computeRewards ? "R" : "P"; + initPropString += min ? "min" : "max"; initPropString += "=? [F \"__goal__\"]"; std::vector propVector = storm::api::parseProperties(initPropString); std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); STORM_PRINT("Underlying MDP" << std::endl) + if (computeRewards) { + underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel()); + } underlyingMdp.printModelInformationToStream(std::cout); std::unique_ptr underlyingRes( storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); @@ -119,6 +122,9 @@ namespace storm { } } auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); + if (computeRewards) { + underApproxModel->restrictRewardModels({"std"}); + } STORM_PRINT("Random Positional Scheduler" << std::endl) underApproxModel->printModelInformationToStream(std::cout); std::unique_ptr underapproxRes( @@ -140,10 +146,12 @@ namespace storm { uint64_t underApproxModelSize = 200; uint64_t refinementCounter = 1; STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) - std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, false, initialOverApproxMap, + std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, + initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); ValueType lastMinScore = storm::utility::infinity(); - while (refinementCounter < 1000 && res->overApproxValue - res->underApproxValue > options.refinementPrecision) { + while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) || + (min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) { // TODO the actual refinement // choose which observation(s) to refine std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); @@ -215,10 +223,11 @@ namespace storm { } STORM_PRINT( "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) - res = computeRefinementStep(targetObservations, min, observationResolutionVector, false, + res = computeRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); - STORM_LOG_ERROR_COND(cc.isLess(res->underApproxValue, res->overApproxValue) || cc.isEqual(res->underApproxValue, res->overApproxValue), + STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || + cc.isEqual(res->underApproxValue, res->overApproxValue), "The value for the under-approximation is larger than the value for the over-approximation."); ++refinementCounter; } @@ -373,8 +382,8 @@ namespace storm { uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; - - if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { + + if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { // TODO: with rewards whe would have to assign the corresponding reward to this transition mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); continue; @@ -458,19 +467,10 @@ namespace storm { } } } - if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - beliefList[currId]); - } if (!transitionInActionBelief.empty()) { transitionsInBelief.push_back(transitionInActionBelief); } } - if (computeRewards) { - beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); - } - - if (transitionsInBelief.empty()) { std::map transitionInActionBelief; transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one(); @@ -654,11 +654,6 @@ namespace storm { } } } - /* TODO - if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - refinementComponents->beliefList[currId]); - }*/ if (!transitionInActionBelief.empty()) { transitionsStateActionPair[stateActionPair] = transitionInActionBelief; } @@ -754,19 +749,10 @@ namespace storm { } } } - /* - if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - beliefList[currId]); - }*/ if (!transitionInActionBelief.empty()) { transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief; } } - /* - if (computeRewards) { - beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); - }*/ } } @@ -817,6 +803,21 @@ namespace storm { } storm::storage::sparse::ModelComponents modelComponents(smb.build(), mdpLabeling); storm::models::sparse::Mdp overApproxMdp(modelComponents); + if (computeRewards) { + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { + auto currentBelief = refinementComponents->beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } + } + overApproxMdp.addRewardModel("std", mdpRewardModel); + overApproxMdp.restrictRewardModels(std::set({"std"})); + } overApproxMdp.printModelInformationToStream(std::cout); auto model = std::make_shared>(overApproxMdp); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 5805449f4..26877d062 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -75,7 +75,7 @@ namespace storm { * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - refineReachabilityProbability(std::set const &targetObservations, bool min); + refineReachability(std::set const &targetObservations, bool min, bool computeRewards); /** * Compute the reachability probability of given target observations on a POMDP for the given resolution only. From 311362d99554a9add4733d5340976806601cc4d2 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 18 Mar 2020 19:16:46 +0100 Subject: [PATCH 072/155] Removal of some more obsolete code --- .../ApproximatePOMDPModelchecker.cpp | 97 ------------------- .../ApproximatePOMDPModelchecker.h | 27 ------ 2 files changed, 124 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 9b1c24a70..d8b56e74d 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -1051,85 +1051,6 @@ namespace storm { return storm::pomdp::Belief{id, observation, distribution}; } - template - void ApproximatePOMDPModelchecker::constructBeliefGrid( - std::set const &target_observations, uint64_t gridResolution, - std::vector> &beliefList, - std::vector> &grid, std::vector &beliefIsTarget, - uint64_t nextId) { - bool isTarget; - uint64_t newId = nextId; - - for (uint32_t observation = 0; observation < pomdp.getNrObservations(); ++observation) { - std::vector statesWithObservation = pomdp.getStatesWithObservation(observation); - isTarget = target_observations.find(observation) != target_observations.end(); - - // TODO this can probably be condensed - if (statesWithObservation.size() == 1) { - // If there is only one state with the observation, we can directly add the corresponding belief - std::map distribution; - distribution[statesWithObservation.front()] = storm::utility::one(); - storm::pomdp::Belief belief = {newId, observation, distribution}; - STORM_LOG_TRACE( - "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," - << distribution << "]"); - beliefList.push_back(belief); - grid.push_back(belief); - beliefIsTarget.push_back(isTarget); - ++newId; - } else { - // Otherwise we have to enumerate all possible distributions with regards to the grid - // helper is used to derive the distribution of the belief - std::vector helper(statesWithObservation.size(), ValueType(0)); - helper[0] = storm::utility::convertNumber(gridResolution); - bool done = false; - uint64_t index = 0; - - while (!done) { - std::map distribution; - for (size_t i = 0; i < statesWithObservation.size() - 1; ++i) { - if (helper[i] - helper[i + 1] > ValueType(0)) { - distribution[statesWithObservation[i]] = (helper[i] - helper[i + 1]) / - storm::utility::convertNumber( - gridResolution); - } - } - if (helper[statesWithObservation.size() - 1] > ValueType(0)) { - distribution[statesWithObservation.back()] = - helper[statesWithObservation.size() - 1] / - storm::utility::convertNumber(gridResolution); - } - storm::pomdp::Belief belief = {newId, observation, distribution}; - STORM_LOG_TRACE("Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); - beliefList.push_back(belief); - grid.push_back(belief); - beliefIsTarget.push_back(isTarget); - if (helper[statesWithObservation.size() - 1] == - storm::utility::convertNumber(gridResolution)) { - // If the last entry of helper is the gridResolution, we have enumerated all necessary distributions - done = true; - } else { - // Update helper by finding the index to increment - index = statesWithObservation.size() - 1; - while (helper[index] == helper[index - 1]) { - --index; - } - STORM_LOG_ASSERT(index > 0, "Error in BeliefGrid generation - index wrong"); - // Increment the value at the index - ++helper[index]; - // Reset all indices greater than the changed one to 0 - ++index; - while (index < statesWithObservation.size()) { - helper[index] = 0; - ++index; - } - } - ++newId; - } - } - } - } - template std::pair>, std::vector> ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( @@ -1226,24 +1147,6 @@ namespace storm { return res; } - template - storm::pomdp::Belief - ApproximatePOMDPModelchecker::getBeliefAfterAction(storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id) { - std::map distributionAfter; - uint32_t observation = 0; - for (auto const &probEntry : belief.probabilities) { - uint64_t state = probEntry.first; - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (entry.getValue() > 0) { - observation = pomdp.getObservation(entry.getColumn()); - distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } - } - } - return storm::pomdp::Belief{id, observation, distributionAfter}; - } - template uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation(std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 26877d062..6837151cd 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -184,22 +184,6 @@ namespace storm { computeSubSimplexAndLambdas(std::map &probabilities, uint64_t gridResolution, uint64_t nrStates); - /** - * Helper method to construct the static belief grid for the POMDP overapproximation - * - * @param target_observations set of target observations - * @param gridResolution the resolution of the grid to be constructed - * @param beliefList data structure to store all generated beliefs - * @param grid data structure to store references to the grid beliefs specifically - * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief - * @param nextId the ID to be used for the next generated belief - */ - void constructBeliefGrid(std::set const &target_observations, uint64_t gridResolution, - std::vector> &beliefList, - std::vector> &grid, - std::vector &beliefIsTarget, uint64_t nextId); - - /** * Helper method to get the probabilities to be in a state with each observation after performing an action * @@ -228,17 +212,6 @@ namespace storm { storm::pomdp::Belief &belief, uint64_t actionIndex, uint32_t observation, uint64_t id); - /** - * Helper method to generate the next belief that results from a belief by performing an action - * - * @param belief the starting belief - * @param actionIndex the index of the action to be performed - * @param id the ID for the generated belief - * @return a belief object representing the belief after performing the action in the starting belief - */ - storm::pomdp::Belief - getBeliefAfterAction(storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id); - /** * Helper to get the id of a Belief stored in a given vector structure * From 0b3945ca123b3f94590d7588ab753c90239052f9 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 20 Mar 2020 12:38:26 +0100 Subject: [PATCH 073/155] Pomdp/FormulaInformation: Added template instantiations which apparently are needed with LTO --- src/storm-pomdp/analysis/FormulaInformation.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/storm-pomdp/analysis/FormulaInformation.cpp b/src/storm-pomdp/analysis/FormulaInformation.cpp index 8bb00450c..d648c6ee8 100644 --- a/src/storm-pomdp/analysis/FormulaInformation.cpp +++ b/src/storm-pomdp/analysis/FormulaInformation.cpp @@ -169,7 +169,11 @@ namespace storm { return FormulaInformation(); } + template void FormulaInformation::updateTargetStates>(storm::models::sparse::Pomdp const& pomdp, storm::storage::BitVector&& newTargetStates); + template void FormulaInformation::updateSinkStates>(storm::models::sparse::Pomdp const& pomdp, storm::storage::BitVector&& newSinkStates); template FormulaInformation getFormulaInformation>(storm::models::sparse::Pomdp const& pomdp, storm::logic::Formula const& formula); + template void FormulaInformation::updateTargetStates>(storm::models::sparse::Pomdp const& pomdp, storm::storage::BitVector&& newTargetStates); + template void FormulaInformation::updateSinkStates>(storm::models::sparse::Pomdp const& pomdp, storm::storage::BitVector&& newSinkStates); template FormulaInformation getFormulaInformation>(storm::models::sparse::Pomdp const& pomdp, storm::logic::Formula const& formula); } From 6f3fab8e80ad2b57d78460c7e069cca59a6ccca0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 20 Mar 2020 14:07:23 +0100 Subject: [PATCH 074/155] Added a statistics struct to the approximatePOMDP model checker --- .../ApproximatePOMDPModelchecker.cpp | 34 +++++++++++++++++++ .../ApproximatePOMDPModelchecker.h | 13 ++++++- src/storm/models/sparse/Pomdp.cpp | 17 ++++++++++ src/storm/models/sparse/Pomdp.h | 8 +++-- 4 files changed, 69 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index d8b56e74d..3945acb29 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -73,6 +73,40 @@ namespace storm { } } + template + void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { + stream << "##### Grid Approximation Statistics ######" << std::endl; + stream << "# Input model: " << std::endl; + pomdp.printModelInformationToStream(stream); + stream << "# Max. Number of states with same observation: " << pomdp.getMaxNrStatesWithSameObservation() << std::endl; + + if (statistics.aborted) { + stream << "# Computation aborted early"; + } + + // Refinement information: + if (statistics.refinementSteps) { + stream << "# Number of refinement steps " << statistics.refinementSteps.get(); + } + + // The overapproximation MDP: + if (statistics.overApproximationStates) { + stream << "# Number of states in the "; + if (options.doRefinement) { + stream << "final "; + } + stream << "grid MDP for the over-approximation: "; + if (statistics.overApproximationBuildAborted) { + stream << ">="; + } + stream << statistics.overApproximationStates.get() << std::endl; + stream << "# Time spend for building the grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; + stream << "# Time spend for checking the grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; + } + + stream << "##########################################" << std::endl; + } + template std::unique_ptr> diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 6837151cd..df47c721a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -66,6 +66,8 @@ namespace storm { std::unique_ptr> check(storm::logic::Formula const& formula); + void printStatisticsToStream(std::ostream& stream) const; + private: /** * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop @@ -240,7 +242,16 @@ namespace storm { */ ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief); - + struct Statistics { + boost::optional overApproximationStates; + bool overApproximationBuildAborted; + storm::utility::Stopwatch overApproximationBuildTime; + storm::utility::Stopwatch overApproximationCheckTime; + boost::optional refinementSteps; + bool aborted; + }; + Statistics statistics; + storm::models::sparse::Pomdp const& pomdp; Options options; storm::utility::ConstantsComparator cc; diff --git a/src/storm/models/sparse/Pomdp.cpp b/src/storm/models/sparse/Pomdp.cpp index aaad3a9e8..5b2e2a8a6 100644 --- a/src/storm/models/sparse/Pomdp.cpp +++ b/src/storm/models/sparse/Pomdp.cpp @@ -54,10 +54,27 @@ namespace storm { return nrObservations; } + template + uint64_t Pomdp::getMaxNrStatesWithSameObservation() const { + std::map counts; + for (auto const& obs : observations) { + auto insertionRes = counts.emplace(obs, 1ull); + if (!insertionRes.second) { + ++insertionRes.first->second; + } + } + uint64_t result = 0; + for (auto const& count : counts) { + result = std::max(result, count.second); + } + return result; + } + template std::vector const& Pomdp::getObservations() const { return observations; } + template std::string Pomdp::additionalDotStateInfo(uint64_t state) const { diff --git a/src/storm/models/sparse/Pomdp.h b/src/storm/models/sparse/Pomdp.h index a9e6c7e95..b09f87886 100644 --- a/src/storm/models/sparse/Pomdp.h +++ b/src/storm/models/sparse/Pomdp.h @@ -59,13 +59,17 @@ namespace storm { uint64_t getNrObservations() const; + /*! + * Returns the number of hidden values, i.e. the maximum number of states with the same observation + */ + uint64_t getMaxNrStatesWithSameObservation() const; + std::vector const& getObservations() const; std::vector getStatesWithObservation(uint32_t observation) const; + bool isCanonic() const; - - protected: /*! * Return a string that is additonally added to the state information in the dot stream. From 7d4e8cf2137e02e0609446a69babbead18d9f030 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 20 Mar 2020 14:07:53 +0100 Subject: [PATCH 075/155] POMDP: Print the statistics from the new statistics struct. --- src/storm-pomdp-cli/storm-pomdp.cpp | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index ac48327b5..6cca313e8 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -32,6 +32,8 @@ #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/utility/NumberTraits.h" #include "storm/utility/Stopwatch.h" +#include "storm/utility/SignalHandler.h" +#include "storm/utility/NumberTraits.h" #include "storm/exceptions/UnexpectedException.h" #include "storm/exceptions/NotSupportedException.h" @@ -79,6 +81,20 @@ namespace storm { return preprocessingPerformed; } + template + void printResult(ValueType const& lowerBound, ValueType const& upperBound) { + if (lowerBound == upperBound) { + STORM_PRINT_AND_LOG(lowerBound); + } else { + STORM_PRINT_AND_LOG("[" << lowerBound << ", " << upperBound << "] (width=" << ValueType(upperBound - lowerBound) << ")"); + } + if (storm::NumberTraits::IsExact) { + STORM_PRINT_AND_LOG(" (approx. "); + printResult(storm::utility::convertNumber(lowerBound), storm::utility::convertNumber(upperBound)); + STORM_PRINT_AND_LOG(")"); + } + } + template bool performAnalysis(std::shared_ptr> const& pomdp, storm::pomdp::analysis::FormulaInformation const& formulaInfo, storm::logic::Formula const& formula) { auto const& pomdpSettings = storm::settings::getModule(); @@ -103,14 +119,13 @@ namespace storm { } storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(*pomdp, options); std::unique_ptr> result = checker.check(formula); - ValueType overRes = result->overApproxValue; - ValueType underRes = result->underApproxValue; - if (overRes != underRes) { - STORM_PRINT("Overapproximation Result: " << overRes << std::endl); - STORM_PRINT("Underapproximation Result: " << underRes << std::endl); + checker.printStatisticsToStream(std::cout); + if (storm::utility::resources::isTerminate()) { + STORM_PRINT_AND_LOG("Result till abort: ") } else { - STORM_PRINT("Result: " << overRes << std::endl) + STORM_PRINT_AND_LOG("Result: ") } + printResult(result->underApproxValue, result->overApproxValue); analysisPerformed = true; } if (pomdpSettings.isMemlessSearchSet()) { From 9d7b447b56f779b0e8fdc2b55464103973129c45 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 20 Mar 2020 15:19:03 +0100 Subject: [PATCH 076/155] Storm-pomdp: Print if a result is not available. --- src/storm-pomdp-cli/storm-pomdp.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 6cca313e8..1cad9014a 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -120,13 +120,17 @@ namespace storm { storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(*pomdp, options); std::unique_ptr> result = checker.check(formula); checker.printStatisticsToStream(std::cout); - if (storm::utility::resources::isTerminate()) { - STORM_PRINT_AND_LOG("Result till abort: ") + if (result) { + if (storm::utility::resources::isTerminate()) { + STORM_PRINT_AND_LOG("Result till abort: ") + } else { + STORM_PRINT_AND_LOG("Result: ") + } + printResult(result->underApproxValue, result->overApproxValue); + analysisPerformed = true; } else { - STORM_PRINT_AND_LOG("Result: ") + STORM_PRINT_AND_LOG("Result: Not available."); } - printResult(result->underApproxValue, result->overApproxValue); - analysisPerformed = true; } if (pomdpSettings.isMemlessSearchSet()) { STORM_LOG_THROW(formulaInfo.isNonNestedReachabilityProbability(), storm::exceptions::NotSupportedException, "Qualitative memoryless scheduler search is not implemented for this property type."); @@ -241,7 +245,7 @@ namespace storm { auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, mpi); if (!model) { - STORM_PRINT_AND_LOG("No input model given."); + STORM_PRINT_AND_LOG("No input model given." << std::endl); return; } STORM_LOG_THROW(model->getType() == storm::models::ModelType::Pomdp && model->isSparseModel(), storm::exceptions::WrongFormatException, "Expected a POMDP in sparse representation."); From e76efd14d56acdca41aafde634146c7f3b18891f Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 20 Mar 2020 15:20:02 +0100 Subject: [PATCH 077/155] POMDP: Filling the statistics struct with information. Also incorporated aborting (SIGTERM, i.e. CTRL+C) --- .../ApproximatePOMDPModelchecker.cpp | 134 +++++++++++++++--- .../ApproximatePOMDPModelchecker.h | 10 +- 2 files changed, 122 insertions(+), 22 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 3945acb29..7c3926db1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -22,6 +22,7 @@ #include "storm-parsers/api/storm-parsers.h" #include "storm/utility/macros.h" +#include "storm/utility/SignalHandler.h" #include "storm/exceptions/NotSupportedException.h" namespace storm { @@ -36,6 +37,10 @@ namespace storm { numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; } + template + ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { + // intentionally left empty; + } template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options) : pomdp(pomdp), options(options) { @@ -44,6 +49,10 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { + // Reset all collected statistics + statistics = Statistics(); + std::unique_ptr> result; + // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); if (formulaInfo.isNonNestedReachabilityProbability()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. @@ -54,23 +63,27 @@ namespace storm { STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); } if (options.doRefinement) { - return refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); + result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); } else { - return computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + result = computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); } } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); if (options.doRefinement) { - return refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true); + result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true); } else { // FIXME: pick the non-unique reward model here STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); - return computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + result = computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); } + if (storm::utility::resources::isTerminate()) { + statistics.aborted = true; + } + return result; } template @@ -100,8 +113,23 @@ namespace storm { stream << ">="; } stream << statistics.overApproximationStates.get() << std::endl; - stream << "# Time spend for building the grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; - stream << "# Time spend for checking the grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; + stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; + stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; + } + + // The underapproximation MDP: + if (statistics.underApproximationStates) { + stream << "# Number of states in the "; + if (options.doRefinement) { + stream << "final "; + } + stream << "grid MDP for the under-approximation: "; + if (statistics.underApproximationBuildAborted) { + stream << ">="; + } + stream << statistics.underApproximationStates.get() << std::endl; + stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl; + stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl; } stream << "##########################################" << std::endl; @@ -183,9 +211,16 @@ namespace storm { std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); + if (res == nullptr) { + statistics.refinementSteps = 0; + return nullptr; + } ValueType lastMinScore = storm::utility::infinity(); while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) || (min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) { + if (storm::utility::resources::isTerminate()) { + break; + } // TODO the actual refinement // choose which observation(s) to refine std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); @@ -265,7 +300,7 @@ namespace storm { "The value for the under-approximation is larger than the value for the over-approximation."); ++refinementCounter; } - + statistics.refinementSteps = refinementCounter; return std::make_unique>(POMDPCheckResult{res->overApproxValue, res->underApproxValue}); } @@ -280,6 +315,9 @@ namespace storm { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap, underApproximationMap, maxUaModelSize); + if (result == nullptr) { + return nullptr; + } return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); } @@ -313,7 +351,7 @@ namespace storm { // current ID -> action -> reward std::map> beliefActionRewards; uint64_t nextId = 0; - storm::utility::Stopwatch expansionTimer(true); + statistics.overApproximationBuildTime.start(); // Initial belief always has belief ID 0 storm::pomdp::Belief initialBelief = getInitialBelief(nextId); ++nextId; @@ -512,11 +550,19 @@ namespace storm { } mdpTransitions.push_back(transitionsInBelief); } + if (storm::utility::resources::isTerminate()) { + statistics.overApproximationBuildAborted = true; + break; + } } - expansionTimer.stop(); - STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl) - STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) - + statistics.overApproximationStates = mdpTransitions.size(); + STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl); + STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); + if (storm::utility::resources::isTerminate()) { + statistics.overApproximationBuildTime.stop(); + return nullptr; + } + storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); @@ -541,6 +587,8 @@ namespace storm { overApproxMdp.addRewardModel("std", mdpRewardModel); overApproxMdp.restrictRewardModels(std::set({"std"})); } + statistics.overApproximationBuildTime.stop(); + STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); overApproxMdp.printModelInformationToStream(std::cout); auto model = std::make_shared>(overApproxMdp); @@ -555,19 +603,26 @@ namespace storm { hint.setResultHint(hintVector); auto hintPtr = std::make_shared>(hint); task.setHint(hintPtr); - storm::utility::Stopwatch overApproxTimer(true); + statistics.overApproximationCheckTime.start(); std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); - overApproxTimer.stop(); - STORM_LOG_ASSERT(res, "Result not exist."); + statistics.overApproximationCheckTime.stop(); + if (storm::utility::resources::isTerminate() && !res) { + return nullptr; + } + STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)]; - STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) + STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); //auto underApprox = weightedSumUnderMap[initialBelief.id]; auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + if (storm::utility::resources::isTerminate() && !underApproxComponents) { + return std::make_unique>( + RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); + } STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); return std::make_unique>( @@ -601,6 +656,8 @@ namespace storm { std::map weightedSumOverMap; std::map weightedSumUnderMap; + statistics.overApproximationBuildTime.start(); + uint64_t nextBeliefId = refinementComponents->beliefList.size(); uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); std::set relevantStates; @@ -788,8 +845,18 @@ namespace storm { } } } + if (storm::utility::resources::isTerminate()) { + statistics.overApproximationBuildAborted = true; + break; + } } + statistics.overApproximationStates = nextStateId; + if (storm::utility::resources::isTerminate()) { + statistics.overApproximationBuildTime.stop(); + // Return the result from the old refinement step + return refinementComponents; + } storm::models::sparse::StateLabeling mdpLabeling(nextStateId); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); @@ -853,7 +920,9 @@ namespace storm { overApproxMdp.restrictRewardModels(std::set({"std"})); } overApproxMdp.printModelInformationToStream(std::cout); - + statistics.overApproximationBuildTime.stop(); + STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); + auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); std::string propertyString = computeRewards ? "R" : "P"; @@ -862,19 +931,26 @@ namespace storm { std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); auto task = storm::api::createTask(property, false); - storm::utility::Stopwatch overApproxTimer(true); + statistics.overApproximationCheckTime.start(); std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); - overApproxTimer.stop(); + statistics.overApproximationCheckTime.stop(); + if (storm::utility::resources::isTerminate() && !res) { + return refinementComponents; // Return the result from the previous iteration + } + STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << std::endl) STORM_LOG_ASSERT(res, "Result not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); auto overApprox = overApproxResultMap[refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)]; - STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) //auto underApprox = weightedSumUnderMap[initialBelief.id]; auto underApproxComponents = computeUnderapproximation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + if (storm::utility::resources::isTerminate() && !underApproxComponents) { + return std::make_unique>( + RefinementComponents{modelPtr, overApprox, refinementComponents->underApproxValue, overApproxResultMap, {}, refinementComponents->beliefList, refinementComponents->beliefGrid, refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, {}, refinementComponents->initialBeliefId}); + } STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); return std::make_shared>( @@ -918,6 +994,7 @@ namespace storm { uint64_t nextId = beliefList.size(); uint64_t counter = 0; + statistics.underApproximationBuildTime.start(); // Expand the believes visitedBelieves.insert(initialBeliefId); believesToBeExpanded.push_back(initialBeliefId); @@ -958,8 +1035,17 @@ namespace storm { transitions.push_back(actionTransitionStorage); } believesToBeExpanded.pop_front(); + if (storm::utility::resources::isTerminate()) { + statistics.underApproximationBuildAborted = true; + break; + } } - + statistics.underApproximationStates = transitions.size(); + if (storm::utility::resources::isTerminate()) { + statistics.underApproximationBuildTime.stop(); + return nullptr; + } + storm::models::sparse::StateLabeling labeling(transitions.size()); labeling.addLabel("init"); labeling.addLabel("target"); @@ -993,6 +1079,7 @@ namespace storm { model = std::make_shared>(underApproxMdp); model->printModelInformationToStream(std::cout); + statistics.underApproximationBuildTime.stop(); std::string propertyString; if (computeRewards) { @@ -1003,7 +1090,12 @@ namespace storm { std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + statistics.underApproximationCheckTime.start(); std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); + statistics.underApproximationCheckTime.stop(); + if (storm::utility::resources::isTerminate() && !res) { + return nullptr; + } STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index df47c721a..782220358 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -243,11 +243,19 @@ namespace storm { ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief); struct Statistics { + Statistics(); + boost::optional refinementSteps; + boost::optional overApproximationStates; bool overApproximationBuildAborted; storm::utility::Stopwatch overApproximationBuildTime; storm::utility::Stopwatch overApproximationCheckTime; - boost::optional refinementSteps; + + boost::optional underApproximationStates; + bool underApproximationBuildAborted; + storm::utility::Stopwatch underApproximationBuildTime; + storm::utility::Stopwatch underApproximationCheckTime; + bool aborted; }; Statistics statistics; From 54b912d350e9d23cf69fa3a53958458769a00384 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 20 Mar 2020 16:14:40 +0100 Subject: [PATCH 078/155] storm-pomdp: better output. --- src/storm-pomdp-cli/storm-pomdp.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 1cad9014a..fa23bbb77 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -127,10 +127,11 @@ namespace storm { STORM_PRINT_AND_LOG("Result: ") } printResult(result->underApproxValue, result->overApproxValue); - analysisPerformed = true; + STORM_PRINT_AND_LOG(std::endl); } else { - STORM_PRINT_AND_LOG("Result: Not available."); + STORM_PRINT_AND_LOG("Result: Not available." << std::endl); } + analysisPerformed = true; } if (pomdpSettings.isMemlessSearchSet()) { STORM_LOG_THROW(formulaInfo.isNonNestedReachabilityProbability(), storm::exceptions::NotSupportedException, "Qualitative memoryless scheduler search is not implemented for this property type."); From 558078b6e9d26b15606dc5fb213554c1de506e7e Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 24 Mar 2020 14:49:12 +0100 Subject: [PATCH 079/155] MakePOMDPCanonic: Improved output of error message --- .../transformer/MakePOMDPCanonic.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp/transformer/MakePOMDPCanonic.cpp b/src/storm-pomdp/transformer/MakePOMDPCanonic.cpp index da03d47ca..62314426c 100644 --- a/src/storm-pomdp/transformer/MakePOMDPCanonic.cpp +++ b/src/storm-pomdp/transformer/MakePOMDPCanonic.cpp @@ -95,7 +95,13 @@ namespace storm { void actionIdentifiersToStream(std::ostream& stream, std::vector const& actionIdentifiers, ChoiceLabelIdStorage const& labelStorage) { stream << "actions: {"; + bool first = true; for (auto ai : actionIdentifiers) { + if (first) { + first = false; + } else { + stream << " "; + } stream << "[" << ai.choiceLabelId << " (" << labelStorage.getLabel(ai.choiceLabelId) << ")"; stream << ", " << ai.choiceOriginId << "]"; } @@ -105,8 +111,14 @@ namespace storm { template void actionIdentifiersToStream(std::ostream& stream, std::map const& actionIdentifiers, ChoiceLabelIdStorage const& labelStorage) { stream << "actions: {"; + bool first = true; for (auto ai : actionIdentifiers) { - stream << "[" << ai.first.choiceLabelId << "('" << labelStorage.getLabel(ai.first.choiceLabelId) << "')"; + if (first) { + first = false; + } else { + stream << " "; + } + stream << "[" << ai.first.choiceLabelId << " (" << labelStorage.getLabel(ai.first.choiceLabelId) << ")"; stream << ", " << ai.first.choiceOriginId << "]"; } stream << "}"; @@ -144,7 +156,7 @@ namespace storm { template std::string MakePOMDPCanonic::getStateInformation(uint64_t state) const { if(pomdp.hasStateValuations()) { - return std::to_string(state) + "[" + pomdp.getStateValuations().getStateInfo(state) + "]"; + return std::to_string(state) + " " + pomdp.getStateValuations().getStateInfo(state); } else { return std::to_string(state); } @@ -242,7 +254,7 @@ namespace storm { detail::actionIdentifiersToStream(std::cout, actionIdentifiers, labelStorage); std::cout << " according to state " << state << "." << std::endl; - STORM_LOG_THROW(false, storm::exceptions::AmbiguousModelException, "Actions identifiers do not align between states '" << getStateInformation(state) << "' and '" << getStateInformation(actionIdentifierDefinition[observation]) << "', both having observation " << observation << ". See output above for more information."); + STORM_LOG_THROW(false, storm::exceptions::AmbiguousModelException, "Actions identifiers do not align between states \n\t" << getStateInformation(state) << "\nand\n\t" << getStateInformation(actionIdentifierDefinition[observation]) << "\nboth having observation " << observation << ". See output above for more information."); } } From a8f3205d969070fbc5d4b5e121509085d383c7bb Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 25 Mar 2020 08:52:08 +0100 Subject: [PATCH 080/155] minor clean-up of includes --- src/storm/settings/SettingsManager.cpp | 1 - src/storm/utility/vector.h | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/storm/settings/SettingsManager.cpp b/src/storm/settings/SettingsManager.cpp index 6a694d09d..bff3aa37d 100644 --- a/src/storm/settings/SettingsManager.cpp +++ b/src/storm/settings/SettingsManager.cpp @@ -11,7 +11,6 @@ #include "storm/exceptions/IllegalFunctionCallException.h" #include "storm/exceptions/OptionParserException.h" -#include "storm/utility/storm-version.h" #include "storm/settings/modules/GeneralSettings.h" #include "storm/settings/modules/CoreSettings.h" #include "storm/settings/modules/IOSettings.h" diff --git a/src/storm/utility/vector.h b/src/storm/utility/vector.h index e5fda444e..dd562a8e1 100644 --- a/src/storm/utility/vector.h +++ b/src/storm/utility/vector.h @@ -5,8 +5,8 @@ #include #include #include -#include -#include +#include "storm/adapters/RationalFunctionAdapter.h" +#include "storm/adapters/IntelTbbAdapter.h" #include From 7f102c915bca0bffd2e5f1572b1e993046d2c2f7 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 25 Mar 2020 10:58:19 +0100 Subject: [PATCH 081/155] Improved some output --- src/storm-pomdp-cli/storm-pomdp.cpp | 10 ++++++---- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index fa23bbb77..b18f77933 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -122,14 +122,14 @@ namespace storm { checker.printStatisticsToStream(std::cout); if (result) { if (storm::utility::resources::isTerminate()) { - STORM_PRINT_AND_LOG("Result till abort: ") + STORM_PRINT_AND_LOG("\nResult till abort: ") } else { - STORM_PRINT_AND_LOG("Result: ") + STORM_PRINT_AND_LOG("\nResult: ") } printResult(result->underApproxValue, result->overApproxValue); STORM_PRINT_AND_LOG(std::endl); } else { - STORM_PRINT_AND_LOG("Result: Not available." << std::endl); + STORM_PRINT_AND_LOG("\nResult: Not available." << std::endl); } analysisPerformed = true; } @@ -157,7 +157,9 @@ namespace storm { STORM_PRINT_AND_LOG("Analyzing the formula on the fully observable MDP ... "); auto result = storm::api::verifyWithSparseEngine(pomdp->template as>(), storm::api::createTask(formula.asSharedPointer(), true))->template asExplicitQuantitativeCheckResult(); result.filter(storm::modelchecker::ExplicitQualitativeCheckResult(pomdp->getInitialStates())); - STORM_PRINT_AND_LOG("Result: " << result.getMax() << std::endl); + STORM_PRINT_AND_LOG("\nResult: "); + printResult(result.getMin(), result.getMax()); + STORM_PRINT_AND_LOG(std::endl); analysisPerformed = true; } return analysisPerformed; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 7c3926db1..b511b386d 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -99,7 +99,7 @@ namespace storm { // Refinement information: if (statistics.refinementSteps) { - stream << "# Number of refinement steps " << statistics.refinementSteps.get(); + stream << "# Number of refinement steps: " << statistics.refinementSteps.get(); } // The overapproximation MDP: From b600498d0ebab1ff8418036cea26e9b8ada21970 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 25 Mar 2020 14:06:19 +0100 Subject: [PATCH 082/155] Better output for checking the fully observable model. --- src/storm-pomdp-cli/storm-pomdp.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index b18f77933..58637c6e7 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -155,11 +155,20 @@ namespace storm { } if (pomdpSettings.isCheckFullyObservableSet()) { STORM_PRINT_AND_LOG("Analyzing the formula on the fully observable MDP ... "); - auto result = storm::api::verifyWithSparseEngine(pomdp->template as>(), storm::api::createTask(formula.asSharedPointer(), true))->template asExplicitQuantitativeCheckResult(); - result.filter(storm::modelchecker::ExplicitQualitativeCheckResult(pomdp->getInitialStates())); - STORM_PRINT_AND_LOG("\nResult: "); - printResult(result.getMin(), result.getMax()); - STORM_PRINT_AND_LOG(std::endl); + auto resultPtr = storm::api::verifyWithSparseEngine(pomdp->template as>(), storm::api::createTask(formula.asSharedPointer(), true)); + if (resultPtr) { + auto result = resultPtr->template asExplicitQuantitativeCheckResult(); + result.filter(storm::modelchecker::ExplicitQualitativeCheckResult(pomdp->getInitialStates())); + if (storm::utility::resources::isTerminate()) { + STORM_PRINT_AND_LOG("\nResult till abort: ") + } else { + STORM_PRINT_AND_LOG("\nResult: ") + } + printResult(result.getMin(), result.getMax()); + STORM_PRINT_AND_LOG(std::endl); + } else { + STORM_PRINT_AND_LOG("\nResult: Not available." << std::endl); + } analysisPerformed = true; } return analysisPerformed; From b53b6ab27533c9b7189a54eac0d8d877660f51ee Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 27 Mar 2020 06:31:11 +0100 Subject: [PATCH 083/155] Added missing line breaks --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index b511b386d..1ff8d07ca 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -94,12 +94,12 @@ namespace storm { stream << "# Max. Number of states with same observation: " << pomdp.getMaxNrStatesWithSameObservation() << std::endl; if (statistics.aborted) { - stream << "# Computation aborted early"; + stream << "# Computation aborted early" << std::endl; } // Refinement information: if (statistics.refinementSteps) { - stream << "# Number of refinement steps: " << statistics.refinementSteps.get(); + stream << "# Number of refinement steps: " << statistics.refinementSteps.get() << std::endl; } // The overapproximation MDP: From 6fee61feb1628846d93617194246ba07604432fd Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 27 Mar 2020 15:11:52 +0100 Subject: [PATCH 084/155] POMDP: Started to split belief logic from exploration logic. --- .../ApproximatePOMDPModelchecker.cpp | 343 ++++++++---------- .../ApproximatePOMDPModelchecker.h | 3 +- src/storm-pomdp/storage/BeliefGrid.h | 296 +++++++++++++++ 3 files changed, 453 insertions(+), 189 deletions(-) create mode 100644 src/storm-pomdp/storage/BeliefGrid.h diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 1ff8d07ca..16704e4a4 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -1,5 +1,7 @@ #include "ApproximatePOMDPModelchecker.h" +#include + #include #include "storm-pomdp/analysis/FormulaInformation.h" @@ -21,6 +23,8 @@ #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" +#include "storm-pomdp/storage/BeliefGrid.h" + #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" #include "storm/exceptions/NotSupportedException.h" @@ -322,6 +326,16 @@ namespace storm { } + + template + ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) { + ValueType result = storm::utility::zero(); + for (auto const& entry : belief) { + result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); + } + return result; + } + template std::shared_ptr> ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, @@ -338,15 +352,14 @@ namespace storm { underMap = underApproximationMap.value(); } - std::vector> beliefList; - std::vector beliefIsTarget; - std::vector> beliefGrid; + storm::storage::BeliefGrid, ValueType> newBeliefGrid(pomdp, options.numericPrecision); //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; + storm::storage::BitVector expandedBeliefs; // current ID -> action -> reward std::map> beliefActionRewards; @@ -355,244 +368,185 @@ namespace storm { // Initial belief always has belief ID 0 storm::pomdp::Belief initialBelief = getInitialBelief(nextId); ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); // These are the components to build the MDP from the grid // Reserve states 0 and 1 as always sink/goal states - std::vector>> mdpTransitions = {{{{0, storm::utility::one()}}}, - {{{1, storm::utility::one()}}}}; + storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); + uint64_t extraBottomState = 0; + uint64_t extraTargetState = computeRewards ? 0 : 1; + uint64_t nextMdpStateId = extraTargetState + 1; + uint64_t mdpMatrixRow = 0; + for (uint64_t state = 0; state < nextMdpStateId; ++state) { + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); + ++mdpMatrixRow; + } // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) - std::vector hintVector = {storm::utility::zero(), storm::utility::one()}; - std::vector targetStates = {1}; - uint64_t mdpStateId = 2; - - beliefStateMap.insert(bsmap_type::value_type(initialBelief.id, mdpStateId)); - ++mdpStateId; + std::vector hintVector(nextMdpStateId, storm::utility::zero()); + if (!computeRewards) { + hintVector[extraTargetState] = storm::utility::one(); + } + std::vector targetStates = {extraTargetState}; // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace std::map weightedSumOverMap; std::map weightedSumUnderMap; // for the initial belief, add the triangulated initial states - auto initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, observationResolutionVector[initialBelief.observation], pomdp.getNumberOfStates()); - auto initSubSimplex = initTemp.first; - auto initLambdas = initTemp.second; + auto triangulation = newBeliefGrid.triangulateBelief(initialBelief.probabilities, observationResolutionVector[initialBelief.observation]); if (options.cacheSubsimplices) { - subSimplexCache[0] = initSubSimplex; - lambdaCache[0] = initLambdas; + //subSimplexCache[0] = initSubSimplex; + //lambdaCache[0] = initLambdas; } std::vector> initTransitionsInBelief; - std::map initTransitionInActionBelief; - bool initInserted = false; - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { - uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); - if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { - if (searchResult == 0) { - // the initial belief is on the grid itself - if (boundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { - tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(overMap[i]); - tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(underMap[i]); - } - weightedSumOverMap[initialBelief.id] = tempWeightedSumOver; - weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder; - } - initInserted = true; - beliefGrid.push_back(initialBelief); - beliefsToBeExpanded.push_back(0); - hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); - } else { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - if (boundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { - tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(overMap[i]); - tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(underMap[i]); - } - - weightedSumOverMap[nextId] = tempWeightedSumOver; - weightedSumUnderMap[nextId] = tempWeightedSumUnder; - } - - storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - beliefsToBeExpanded.push_back(nextId); - ++nextId; - - hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + uint64_t initialMdpState = nextMdpStateId; + ++nextMdpStateId; + if (triangulation.size() == 1) { + // The initial belief is on the grid itself + auto initBeliefId = triangulation.gridPoints.front(); + if (boundMapsSet) { + auto const& gridPoint = newBeliefGrid.getGridPoint(initBeliefId); + weightedSumOverMap[initBeliefId] = getWeightedSum(gridPoint, overMap); + weightedSumUnderMap[initBeliefId] = getWeightedSum(gridPoint, underMap); + } + beliefsToBeExpanded.push_back(initBeliefId); + beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState)); + hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() : storm::utility::zero()); - - beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId)); - initTransitionInActionBelief[mdpStateId] = initLambdas[j]; - ++nextId; - ++mdpStateId; - } + } else { + // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + for (uint64_t i = 0; i < triangulation.size(); ++i) { + beliefsToBeExpanded.push_back(triangulation.gridPoints[i]); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]); + beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId)); + ++nextMdpStateId; + if (boundMapsSet) { + auto const& gridPoint = newBeliefGrid.getGridPoint(triangulation.gridPoints[i]); + weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, overMap); + weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, underMap); } + hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + : storm::utility::zero()); } + //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + ++mdpMatrixRow; } - - // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs - if (!initTransitionInActionBelief.empty()) { - initTransitionsInBelief.push_back(initTransitionInActionBelief); - mdpTransitions.push_back(initTransitionsInBelief); - } - //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - + // Expand the beliefs to generate the grid on-the-fly if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } + expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); while (!beliefsToBeExpanded.empty()) { uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = beliefIsTarget[currId]; - if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { - // TODO: with rewards whe would have to assign the corresponding reward to this transition - mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); - continue; - } - - if (isTarget) { - // Depending on whether we compute rewards, we select the right initial result - // MDP stuff - targetStates.push_back(beliefStateMap.left.at(currId)); - mdpTransitions.push_back({{{beliefStateMap.left.at(currId), storm::utility::one()}}}); + beliefsToBeExpanded.pop_front(); + expandedBeliefs.set(currId, true); // Do not expand this belief again. + assert(currId < expandedBeliefs.size()); + uint64_t currMdpState = beliefStateMap.left.at(currId); + auto const& currBelief = newBeliefGrid.getGridPoint(currId); + uint32_t currObservation = pomdp.getObservation(currBelief.begin()->first); + + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + + if (targetObservations.count(currObservation) != 0) { + // Make this state absorbing + targetStates.push_back(currMdpState); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); + ++mdpMatrixRow; + } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { + // TODO: with rewards we would have to assign the corresponding reward to this transition + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one() - weightedSumOverMap[currId]); + ++mdpMatrixRow; } else { - uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); + auto const& currBelief = newBeliefGrid.getGridPoint(currId); + uint64_t someState = currBelief.begin()->first; + uint64_t numChoices = pomdp.getNumberOfChoices(someState); std::vector actionRewardsInState(numChoices); std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currId], action); - std::map transitionInActionBelief; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, - observation, nextId); - nextId = beliefList.size(); - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - auto temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities, - observationResolutionVector[beliefList[idNextBelief].observation], pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; - } - } - - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - auto approxId = getBeliefIdInVector(beliefGrid, observation, subSimplex[j]); - if (approxId == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - // compute overapproximate value using MDP result map - if (boundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(overMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(underMap[i]); - } - if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) { - hintVector.push_back(tempWeightedSumOver); - } else { - hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); - } - weightedSumOverMap[nextId] = tempWeightedSumOver; - weightedSumUnderMap[nextId] = tempWeightedSumUnder; - } else { - hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); - } - beliefsToBeExpanded.push_back(nextId); - beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId)); - transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; - ++nextId; - ++mdpStateId; + auto successorGridPoints = newBeliefGrid.expandAction(currId, action, observationResolutionVector); + // Check for newly found grid points + expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); + for (auto const& successor : successorGridPoints) { + auto successorId = successor.first; + auto successorBelief = newBeliefGrid.getGridPoint(successorId); + auto successorObservation = pomdp.getObservation(successorBelief.begin()->first); + if (!expandedBeliefs.get(successorId)) { + beliefsToBeExpanded.push_back(successorId); + beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); + ++nextMdpStateId; + + if (boundMapsSet) { + ValueType upperBound = getWeightedSum(successorBelief, overMap); + ValueType lowerBound = getWeightedSum(successorBelief, underMap); + if (cc.isEqual(upperBound, lowerBound)) { + hintVector.push_back(lowerBound); } else { - transitionInActionBelief[beliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; + hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); } + weightedSumOverMap[successorId] = upperBound; + weightedSumUnderMap[successorId] = lowerBound; + } else { + hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); } } + auto successorMdpState = beliefStateMap.left.at(successorId); + // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } - if (!transitionInActionBelief.empty()) { - transitionsInBelief.push_back(transitionInActionBelief); - } + ++mdpMatrixRow; } - if (transitionsInBelief.empty()) { - std::map transitionInActionBelief; - transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one(); - transitionsInBelief.push_back(transitionInActionBelief); - } - mdpTransitions.push_back(transitionsInBelief); } if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildAborted = true; break; } } - statistics.overApproximationStates = mdpTransitions.size(); - STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl); + statistics.overApproximationStates = nextMdpStateId; STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); return nullptr; } - storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); + storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBelief.id)); + mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialMdpState)); for (auto targetState : targetStates) { mdpLabeling.addLabelToState("target", targetState); } - storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); - storm::models::sparse::Mdp overApproxMdp(modelComponents); + storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); + for (uint64_t row = 0; row < modelComponents.transitionMatrix.getRowCount(); ++row) { + if (!storm::utility::isOne(modelComponents.transitionMatrix.getRowSum(row))) { + std::cout << "Row " << row << " does not sum up to one. " << modelComponents.transitionMatrix.getRowSum(row) << " instead" << std::endl; + } + } + auto overApproxMdp = std::make_shared>(std::move(modelComponents)); if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); for (auto const &iter : beliefStateMap.left) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { + auto currentBelief = newBeliefGrid.getGridPoint(iter.first); + auto representativeState = currentBelief.begin()->first; + for (uint64_t action = 0; action < overApproxMdp->getNumberOfChoices(representativeState); ++action) { // Add the reward - mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); + uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); + uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); + mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); } } - overApproxMdp.addRewardModel("std", mdpRewardModel); - overApproxMdp.restrictRewardModels(std::set({"std"})); + overApproxMdp->addRewardModel("default", mdpRewardModel); + overApproxMdp->restrictRewardModels(std::set({"default"})); } statistics.overApproximationBuildTime.stop(); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - overApproxMdp.printModelInformationToStream(std::cout); + overApproxMdp->printModelInformationToStream(std::cout); - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); + auto modelPtr = std::static_pointer_cast>(overApproxMdp); std::string propertyString = computeRewards ? "R" : "P"; propertyString += min ? "min" : "max"; propertyString += "=? [F \"target\"]"; @@ -604,31 +558,34 @@ namespace storm { auto hintPtr = std::make_shared>(hint); task.setHint(hintPtr); statistics.overApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + std::unique_ptr res(storm::api::verifyWithSparseEngine(overApproxMdp, task)); statistics.overApproximationCheckTime.stop(); if (storm::utility::resources::isTerminate() && !res) { return nullptr; } STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true))); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)]; STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); - //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, - maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + //auto underApprox = weightedSumUnderMap[initialBelief.id]; + /* TODO: Enable under approx again: + auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize); if (storm::utility::resources::isTerminate() && !underApproxComponents) { return std::make_unique>( RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); } STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); - return std::make_unique>( RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); + */ + return std::make_unique>(RefinementComponents{modelPtr, overApprox, storm::utility::zero(), overApproxResultMap, + {}, {}, {}, {}, beliefStateMap, {}, initialBelief.id}); + } template @@ -973,7 +930,8 @@ namespace storm { std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); } - + + template std::unique_ptr> ApproximatePOMDPModelchecker::computeUnderapproximation(std::vector> &beliefList, @@ -1307,7 +1265,16 @@ namespace storm { } template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief) { + ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, std::map const& belief) { + auto result = storm::utility::zero(); + for (auto const &probEntry : belief) { + result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); + } + return result; + } + + template + ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief) { auto result = storm::utility::zero(); for (auto const &probEntry : belief.probabilities) { result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 782220358..4820ea380 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -240,7 +240,8 @@ namespace storm { * @param belief the belief in which the action is performed * @return the reward earned by performing the action in the belief */ - ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief); + ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief); + ValueType getRewardAfterAction(uint64_t action, std::map const& belief); struct Statistics { Statistics(); diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h new file mode 100644 index 000000000..a75872ee4 --- /dev/null +++ b/src/storm-pomdp/storage/BeliefGrid.h @@ -0,0 +1,296 @@ +#pragma once + +#include +#include +//#include + +#include "storm/utility/macros.h" +#include "storm/exceptions/UnexpectedException.h" + +namespace storm { + namespace storage { + + template + class BeliefGrid { + public: + + typedef typename PomdpType::ValueType ValueType; + //typedef boost::container::flat_map BeliefType + typedef std::map BeliefType; + typedef uint64_t BeliefId; + + BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { + // Intentionally left empty + } + + struct Triangulation { + std::vector gridPoints; + std::vector weights; + uint64_t size() const { + return weights.size(); + } + }; + + BeliefType const& getGridPoint(BeliefId const& id) const { + return gridPoints[id]; + } + + BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const { + auto idIt = gridPointToIdMap.find(gridPoint); + STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state."); + return idIt->second; + } + + bool isEqual(BeliefType const& first, BeliefType const& second) const { + if (first.size() != second.size()) { + return false; + } + auto secondIt = second.begin(); + for (auto const& firstEntry : first) { + if (firstEntry.first != secondIt->first) { + return false; + } + if (!cc.isEqual(firstEntry.second, secondIt->second)) { + return false; + } + ++secondIt; + } + return true; + } + + bool assertBelief(BeliefType const& belief) const { + BeliefValueType sum = storm::utility::zero(); + boost::optional observation; + for (auto const& entry : belief) { + uintmax_t entryObservation = pomdp.getObservation(entry.first); + if (observation) { + if (observation.get() != entryObservation) { + STORM_LOG_ERROR("Beliefsupport contains different observations."); + return false; + } + } else { + observation = entryObservation; + } + if (cc.isZero(entry.second)) { + // We assume that beliefs only consider their support. + STORM_LOG_ERROR("Zero belief probability."); + return false; + } + if (cc.isLess(entry.second, storm::utility::zero())) { + STORM_LOG_ERROR("Negative belief probability."); + return false; + } + if (cc.isLess(storm::utility::one(), entry.second)) { + STORM_LOG_ERROR("Belief probability greater than one."); + return false; + } + sum += entry.second; + } + if (!cc.isOne(sum)) { + STORM_LOG_ERROR("Belief does not sum up to one."); + return false; + } + return true; + } + + bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const { + if (triangulation.weights.size() != triangulation.gridPoints.size()) { + STORM_LOG_ERROR("Number of weights and points in triangulation does not match."); + return false; + } + if (triangulation.size() == 0) { + STORM_LOG_ERROR("Empty triangulation."); + return false; + } + BeliefType triangulatedBelief; + BeliefValueType weightSum = storm::utility::zero(); + for (uint64_t i = 0; i < triangulation.weights.size(); ++i) { + if (cc.isZero(triangulation.weights[i])) { + STORM_LOG_ERROR("Zero weight in triangulation."); + return false; + } + if (cc.isLess(triangulation.weights[i], storm::utility::zero())) { + STORM_LOG_ERROR("Negative weight in triangulation."); + return false; + } + if (cc.isLess(storm::utility::one(), triangulation.weights[i])) { + STORM_LOG_ERROR("Weight greater than one in triangulation."); + } + weightSum += triangulation.weights[i]; + BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]); + for (auto const& pointEntry : gridPoint) { + BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero()).first->second; + triangulatedValue += triangulation.weights[i] * pointEntry.second; + } + } + if (!cc.isOne(weightSum)) { + STORM_LOG_ERROR("Triangulation weights do not sum up to one."); + return false; + } + if (!assertBelief(triangulatedBelief)) { + STORM_LOG_ERROR("Triangulated belief is not a belief."); + } + if (!isEqual(belief, triangulatedBelief)) { + STORM_LOG_ERROR("Belief does not match triangulated belief."); + return false; + } + return true; + } + + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { + //TODO this can also be simplified using the sparse vector interpretation + //TODO Enable chaching for this method? + STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); + + auto nrStates = pomdp.getNumberOfStates(); + + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) + // Variable names are based on the paper + // TODO avoid reallocations for these vectors + std::vector x(nrStates); + std::vector v(nrStates); + std::vector d(nrStates); + auto convResolution = storm::utility::convertNumber(resolution); + + for (size_t i = 0; i < nrStates; ++i) { + for (auto const &probEntry : belief) { + if (probEntry.first >= i) { + x[i] += convResolution * probEntry.second; + } + } + v[i] = storm::utility::floor(x[i]); + d[i] = x[i] - v[i]; + } + + auto p = storm::utility::vector::getSortedIndices(d); + + std::vector> qs(nrStates, std::vector(nrStates)); + for (size_t i = 0; i < nrStates; ++i) { + if (i == 0) { + for (size_t j = 0; j < nrStates; ++j) { + qs[i][j] = v[j]; + } + } else { + for (size_t j = 0; j < nrStates; ++j) { + if (j == p[i - 1]) { + qs[i][j] = qs[i - 1][j] + storm::utility::one(); + } else { + qs[i][j] = qs[i - 1][j]; + } + } + } + } + + Triangulation result; + // The first weight is 1-sum(other weights). We therefore process the js in reverse order + BeliefValueType firstWeight = storm::utility::one(); + for (size_t j = nrStates; j > 0;) { + --j; + // First create the weights. The weights vector will be reversed at the end. + ValueType weight; + if (j == 0) { + weight = firstWeight; + } else { + weight = d[p[j - 1]] - d[p[j]]; + firstWeight -= weight; + } + if (!cc.isZero(weight)) { + result.weights.push_back(weight); + BeliefType gridPoint; + auto const& qsj = qs[j]; + for (size_t i = 0; i < nrStates - 1; ++i) { + BeliefValueType gridPointEntry = qsj[i] - qsj[i + 1]; + if (!cc.isZero(gridPointEntry)) { + gridPoint[i] = gridPointEntry / convResolution; + } + } + if (!cc.isZero(qsj[nrStates - 1])) { + gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution; + } + result.gridPoints.push_back(getOrAddGridPointId(gridPoint)); + } + } + std::reverse(result.weights.begin(), result.weights.end()); + + STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation."); + + return result; + } + + template + void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { + auto insertionRes = distr.emplace(state, value); + if (!insertionRes.second) { + insertionRes.first->second += value; + } + } + + BeliefId getNumberOfGridPointIds() const { + return gridPoints.size(); + } + + std::map expandAction(BeliefId const& gridPointId, uint64_t actionIndex, std::vector const& observationResolutions) { + + std::map destinations; // The belief ids should be ordered + + BeliefType gridPoint = getGridPoint(gridPointId); + + // Find the probability we go to each observation + BeliefType successorObs; // This is actually not a belief but has the same type + for (auto const& pointEntry : gridPoint) { + uint64_t state = pointEntry.first; + for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { + if (!storm::utility::isZero(pomdpTransition.getValue())) { + auto obs = pomdp.getObservation(pomdpTransition.getColumn()); + addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue()); + } + } + } + + // Now for each successor observation we find and triangulate the successor belief + for (auto const& successor : successorObs) { + BeliefType successorBelief; + for (auto const& pointEntry : gridPoint) { + uint64_t state = pointEntry.first; + for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { + if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { + ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second; + addToDistribution(successorBelief, pomdpTransition.getColumn(), prob); + } + } + } + STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); + + Triangulation triangulation = triangulateBelief(successorBelief, observationResolutions[successor.first]); + for (size_t j = 0; j < triangulation.size(); ++j) { + addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); + } + } + + return destinations; + + } + + private: + + BeliefId getOrAddGridPointId(BeliefType const& gridPoint) { + auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size()); + if (insertioRes.second) { + // There actually was an insertion, so add the new grid state + gridPoints.push_back(gridPoint); + } + // Return the id + return insertioRes.first->second; + } + + PomdpType const& pomdp; + uint64_t resolution; + + std::vector gridPoints; + std::map gridPointToIdMap; + storm::utility::ConstantsComparator cc; + + + }; + } +} \ No newline at end of file From b3796d740fb6f8c8e191213df9f6bcd5611adc61 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Sat, 28 Mar 2020 05:20:06 +0100 Subject: [PATCH 085/155] Fixed confusing lower and upper result bounds for minimizing properties. --- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 16704e4a4..04b9ea196 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -305,7 +305,11 @@ namespace storm { ++refinementCounter; } statistics.refinementSteps = refinementCounter; - return std::make_unique>(POMDPCheckResult{res->overApproxValue, res->underApproxValue}); + if (min) { + return std::make_unique>(POMDPCheckResult{res->underApproxValue, res->overApproxValue}); + } else { + return std::make_unique>(POMDPCheckResult{res->overApproxValue, res->underApproxValue}); + } } template @@ -322,7 +326,11 @@ namespace storm { if (result == nullptr) { return nullptr; } - return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); + if (min) { + return std::make_unique>(POMDPCheckResult{result->underApproxValue, result->overApproxValue}); + } else { + return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); + } } From 97842f356dfb7e0423cf217a63c5c801cf6e8728 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Sat, 28 Mar 2020 05:20:59 +0100 Subject: [PATCH 086/155] Fixed beliefgrid exploration. --- .../ApproximatePOMDPModelchecker.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 04b9ea196..146d1ff3e 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -367,7 +367,7 @@ namespace storm { bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; - storm::storage::BitVector expandedBeliefs; + storm::storage::BitVector foundBeliefs; // current ID -> action -> reward std::map> beliefActionRewards; @@ -445,13 +445,14 @@ namespace storm { if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); + foundBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); + for (auto const& belId : beliefsToBeExpanded) { + foundBeliefs.set(belId, true); + } while (!beliefsToBeExpanded.empty()) { uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - expandedBeliefs.set(currId, true); // Do not expand this belief again. - assert(currId < expandedBeliefs.size()); + uint64_t currMdpState = beliefStateMap.left.at(currId); auto const& currBelief = newBeliefGrid.getGridPoint(currId); uint32_t currObservation = pomdp.getObservation(currBelief.begin()->first); @@ -478,12 +479,13 @@ namespace storm { for (uint64_t action = 0; action < numChoices; ++action) { auto successorGridPoints = newBeliefGrid.expandAction(currId, action, observationResolutionVector); // Check for newly found grid points - expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); + foundBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); for (auto const& successor : successorGridPoints) { auto successorId = successor.first; auto successorBelief = newBeliefGrid.getGridPoint(successorId); auto successorObservation = pomdp.getObservation(successorBelief.begin()->first); - if (!expandedBeliefs.get(successorId)) { + if (!foundBeliefs.get(successorId)) { + foundBeliefs.set(successorId); beliefsToBeExpanded.push_back(successorId); beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); ++nextMdpStateId; From d184d67b53e2c2a7b6ef5af67880b5e0e9746f52 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Sat, 28 Mar 2020 07:24:42 +0100 Subject: [PATCH 087/155] Refactored under-approximation code a bit. --- .../ApproximatePOMDPModelchecker.cpp | 155 +++++++++++++++++- .../ApproximatePOMDPModelchecker.h | 4 + src/storm-pomdp/storage/BeliefGrid.h | 50 +++++- 3 files changed, 199 insertions(+), 10 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 146d1ff3e..4976f3404 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -23,7 +23,6 @@ #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" -#include "storm-pomdp/storage/BeliefGrid.h" #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" @@ -477,7 +476,7 @@ namespace storm { std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { - auto successorGridPoints = newBeliefGrid.expandAction(currId, action, observationResolutionVector); + auto successorGridPoints = beliefGrid.expandAndTriangulate(currId, action, observationResolutionVector); // Check for newly found grid points foundBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); for (auto const& successor : successorGridPoints) { @@ -1072,6 +1071,158 @@ namespace storm { return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeUnderapproximation(storm::storage::BeliefGrid>& beliefGrid, + std::set const &targetObservations, bool min, + bool computeRewards, uint64_t maxModelSize) { + // Build the belief MDP until enough states are explored. + //TODO think of other ways to stop exploration besides model size + + statistics.underApproximationBuildTime.start(); + + // Reserve states 0 and 1 as always sink/goal states + storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); + uint64_t extraBottomState = 0; + uint64_t extraTargetState = computeRewards ? 0 : 1; + uint64_t nextMdpStateId = extraTargetState + 1; + uint64_t mdpMatrixRow = 0; + for (uint64_t state = 0; state < nextMdpStateId; ++state) { + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); + ++mdpMatrixRow; + } + std::vector targetStates = {extraTargetState}; + + bsmap_type beliefStateMap; + std::deque beliefsToBeExpanded; + + auto initialBeliefId = beliefGrid.getInitialBelief(); + beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, nextMdpStateId)); + beliefsToBeExpanded.push_back(initialBeliefId); + ++nextMdpStateId; + + // Expand the believes + storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false); + for (auto const& belId : beliefsToBeExpanded) { + foundBeliefs.set(belId, true); + } + while (!beliefsToBeExpanded.empty()) { + uint64_t currId = beliefsToBeExpanded.front(); + beliefsToBeExpanded.pop_front(); + + uint64_t currMdpState = beliefStateMap.left.at(currId); + auto const& currBelief = beliefGrid.getGridPoint(currId); + uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief); + + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + + if (targetObservations.count(currObservation) != 0) { + // Make this state absorbing + targetStates.push_back(currMdpState); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); + ++mdpMatrixRow; + } else if (currMdpState > maxModelSize) { + // In other cases, this could be helpflull as well. + if (min) { + // Get an upper bound here + if (computeRewards) { + // TODO: With minimizing rewards we need an upper bound! + // For now, add a selfloop to "generate" infinite reward + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); + } else { + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, storm::utility::one()); + } + } else { + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, computeRewards ? extraTargetState : extraBottomState, storm::utility::one()); + } + ++mdpMatrixRow; + } else { + // Iterate over all actions and add the corresponding transitions + uint64_t someState = currBelief.begin()->first; + uint64_t numChoices = pomdp.getNumberOfChoices(someState); + for (uint64_t action = 0; action < numChoices; ++action) { + auto successorBeliefs = beliefGrid.expand(currId, action); + // Check for newly found beliefs + foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false); + for (auto const& successor : successorBeliefs) { + auto successorId = successor.first; + if (!foundBeliefs.get(successorId)) { + foundBeliefs.set(successorId); + beliefsToBeExpanded.push_back(successorId); + beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); + ++nextMdpStateId; + } + auto successorMdpState = beliefStateMap.left.at(successorId); + // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); + } + ++mdpMatrixRow; + } + } + if (storm::utility::resources::isTerminate()) { + statistics.underApproximationBuildAborted = true; + break; + } + } + statistics.underApproximationStates = nextMdpStateId; + if (storm::utility::resources::isTerminate()) { + statistics.underApproximationBuildTime.stop(); + return nullptr; + } + + storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabel("target"); + mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBeliefId)); + for (auto targetState : targetStates) { + mdpLabeling.addLabelToState("target", targetState); + } + + storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); + auto model = std::make_shared>(std::move(modelComponents)); + if (computeRewards) { + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); + for (auto const &iter : beliefStateMap.left) { + auto currentBelief = beliefGrid.getGridPoint(iter.first); + auto representativeState = currentBelief.begin()->first; + for (uint64_t action = 0; action < model->getNumberOfChoices(representativeState); ++action) { + // Add the reward + uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); + uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); + mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + } + } + model->addRewardModel("default", mdpRewardModel); + model->restrictRewardModels(std::set({"default"})); + } + + model->printModelInformationToStream(std::cout); + statistics.underApproximationBuildTime.stop(); + + std::string propertyString; + if (computeRewards) { + propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + } else { + propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + } + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + + statistics.underApproximationCheckTime.start(); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); + statistics.underApproximationCheckTime.stop(); + if (storm::utility::resources::isTerminate() && !res) { + return nullptr; + } + STORM_LOG_ASSERT(res, "Result does not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true))); + auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); + auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; + + return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); + } + template storm::storage::SparseMatrix diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 4820ea380..32daa0876 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -3,6 +3,7 @@ #include "storm/models/sparse/Pomdp.h" #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" +#include "storm-pomdp/storage/BeliefGrid.h" #include #include "storm/storage/jani/Property.h" @@ -162,6 +163,9 @@ namespace storm { std::set const &targetObservations, uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize); + std::unique_ptr> computeUnderapproximation(storm::storage::BeliefGrid>& beliefGrid, + std::set const &targetObservations, bool min, bool computeReward, + uint64_t maxModelSize); /** * Constructs the initial belief for the given POMDP diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h index a75872ee4..48da1b85b 100644 --- a/src/storm-pomdp/storage/BeliefGrid.h +++ b/src/storm-pomdp/storage/BeliefGrid.h @@ -10,7 +10,8 @@ namespace storm { namespace storage { - template + template + // TODO: Change name. This actually does not only manage grid points. class BeliefGrid { public: @@ -137,6 +138,28 @@ namespace storm { return true; } + BeliefId getInitialBelief() { + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, + "POMDP contains more than one initial state"); + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, + "POMDP does not contain an initial state"); + BeliefType belief; + belief[*pomdp.getInitialStates().begin()] = storm::utility::one(); + + STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); + return getOrAddGridPointId(belief); + } + + uint32_t getBeliefObservation(BeliefType belief) { + STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); + return pomdp.getObservation(belief.begin()->first); + } + + uint32_t getBeliefObservation(BeliefId beliefId) { + return getBeliefObservation(getGridPoint(beliefId)); + } + + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { //TODO this can also be simplified using the sparse vector interpretation //TODO Enable chaching for this method? @@ -229,9 +252,9 @@ namespace storm { return gridPoints.size(); } - std::map expandAction(BeliefId const& gridPointId, uint64_t actionIndex, std::vector const& observationResolutions) { - + std::map expandInternal(BeliefId const& gridPointId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { std::map destinations; // The belief ids should be ordered + // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds BeliefType gridPoint = getGridPoint(gridPointId); @@ -247,7 +270,7 @@ namespace storm { } } - // Now for each successor observation we find and triangulate the successor belief + // Now for each successor observation we find and potentially triangulate the successor belief for (auto const& successor : successorObs) { BeliefType successorBelief; for (auto const& pointEntry : gridPoint) { @@ -261,9 +284,13 @@ namespace storm { } STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); - Triangulation triangulation = triangulateBelief(successorBelief, observationResolutions[successor.first]); - for (size_t j = 0; j < triangulation.size(); ++j) { - addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); + if (observationTriangulationResolutions) { + Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]); + for (size_t j = 0; j < triangulation.size(); ++j) { + addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); + } + } else { + addToDistribution(destinations, getOrAddGridPointId(successorBelief), successor.second); } } @@ -271,6 +298,14 @@ namespace storm { } + std::map expandAndTriangulate(BeliefId const& gridPointId, uint64_t actionIndex, std::vector const& observationResolutions) { + return expandInternal(gridPointId, actionIndex, observationResolutions); + } + + std::map expand(BeliefId const& gridPointId, uint64_t actionIndex) { + return expandInternal(gridPointId, actionIndex); + } + private: BeliefId getOrAddGridPointId(BeliefType const& gridPoint) { @@ -284,7 +319,6 @@ namespace storm { } PomdpType const& pomdp; - uint64_t resolution; std::vector gridPoints; std::map gridPointToIdMap; From b3115e9395a943832975f6ad63c0f6fcbd6abea9 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Sat, 28 Mar 2020 07:27:53 +0100 Subject: [PATCH 088/155] Code polishing and re-enabled the under-approximation. Refinement should still not be possible right now. --- .../ApproximatePOMDPModelchecker.cpp | 83 ++++++++----------- 1 file changed, 33 insertions(+), 50 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 4976f3404..c18c59961 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -359,23 +359,16 @@ namespace storm { underMap = underApproximationMap.value(); } - storm::storage::BeliefGrid, ValueType> newBeliefGrid(pomdp, options.numericPrecision); - //Use caching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; + storm::storage::BeliefGrid> beliefGrid(pomdp, options.numericPrecision); bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; - storm::storage::BitVector foundBeliefs; - // current ID -> action -> reward - std::map> beliefActionRewards; - uint64_t nextId = 0; statistics.overApproximationBuildTime.start(); // Initial belief always has belief ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(nextId); - ++nextId; - + auto initialBeliefId = beliefGrid.getInitialBelief(); + auto const& initialBelief = beliefGrid.getGridPoint(initialBeliefId); + auto initialObservation = beliefGrid.getBeliefObservation(initialBelief); // These are the components to build the MDP from the grid // Reserve states 0 and 1 as always sink/goal states storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); @@ -400,25 +393,20 @@ namespace storm { std::map weightedSumUnderMap; // for the initial belief, add the triangulated initial states - auto triangulation = newBeliefGrid.triangulateBelief(initialBelief.probabilities, observationResolutionVector[initialBelief.observation]); - if (options.cacheSubsimplices) { - //subSimplexCache[0] = initSubSimplex; - //lambdaCache[0] = initLambdas; - } - std::vector> initTransitionsInBelief; + auto triangulation = beliefGrid.triangulateBelief(initialBelief, observationResolutionVector[initialObservation]); uint64_t initialMdpState = nextMdpStateId; ++nextMdpStateId; if (triangulation.size() == 1) { // The initial belief is on the grid itself auto initBeliefId = triangulation.gridPoints.front(); if (boundMapsSet) { - auto const& gridPoint = newBeliefGrid.getGridPoint(initBeliefId); + auto const& gridPoint = beliefGrid.getGridPoint(initBeliefId); weightedSumOverMap[initBeliefId] = getWeightedSum(gridPoint, overMap); weightedSumUnderMap[initBeliefId] = getWeightedSum(gridPoint, underMap); } beliefsToBeExpanded.push_back(initBeliefId); beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState)); - hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one() : storm::utility::zero()); } else { // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs @@ -429,11 +417,11 @@ namespace storm { beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId)); ++nextMdpStateId; if (boundMapsSet) { - auto const& gridPoint = newBeliefGrid.getGridPoint(triangulation.gridPoints[i]); + auto const& gridPoint = beliefGrid.getGridPoint(triangulation.gridPoints[i]); weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, overMap); weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, underMap); } - hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one() : storm::utility::zero()); } //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting @@ -444,7 +432,7 @@ namespace storm { if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - foundBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); + storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false); for (auto const& belId : beliefsToBeExpanded) { foundBeliefs.set(belId, true); } @@ -453,8 +441,8 @@ namespace storm { beliefsToBeExpanded.pop_front(); uint64_t currMdpState = beliefStateMap.left.at(currId); - auto const& currBelief = newBeliefGrid.getGridPoint(currId); - uint32_t currObservation = pomdp.getObservation(currBelief.begin()->first); + auto const& currBelief = beliefGrid.getGridPoint(currId); + uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief); mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); @@ -469,20 +457,18 @@ namespace storm { mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one() - weightedSumOverMap[currId]); ++mdpMatrixRow; } else { - auto const& currBelief = newBeliefGrid.getGridPoint(currId); + auto const& currBelief = beliefGrid.getGridPoint(currId); uint64_t someState = currBelief.begin()->first; uint64_t numChoices = pomdp.getNumberOfChoices(someState); - std::vector actionRewardsInState(numChoices); - std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { auto successorGridPoints = beliefGrid.expandAndTriangulate(currId, action, observationResolutionVector); // Check for newly found grid points - foundBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); + foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false); for (auto const& successor : successorGridPoints) { auto successorId = successor.first; - auto successorBelief = newBeliefGrid.getGridPoint(successorId); - auto successorObservation = pomdp.getObservation(successorBelief.begin()->first); + auto const& successorBelief = beliefGrid.getGridPoint(successorId); + auto successorObservation = beliefGrid.getBeliefObservation(successorBelief); if (!foundBeliefs.get(successorId)) { foundBeliefs.set(successorId); beliefsToBeExpanded.push_back(successorId); @@ -525,21 +511,16 @@ namespace storm { storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialMdpState)); + mdpLabeling.addLabelToState("init", initialMdpState); for (auto targetState : targetStates) { mdpLabeling.addLabelToState("target", targetState); } storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); - for (uint64_t row = 0; row < modelComponents.transitionMatrix.getRowCount(); ++row) { - if (!storm::utility::isOne(modelComponents.transitionMatrix.getRowSum(row))) { - std::cout << "Row " << row << " does not sum up to one. " << modelComponents.transitionMatrix.getRowSum(row) << " instead" << std::endl; - } - } auto overApproxMdp = std::make_shared>(std::move(modelComponents)); if (computeRewards) { storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); for (auto const &iter : beliefStateMap.left) { - auto currentBelief = newBeliefGrid.getGridPoint(iter.first); + auto currentBelief = beliefGrid.getGridPoint(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < overApproxMdp->getNumberOfChoices(representativeState); ++action) { // Add the reward @@ -575,25 +556,27 @@ namespace storm { STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true))); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)]; + auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); //auto underApprox = weightedSumUnderMap[initialBelief.id]; - /* TODO: Enable under approx again: - auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize); + auto underApproxComponents = computeUnderapproximation(beliefGrid, targetObservations, min, computeRewards, maxUaModelSize); if (storm::utility::resources::isTerminate() && !underApproxComponents) { - return std::make_unique>( - RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); + // TODO: return other components needed for refinement. + //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); + return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, initialBeliefId}); } + STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); + /* TODO: return other components needed for refinement. return std::make_unique>( RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); */ - return std::make_unique>(RefinementComponents{modelPtr, overApprox, storm::utility::zero(), overApproxResultMap, - {}, {}, {}, {}, beliefStateMap, {}, initialBelief.id}); + return std::make_unique>(RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, + underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBeliefId}); } @@ -949,7 +932,7 @@ namespace storm { uint64_t initialBeliefId, bool min, bool computeRewards, uint64_t maxModelSize) { std::set visitedBelieves; - std::deque believesToBeExpanded; + std::deque beliefsToBeExpanded; bsmap_type beliefStateMap; std::vector>> transitions = {{{{0, storm::utility::one()}}}, {{{1, storm::utility::one()}}}}; @@ -964,10 +947,10 @@ namespace storm { statistics.underApproximationBuildTime.start(); // Expand the believes visitedBelieves.insert(initialBeliefId); - believesToBeExpanded.push_back(initialBeliefId); - while (!believesToBeExpanded.empty()) { + beliefsToBeExpanded.push_back(initialBeliefId); + while (!beliefsToBeExpanded.empty()) { //TODO think of other ways to stop exploration besides model size - auto currentBeliefId = believesToBeExpanded.front(); + auto currentBeliefId = beliefsToBeExpanded.front(); uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); // for targets, we only consider one action with one transition if (beliefIsTarget[currentBeliefId]) { @@ -992,7 +975,7 @@ namespace storm { if (visitedBelieves.insert(nextBeliefId).second) { beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId)); ++stateId; - believesToBeExpanded.push_back(nextBeliefId); + beliefsToBeExpanded.push_back(nextBeliefId); ++counter; } transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second; @@ -1001,7 +984,7 @@ namespace storm { } transitions.push_back(actionTransitionStorage); } - believesToBeExpanded.pop_front(); + beliefsToBeExpanded.pop_front(); if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildAborted = true; break; From 3887e8a979b651569234cc3748e458abd45c4cd4 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Sat, 28 Mar 2020 20:28:15 +0100 Subject: [PATCH 089/155] Fix for belief triangulation. More descriptive output for belief triangulation asserts. --- src/storm-pomdp/storage/BeliefGrid.h | 32 ++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h index 48da1b85b..b0661fcc5 100644 --- a/src/storm-pomdp/storage/BeliefGrid.h +++ b/src/storm-pomdp/storage/BeliefGrid.h @@ -42,6 +42,32 @@ namespace storm { return idIt->second; } + std::string toString(BeliefType const& belief) const { + std::stringstream str; + str << "{ "; + bool first = true; + for (auto const& entry : belief) { + if (first) { + first = false; + } else { + str << ", "; + } + str << entry.first << ": " << entry.second; + } + str << " }"; + return str.str(); + } + + std::string toString(Triangulation const& t) const { + std::stringstream str; + str << "(\n"; + for (uint64_t i = 0; i < t.size(); ++i) { + str << "\t" << t.weights[i] << " * \t" << toString(getGridPoint(t.gridPoints[i])) << "\n"; + } + str <<")\n"; + return str.str(); + } + bool isEqual(BeliefType const& first, BeliefType const& second) const { if (first.size() != second.size()) { return false; @@ -132,7 +158,7 @@ namespace storm { STORM_LOG_ERROR("Triangulated belief is not a belief."); } if (!isEqual(belief, triangulatedBelief)) { - STORM_LOG_ERROR("Belief does not match triangulated belief."); + STORM_LOG_ERROR("Belief:\n\t" << toString(belief) << "\ndoes not match triangulated belief:\n\t" << toString(triangulatedBelief) << "."); return false; } return true; @@ -233,10 +259,8 @@ namespace storm { result.gridPoints.push_back(getOrAddGridPointId(gridPoint)); } } - std::reverse(result.weights.begin(), result.weights.end()); - - STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation."); + STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result)); return result; } From 110453146d78b7ef77f2b87ab8457628c1b58b11 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Sat, 28 Mar 2020 20:29:18 +0100 Subject: [PATCH 090/155] Various fixes for under/over approximation with rewards. --- .../ApproximatePOMDPModelchecker.cpp | 53 +++++++++++-------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index c18c59961..fb5264ca8 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -387,6 +387,7 @@ namespace storm { hintVector[extraTargetState] = storm::utility::one(); } std::vector targetStates = {extraTargetState}; + storm::storage::BitVector fullyExpandedStates; // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace std::map weightedSumOverMap; @@ -441,9 +442,8 @@ namespace storm { beliefsToBeExpanded.pop_front(); uint64_t currMdpState = beliefStateMap.left.at(currId); - auto const& currBelief = beliefGrid.getGridPoint(currId); - uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief); - + uint32_t currObservation = beliefGrid.getBeliefObservation(currId); + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); if (targetObservations.count(currObservation) != 0) { @@ -457,8 +457,9 @@ namespace storm { mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one() - weightedSumOverMap[currId]); ++mdpMatrixRow; } else { - auto const& currBelief = beliefGrid.getGridPoint(currId); - uint64_t someState = currBelief.begin()->first; + fullyExpandedStates.grow(nextMdpStateId, false); + fullyExpandedStates.set(currMdpState, true); + uint64_t someState = beliefGrid.getGridPoint(currId).begin()->first; uint64_t numChoices = pomdp.getNumberOfChoices(someState); for (uint64_t action = 0; action < numChoices; ++action) { @@ -507,6 +508,7 @@ namespace storm { statistics.overApproximationBuildTime.stop(); return nullptr; } + fullyExpandedStates.resize(nextMdpStateId, false); storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); mdpLabeling.addLabel("init"); @@ -520,13 +522,15 @@ namespace storm { if (computeRewards) { storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); for (auto const &iter : beliefStateMap.left) { - auto currentBelief = beliefGrid.getGridPoint(iter.first); - auto representativeState = currentBelief.begin()->first; - for (uint64_t action = 0; action < overApproxMdp->getNumberOfChoices(representativeState); ++action) { - // Add the reward - uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); - mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + if (fullyExpandedStates.get(iter.second)) { + auto currentBelief = beliefGrid.getGridPoint(iter.first); + auto representativeState = currentBelief.begin()->first; + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { + // Add the reward + uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); + uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); + mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + } } } overApproxMdp->addRewardModel("default", mdpRewardModel); @@ -1076,7 +1080,8 @@ namespace storm { ++mdpMatrixRow; } std::vector targetStates = {extraTargetState}; - + storm::storage::BitVector fullyExpandedStates; + bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; @@ -1106,11 +1111,11 @@ namespace storm { mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); ++mdpMatrixRow; } else if (currMdpState > maxModelSize) { - // In other cases, this could be helpflull as well. if (min) { // Get an upper bound here if (computeRewards) { // TODO: With minimizing rewards we need an upper bound! + // In other cases, this could be helpflull as well. // For now, add a selfloop to "generate" infinite reward mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); } else { @@ -1121,6 +1126,8 @@ namespace storm { } ++mdpMatrixRow; } else { + fullyExpandedStates.grow(nextMdpStateId, false); + fullyExpandedStates.set(currMdpState, true); // Iterate over all actions and add the corresponding transitions uint64_t someState = currBelief.begin()->first; uint64_t numChoices = pomdp.getNumberOfChoices(someState); @@ -1153,7 +1160,7 @@ namespace storm { statistics.underApproximationBuildTime.stop(); return nullptr; } - + fullyExpandedStates.resize(nextMdpStateId, false); storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); @@ -1167,13 +1174,15 @@ namespace storm { if (computeRewards) { storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); for (auto const &iter : beliefStateMap.left) { - auto currentBelief = beliefGrid.getGridPoint(iter.first); - auto representativeState = currentBelief.begin()->first; - for (uint64_t action = 0; action < model->getNumberOfChoices(representativeState); ++action) { - // Add the reward - uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); - mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + if (fullyExpandedStates.get(iter.second)) { + auto currentBelief = beliefGrid.getGridPoint(iter.first); + auto representativeState = currentBelief.begin()->first; + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { + // Add the reward + uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); + uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); + mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + } } } model->addRewardModel("default", mdpRewardModel); From 98bb48d3c54e2d35b607c00f07036ea1caf5f9b0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 30 Mar 2020 12:07:49 +0200 Subject: [PATCH 091/155] BeliefGrid: Adding support for rewards. --- src/storm-pomdp/storage/BeliefGrid.h | 33 +++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h index b0661fcc5..61fe3b370 100644 --- a/src/storm-pomdp/storage/BeliefGrid.h +++ b/src/storm-pomdp/storage/BeliefGrid.h @@ -24,6 +24,19 @@ namespace storm { // Intentionally left empty } + void setRewardModel(boost::optional rewardModelName = boost::none) { + if (rewardModelName) { + auto const& rewardModel = pomdp.getRewardModel(rewardModelName.get()); + pomdpActionRewardVector = rewardModel.getTotalRewardVector(pomdp.getTransitionMatrix()); + } else { + setRewardModel(pomdp.getUniqueRewardModelName()); + } + } + + void unsetRewardModel() { + pomdpActionRewardVector.clear(); + } + struct Triangulation { std::vector gridPoints; std::vector weights; @@ -89,7 +102,11 @@ namespace storm { BeliefValueType sum = storm::utility::zero(); boost::optional observation; for (auto const& entry : belief) { - uintmax_t entryObservation = pomdp.getObservation(entry.first); + if (entry.first >= pomdp.getNumberOfStates()) { + STORM_LOG_ERROR("Belief does refer to non-existing pomdp state " << entry.first << "."); + return false; + } + uint64_t entryObservation = pomdp.getObservation(entry.first); if (observation) { if (observation.get() != entryObservation) { STORM_LOG_ERROR("Beliefsupport contains different observations."); @@ -176,6 +193,19 @@ namespace storm { return getOrAddGridPointId(belief); } + ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const { + STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); + auto result = storm::utility::zero(); + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + for (auto const &entry : belief) { + uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex; + STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index."); + STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index."); + result += entry.second * pomdpActionRewardVector[choiceIndex]; + } + return result; + } + uint32_t getBeliefObservation(BeliefType belief) { STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); return pomdp.getObservation(belief.begin()->first); @@ -343,6 +373,7 @@ namespace storm { } PomdpType const& pomdp; + std::vector pomdpActionRewardVector; std::vector gridPoints; std::map gridPointToIdMap; From a3e92d2f72952e759effe1cf765f7d7cad87a2a9 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 30 Mar 2020 12:18:06 +0200 Subject: [PATCH 092/155] Using the new reward functionalities of BliefGrid. This also fixes setting rewards in a wrong way (previously, the same reward was assigned to states with the same observation). --- .../ApproximatePOMDPModelchecker.cpp | 71 ++++++++++--------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index fb5264ca8..3aeb81d18 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -137,7 +137,28 @@ namespace storm { stream << "##########################################" << std::endl; } - + + std::shared_ptr createStandardProperty(bool min, bool computeRewards) { + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += min ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + return storm::api::extractFormulasFromProperties(propertyVector).front(); + } + + template + storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property, std::vector&& hintVector) { + //Note: The property should not run out of scope after calling this because the task only stores the property by reference. + // Therefore, this method needs the property by reference (and not const reference) + auto task = storm::api::createTask(property, false); + if (!hintVector.empty()) { + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(std::move(hintVector)); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + } + return task; + } template std::unique_ptr> @@ -360,6 +381,10 @@ namespace storm { } storm::storage::BeliefGrid> beliefGrid(pomdp, options.numericPrecision); + if (computeRewards) { + beliefGrid.setRewardModel(); + } + bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; @@ -520,37 +545,27 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); auto overApproxMdp = std::make_shared>(std::move(modelComponents)); if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); for (auto const &iter : beliefStateMap.left) { if (fullyExpandedStates.get(iter.second)) { - auto currentBelief = beliefGrid.getGridPoint(iter.first); + auto const& currentBelief = beliefGrid.getGridPoint(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - // Add the reward uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); - mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action)); } } } overApproxMdp->addRewardModel("default", mdpRewardModel); - overApproxMdp->restrictRewardModels(std::set({"default"})); } statistics.overApproximationBuildTime.stop(); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); overApproxMdp->printModelInformationToStream(std::cout); auto modelPtr = std::static_pointer_cast>(overApproxMdp); - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - auto task = storm::api::createTask(property, false); - auto hint = storm::modelchecker::ExplicitModelCheckerHint(); - hint.setResultHint(hintVector); - auto hintPtr = std::make_shared>(hint); - task.setHint(hintPtr); + auto property = createStandardProperty(min, computeRewards); + auto task = createStandardCheckTask(property, std::move(hintVector)); + statistics.overApproximationCheckTime.start(); std::unique_ptr res(storm::api::verifyWithSparseEngine(overApproxMdp, task)); statistics.overApproximationCheckTime.stop(); @@ -1172,16 +1187,14 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); auto model = std::make_shared>(std::move(modelComponents)); if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); for (auto const &iter : beliefStateMap.left) { if (fullyExpandedStates.get(iter.second)) { - auto currentBelief = beliefGrid.getGridPoint(iter.first); + auto const& currentBelief = beliefGrid.getGridPoint(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - // Add the reward uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); - mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action)); } } } @@ -1192,17 +1205,11 @@ namespace storm { model->printModelInformationToStream(std::cout); statistics.underApproximationBuildTime.stop(); - std::string propertyString; - if (computeRewards) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - + auto property = createStandardProperty(min, computeRewards); + auto task = createStandardCheckTask(property, std::vector()); + statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); statistics.underApproximationCheckTime.stop(); if (storm::utility::resources::isTerminate() && !res) { return nullptr; From 87c855531219755df5920538dca0c7755462a1b0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 30 Mar 2020 12:20:13 +0200 Subject: [PATCH 093/155] Using the new reward functionalities of BliefGrid. This also fixes setting rewards in a wrong way (previously, the same reward was assigned to states with the same observation). Added auxiliary functions for creating properties. --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 3aeb81d18..eac2dad1b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -1207,8 +1207,8 @@ namespace storm { auto property = createStandardProperty(min, computeRewards); auto task = createStandardCheckTask(property, std::vector()); - statistics.underApproximationCheckTime.start(); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); statistics.underApproximationCheckTime.stop(); if (storm::utility::resources::isTerminate() && !res) { From 0b552e68132ed8028d9eb3f27e87f41ead80edfc Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 30 Mar 2020 13:13:40 +0200 Subject: [PATCH 094/155] Renamed BeliefGrid to BeliefManager --- .../ApproximatePOMDPModelchecker.cpp | 69 +++++++------- .../ApproximatePOMDPModelchecker.h | 4 +- .../storage/{BeliefGrid.h => BeliefManager.h} | 91 ++++++++++--------- 3 files changed, 85 insertions(+), 79 deletions(-) rename src/storm-pomdp/storage/{BeliefGrid.h => BeliefManager.h} (87%) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index eac2dad1b..6728535f5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -380,9 +380,9 @@ namespace storm { underMap = underApproximationMap.value(); } - storm::storage::BeliefGrid> beliefGrid(pomdp, options.numericPrecision); + auto beliefManager = std::make_shared>>(pomdp, options.numericPrecision); if (computeRewards) { - beliefGrid.setRewardModel(); + beliefManager->setRewardModel(); // TODO: get actual name } bsmap_type beliefStateMap; @@ -391,9 +391,7 @@ namespace storm { statistics.overApproximationBuildTime.start(); // Initial belief always has belief ID 0 - auto initialBeliefId = beliefGrid.getInitialBelief(); - auto const& initialBelief = beliefGrid.getGridPoint(initialBeliefId); - auto initialObservation = beliefGrid.getBeliefObservation(initialBelief); + auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief()); // These are the components to build the MDP from the grid // Reserve states 0 and 1 as always sink/goal states storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); @@ -419,14 +417,14 @@ namespace storm { std::map weightedSumUnderMap; // for the initial belief, add the triangulated initial states - auto triangulation = beliefGrid.triangulateBelief(initialBelief, observationResolutionVector[initialObservation]); + auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]); uint64_t initialMdpState = nextMdpStateId; ++nextMdpStateId; if (triangulation.size() == 1) { // The initial belief is on the grid itself auto initBeliefId = triangulation.gridPoints.front(); if (boundMapsSet) { - auto const& gridPoint = beliefGrid.getGridPoint(initBeliefId); + auto const& gridPoint = beliefManager->getBelief(initBeliefId); weightedSumOverMap[initBeliefId] = getWeightedSum(gridPoint, overMap); weightedSumUnderMap[initBeliefId] = getWeightedSum(gridPoint, underMap); } @@ -443,7 +441,7 @@ namespace storm { beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId)); ++nextMdpStateId; if (boundMapsSet) { - auto const& gridPoint = beliefGrid.getGridPoint(triangulation.gridPoints[i]); + auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]); weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, overMap); weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, underMap); } @@ -458,7 +456,7 @@ namespace storm { if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false); + storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false); for (auto const& belId : beliefsToBeExpanded) { foundBeliefs.set(belId, true); } @@ -467,7 +465,7 @@ namespace storm { beliefsToBeExpanded.pop_front(); uint64_t currMdpState = beliefStateMap.left.at(currId); - uint32_t currObservation = beliefGrid.getBeliefObservation(currId); + uint32_t currObservation = beliefManager->getBeliefObservation(currId); mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); @@ -484,17 +482,17 @@ namespace storm { } else { fullyExpandedStates.grow(nextMdpStateId, false); fullyExpandedStates.set(currMdpState, true); - uint64_t someState = beliefGrid.getGridPoint(currId).begin()->first; + uint64_t someState = beliefManager->getBelief(currId).begin()->first; uint64_t numChoices = pomdp.getNumberOfChoices(someState); for (uint64_t action = 0; action < numChoices; ++action) { - auto successorGridPoints = beliefGrid.expandAndTriangulate(currId, action, observationResolutionVector); + auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); // Check for newly found grid points - foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false); + foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false); for (auto const& successor : successorGridPoints) { auto successorId = successor.first; - auto const& successorBelief = beliefGrid.getGridPoint(successorId); - auto successorObservation = beliefGrid.getBeliefObservation(successorBelief); + auto const& successorBelief = beliefManager->getBelief(successorId); + auto successorObservation = beliefManager->getBeliefObservation(successorBelief); if (!foundBeliefs.get(successorId)) { foundBeliefs.set(successorId); beliefsToBeExpanded.push_back(successorId); @@ -548,11 +546,11 @@ namespace storm { storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); for (auto const &iter : beliefStateMap.left) { if (fullyExpandedStates.get(iter.second)) { - auto const& currentBelief = beliefGrid.getGridPoint(iter.first); + auto const& currentBelief = beliefManager->getBelief(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action)); } } } @@ -575,16 +573,16 @@ namespace storm { STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true))); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; + auto overApprox = overApproxResultMap[initialMdpState]; STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefGrid, targetObservations, min, computeRewards, maxUaModelSize); + auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize); if (storm::utility::resources::isTerminate() && !underApproxComponents) { // TODO: return other components needed for refinement. //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); - return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, initialBeliefId}); + return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); } STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); @@ -595,7 +593,7 @@ namespace storm { underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); */ return std::make_unique>(RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBeliefId}); + underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); } @@ -1075,7 +1073,7 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(storm::storage::BeliefGrid>& beliefGrid, + ApproximatePOMDPModelchecker::computeUnderapproximation(std::shared_ptr>> beliefManager, std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxModelSize) { // Build the belief MDP until enough states are explored. @@ -1100,13 +1098,12 @@ namespace storm { bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; - auto initialBeliefId = beliefGrid.getInitialBelief(); - beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, nextMdpStateId)); - beliefsToBeExpanded.push_back(initialBeliefId); + beliefStateMap.insert(bsmap_type::value_type(beliefManager->getInitialBelief(), nextMdpStateId)); + beliefsToBeExpanded.push_back(beliefManager->getInitialBelief()); ++nextMdpStateId; // Expand the believes - storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false); + storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false); for (auto const& belId : beliefsToBeExpanded) { foundBeliefs.set(belId, true); } @@ -1115,8 +1112,8 @@ namespace storm { beliefsToBeExpanded.pop_front(); uint64_t currMdpState = beliefStateMap.left.at(currId); - auto const& currBelief = beliefGrid.getGridPoint(currId); - uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief); + auto const& currBelief = beliefManager->getBelief(currId); + uint32_t currObservation = beliefManager->getBeliefObservation(currBelief); mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); @@ -1147,9 +1144,9 @@ namespace storm { uint64_t someState = currBelief.begin()->first; uint64_t numChoices = pomdp.getNumberOfChoices(someState); for (uint64_t action = 0; action < numChoices; ++action) { - auto successorBeliefs = beliefGrid.expand(currId, action); + auto successorBeliefs = beliefManager->expand(currId, action); // Check for newly found beliefs - foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false); + foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false); for (auto const& successor : successorBeliefs) { auto successorId = successor.first; if (!foundBeliefs.get(successorId)) { @@ -1159,7 +1156,7 @@ namespace storm { ++nextMdpStateId; } auto successorMdpState = beliefStateMap.left.at(successorId); - // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. + // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorBeliefs are sorted. mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } ++mdpMatrixRow; @@ -1179,7 +1176,7 @@ namespace storm { storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBeliefId)); + mdpLabeling.addLabelToState("init", beliefStateMap.left.at(beliefManager->getInitialBelief())); for (auto targetState : targetStates) { mdpLabeling.addLabelToState("target", targetState); } @@ -1190,11 +1187,11 @@ namespace storm { storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); for (auto const &iter : beliefStateMap.left) { if (fullyExpandedStates.get(iter.second)) { - auto const& currentBelief = beliefGrid.getGridPoint(iter.first); + auto const& currentBelief = beliefManager->getBelief(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action)); } } } @@ -1207,8 +1204,8 @@ namespace storm { auto property = createStandardProperty(min, computeRewards); auto task = createStandardCheckTask(property, std::vector()); - statistics.underApproximationCheckTime.start(); + statistics.underApproximationCheckTime.start(); std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); statistics.underApproximationCheckTime.stop(); if (storm::utility::resources::isTerminate() && !res) { @@ -1217,7 +1214,7 @@ namespace storm { STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true))); auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; + auto underApprox = underApproxResultMap[beliefStateMap.left.at(beliefManager->getInitialBelief())]; return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 32daa0876..a97d36cb2 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -3,7 +3,7 @@ #include "storm/models/sparse/Pomdp.h" #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" -#include "storm-pomdp/storage/BeliefGrid.h" +#include "storm-pomdp/storage/BeliefManager.h" #include #include "storm/storage/jani/Property.h" @@ -163,7 +163,7 @@ namespace storm { std::set const &targetObservations, uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize); - std::unique_ptr> computeUnderapproximation(storm::storage::BeliefGrid>& beliefGrid, + std::unique_ptr> computeUnderapproximation(std::shared_ptr>> beliefManager, std::set const &targetObservations, bool min, bool computeReward, uint64_t maxModelSize); diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefManager.h similarity index 87% rename from src/storm-pomdp/storage/BeliefGrid.h rename to src/storm-pomdp/storage/BeliefManager.h index 61fe3b370..efc6bee2a 100644 --- a/src/storm-pomdp/storage/BeliefGrid.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -11,8 +11,7 @@ namespace storm { namespace storage { template - // TODO: Change name. This actually does not only manage grid points. - class BeliefGrid { + class BeliefManager { public: typedef typename PomdpType::ValueType ValueType; @@ -20,8 +19,8 @@ namespace storm { typedef std::map BeliefType; typedef uint64_t BeliefId; - BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { - // Intentionally left empty + BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { + initialBeliefId = computeInitialBelief(); } void setRewardModel(boost::optional rewardModelName = boost::none) { @@ -45,13 +44,13 @@ namespace storm { } }; - BeliefType const& getGridPoint(BeliefId const& id) const { - return gridPoints[id]; + BeliefType const& getBelief(BeliefId const& id) const { + return beliefs[id]; } - BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const { - auto idIt = gridPointToIdMap.find(gridPoint); - STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state."); + BeliefId getId(BeliefType const& belief) const { + auto idIt = beliefToIdMap.find(belief); + STORM_LOG_THROW(idIt != beliefToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown Belief."); return idIt->second; } @@ -75,7 +74,7 @@ namespace storm { std::stringstream str; str << "(\n"; for (uint64_t i = 0; i < t.size(); ++i) { - str << "\t" << t.weights[i] << " * \t" << toString(getGridPoint(t.gridPoints[i])) << "\n"; + str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n"; } str <<")\n"; return str.str(); @@ -161,7 +160,7 @@ namespace storm { STORM_LOG_ERROR("Weight greater than one in triangulation."); } weightSum += triangulation.weights[i]; - BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]); + BeliefType const& gridPoint = getBelief(triangulation.gridPoints[i]); for (auto const& pointEntry : gridPoint) { BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero()).first->second; triangulatedValue += triangulation.weights[i] * pointEntry.second; @@ -181,16 +180,8 @@ namespace storm { return true; } - BeliefId getInitialBelief() { - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, - "POMDP contains more than one initial state"); - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, - "POMDP does not contain an initial state"); - BeliefType belief; - belief[*pomdp.getInitialStates().begin()] = storm::utility::one(); - - STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); - return getOrAddGridPointId(belief); + BeliefId const& getInitialBelief() const { + return initialBeliefId; } ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const { @@ -212,10 +203,10 @@ namespace storm { } uint32_t getBeliefObservation(BeliefId beliefId) { - return getBeliefObservation(getGridPoint(beliefId)); + return getBeliefObservation(getBelief(beliefId)); } - + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { //TODO this can also be simplified using the sparse vector interpretation //TODO Enable chaching for this method? @@ -286,7 +277,7 @@ namespace storm { if (!cc.isZero(qsj[nrStates - 1])) { gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution; } - result.gridPoints.push_back(getOrAddGridPointId(gridPoint)); + result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); } } @@ -294,6 +285,10 @@ namespace storm { return result; } + Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) { + return triangulateBelief(getBelief(beliefId), resolution); + } + template void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { auto insertionRes = distr.emplace(state, value); @@ -302,19 +297,19 @@ namespace storm { } } - BeliefId getNumberOfGridPointIds() const { - return gridPoints.size(); + BeliefId getNumberOfBeliefIds() const { + return beliefs.size(); } - std::map expandInternal(BeliefId const& gridPointId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { + std::map expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { std::map destinations; // The belief ids should be ordered // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds - BeliefType gridPoint = getGridPoint(gridPointId); + BeliefType belief = getBelief(beliefId); // Find the probability we go to each observation BeliefType successorObs; // This is actually not a belief but has the same type - for (auto const& pointEntry : gridPoint) { + for (auto const& pointEntry : belief) { uint64_t state = pointEntry.first; for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { if (!storm::utility::isZero(pomdpTransition.getValue())) { @@ -327,7 +322,7 @@ namespace storm { // Now for each successor observation we find and potentially triangulate the successor belief for (auto const& successor : successorObs) { BeliefType successorBelief; - for (auto const& pointEntry : gridPoint) { + for (auto const& pointEntry : belief) { uint64_t state = pointEntry.first; for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { @@ -344,7 +339,7 @@ namespace storm { addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); } } else { - addToDistribution(destinations, getOrAddGridPointId(successorBelief), successor.second); + addToDistribution(destinations, getOrAddBeliefId(successorBelief), successor.second); } } @@ -352,21 +347,33 @@ namespace storm { } - std::map expandAndTriangulate(BeliefId const& gridPointId, uint64_t actionIndex, std::vector const& observationResolutions) { - return expandInternal(gridPointId, actionIndex, observationResolutions); + std::map expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { + return expandInternal(beliefId, actionIndex, observationResolutions); } - std::map expand(BeliefId const& gridPointId, uint64_t actionIndex) { - return expandInternal(gridPointId, actionIndex); + std::map expand(BeliefId const& beliefId, uint64_t actionIndex) { + return expandInternal(beliefId, actionIndex); } private: - BeliefId getOrAddGridPointId(BeliefType const& gridPoint) { - auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size()); + BeliefId computeInitialBelief() { + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, + "POMDP contains more than one initial state"); + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, + "POMDP does not contain an initial state"); + BeliefType belief; + belief[*pomdp.getInitialStates().begin()] = storm::utility::one(); + + STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); + return getOrAddBeliefId(belief); + } + + BeliefId getOrAddBeliefId(BeliefType const& belief) { + auto insertioRes = beliefToIdMap.emplace(belief, beliefs.size()); if (insertioRes.second) { - // There actually was an insertion, so add the new grid state - gridPoints.push_back(gridPoint); + // There actually was an insertion, so add the new belief + beliefs.push_back(belief); } // Return the id return insertioRes.first->second; @@ -375,8 +382,10 @@ namespace storm { PomdpType const& pomdp; std::vector pomdpActionRewardVector; - std::vector gridPoints; - std::map gridPointToIdMap; + std::vector beliefs; + std::map beliefToIdMap; + BeliefId initialBeliefId; + storm::utility::ConstantsComparator cc; From 37da2b4e1fae34993c05c6153735fc433ad65dfa Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 31 Mar 2020 13:58:45 +0200 Subject: [PATCH 095/155] Added a new model checker that allows to compute trivial (but sound) bounds on the value of POMDP states --- .../TrivialPomdpValueBoundsModelChecker.h | 115 ++++++++++++++++++ src/storm/storage/Distribution.cpp | 12 +- src/storm/storage/Distribution.h | 5 + 3 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h new file mode 100644 index 000000000..862a82a05 --- /dev/null +++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h @@ -0,0 +1,115 @@ +#pragma once + +#include "storm-pomdp/analysis/FormulaInformation.h" + +#include "storm/api/verification.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/models/sparse/StandardRewardModel.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/storage/Scheduler.h" + +#include "storm/utility/macros.h" +#include "storm/exceptions/UnexpectedException.h" +#include "storm/exceptions/NotSupportedException.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + template + class TrivialPomdpValueBoundsModelChecker { + public: + typedef typename PomdpType::ValueType ValueType; + TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) { + // Intentionally left empty + } + + struct ValueBounds { + std::vector lower; + std::vector upper; + }; + ValueBounds getValueBounds(storm::logic::Formula const& formula) { + return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula)); + } + + ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) { + STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis."); + // Compute the values on the fully observable MDP + // We need an actual MDP here so that the apply scheduler method below will work. + // Also, the api call in the next line will require a copy anyway. + auto underlyingMdp = std::make_shared>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels()); + auto resultPtr = storm::api::verifyWithSparseEngine(underlyingMdp, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult().getValueVector()); + + // Create some positional scheduler for the POMDP + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + // For each state, we heuristically find a good distribution over output actions. + std::vector fullyObservableChoiceValues(pomdp.getNumberOfChoices()); + if (info.isNonNestedExpectedRewardFormula()) { + std::vector actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix()); + pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards); + } else { + pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues); + } + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + auto obsStates = pomdp.getStatesWithObservation(obs); + storm::storage::Distribution choiceDistribution; + for (auto const &state : obsStates) { + ValueType const& stateValue = fullyObservableResult[state]; + assert(stateValue >= storm::utility::zero()); + for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { + ValueType const& choiceValue = fullyObservableChoiceValues[choice]; + assert(choiceValue >= storm::utility::zero()); + // Rate this choice by considering the relative difference between the choice value and the (optimal) state value + ValueType choiceRating; + if (stateValue < choiceValue) { + choiceRating = choiceValue - stateValue; + if (!storm::utility::isZero(choiceValue)) { + choiceRating /= choiceValue; + } + } else { + choiceRating = stateValue - choiceValue; + if (!storm::utility::isZero(stateValue)) { + choiceRating /= stateValue; + } + } + assert(choiceRating <= storm::utility::one()); + assert(choiceRating >= storm::utility::zero()); + // choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice + if (choiceRating <= storm::utility::convertNumber(0.5)) { + choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one() - choiceRating); + } + } + } + choiceDistribution.normalize(); + for (auto const& state : obsStates) { + pomdpScheduler.setChoice(choiceDistribution, state); + } + } + auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); + + auto resultPtr2 = storm::api::verifyWithSparseEngine(scheduledModel, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult().getValueVector()); + + // Finally prepare the result + ValueBounds result; + if (info.minimize()) { + result.lower = std::move(fullyObservableResult); + result.upper = std::move(pomdpSchedulerResult); + } else { + result.lower = std::move(pomdpSchedulerResult); + result.upper = std::move(fullyObservableResult); + } + return result; + } + + private: + PomdpType const& pomdp; + }; + } + } +} \ No newline at end of file diff --git a/src/storm/storage/Distribution.cpp b/src/storm/storage/Distribution.cpp index f40afb402..2290c611c 100644 --- a/src/storm/storage/Distribution.cpp +++ b/src/storm/storage/Distribution.cpp @@ -166,7 +166,17 @@ namespace storm { } } - + template + void Distribution::normalize() { + ValueType sum = storm::utility::zero(); + for (auto const& entry: distribution) { + sum += entry.second; + } + for (auto& entry: distribution) { + entry.second /= sum; + } + } + template class Distribution; template std::ostream& operator<<(std::ostream& out, Distribution const& distribution); diff --git a/src/storm/storage/Distribution.h b/src/storm/storage/Distribution.h index d7e0bd2fb..c3ac58dcc 100644 --- a/src/storm/storage/Distribution.h +++ b/src/storm/storage/Distribution.h @@ -144,6 +144,11 @@ namespace storm { */ ValueType getProbability(StateType const& state) const; + /*! + * Normalizes the distribution such that the values sum up to one. + */ + void normalize(); + private: // A list of states and the probabilities that are assigned to them. container_type distribution; From ab26b6943558e711bb5a7e2c064908914ca444ff Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 31 Mar 2020 14:01:00 +0200 Subject: [PATCH 096/155] Added BeliefMdpExplorer which does most of the work when exploring (triangulated Variants of) the BeliefMdp. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 354 ++++++++++++++++++ .../ApproximatePOMDPModelchecker.cpp | 248 ++++-------- .../ApproximatePOMDPModelchecker.h | 9 +- src/storm-pomdp/storage/BeliefManager.h | 24 +- 4 files changed, 443 insertions(+), 192 deletions(-) create mode 100644 src/storm-pomdp/builder/BeliefMdpExplorer.h diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h new file mode 100644 index 000000000..107f699ae --- /dev/null +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -0,0 +1,354 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "storm/api/properties.h" +#include "storm/api/verification.h" + +#include "storm/storage/BitVector.h" +#include "storm/utility/macros.h" +#include "storm-pomdp/storage/BeliefManager.h" +#include "storm/utility/SignalHandler.h" + +namespace storm { + namespace builder { + template + class BeliefMdpExplorer { + public: + typedef typename PomdpType::ValueType ValueType; + typedef storm::storage::BeliefManager BeliefManagerType; + typedef typename BeliefManagerType::BeliefId BeliefId; + typedef uint64_t MdpStateType; + + BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) { + // Intentionally left empty + } + + void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { + // Reset data from potential previous explorations + mdpStateToBeliefIdMap.clear(); + beliefIdToMdpStateMap.clear(); + beliefIdsWithMdpState.clear(); + beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false); + lowerValueBounds.clear(); + upperValueBounds.clear(); + values.clear(); + mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder(0, 0, 0, true, true); + currentRowCount = 0; + startOfCurrentRowGroup = 0; + mdpActionRewards.clear(); + exploredMdp = nullptr; + + // Add some states with special treatment (if requested) + if (extraBottomStateValue) { + extraBottomState = getCurrentNumberOfMdpStates(); + mdpStateToBeliefIdMap.push_back(beliefManager->noId()); + insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); + + startOfCurrentRowGroup = currentRowCount; + mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); + mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one()); + ++currentRowCount; + } else { + extraBottomState = boost::none; + } + if (extraTargetStateValue) { + extraTargetState = getCurrentNumberOfMdpStates(); + mdpStateToBeliefIdMap.push_back(beliefManager->noId()); + insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); + + startOfCurrentRowGroup = currentRowCount; + mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); + mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one()); + ++currentRowCount; + + targetStates.grow(getCurrentNumberOfMdpStates(), false); + targetStates.set(extraTargetState.get(), true); + } else { + extraTargetState = boost::none; + } + + // Set up the initial state. + initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); + } + + bool hasUnexploredState() const { + return !beliefIdsToExplore.empty(); + } + + BeliefId exploreNextState() { + // Set up the matrix builder + finishCurrentRow(); + startOfCurrentRowGroup = currentRowCount; + mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); + ++currentRowCount; + + // Pop from the queue. + auto result = beliefIdsToExplore.front(); + beliefIdsToExplore.pop_front(); + return result; + } + + void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { + // We first insert the entries of the current row in a separate map. + // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + + uint64_t row = startOfCurrentRowGroup + localActionIndex; + if (!storm::utility::isZero(bottomStateValue)) { + STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); + internalAddTransition(row, extraBottomState.get(), bottomStateValue); + } + if (!storm::utility::isZero(targetStateValue)) { + STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); + internalAddTransition(row, extraTargetState.get(), targetStateValue); + } + } + + void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { + uint64_t row = startOfCurrentRowGroup + localActionIndex; + internalAddTransition(row, getCurrentMdpState(), value); + } + + /*! + * Adds the next transition to the given successor belief + * @param localActionIndex + * @param transitionTarget + * @param value + * @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition. + * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. + */ + bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { + // We first insert the entries of the current row in a separate map. + // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + MdpStateType column; + if (ignoreNewBeliefs) { + column = getMdpState(transitionTarget); + if (column == noState()) { + return false; + } + } else { + column = getOrAddMdpState(transitionTarget); + } + uint64_t row = startOfCurrentRowGroup + localActionIndex; + internalAddTransition(row, column, value); + return true; + } + + void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero()) { + if (currentRowCount >= mdpActionRewards.size()) { + mdpActionRewards.resize(currentRowCount, storm::utility::zero()); + } + uint64_t row = startOfCurrentRowGroup + localActionIndex; + mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; + } + + void setCurrentStateIsTarget() { + targetStates.grow(getCurrentNumberOfMdpStates(), false); + targetStates.set(getCurrentMdpState(), true); + } + + void setCurrentStateIsTruncated() { + truncatedStates.grow(getCurrentNumberOfMdpStates(), false); + truncatedStates.set(getCurrentMdpState(), true); + } + + void finishExploration() { + // Create the tranistion matrix + finishCurrentRow(); + auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates()); + + // Create a standard labeling + storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabelToState("init", initialMdpState); + targetStates.resize(getCurrentNumberOfMdpStates(), false); + mdpLabeling.addLabel("target", std::move(targetStates)); + truncatedStates.resize(getCurrentNumberOfMdpStates(), false); + mdpLabeling.addLabel("truncated", std::move(truncatedStates)); + + // Create a standard reward model (if rewards are available) + std::unordered_map> mdpRewardModels; + if (!mdpActionRewards.empty()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); + } + + storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); + exploredMdp = std::make_shared>(std::move(modelComponents)); + } + + std::shared_ptr> getExploredMdp() const { + STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); + return exploredMdp; + } + + MdpStateType getCurrentNumberOfMdpStates() const { + return mdpStateToBeliefIdMap.size(); + } + + MdpStateType getCurrentNumberOfMdpChoices() const { + return currentRowCount; + } + + ValueType getLowerValueBoundAtCurrentState() const { + return lowerValueBounds[getCurrentMdpState()]; + } + + ValueType getUpperValueBoundAtCurrentState() const { + return upperValueBounds[getCurrentMdpState()]; + } + + ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { + return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); + } + + ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { + return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); + } + + std::vector const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { + STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); + auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); + auto task = createStandardCheckTask(property); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(exploredMdp, task)); + if (res) { + values = std::move(res->asExplicitQuantitativeCheckResult().getValueVector()); + } else { + STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); + STORM_LOG_ERROR("No result obtained while checking."); + } + return values; + } + + ValueType const& getComputedValueAtInitialState() const { + STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); + return values[exploredMdp->getInitialStates().getNextSetIndex(0)]; + } + + private: + MdpStateType noState() const { + return std::numeric_limits::max(); + } + + std::shared_ptr createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) { + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += storm::solver::minimize(dir) ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + return storm::api::extractFormulasFromProperties(propertyVector).front(); + } + + storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property) { + //Note: The property should not run out of scope after calling this because the task only stores the property by reference. + // Therefore, this method needs the property by reference (and not const reference) + auto task = storm::api::createTask(property, false); + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(values); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + return task; + } + + MdpStateType getCurrentMdpState() const { + return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1; + } + + MdpStateType getCurrentBeliefId() const { + return mdpStateToBeliefIdMap[getCurrentMdpState()]; + } + + void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { + // We first insert the entries of the current row in a separate map. + // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row."); + if (row >= currentRowCount) { + // We are going to start a new row, so insert the entries of the old one + finishCurrentRow(); + currentRowCount = row + 1; + } + STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state."); + mdpTransitionsBuilderCurrentRowEntries[column] = value; + } + + void finishCurrentRow() { + for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) { + mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second); + } + mdpTransitionsBuilderCurrentRowEntries.clear(); + } + + MdpStateType getMdpState(BeliefId const& beliefId) const { + if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) { + return beliefIdToMdpStateMap.at(beliefId); + } else { + return noState(); + } + } + + void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) { + lowerValueBounds.push_back(lowerBound); + upperValueBounds.push_back(upperBound); + // Take the middle value as a hint + values.push_back((lowerBound + upperBound) / storm::utility::convertNumber(2)); + STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); + STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); + } + + MdpStateType getOrAddMdpState(BeliefId const& beliefId) { + beliefIdsWithMdpState.grow(beliefId + 1, false); + if (beliefIdsWithMdpState.get(beliefId)) { + return beliefIdToMdpStateMap[beliefId]; + } else { + // Add a new MDP state + beliefIdsWithMdpState.set(beliefId, true); + MdpStateType result = getCurrentNumberOfMdpStates(); + assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); + mdpStateToBeliefIdMap.push_back(beliefId); + beliefIdToMdpStateMap[beliefId] = result; + // This new belief needs exploration + beliefIdsToExplore.push_back(beliefId); + + insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); + return result; + } + } + + // Belief state related information + std::shared_ptr beliefManager; + std::vector mdpStateToBeliefIdMap; + std::map beliefIdToMdpStateMap; + storm::storage::BitVector beliefIdsWithMdpState; + + // Exploration information + std::deque beliefIdsToExplore; + storm::storage::SparseMatrixBuilder mdpTransitionsBuilder; + std::map mdpTransitionsBuilderCurrentRowEntries; + std::vector mdpActionRewards; + uint64_t startOfCurrentRowGroup; + uint64_t currentRowCount; + + // Special states during exploration + boost::optional extraTargetState; + boost::optional extraBottomState; + storm::storage::BitVector targetStates; + storm::storage::BitVector truncatedStates; + MdpStateType initialMdpState; + + // Final Mdp + std::shared_ptr> exploredMdp; + + // Value related information + std::vector const& pomdpLowerValueBounds; + std::vector const& pomdpUpperValueBounds; + std::vector lowerValueBounds; + std::vector upperValueBounds; + std::vector values; // Contains an estimate during building and the actual result after a check has performed + + }; + } +} \ No newline at end of file diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 6728535f5..42f1872f7 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -22,7 +22,8 @@ #include "storm/api/properties.h" #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" - +#include "storm-pomdp/builder/BeliefMdpExplorer.h" +#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" @@ -57,6 +58,10 @@ namespace storm { std::unique_ptr> result; // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); + + // Compute some initial bounds on the values for each state of the pomdp + auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); + if (formulaInfo.isNonNestedReachabilityProbability()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); @@ -68,7 +73,7 @@ namespace storm { if (options.doRefinement) { result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); } else { - result = computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); } } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. @@ -78,7 +83,7 @@ namespace storm { } else { // FIXME: pick the non-unique reward model here STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); - result = computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); @@ -233,8 +238,8 @@ namespace storm { uint64_t refinementCounter = 1; STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - initialOverApproxMap, - initialUnderApproxMap, underApproxModelSize); + {}, + {}, underApproxModelSize); if (res == nullptr) { statistics.refinementSteps = 0; return nullptr; @@ -335,14 +340,14 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, bool computeRewards, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, + std::vector const& lowerPomdpValueBounds, + std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap, - underApproximationMap, maxUaModelSize); + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds, + upperPomdpValueBounds, maxUaModelSize); if (result == nullptr) { return nullptr; } @@ -353,8 +358,6 @@ namespace storm { } } - - template ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) { ValueType result = storm::utility::zero(); @@ -369,155 +372,64 @@ namespace storm { ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, + std::vector const& lowerPomdpValueBounds, + std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize) { - bool boundMapsSet = overApproximationMap && underApproximationMap; - std::map overMap; - std::map underMap; - if (boundMapsSet) { - overMap = overApproximationMap.value(); - underMap = underApproximationMap.value(); - } - + auto beliefManager = std::make_shared>>(pomdp, options.numericPrecision); if (computeRewards) { beliefManager->setRewardModel(); // TODO: get actual name } - bsmap_type beliefStateMap; - - std::deque beliefsToBeExpanded; - statistics.overApproximationBuildTime.start(); - // Initial belief always has belief ID 0 - auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief()); - // These are the components to build the MDP from the grid - // Reserve states 0 and 1 as always sink/goal states - storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); - uint64_t extraBottomState = 0; - uint64_t extraTargetState = computeRewards ? 0 : 1; - uint64_t nextMdpStateId = extraTargetState + 1; - uint64_t mdpMatrixRow = 0; - for (uint64_t state = 0; state < nextMdpStateId; ++state) { - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); - ++mdpMatrixRow; - } - // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) - std::vector hintVector(nextMdpStateId, storm::utility::zero()); - if (!computeRewards) { - hintVector[extraTargetState] = storm::utility::one(); - } - std::vector targetStates = {extraTargetState}; - storm::storage::BitVector fullyExpandedStates; - - // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace - std::map weightedSumOverMap; - std::map weightedSumUnderMap; - - // for the initial belief, add the triangulated initial states - auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]); - uint64_t initialMdpState = nextMdpStateId; - ++nextMdpStateId; - if (triangulation.size() == 1) { - // The initial belief is on the grid itself - auto initBeliefId = triangulation.gridPoints.front(); - if (boundMapsSet) { - auto const& gridPoint = beliefManager->getBelief(initBeliefId); - weightedSumOverMap[initBeliefId] = getWeightedSum(gridPoint, overMap); - weightedSumUnderMap[initBeliefId] = getWeightedSum(gridPoint, underMap); - } - beliefsToBeExpanded.push_back(initBeliefId); - beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState)); - hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); + storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + if (computeRewards) { + explorer.startNewExploration(storm::utility::zero()); } else { - // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - for (uint64_t i = 0; i < triangulation.size(); ++i) { - beliefsToBeExpanded.push_back(triangulation.gridPoints[i]); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]); - beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId)); - ++nextMdpStateId; - if (boundMapsSet) { - auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]); - weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, overMap); - weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, underMap); - } - hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); - } - //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - ++mdpMatrixRow; + explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); } // Expand the beliefs to generate the grid on-the-fly if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false); - for (auto const& belId : beliefsToBeExpanded) { - foundBeliefs.set(belId, true); - } - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); + while (explorer.hasUnexploredState()) { + uint64_t currId = explorer.exploreNextState(); - uint64_t currMdpState = beliefStateMap.left.at(currId); uint32_t currObservation = beliefManager->getBeliefObservation(currId); - - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - if (targetObservations.count(currObservation) != 0) { - // Make this state absorbing - targetStates.push_back(currMdpState); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - ++mdpMatrixRow; - } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { - // TODO: with rewards we would have to assign the corresponding reward to this transition - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one() - weightedSumOverMap[currId]); - ++mdpMatrixRow; + explorer.setCurrentStateIsTarget(); + explorer.addSelfloopTransition(); } else { - fullyExpandedStates.grow(nextMdpStateId, false); - fullyExpandedStates.set(currMdpState, true); - uint64_t someState = beliefManager->getBelief(currId).begin()->first; - uint64_t numChoices = pomdp.getNumberOfChoices(someState); - - for (uint64_t action = 0; action < numChoices; ++action) { + bool stopExploration = false; + if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + explorer.setCurrentStateIsTruncated(); + } + for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); - // Check for newly found grid points - foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false); for (auto const& successor : successorGridPoints) { - auto successorId = successor.first; - auto const& successorBelief = beliefManager->getBelief(successorId); - auto successorObservation = beliefManager->getBeliefObservation(successorBelief); - if (!foundBeliefs.get(successorId)) { - foundBeliefs.set(successorId); - beliefsToBeExpanded.push_back(successorId); - beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); - ++nextMdpStateId; - - if (boundMapsSet) { - ValueType upperBound = getWeightedSum(successorBelief, overMap); - ValueType lowerBound = getWeightedSum(successorBelief, underMap); - if (cc.isEqual(upperBound, lowerBound)) { - hintVector.push_back(lowerBound); - } else { - hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); - } - weightedSumOverMap[successorId] = upperBound; - weightedSumUnderMap[successorId] = lowerBound; - } else { - hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); - } + bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first)); } - auto successorMdpState = beliefStateMap.left.at(successorId); - // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } - ++mdpMatrixRow; + if (stopExploration) { + if (computeRewards) { + explorer.addTransitionsToExtraStates(action, truncationProbability); + } else { + explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + explorer.computeRewardAtCurrentState(action, truncationValueBound); + } } } if (storm::utility::resources::isTerminate()) { @@ -525,64 +437,30 @@ namespace storm { break; } } - statistics.overApproximationStates = nextMdpStateId; - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); + statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); return nullptr; } - fullyExpandedStates.resize(nextMdpStateId, false); - storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", initialMdpState); - for (auto targetState : targetStates) { - mdpLabeling.addLabelToState("target", targetState); - } - storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); - auto overApproxMdp = std::make_shared>(std::move(modelComponents)); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); - for (auto const &iter : beliefStateMap.left) { - if (fullyExpandedStates.get(iter.second)) { - auto const& currentBelief = beliefManager->getBelief(iter.first); - auto representativeState = currentBelief.begin()->first; - for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action)); - } - } - } - overApproxMdp->addRewardModel("default", mdpRewardModel); - } + explorer.finishExploration(); statistics.overApproximationBuildTime.stop(); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - overApproxMdp->printModelInformationToStream(std::cout); - - auto modelPtr = std::static_pointer_cast>(overApproxMdp); - auto property = createStandardProperty(min, computeRewards); - auto task = createStandardCheckTask(property, std::move(hintVector)); + explorer.getExploredMdp()->printModelInformationToStream(std::cout); statistics.overApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(overApproxMdp, task)); + explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true))); - auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[initialMdpState]; STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); + //auto underApprox = weightedSumUnderMap[initialBelief.id]; auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize); if (storm::utility::resources::isTerminate() && !underApproxComponents) { // TODO: return other components needed for refinement. //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); - return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); + //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); } STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); @@ -592,8 +470,8 @@ namespace storm { underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); */ - return std::make_unique>(RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); + return std::make_unique>(RefinementComponents{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {}, + underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); } @@ -930,14 +808,14 @@ namespace storm { std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); + // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min) { std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); + // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); } @@ -1191,7 +1069,7 @@ namespace storm { auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action)); } } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index a97d36cb2..6216de097 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -117,8 +117,7 @@ namespace storm { */ std::shared_ptr> computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); std::shared_ptr> computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, @@ -140,10 +139,8 @@ namespace storm { * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - computeReachabilityOTF(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, bool computeRewards, - boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + computeReachabilityOTF(std::set const &targetObservations, bool min, bool computeRewards, + std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); /** * Helper to compute an underapproximation of the reachability property. diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index efc6bee2a..9cb7c039c 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -45,6 +45,8 @@ namespace storm { }; BeliefType const& getBelief(BeliefId const& id) const { + STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief."); + STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range."); return beliefs[id]; } @@ -54,6 +56,10 @@ namespace storm { return idIt->second; } + BeliefId noId() const { + return std::numeric_limits::max(); + } + std::string toString(BeliefType const& belief) const { std::stringstream str; str << "{ "; @@ -180,11 +186,22 @@ namespace storm { return true; } + template + ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { + ValueType result = storm::utility::zero(); + for (auto const& entry : getBelief(beliefId)) { + result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); + } + return result; + } + + BeliefId const& getInitialBelief() const { return initialBeliefId; } - ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const { + ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { + auto const& belief = getBelief(beliefId); STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); auto result = storm::utility::zero(); auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); @@ -206,6 +223,11 @@ namespace storm { return getBeliefObservation(getBelief(beliefId)); } + uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { + auto belief = getBelief(beliefId); + return pomdp.getNumberOfChoices(belief.begin()->first); + } + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { //TODO this can also be simplified using the sparse vector interpretation From 8b0e582ef4d56a09c53179f97be83743efeb0fa9 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 31 Mar 2020 14:16:21 +0200 Subject: [PATCH 097/155] Use the new BeliefMdpExplorer also for the underapproximation. --- .../ApproximatePOMDPModelchecker.cpp | 168 ++++++------------ .../ApproximatePOMDPModelchecker.h | 2 +- 2 files changed, 55 insertions(+), 115 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 42f1872f7..9c06c4c06 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -456,7 +456,7 @@ namespace storm { STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize); + auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds); if (storm::utility::resources::isTerminate() && !underApproxComponents) { // TODO: return other components needed for refinement. //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); @@ -953,91 +953,62 @@ namespace storm { std::unique_ptr> ApproximatePOMDPModelchecker::computeUnderapproximation(std::shared_ptr>> beliefManager, std::set const &targetObservations, bool min, - bool computeRewards, uint64_t maxModelSize) { + bool computeRewards, uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds) { // Build the belief MDP until enough states are explored. //TODO think of other ways to stop exploration besides model size statistics.underApproximationBuildTime.start(); - - // Reserve states 0 and 1 as always sink/goal states - storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); - uint64_t extraBottomState = 0; - uint64_t extraTargetState = computeRewards ? 0 : 1; - uint64_t nextMdpStateId = extraTargetState + 1; - uint64_t mdpMatrixRow = 0; - for (uint64_t state = 0; state < nextMdpStateId; ++state) { - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); - ++mdpMatrixRow; + storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + if (computeRewards) { + explorer.startNewExploration(storm::utility::zero()); + } else { + explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); } - std::vector targetStates = {extraTargetState}; - storm::storage::BitVector fullyExpandedStates; - bsmap_type beliefStateMap; - std::deque beliefsToBeExpanded; - - beliefStateMap.insert(bsmap_type::value_type(beliefManager->getInitialBelief(), nextMdpStateId)); - beliefsToBeExpanded.push_back(beliefManager->getInitialBelief()); - ++nextMdpStateId; - - // Expand the believes - storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false); - for (auto const& belId : beliefsToBeExpanded) { - foundBeliefs.set(belId, true); + // Expand the beliefs to generate the grid on-the-fly + if (options.explorationThreshold > storm::utility::zero()) { + STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - - uint64_t currMdpState = beliefStateMap.left.at(currId); - auto const& currBelief = beliefManager->getBelief(currId); - uint32_t currObservation = beliefManager->getBeliefObservation(currBelief); - - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + while (explorer.hasUnexploredState()) { + uint64_t currId = explorer.exploreNextState(); + uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - // Make this state absorbing - targetStates.push_back(currMdpState); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - ++mdpMatrixRow; - } else if (currMdpState > maxModelSize) { - if (min) { - // Get an upper bound here - if (computeRewards) { - // TODO: With minimizing rewards we need an upper bound! - // In other cases, this could be helpflull as well. - // For now, add a selfloop to "generate" infinite reward - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - } else { - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, storm::utility::one()); - } - } else { - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, computeRewards ? extraTargetState : extraBottomState, storm::utility::one()); - } - ++mdpMatrixRow; + explorer.setCurrentStateIsTarget(); + explorer.addSelfloopTransition(); } else { - fullyExpandedStates.grow(nextMdpStateId, false); - fullyExpandedStates.set(currMdpState, true); - // Iterate over all actions and add the corresponding transitions - uint64_t someState = currBelief.begin()->first; - uint64_t numChoices = pomdp.getNumberOfChoices(someState); - for (uint64_t action = 0; action < numChoices; ++action) { - auto successorBeliefs = beliefManager->expand(currId, action); - // Check for newly found beliefs - foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false); - for (auto const& successor : successorBeliefs) { - auto successorId = successor.first; - if (!foundBeliefs.get(successorId)) { - foundBeliefs.set(successorId); - beliefsToBeExpanded.push_back(successorId); - beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); - ++nextMdpStateId; + bool stopExploration = false; + if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + explorer.setCurrentStateIsTruncated(); + } else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) { + stopExploration = true; + explorer.setCurrentStateIsTruncated(); + } + for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successors = beliefManager->expand(currId, action); + for (auto const& successor : successors) { + bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? explorer.computeUpperValueBoundAtBelief(successor.first) : explorer.computeLowerValueBoundAtBelief(successor.first)); } - auto successorMdpState = beliefStateMap.left.at(successorId); - // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorBeliefs are sorted. - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } - ++mdpMatrixRow; + if (stopExploration) { + if (computeRewards) { + explorer.addTransitionsToExtraStates(action, truncationProbability); + } else { + explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + explorer.computeRewardAtCurrentState(action, truncationValueBound); + } } } if (storm::utility::resources::isTerminate()) { @@ -1045,56 +1016,25 @@ namespace storm { break; } } - statistics.underApproximationStates = nextMdpStateId; + statistics.underApproximationStates = explorer.getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildTime.stop(); return nullptr; } - fullyExpandedStates.resize(nextMdpStateId, false); - storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", beliefStateMap.left.at(beliefManager->getInitialBelief())); - for (auto targetState : targetStates) { - mdpLabeling.addLabelToState("target", targetState); - } - storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); - auto model = std::make_shared>(std::move(modelComponents)); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); - for (auto const &iter : beliefStateMap.left) { - if (fullyExpandedStates.get(iter.second)) { - auto const& currentBelief = beliefManager->getBelief(iter.first); - auto representativeState = currentBelief.begin()->first; - for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action)); - } - } - } - model->addRewardModel("default", mdpRewardModel); - model->restrictRewardModels(std::set({"default"})); - } - - model->printModelInformationToStream(std::cout); + explorer.finishExploration(); statistics.underApproximationBuildTime.stop(); + STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl); + explorer.getExploredMdp()->printModelInformationToStream(std::cout); - auto property = createStandardProperty(min, computeRewards); - auto task = createStandardCheckTask(property, std::vector()); - statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true))); - auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap.left.at(beliefManager->getInitialBelief())]; - return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); + STORM_PRINT("Time Underapproximation: " << statistics.underApproximationCheckTime << " seconds." << std::endl); + STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); + + return std::make_unique>(UnderApproxComponents{explorer.getComputedValueAtInitialState(), {}, {}}); } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 6216de097..925bff5b5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -162,7 +162,7 @@ namespace storm { uint64_t maxModelSize); std::unique_ptr> computeUnderapproximation(std::shared_ptr>> beliefManager, std::set const &targetObservations, bool min, bool computeReward, - uint64_t maxModelSize); + uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds); /** * Constructs the initial belief for the given POMDP From 37fa53c4d827ff78b27cc49b5d712e02361961e0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 1 Apr 2020 12:39:02 +0200 Subject: [PATCH 098/155] Added a command-line-switch to disable making a pomdp canonic (for prism compatibility) --- .../settings/modules/POMDPSettings.cpp | 6 +++++ .../settings/modules/POMDPSettings.h | 1 + src/storm-pomdp-cli/storm-pomdp.cpp | 26 +++++++++---------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 4ba3f8148..57c065f7a 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -13,6 +13,7 @@ namespace storm { namespace modules { const std::string POMDPSettings::moduleName = "pomdp"; + const std::string noCanonicOption = "nocanonic"; const std::string exportAsParametricModelOption = "parametric-drn"; const std::string gridApproximationOption = "gridapproximation"; const std::string qualitativeReductionOption = "qualitativereduction"; @@ -31,6 +32,7 @@ namespace storm { const std::string checkFullyObservableOption = "check-fully-observable"; POMDPSettings::POMDPSettings() : ModuleSettings(moduleName) { + this->addOption(storm::settings::OptionBuilder(moduleName, noCanonicOption, false, "If this is set, actions will not be ordered canonically. Could yield incorrect results.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, exportAsParametricModelOption, false, "Export the parametric file.").addArgument(storm::settings::ArgumentBuilder::createStringArgument("filename", "The name of the file to which to write the model.").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, qualitativeReductionOption, false, "Reduces the model size by performing qualitative analysis (E.g. merge states with prob. 1.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, analyzeUniqueObservationsOption, false, "Computes the states with a unique observation").build()); @@ -47,6 +49,10 @@ namespace storm { } + bool POMDPSettings::isNoCanonicSet() const { + return this->getOption(noCanonicOption).getHasOptionBeenSet(); + } + bool POMDPSettings::isExportToParametricSet() const { return this->getOption(exportAsParametricModelOption).getHasOptionBeenSet(); } diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 768766536..6754ac55c 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -26,6 +26,7 @@ namespace storm { bool isQualitativeReductionSet() const; + bool isNoCanonicSet() const; bool isGridApproximationSet() const; bool isAnalyzeUniqueObservationsSet() const; bool isMecReductionSet() const; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 58637c6e7..9b3026832 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -102,7 +102,7 @@ namespace storm { if (pomdpSettings.isGridApproximationSet()) { STORM_PRINT_AND_LOG("Applying grid approximation... "); auto const& gridSettings = storm::settings::getModule(); - typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker::Options options; + typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker>::Options options; options.initialGridResolution = gridSettings.getGridResolution(); options.explorationThreshold = storm::utility::convertNumber(gridSettings.getExplorationThreshold()); options.doRefinement = gridSettings.isRefineSet(); @@ -117,20 +117,16 @@ namespace storm { STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "A non-zero numeric precision was set although exact arithmethic is used. Results might be inexact."); } } - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(*pomdp, options); - std::unique_ptr> result = checker.check(formula); + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker> checker(*pomdp, options); + auto result = checker.check(formula); checker.printStatisticsToStream(std::cout); - if (result) { - if (storm::utility::resources::isTerminate()) { - STORM_PRINT_AND_LOG("\nResult till abort: ") - } else { - STORM_PRINT_AND_LOG("\nResult: ") - } - printResult(result->underApproxValue, result->overApproxValue); - STORM_PRINT_AND_LOG(std::endl); + if (storm::utility::resources::isTerminate()) { + STORM_PRINT_AND_LOG("\nResult till abort: ") } else { - STORM_PRINT_AND_LOG("\nResult: Not available." << std::endl); + STORM_PRINT_AND_LOG("\nResult: ") } + printResult(result.lowerBound, result.upperBound); + STORM_PRINT_AND_LOG(std::endl); analysisPerformed = true; } if (pomdpSettings.isMemlessSearchSet()) { @@ -263,8 +259,10 @@ namespace storm { STORM_LOG_THROW(model->getType() == storm::models::ModelType::Pomdp && model->isSparseModel(), storm::exceptions::WrongFormatException, "Expected a POMDP in sparse representation."); std::shared_ptr> pomdp = model->template as>(); - storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); - pomdp = makeCanonic.transform(); + if (!pomdpSettings.isNoCanonicSet()) { + storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); + pomdp = makeCanonic.transform(); + } std::shared_ptr formula; if (!symbolicInput.properties.empty()) { From 71e065449890913cfdf56c81400fa9d6387228ee Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 1 Apr 2020 12:39:56 +0200 Subject: [PATCH 099/155] Changed method signatures to new data structures. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 10 +- .../ApproximatePOMDPModelchecker.cpp | 765 ++++-------------- .../ApproximatePOMDPModelchecker.h | 107 +-- 3 files changed, 200 insertions(+), 682 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 107f699ae..33e1d1f51 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -6,6 +6,7 @@ #include #include +#include "storm-parsers/api/properties.h" #include "storm/api/properties.h" #include "storm/api/verification.h" @@ -13,20 +14,25 @@ #include "storm/utility/macros.h" #include "storm-pomdp/storage/BeliefManager.h" #include "storm/utility/SignalHandler.h" +#include "storm/modelchecker/results/CheckResult.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" namespace storm { namespace builder { - template + template class BeliefMdpExplorer { public: typedef typename PomdpType::ValueType ValueType; - typedef storm::storage::BeliefManager BeliefManagerType; + typedef storm::storage::BeliefManager BeliefManagerType; typedef typename BeliefManagerType::BeliefId BeliefId; typedef uint64_t MdpStateType; BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) { // Intentionally left empty } + BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { // Reset data from potential previous explorations diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 9c06c4c06..436fc3e09 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -15,13 +15,8 @@ #include "storm/models/sparse/StandardRewardModel.h" #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" #include "storm/utility/vector.h" -#include "storm/modelchecker/results/CheckResult.h" -#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" -#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" -#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" #include "storm/api/properties.h" #include "storm/api/export.h" -#include "storm-parsers/api/storm-parsers.h" #include "storm-pomdp/builder/BeliefMdpExplorer.h" #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" @@ -32,8 +27,8 @@ namespace storm { namespace pomdp { namespace modelchecker { - template - ApproximatePOMDPModelchecker::Options::Options() { + template + ApproximatePOMDPModelchecker::Options::Options() { initialGridResolution = 10; explorationThreshold = storm::utility::zero(); doRefinement = true; @@ -41,61 +36,78 @@ namespace storm { numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; } - template - ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { + + template + ApproximatePOMDPModelchecker::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) { + // Intentionally left empty + } + + template + typename ApproximatePOMDPModelchecker::ValueType + ApproximatePOMDPModelchecker::Result::diff(bool relative) const { + ValueType diff = upperBound - lowerBound; + if (diff < storm::utility::zero()) { + STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << "."); + diff = storm::utility::zero(); + } + if (relative && !storm::utility::isZero(upperBound)) { + diff /= upperBound; + } + return diff; + } + + template + ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { // intentionally left empty; } - template - ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options) : pomdp(pomdp), options(options) { + template + ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); } - template - std::unique_ptr> ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { + template + typename ApproximatePOMDPModelchecker::Result ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { // Reset all collected statistics statistics = Statistics(); - std::unique_ptr> result; // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); // Compute some initial bounds on the values for each state of the pomdp auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); + Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]); - if (formulaInfo.isNonNestedReachabilityProbability()) { - // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. - STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); - if (!formulaInfo.getSinkStates().empty()) { - auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); - reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; - STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); - } - if (options.doRefinement) { - result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); - } else { - result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); - } - } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { + boost::optional rewardModelName; + if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); - if (options.doRefinement) { - result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true); + if (formulaInfo.isNonNestedReachabilityProbability()) { + if (!formulaInfo.getSinkStates().empty()) { + auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); + reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; + STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); + } } else { - // FIXME: pick the non-unique reward model here - STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); - result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); + // Expected reward formula! + rewardModelName = formulaInfo.getRewardModelName(); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); } + + if (options.doRefinement) { + refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); + } else { + computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); + } if (storm::utility::resources::isTerminate()) { statistics.aborted = true; } return result; } - template - void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { + template + void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { stream << "##### Grid Approximation Statistics ######" << std::endl; stream << "# Input model: " << std::endl; pomdp.printModelInformationToStream(stream); @@ -143,114 +155,82 @@ namespace storm { stream << "##########################################" << std::endl; } - std::shared_ptr createStandardProperty(bool min, bool computeRewards) { - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - return storm::api::extractFormulasFromProperties(propertyVector).front(); - } - - template - storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property, std::vector&& hintVector) { - //Note: The property should not run out of scope after calling this because the task only stores the property by reference. - // Therefore, this method needs the property by reference (and not const reference) - auto task = storm::api::createTask(property, false); - if (!hintVector.empty()) { - auto hint = storm::modelchecker::ExplicitModelCheckerHint(); - hint.setResultHint(std::move(hintVector)); - auto hintPtr = std::make_shared>(hint); - task.setHint(hintPtr); - } - return task; - } + - template - std::unique_ptr> - ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, bool computeRewards) { - std::srand(time(NULL)); - // Compute easy upper and lower bounds - storm::utility::Stopwatch underlyingWatch(true); - // Compute the results on the underlying MDP as a basic overapproximation - storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); - // TODO: Is the following really necessary - underlyingMdpLabeling.addLabel("__goal__"); - std::vector goalStates; - for (auto const &targetObs : targetObservations) { - for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) { - underlyingMdpLabeling.addLabelToState("__goal__", goalState); - } - } - storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); - auto underlyingModel = std::static_pointer_cast>( - std::make_shared>(underlyingMdp)); - std::string initPropString = computeRewards ? "R" : "P"; - initPropString += min ? "min" : "max"; - initPropString += "=? [F \"__goal__\"]"; - std::vector propVector = storm::api::parseProperties(initPropString); - std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); - STORM_PRINT("Underlying MDP" << std::endl) - if (computeRewards) { - underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel()); + template + void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + + if (options.explorationThreshold > storm::utility::zero()) { + STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - underlyingMdp.printModelInformationToStream(std::cout); - std::unique_ptr underlyingRes( - storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); - STORM_LOG_ASSERT(underlyingRes, "Result not exist."); - underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); - auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); - underlyingWatch.stop(); - - storm::utility::Stopwatch positionalWatch(true); - // we define some positional scheduler for the POMDP as a basic lower bound - storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); - for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { - auto obsStates = pomdp.getStatesWithObservation(obs); - // select a random action for all states with the same observation - uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); - for (auto const &state : obsStates) { - pomdpScheduler.setChoice(chosenAction, state); + + uint64_t underApproxSizeThreshold = 0; + { // Overapproximation + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + auto manager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager); + if (approx) { + STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); + approx->getExploredMdp()->printModelInformationToStream(std::cout); + ValueType& resultValue = min ? result.lowerBound : result.upperBound; + resultValue = approx->getComputedValueAtInitialState(); + underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates(); + } + } + { // Underapproximation (Uses a fresh Belief manager) + auto manager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager); + if (approx) { + STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); + approx->getExploredMdp()->printModelInformationToStream(std::cout); + ValueType& resultValue = min ? result.upperBound : result.lowerBound; + resultValue = approx->getComputedValueAtInitialState(); } } - auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); - if (computeRewards) { - underApproxModel->restrictRewardModels({"std"}); - } - STORM_PRINT("Random Positional Scheduler" << std::endl) - underApproxModel->printModelInformationToStream(std::cout); - std::unique_ptr underapproxRes( - storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); - STORM_LOG_ASSERT(underapproxRes, "Result not exist."); - underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); - auto initialUnderApproxMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); - positionalWatch.stop(); - - STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // " - << initialUnderApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl) - STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl) - - // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution. - // This can probably be improved (i.e. resolutions for single belief states) - STORM_PRINT("Initial Resolution: " << options.initialGridResolution << std::endl) + } + + template + void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + + // Set up exploration data std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - std::set changedObservations; - uint64_t underApproxModelSize = 200; - uint64_t refinementCounter = 1; - STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) - std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - {}, - {}, underApproxModelSize); - if (res == nullptr) { - statistics.refinementSteps = 0; - return nullptr; + auto beliefManager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + beliefManager->setRewardModel(rewardModelName); + } + + // OverApproximaion + auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager); + if (!overApproximation) { + return; } - ValueType lastMinScore = storm::utility::infinity(); - while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) || - (min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) { + ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; + overApproxValue = overApproximation->getComputedValueAtInitialState(); + + // UnderApproximation TODO: use same belief manager?) + uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); + auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager); + if (!underApproximation) { + return; + } + ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; + underApproxValue = underApproximation->getComputedValueAtInitialState(); + + // ValueType lastMinScore = storm::utility::infinity(); + // Start refinement + statistics.refinementSteps = 0; + while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } // TODO the actual refinement + /* // choose which observation(s) to refine std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); std::vector beliefCount(pomdp.getNrObservations(), 0); @@ -286,9 +266,9 @@ namespace storm { } - /*for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { - obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); - }*/ + //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { + // obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); + //} changedObservations.clear(); //TODO think about some other scoring methods @@ -301,21 +281,21 @@ namespace storm { observationResolutionVector[i] = maxRes + 1; changedObservations.insert(i); } - /*} else { - lastMinScore = std::min(maxAvgDifference, lastMinScore); - STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) - STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) - //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) - for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { + //} else { + // lastMinScore = std::min(maxAvgDifference, lastMinScore); + // STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) + // STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) + // //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) + // for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i]) - if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { + // if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { //STORM_PRINT(" *** ") - observationResolutionVector[i] += 1; - changedObservations.insert(i); - } + // observationResolutionVector[i] += 1; + // changedObservations.insert(i); + // } //STORM_PRINT(std::endl) - } - }*/ + // } + //} if (underApproxModelSize < std::numeric_limits::max() - 101) { underApproxModelSize += 100; } @@ -327,60 +307,13 @@ namespace storm { STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || cc.isEqual(res->underApproxValue, res->overApproxValue), "The value for the under-approximation is larger than the value for the over-approximation."); - ++refinementCounter; - } - statistics.refinementSteps = refinementCounter; - if (min) { - return std::make_unique>(POMDPCheckResult{res->underApproxValue, res->overApproxValue}); - } else { - return std::make_unique>(POMDPCheckResult{res->overApproxValue, res->underApproxValue}); - } - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, - bool computeRewards, - std::vector const& lowerPomdpValueBounds, - std::vector const& upperPomdpValueBounds, - uint64_t maxUaModelSize) { - STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds, - upperPomdpValueBounds, maxUaModelSize); - if (result == nullptr) { - return nullptr; - } - if (min) { - return std::make_unique>(POMDPCheckResult{result->underApproxValue, result->overApproxValue}); - } else { - return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); + */ + ++statistics.refinementSteps.get(); } } - template - ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) { - ValueType result = storm::utility::zero(); - for (auto const& entry : belief) { - result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); - } - return result; - } - - template - std::shared_ptr> - ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, - bool computeRewards, - std::vector const& lowerPomdpValueBounds, - std::vector const& upperPomdpValueBounds, - uint64_t maxUaModelSize) { - - auto beliefManager = std::make_shared>>(pomdp, options.numericPrecision); - if (computeRewards) { - beliefManager->setRewardModel(); // TODO: get actual name - } - + template + std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager) { statistics.overApproximationBuildTime.start(); storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); if (computeRewards) { @@ -390,9 +323,6 @@ namespace storm { } // Expand the beliefs to generate the grid on-the-fly - if (options.explorationThreshold > storm::utility::zero()) { - STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) - } while (explorer.hasUnexploredState()) { uint64_t currId = explorer.exploreNextState(); @@ -445,39 +375,20 @@ namespace storm { explorer.finishExploration(); statistics.overApproximationBuildTime.stop(); - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - explorer.getExploredMdp()->printModelInformationToStream(std::cout); statistics.overApproximationCheckTime.start(); explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); - - STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); - STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); - - //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds); - if (storm::utility::resources::isTerminate() && !underApproxComponents) { - // TODO: return other components needed for refinement. - //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); - //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); - } - STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); - /* TODO: return other components needed for refinement. - return std::make_unique>( - RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, - underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); - */ - return std::make_unique>(RefinementComponents{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {}, - underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); - + return std::make_shared(std::move(explorer)); } - template + template + void ApproximatePOMDPModelchecker::refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + /*TODO: + template std::shared_ptr> - ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, + ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, std::shared_ptr> refinementComponents, @@ -504,7 +415,7 @@ namespace storm { uint64_t nextBeliefId = refinementComponents->beliefList.size(); uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); - std::set relevantStates; + std::set relevantStates; // The MDP states where the observation has changed for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { auto currentBelief = refinementComponents->beliefList[iter.first]; if (changedObservations.find(currentBelief.observation) != changedObservations.end()) { @@ -512,7 +423,7 @@ namespace storm { } } - std::set> statesAndActionsToCheck; + std::set> statesAndActionsToCheck; // The predecessors of states where the observation has changed for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) { for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) { for (typename storm::storage::SparseMatrix::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow( @@ -536,6 +447,7 @@ namespace storm { action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { + // Expand and triangulate the successor uint32_t observation = iter->first; uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); @@ -803,160 +715,12 @@ namespace storm { refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId}); } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min) { - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); + */ } - - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - uint64_t initialBeliefId, bool min, - bool computeRewards, uint64_t maxModelSize) { - std::set visitedBelieves; - std::deque beliefsToBeExpanded; - bsmap_type beliefStateMap; - std::vector>> transitions = {{{{0, storm::utility::one()}}}, - {{{1, storm::utility::one()}}}}; - std::vector targetStates = {1}; - - uint64_t stateId = 2; - beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId)); - ++stateId; - uint64_t nextId = beliefList.size(); - uint64_t counter = 0; - statistics.underApproximationBuildTime.start(); - // Expand the believes - visitedBelieves.insert(initialBeliefId); - beliefsToBeExpanded.push_back(initialBeliefId); - while (!beliefsToBeExpanded.empty()) { - //TODO think of other ways to stop exploration besides model size - auto currentBeliefId = beliefsToBeExpanded.front(); - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); - // for targets, we only consider one action with one transition - if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states - targetStates.push_back(beliefStateMap.left.at(currentBeliefId)); - transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one()}}}); - } else if (counter > maxModelSize) { - transitions.push_back({{{0, storm::utility::one()}}}); - } else { - // Iterate over all actions and add the corresponding transitions - std::vector> actionTransitionStorage; - //TODO add a way to extract the actions from the over-approx and use them here? - for (uint64_t action = 0; action < numChoices; ++action) { - std::map transitionsInStateWithAction; - std::map observationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currentBeliefId], action); - for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - uint64_t nextBeliefId = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId], - action, - observation, nextId); - nextId = beliefList.size(); - if (visitedBelieves.insert(nextBeliefId).second) { - beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId)); - ++stateId; - beliefsToBeExpanded.push_back(nextBeliefId); - ++counter; - } - transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second; - } - actionTransitionStorage.push_back(transitionsInStateWithAction); - } - transitions.push_back(actionTransitionStorage); - } - beliefsToBeExpanded.pop_front(); - if (storm::utility::resources::isTerminate()) { - statistics.underApproximationBuildAborted = true; - break; - } - } - statistics.underApproximationStates = transitions.size(); - if (storm::utility::resources::isTerminate()) { - statistics.underApproximationBuildTime.stop(); - return nullptr; - } + template + std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager) { - storm::models::sparse::StateLabeling labeling(transitions.size()); - labeling.addLabel("init"); - labeling.addLabel("target"); - labeling.addLabelToState("init", 0); - for (auto targetState : targetStates) { - labeling.addLabelToState("target", targetState); - } - - std::shared_ptr> model; - auto transitionMatrix = buildTransitionMatrix(transitions); - if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) { - transitionMatrix.makeRowGroupingTrivial(); - } - storm::storage::sparse::ModelComponents modelComponents(transitionMatrix, labeling); - storm::models::sparse::Mdp underApproxMdp(modelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap.left) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } - } - underApproxMdp.addRewardModel("std", rewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); - } - model = std::make_shared>(underApproxMdp); - - model->printModelInformationToStream(std::cout); - statistics.underApproximationBuildTime.stop(); - - std::string propertyString; - if (computeRewards) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); - statistics.underApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); - auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; - - return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(std::shared_ptr>> beliefManager, - std::set const &targetObservations, bool min, - bool computeRewards, uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds) { - // Build the belief MDP until enough states are explored. - //TODO think of other ways to stop exploration besides model size - statistics.underApproximationBuildTime.start(); storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); if (computeRewards) { @@ -981,7 +745,7 @@ namespace storm { if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { stopExploration = true; explorer.setCurrentStateIsTruncated(); - } else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) { + } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) { stopExploration = true; explorer.setCurrentStateIsTruncated(); } @@ -1024,249 +788,22 @@ namespace storm { explorer.finishExploration(); statistics.underApproximationBuildTime.stop(); - STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl); - explorer.getExploredMdp()->printModelInformationToStream(std::cout); statistics.underApproximationCheckTime.start(); explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); - STORM_PRINT("Time Underapproximation: " << statistics.underApproximationCheckTime << " seconds." << std::endl); - STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); - - return std::make_unique>(UnderApproxComponents{explorer.getComputedValueAtInitialState(), {}, {}}); - } - - - template - storm::storage::SparseMatrix - ApproximatePOMDPModelchecker::buildTransitionMatrix(std::vector>> &transitions) { - uint_fast64_t currentRow = 0; - uint_fast64_t currentRowGroup = 0; - uint64_t nrColumns = transitions.size(); - uint64_t nrRows = 0; - uint64_t nrEntries = 0; - for (auto const &actionTransitions : transitions) { - for (auto const &map : actionTransitions) { - nrEntries += map.size(); - ++nrRows; - } - } - storm::storage::SparseMatrixBuilder smb(nrRows, nrColumns, nrEntries, true, true); - for (auto const &actionTransitions : transitions) { - smb.newRowGroup(currentRow); - for (auto const &map : actionTransitions) { - for (auto const &transition : map) { - smb.addNextValue(currentRow, transition.first, transition.second); - } - ++currentRow; - } - ++currentRowGroup; - } - return smb.build(); - } - - template - uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( - std::vector> const &grid, uint32_t observation, - std::map &probabilities) { - // TODO This one is quite slow - for (auto const &belief : grid) { - if (belief.observation == observation) { - bool same = true; - for (auto const &probEntry : belief.probabilities) { - if (probabilities.find(probEntry.first) == probabilities.end()) { - same = false; - break; - } - if (!cc.isEqual(probEntry.second, probabilities[probEntry.first])) { - same = false; - break; - } - } - if (same) { - return belief.id; - } - } - } - return -1; - } - - template - storm::pomdp::Belief ApproximatePOMDPModelchecker::getInitialBelief(uint64_t id) { - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, - "POMDP contains more than one initial state"); - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, - "POMDP does not contain an initial state"); - std::map distribution; - uint32_t observation = 0; - for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - if (pomdp.getInitialStates()[state] == 1) { - distribution[state] = storm::utility::one(); - observation = pomdp.getObservation(state); - break; - } - } - return storm::pomdp::Belief{id, observation, distribution}; - } - - template - std::pair>, std::vector> - ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( - std::map &probabilities, uint64_t resolution, uint64_t nrStates) { - - //TODO this can also be simplified using the sparse vector interpretation - - // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) - // Variable names are based on the paper - std::vector x(nrStates); - std::vector v(nrStates); - std::vector d(nrStates); - auto convResolution = storm::utility::convertNumber(resolution); - - for (size_t i = 0; i < nrStates; ++i) { - for (auto const &probEntry : probabilities) { - if (probEntry.first >= i) { - x[i] += convResolution * probEntry.second; - } - } - v[i] = storm::utility::floor(x[i]); - d[i] = x[i] - v[i]; - } - - auto p = storm::utility::vector::getSortedIndices(d); - - std::vector> qs(nrStates, std::vector(nrStates)); - for (size_t i = 0; i < nrStates; ++i) { - if (i == 0) { - for (size_t j = 0; j < nrStates; ++j) { - qs[i][j] = v[j]; - } - } else { - for (size_t j = 0; j < nrStates; ++j) { - if (j == p[i - 1]) { - qs[i][j] = qs[i - 1][j] + storm::utility::one(); - } else { - qs[i][j] = qs[i - 1][j]; - } - } - } - } - std::vector> subSimplex(nrStates); - for (size_t j = 0; j < nrStates; ++j) { - for (size_t i = 0; i < nrStates - 1; ++i) { - if (cc.isLess(storm::utility::zero(), qs[j][i] - qs[j][i + 1])) { - subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution; - } - } - - if (cc.isLess(storm::utility::zero(), qs[j][nrStates - 1])) { - subSimplex[j][nrStates - 1] = qs[j][nrStates - 1] / convResolution; - } - } - - std::vector lambdas(nrStates, storm::utility::zero()); - auto sum = storm::utility::zero(); - for (size_t i = 1; i < nrStates; ++i) { - lambdas[i] = d[p[i - 1]] - d[p[i]]; - sum += d[p[i - 1]] - d[p[i]]; - } - lambdas[0] = storm::utility::one() - sum; - - return std::make_pair(subSimplex, lambdas); - } - - - template - std::map - ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( - storm::pomdp::Belief &belief, - uint64_t actionIndex) { - std::map res; - // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) - std::map postProbabilities; - for (auto const &probEntry : belief.probabilities) { - uint64_t state = probEntry.first; - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (entry.getValue() > 0) { - postProbabilities[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } - } - } - for (auto const &probEntry : postProbabilities) { - uint32_t observation = pomdp.getObservation(probEntry.first); - if (res.count(observation) == 0) { - res[observation] = probEntry.second; - } else { - res[observation] += probEntry.second; - } - } - - return res; + return std::make_shared(std::move(explorer)); } - template - uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation(std::vector> &beliefList, - std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, - uint32_t observation, uint64_t id) { - std::map distributionAfter; - for (auto const &probEntry : belief.probabilities) { - uint64_t state = probEntry.first; - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (pomdp.getObservation(entry.getColumn()) == observation) { - distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } - } - } - // We have to normalize the distribution - auto sum = storm::utility::zero(); - for (auto const &entry : distributionAfter) { - sum += entry.second; - } - - for (auto const &entry : distributionAfter) { - distributionAfter[entry.first] /= sum; - } - if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { - auto res = getBeliefIdInVector(beliefList, observation, distributionAfter); - return res; - } else { - beliefList.push_back(storm::pomdp::Belief{id, observation, distributionAfter}); - beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - return id; - } + template + void ApproximatePOMDPModelchecker::refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { + // TODO } - template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, std::map const& belief) { - auto result = storm::utility::zero(); - for (auto const &probEntry : belief) { - result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); - } - return result; - } - - template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief) { - auto result = storm::utility::zero(); - for (auto const &probEntry : belief.probabilities) { - result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); - } - return result; - } - - - template - class ApproximatePOMDPModelchecker; - -#ifdef STORM_HAVE_CARL - - template - class ApproximatePOMDPModelchecker; + template class ApproximatePOMDPModelchecker>; + template class ApproximatePOMDPModelchecker>; -#endif } } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 925bff5b5..0d59ac31a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -4,6 +4,7 @@ #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" #include "storm-pomdp/storage/BeliefManager.h" +#include "storm-pomdp/builder/BeliefMdpExplorer.h" #include #include "storm/storage/jani/Property.h" @@ -17,12 +18,6 @@ namespace storm { namespace modelchecker { typedef boost::bimap bsmap_type; - template - struct POMDPCheckResult { - ValueType overApproxValue; - ValueType underApproxValue; - }; - /** * Struct containing information which is supposed to be persistent over multiple refinement steps * @@ -49,9 +44,13 @@ namespace storm { bsmap_type underApproxBeliefStateMap; }; - template> + template class ApproximatePOMDPModelchecker { public: + typedef typename PomdpModelType::ValueType ValueType; + typedef typename PomdpModelType::RewardModelType RewardModelType; + typedef storm::storage::BeliefManager BeliefManagerType; + typedef storm::builder::BeliefMdpExplorer ExplorerType; struct Options { Options(); @@ -63,85 +62,60 @@ namespace storm { bool cacheSubsimplices; /// Enables caching of subsimplices }; - ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options = Options()); + struct Result { + Result(ValueType lower, ValueType upper); + ValueType lowerBound; + ValueType upperBound; + ValueType diff (bool relative = false) const; + }; + + ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options()); - std::unique_ptr> check(storm::logic::Formula const& formula); + Result check(storm::logic::Formula const& formula); void printStatisticsToStream(std::ostream& stream) const; private: /** - * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop + * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size * - * @param targetObservations the set of observations to be reached - * @param min true if minimum probability is to be computed - * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values + * @param targetObservations set of target observations + * @param min true if minimum value is to be computed + * @param observationResolutionVector vector containing the resolution to be used for each observation + * @param computeRewards true if rewards are to be computed, false if probability is computed + * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value + * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value + * @param maxUaModelSize the maximum size of the underapproximation model to be generated + * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ - std::unique_ptr> - refineReachability(std::set const &targetObservations, bool min, bool computeRewards); - + void computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); + + /** - * Compute the reachability probability of given target observations on a POMDP for the given resolution only. - * On-the-fly state space generation is used for the overapproximation + * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop * * @param targetObservations the set of observations to be reached * @param min true if minimum probability is to be computed - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values + * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ - std::unique_ptr> - computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min); + void refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); /** - * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. - * On-the-fly state space generation is used for the overapproximation - * - * @param targetObservations the set of observations to be reached - * @param min true if minimum rewards are to be computed - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values + * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties */ - std::unique_ptr> - computeReachabilityRewardOTF(std::set const &targetObservations, bool min); + std::shared_ptr computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager); + + void refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); - private: /** - * Helper method to compute the inital step of the refinement loop - * - * @param targetObservations set of target observations - * @param min true if minimum value is to be computed - * @param observationResolutionVector vector containing the resolution to be used for each observation - * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value - * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value - * @param maxUaModelSize the maximum size of the underapproximation model to be generated - * @return struct containing components generated during the computation to be used in later refinement iterations + * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties */ - std::shared_ptr> - computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); + std::shared_ptr computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager); - std::shared_ptr> - computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, std::shared_ptr> refinementComponents, - std::set changedObservations, - boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + void refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - /** - * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size - * - * @param targetObservations set of target observations - * @param min true if minimum value is to be computed - * @param observationResolutionVector vector containing the resolution to be used for each observation - * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value - * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value - * @param maxUaModelSize the maximum size of the underapproximation model to be generated - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachabilityOTF(std::set const &targetObservations, bool min, bool computeRewards, - std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); +#ifdef REMOVE_THIS /** * Helper to compute an underapproximation of the reachability property. * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. @@ -243,7 +217,8 @@ namespace storm { */ ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief); ValueType getRewardAfterAction(uint64_t action, std::map const& belief); - +#endif //REMOVE_THIS + struct Statistics { Statistics(); boost::optional refinementSteps; @@ -262,7 +237,7 @@ namespace storm { }; Statistics statistics; - storm::models::sparse::Pomdp const& pomdp; + PomdpModelType const& pomdp; Options options; storm::utility::ConstantsComparator cc; }; From 5388ed98e3d56a379a9350b169db59f1d0509a41 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 1 Apr 2020 12:51:32 +0200 Subject: [PATCH 100/155] BeliefMdpExplorer: Added a few asserts so that methods can only be called in the corresponding exploration phase --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 38 +++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 33e1d1f51..86f49fe02 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -29,12 +29,20 @@ namespace storm { typedef typename BeliefManagerType::BeliefId BeliefId; typedef uint64_t MdpStateType; - BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) { + enum class Status { + Uninitialized, + Exploring, + ModelFinished, + ModelChecked + }; + + BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds), status(Status::Uninitialized) { // Intentionally left empty } BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { + status = Status::Exploring; // Reset data from potential previous explorations mdpStateToBeliefIdMap.clear(); beliefIdToMdpStateMap.clear(); @@ -83,10 +91,12 @@ namespace storm { } bool hasUnexploredState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return !beliefIdsToExplore.empty(); } BeliefId exploreNextState() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); // Set up the matrix builder finishCurrentRow(); startOfCurrentRowGroup = currentRowCount; @@ -100,6 +110,7 @@ namespace storm { } void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); // We first insert the entries of the current row in a separate map. // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) @@ -115,6 +126,7 @@ namespace storm { } void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); uint64_t row = startOfCurrentRowGroup + localActionIndex; internalAddTransition(row, getCurrentMdpState(), value); } @@ -128,6 +140,7 @@ namespace storm { * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. */ bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); // We first insert the entries of the current row in a separate map. // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) MdpStateType column; @@ -145,6 +158,7 @@ namespace storm { } void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); if (currentRowCount >= mdpActionRewards.size()) { mdpActionRewards.resize(currentRowCount, storm::utility::zero()); } @@ -153,16 +167,19 @@ namespace storm { } void setCurrentStateIsTarget() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); targetStates.grow(getCurrentNumberOfMdpStates(), false); targetStates.set(getCurrentMdpState(), true); } void setCurrentStateIsTruncated() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); truncatedStates.grow(getCurrentNumberOfMdpStates(), false); truncatedStates.set(getCurrentMdpState(), true); } void finishExploration() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); // Create the tranistion matrix finishCurrentRow(); auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates()); @@ -185,26 +202,32 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); exploredMdp = std::make_shared>(std::move(modelComponents)); + status = Status::ModelFinished; } std::shared_ptr> getExploredMdp() const { + STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); return exploredMdp; } MdpStateType getCurrentNumberOfMdpStates() const { + STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); return mdpStateToBeliefIdMap.size(); } MdpStateType getCurrentNumberOfMdpChoices() const { + STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); return currentRowCount; } ValueType getLowerValueBoundAtCurrentState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return lowerValueBounds[getCurrentMdpState()]; } ValueType getUpperValueBoundAtCurrentState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return upperValueBounds[getCurrentMdpState()]; } @@ -216,7 +239,8 @@ namespace storm { return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); } - std::vector const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { + void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { + STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); auto task = createStandardCheckTask(property); @@ -228,12 +252,18 @@ namespace storm { STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); STORM_LOG_ERROR("No result obtained while checking."); } + status = Status::ModelChecked; + } + + std::vector const& getValuesOfExploredMdp() const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); return values; } ValueType const& getComputedValueAtInitialState() const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); - return values[exploredMdp->getInitialStates().getNextSetIndex(0)]; + return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; } private: @@ -355,6 +385,8 @@ namespace storm { std::vector upperValueBounds; std::vector values; // Contains an estimate during building and the actual result after a check has performed + // The current status of this explorer + Status status; }; } } \ No newline at end of file From 79641ef1310d99a187fba3083fba9c3c6dddfb26 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 1 Apr 2020 15:59:31 +0200 Subject: [PATCH 101/155] Started to make the BeliefMdpExplorer more flexible, allowing to restart the exploration --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 224 ++++++++++++++------ src/storm-pomdp/storage/BeliefManager.h | 4 +- 2 files changed, 162 insertions(+), 66 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 86f49fe02..e13e20cf3 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -11,6 +11,7 @@ #include "storm/api/verification.h" #include "storm/storage/BitVector.h" +#include "storm/storage/SparseMatrix.h" #include "storm/utility/macros.h" #include "storm-pomdp/storage/BeliefManager.h" #include "storm/utility/SignalHandler.h" @@ -19,6 +20,7 @@ #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" #include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" + namespace storm { namespace builder { template @@ -46,16 +48,17 @@ namespace storm { // Reset data from potential previous explorations mdpStateToBeliefIdMap.clear(); beliefIdToMdpStateMap.clear(); - beliefIdsWithMdpState.clear(); - beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false); + exploredBeliefIds.clear(); + exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); + mdpStatesToExplore.clear(); lowerValueBounds.clear(); upperValueBounds.clear(); values.clear(); - mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder(0, 0, 0, true, true); - currentRowCount = 0; - startOfCurrentRowGroup = 0; + exploredMdpTransitions.clear(); + exploredChoiceIndices.clear(); mdpActionRewards.clear(); exploredMdp = nullptr; + currentMdpState = noState(); // Add some states with special treatment (if requested) if (extraBottomStateValue) { @@ -63,10 +66,8 @@ namespace storm { mdpStateToBeliefIdMap.push_back(beliefManager->noId()); insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); - startOfCurrentRowGroup = currentRowCount; - mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); - mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one()); - ++currentRowCount; + internalAddRowGroupIndex(); + internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one()); } else { extraBottomState = boost::none; } @@ -75,10 +76,8 @@ namespace storm { mdpStateToBeliefIdMap.push_back(beliefManager->noId()); insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); - startOfCurrentRowGroup = currentRowCount; - mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); - mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one()); - ++currentRowCount; + internalAddRowGroupIndex(); + internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one()); targetStates.grow(getCurrentNumberOfMdpStates(), false); targetStates.set(extraTargetState.get(), true); @@ -89,24 +88,62 @@ namespace storm { // Set up the initial state. initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); } + + /*! + * Restarts the exploration to allow re-exploring each state. + * After calling this, the "currently explored" MDP has the same number of states and choices as the "old" one, but the choices are still empty + * This method inserts the initial state of the MDP in the exploration queue. + * While re-exploring, the reference to the old MDP remains valid. + */ + void restartExploration() { + STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); + // We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid. + exploredBeliefIds.clear(); + exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); + exploredMdpTransitions.clear(); + exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices); + exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); + mdpActionRewards.clear(); + if (exploredMdp->hasRewardModel()) { + // Can be overwritten during exploration + mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector(); + } + targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); + truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); + mdpStatesToExplore.clear(); + + // The extra states are not changed + if (extraBottomState) { + currentMdpState = extraBottomState.get(); + restoreOldBehaviorAtCurrentState(0); + } + if (extraTargetState) { + currentMdpState = extraTargetState.get(); + restoreOldBehaviorAtCurrentState(0); + } + currentMdpState = noState(); + + // Set up the initial state. + initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); + } bool hasUnexploredState() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - return !beliefIdsToExplore.empty(); + return !mdpStatesToExplore.empty(); } BeliefId exploreNextState() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - // Set up the matrix builder - finishCurrentRow(); - startOfCurrentRowGroup = currentRowCount; - mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); - ++currentRowCount; // Pop from the queue. - auto result = beliefIdsToExplore.front(); - beliefIdsToExplore.pop_front(); - return result; + currentMdpState = mdpStatesToExplore.front(); + mdpStatesToExplore.pop_front(); + + if (!currentStateHasOldBehavior()) { + internalAddRowGroupIndex(); + } + + return mdpStateToBeliefIdMap[currentMdpState]; } void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { @@ -114,7 +151,7 @@ namespace storm { // We first insert the entries of the current row in a separate map. // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) - uint64_t row = startOfCurrentRowGroup + localActionIndex; + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; if (!storm::utility::isZero(bottomStateValue)) { STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); internalAddTransition(row, extraBottomState.get(), bottomStateValue); @@ -127,7 +164,7 @@ namespace storm { void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - uint64_t row = startOfCurrentRowGroup + localActionIndex; + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; internalAddTransition(row, getCurrentMdpState(), value); } @@ -145,24 +182,24 @@ namespace storm { // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) MdpStateType column; if (ignoreNewBeliefs) { - column = getMdpState(transitionTarget); + column = getExploredMdpState(transitionTarget); if (column == noState()) { return false; } } else { column = getOrAddMdpState(transitionTarget); } - uint64_t row = startOfCurrentRowGroup + localActionIndex; + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; internalAddTransition(row, column, value); return true; } void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - if (currentRowCount >= mdpActionRewards.size()) { - mdpActionRewards.resize(currentRowCount, storm::utility::zero()); + if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); } - uint64_t row = startOfCurrentRowGroup + localActionIndex; + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; } @@ -178,11 +215,64 @@ namespace storm { truncatedStates.set(getCurrentMdpState(), true); } + bool currentStateHasOldBehavior() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); + } + + /*! + * Inserts transitions and rewards at the given action as in the MDP of the previous exploration. + * Does NOT set whether the state is truncated and/or target. + * Will add "old" states that have not been considered before into the exploration queue + * @param localActionIndex + */ + void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) { + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); + uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; + STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); + + // Insert the transitions + for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { + internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); + // Check whether exploration is needed + auto beliefId = mdpStateToBeliefIdMap[transition.getColumn()]; + if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state + if (!exploredBeliefIds.get(beliefId)) { + // This belief needs exploration + exploredBeliefIds.set(beliefId, true); + mdpStatesToExplore.push_back(transition.getColumn()); + } + } + } + + // Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values + } + void finishExploration() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); + // Finish the last row grouping in case the last explored state was new + if (!currentStateHasOldBehavior()) { + internalAddRowGroupIndex(); + } + // Create the tranistion matrix - finishCurrentRow(); - auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates()); + uint64_t entryCount = 0; + for (auto const& row : exploredMdpTransitions) { + entryCount += row.size(); + } + storm::storage::SparseMatrixBuilder builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true, getCurrentNumberOfMdpStates()); + for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { + uint64_t rowIndex = exploredChoiceIndices[groupIndex]; + uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; + builder.newRowGroup(rowIndex); + for (; rowIndex < groupEnd; ++rowIndex) { + for (auto const& entry : exploredMdpTransitions[rowIndex]) { + builder.addNextValue(rowIndex, entry.first, entry.second); + } + } + } + auto mdpTransitionMatrix = builder.build(); // Create a standard labeling storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); @@ -212,13 +302,18 @@ namespace storm { } MdpStateType getCurrentNumberOfMdpStates() const { - STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return mdpStateToBeliefIdMap.size(); } MdpStateType getCurrentNumberOfMdpChoices() const { - STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); - return currentRowCount; + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return exploredMdpTransitions.size(); + } + + MdpStateType getStartOfCurrentRowGroup() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return exploredChoiceIndices.back(); } ValueType getLowerValueBoundAtCurrentState() const { @@ -291,7 +386,8 @@ namespace storm { } MdpStateType getCurrentMdpState() const { - return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1; + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return currentMdpState; } MdpStateType getCurrentBeliefId() const { @@ -299,27 +395,20 @@ namespace storm { } void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { - // We first insert the entries of the current row in a separate map. - // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) - STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row."); - if (row >= currentRowCount) { - // We are going to start a new row, so insert the entries of the old one - finishCurrentRow(); - currentRowCount = row + 1; + STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row."); + if (row == exploredMdpTransitions.size()) { + exploredMdpTransitions.emplace_back(); } - STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state."); - mdpTransitionsBuilderCurrentRowEntries[column] = value; + STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state."); + exploredMdpTransitions[row][column] = value; } - void finishCurrentRow() { - for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) { - mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second); - } - mdpTransitionsBuilderCurrentRowEntries.clear(); + void internalAddRowGroupIndex() { + exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices()); } - MdpStateType getMdpState(BeliefId const& beliefId) const { - if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) { + MdpStateType getExploredMdpState(BeliefId const& beliefId) const { + if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) { return beliefIdToMdpStateMap.at(beliefId); } else { return noState(); @@ -336,20 +425,28 @@ namespace storm { } MdpStateType getOrAddMdpState(BeliefId const& beliefId) { - beliefIdsWithMdpState.grow(beliefId + 1, false); - if (beliefIdsWithMdpState.get(beliefId)) { + exploredBeliefIds.grow(beliefId + 1, false); + if (exploredBeliefIds.get(beliefId)) { return beliefIdToMdpStateMap[beliefId]; } else { - // Add a new MDP state - beliefIdsWithMdpState.set(beliefId, true); + // This state needs exploration + exploredBeliefIds.set(beliefId, true); + + // If this is a restart of the exploration, we still might have an MDP state for the belief + if (exploredMdp) { + auto findRes = beliefIdToMdpStateMap.find(beliefId); + if (findRes != beliefIdToMdpStateMap.end()) { + mdpStatesToExplore.push_back(findRes->second); + return findRes->second; + } + } + // At this poind we need to add a new MDP state MdpStateType result = getCurrentNumberOfMdpStates(); assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); mdpStateToBeliefIdMap.push_back(beliefId); beliefIdToMdpStateMap[beliefId] = result; - // This new belief needs exploration - beliefIdsToExplore.push_back(beliefId); - insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); + mdpStatesToExplore.push_back(result); return result; } } @@ -358,15 +455,14 @@ namespace storm { std::shared_ptr beliefManager; std::vector mdpStateToBeliefIdMap; std::map beliefIdToMdpStateMap; - storm::storage::BitVector beliefIdsWithMdpState; + storm::storage::BitVector exploredBeliefIds; // Exploration information - std::deque beliefIdsToExplore; - storm::storage::SparseMatrixBuilder mdpTransitionsBuilder; - std::map mdpTransitionsBuilderCurrentRowEntries; + std::deque mdpStatesToExplore; + std::vector> exploredMdpTransitions; + std::vector exploredChoiceIndices; std::vector mdpActionRewards; - uint64_t startOfCurrentRowGroup; - uint64_t currentRowCount; + uint64_t currentMdpState; // Special states during exploration boost::optional extraTargetState; diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 9cb7c039c..8f0dcd225 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -324,8 +324,8 @@ namespace storm { } std::map expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { - std::map destinations; // The belief ids should be ordered - // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds + std::map destinations; + // TODO: Output as vector? BeliefType belief = getBelief(beliefId); From 3041b881d44eb4e53f8c4a3dda7817456eadef81 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 1 Apr 2020 22:34:47 +0200 Subject: [PATCH 102/155] Beginning of dropUnreachableStates() --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 41 +++++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index e13e20cf3..bb53c61c6 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -287,20 +287,53 @@ namespace storm { std::unordered_map> mdpRewardModels; if (!mdpActionRewards.empty()) { mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); - mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); + mdpRewardModels.emplace("default", + storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); } - + storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); exploredMdp = std::make_shared>(std::move(modelComponents)); status = Status::ModelFinished; } - + + void dropUnreachableStates() { + STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); + storm::storage::BitVector reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(), + storm::storage::BitVector(getCurrentNumberOfMdpStates(), {initialMdpState}), + storm::storage::BitVector(getCurrentNumberOfMdpStates(), true), targetStates); + auto reachableTransitionMatrix = getExploredMdp()->getTransitionMatrix().getSubmatrix(true, reachableStates, reachableStates); + auto reachableStateLabeling = getExploredMdp()->getStateLabeling().getSubLabeling(reachableStates); + // TODO reward model + storm::storage::sparse::ModelComponents modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling)); + exploredMdp = std::make_shared>(std::move(modelComponents)); + + std::vector reachableMdpStateToBeliefIdMap(reachableStates.getNumberOfSetBits()); + std::vector reachableLowerValueBounds(reachableStates.getNumberOfSetBits()); + std::vector reachableUpperValueBounds(reachableStates.getNumberOfSetBits()); + std::vector reachableValues(reachableStates.getNumberOfSetBits()); + for (uint64_t state = 0; state < reachableStates.size(); ++state) { + if (reachableStates[state]) { + reachableMdpStateToBeliefIdMap.push_back(mdpStateToBeliefIdMap[state]); + reachableLowerValueBounds.push_back(lowerValueBounds[state]); + reachableUpperValueBounds.push_back(upperValueBounds[state]); + reachableValues.push_back(values[state]); + } + //TODO drop BeliefIds from exploredBeliefIDs? + } + std::map reachableBeliefIdToMdpStateMap; + for (MdpStateType state = 0; state < reachableMdpStateToBeliefIdMap.size(); ++state) { + reachableBeliefIdToMdpStateMap[reachableMdpStateToBeliefIdMap[state]] = state; + } + mdpStateToBeliefIdMap = reachableMdpStateToBeliefIdMap; + beliefIdToMdpStateMap = reachableBeliefIdToMdpStateMap; + } + std::shared_ptr> getExploredMdp() const { STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); return exploredMdp; } - + MdpStateType getCurrentNumberOfMdpStates() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return mdpStateToBeliefIdMap.size(); From 62c905fc583ef215d3dcd885181f0d99f816a5bc Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Thu, 2 Apr 2020 20:05:00 +0200 Subject: [PATCH 103/155] Added basis for rewards in dropUnreachableStates() --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 28 +++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index bb53c61c6..426eff188 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -298,28 +298,42 @@ namespace storm { void dropUnreachableStates() { STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); - storm::storage::BitVector reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(), - storm::storage::BitVector(getCurrentNumberOfMdpStates(), {initialMdpState}), - storm::storage::BitVector(getCurrentNumberOfMdpStates(), true), targetStates); + auto reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(), + storm::storage::BitVector(getCurrentNumberOfMdpStates(), std::vector{initialMdpState}), + storm::storage::BitVector(getCurrentNumberOfMdpStates(), true), + getExploredMdp()->getStateLabeling().getStates("target")); auto reachableTransitionMatrix = getExploredMdp()->getTransitionMatrix().getSubmatrix(true, reachableStates, reachableStates); auto reachableStateLabeling = getExploredMdp()->getStateLabeling().getSubLabeling(reachableStates); - // TODO reward model - storm::storage::sparse::ModelComponents modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling)); - exploredMdp = std::make_shared>(std::move(modelComponents)); - std::vector reachableMdpStateToBeliefIdMap(reachableStates.getNumberOfSetBits()); std::vector reachableLowerValueBounds(reachableStates.getNumberOfSetBits()); std::vector reachableUpperValueBounds(reachableStates.getNumberOfSetBits()); std::vector reachableValues(reachableStates.getNumberOfSetBits()); + std::vector reachableMdpActionRewards; for (uint64_t state = 0; state < reachableStates.size(); ++state) { if (reachableStates[state]) { reachableMdpStateToBeliefIdMap.push_back(mdpStateToBeliefIdMap[state]); reachableLowerValueBounds.push_back(lowerValueBounds[state]); reachableUpperValueBounds.push_back(upperValueBounds[state]); reachableValues.push_back(values[state]); + if (getExploredMdp()->hasRewardModel()) { + //TODO FIXME is there some mismatch with the indices here? + for (uint64_t i = 0; i < getExploredMdp()->getTransitionMatrix().getRowGroupSize(state); ++i) { + reachableMdpActionRewards.push_back(getExploredMdp()->getUniqueRewardModel().getStateActionRewardVector()[state + i]); + } + } } //TODO drop BeliefIds from exploredBeliefIDs? } + std::unordered_map> mdpRewardModels; + if (!reachableMdpActionRewards.empty()) { + //reachableMdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + mdpRewardModels.emplace("default", + storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(reachableMdpActionRewards))); + } + storm::storage::sparse::ModelComponents modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling), + std::move(mdpRewardModels)); + exploredMdp = std::make_shared>(std::move(modelComponents)); + std::map reachableBeliefIdToMdpStateMap; for (MdpStateType state = 0; state < reachableMdpStateToBeliefIdMap.size(); ++state) { reachableBeliefIdToMdpStateMap[reachableMdpStateToBeliefIdMap[state]] = state; From c2ddea14806cfabc81e1b956ced220129e6f463b Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 3 Apr 2020 12:41:55 +0200 Subject: [PATCH 104/155] First (re-) implementation of refinement. (probably needs some testing/debugging) --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 60 +- .../ApproximatePOMDPModelchecker.cpp | 712 +++++------------- .../ApproximatePOMDPModelchecker.h | 141 +--- 3 files changed, 267 insertions(+), 646 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 426eff188..2a97c5e05 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -43,6 +43,10 @@ namespace storm { } BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; + BeliefManagerType const& getBeliefManager() const { + return *beliefManager; + } + void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { status = Status::Exploring; // Reset data from potential previous explorations @@ -101,7 +105,7 @@ namespace storm { exploredBeliefIds.clear(); exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); exploredMdpTransitions.clear(); - exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices); + exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices()); exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); mdpActionRewards.clear(); if (exploredMdp->hasRewardModel()) { @@ -235,7 +239,7 @@ namespace storm { for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); // Check whether exploration is needed - auto beliefId = mdpStateToBeliefIdMap[transition.getColumn()]; + auto beliefId = getBeliefId(transition.getColumn()); if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state if (!exploredBeliefIds.get(beliefId)) { // This belief needs exploration @@ -397,6 +401,10 @@ namespace storm { status = Status::ModelChecked; } + bool hasComputedValues() const { + return status == Status::ModelChecked; + } + std::vector const& getValuesOfExploredMdp() const { STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); return values; @@ -408,6 +416,51 @@ namespace storm { return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; } + MdpStateType getBeliefId(MdpStateType exploredMdpState) const { + STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); + return mdpStateToBeliefIdMap[exploredMdpState]; + } + + struct SuccessorObservationInformation { + SuccessorObservationInformation(ValueType const& obsProb, ValueType const& maxProb, uint64_t const& count) : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) { + // Intentionally left empty. + } + + void join(SuccessorObservationInformation other) { + observationProbability += other.observationProbability; + maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); + successorWithObsCount += other.successorWithObsCount; + } + + ValueType observationProbability; /// The probability we move to the corresponding observation. + ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. + uint64_t successorWithObsCount; /// The number of successors with this observation + }; + + void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map gatheredSuccessorObservations) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); + uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; + gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + } + + void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map gatheredSuccessorObservations) { + STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); + for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { + auto const& beliefId = getBeliefId(entry.getColumn()); + if (beliefId != beliefManager->noId()) { + auto const& obs = beliefManager->getBeliefObservation(beliefId); + SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1); + auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info); + if (!obsInsertion.second) { + // There already is an entry for this observation, so join the two informations + obsInsertion.first->second.join(info); + } + } + } + } + + private: MdpStateType noState() const { return std::numeric_limits::max(); @@ -438,7 +491,8 @@ namespace storm { } MdpStateType getCurrentBeliefId() const { - return mdpStateToBeliefIdMap[getCurrentMdpState()]; + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return getBeliefId(getCurrentMdpState()); } void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 436fc3e09..4526607d4 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -171,8 +171,9 @@ namespace storm { if (rewardModelName) { manager->setRewardModel(rewardModelName); } - auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager); - if (approx) { + auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, manager, approx); + if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.lowerBound : result.upperBound; @@ -185,8 +186,9 @@ namespace storm { if (rewardModelName) { manager->setRewardModel(rewardModelName); } - auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager); - if (approx) { + auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx); + if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.upperBound : result.lowerBound; @@ -200,23 +202,27 @@ namespace storm { // Set up exploration data std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - auto beliefManager = std::make_shared(pomdp, options.numericPrecision); + auto overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); + auto underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { - beliefManager->setRewardModel(rewardModelName); + overApproxBeliefManager->setRewardModel(rewardModelName); + underApproxBeliefManager->setRewardModel(rewardModelName); } // OverApproximaion - auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager); - if (!overApproximation) { + auto overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (!overApproximation->hasComputedValues()) { return; } ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; overApproxValue = overApproximation->getComputedValueAtInitialState(); - // UnderApproximation TODO: use same belief manager?) - uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); - auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager); - if (!underApproximation) { + // UnderApproximation + uint64_t underApproxSizeThreshold = std::max(overApproximation->getExploredMdp()->getNumberOfStates(), 10); + auto underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + if (!underApproximation->hasComputedValues()) { return; } ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; @@ -225,379 +231,165 @@ namespace storm { // ValueType lastMinScore = storm::utility::infinity(); // Start refinement statistics.refinementSteps = 0; + ValueType refinementAggressiveness = storm::utility::zero(); while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } - // TODO the actual refinement - /* - // choose which observation(s) to refine - std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); - std::vector beliefCount(pomdp.getNrObservations(), 0); - bsmap_type::right_map::const_iterator underApproxStateBeliefIter = res->underApproxBeliefStateMap.right.begin(); - while (underApproxStateBeliefIter != res->underApproxBeliefStateMap.right.end()) { - auto currentBelief = res->beliefList[underApproxStateBeliefIter->second]; - beliefCount[currentBelief.observation] += 1; - bsmap_type::left_const_iterator overApproxBeliefStateIter = res->overApproxBeliefStateMap.left.find(underApproxStateBeliefIter->second); - if (overApproxBeliefStateIter != res->overApproxBeliefStateMap.left.end()) { - // If there is an over-approximate value for the belief, use it - auto diff = res->overApproxMap[overApproxBeliefStateIter->second] - res->underApproxMap[underApproxStateBeliefIter->first]; - obsAccumulator[currentBelief.observation] += diff; - } else { - //otherwise, we approximate a value TODO this is critical, we have to think about it - auto overApproxValue = storm::utility::zero(); - auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation], pomdp.getNumberOfStates()); - auto subSimplex = temp.first; - auto lambdas = temp.second; - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]); - bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId); - if (approxIter != res->overApproxBeliefStateMap.left.end()) { - overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second]; - } else { - overApproxValue += lambdas[j]; - } - } - } - obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[underApproxStateBeliefIter->first]; - } - ++underApproxStateBeliefIter; - } - - - //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { - // obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); - //} - changedObservations.clear(); - - //TODO think about some other scoring methods - auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end()); - //if (cc.isEqual(maxAvgDifference, lastMinScore) || cc.isLess(lastMinScore, maxAvgDifference)) { - lastMinScore = maxAvgDifference; - auto maxRes = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - STORM_PRINT("Set all to " << maxRes + 1 << std::endl) - for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { - observationResolutionVector[i] = maxRes + 1; - changedObservations.insert(i); + + // Refine over-approximation + refinementAggressiveness *= storm::utility::convertNumber(1.1);; + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (overApproximation->hasComputedValues()) { + overApproxValue = overApproximation->getComputedValueAtInitialState(); + } else { + break; } - //} else { - // lastMinScore = std::min(maxAvgDifference, lastMinScore); - // STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) - // STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) - // //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) - // for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { - //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i]) - // if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { - //STORM_PRINT(" *** ") - // observationResolutionVector[i] += 1; - // changedObservations.insert(i); - // } - //STORM_PRINT(std::endl) - // } - //} - if (underApproxModelSize < std::numeric_limits::max() - 101) { - underApproxModelSize += 100; + + // Refine under-approximation + underApproxSizeThreshold *= storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); + underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + if (underApproximation->hasComputedValues()) { + underApproxValue = underApproximation->getComputedValueAtInitialState(); + } else { + break; } - STORM_PRINT( - "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) - res = computeRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); - //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); - STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || - cc.isEqual(res->underApproxValue, res->overApproxValue), - "The value for the under-approximation is larger than the value for the over-approximation."); - */ ++statistics.refinementSteps.get(); } } + /*! + * Heuristically rates the quality of the approximation described by the given successor observation info. + * Here, 0 means a bad approximation and 1 means a good approximation. + */ + template + typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info) { + auto n = storm::utility::convertNumber(info.successorWithObsCount); + auto one = storm::utility::one(); + + // Create the actual rating for this observation at this choice from the given info + ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; + // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 + // Normalize the rating so that it ranges from 0 to 1, where + // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. + obsChoiceRating = (obsChoiceRating * n - one) / (n - one); + return obsChoiceRating; + } + + template + std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation) { + uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices(); + + std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); + + std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + for (uint64_t mdpChoice = 0; mdpChoice < numMdpChoices; ++mdpChoice) { + gatheredSuccessorObservations.clear(); + overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + for (auto const& obsInfo : gatheredSuccessorObservations) { + auto const& obs = obsInfo.first; + ValueType obsChoiceRating = rateObservation(obsInfo.second); + + // The rating of the observation will be the minimum over all choice-based observation ratings + resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); + } + } + return resultingRatings; + } + template - std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager) { + void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); + STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero(), "Can not refine with negative aggressiveness."); + STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one(), "Refinement with aggressiveness > 1 is invalid."); + statistics.overApproximationBuildTime.start(); - storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); - if (computeRewards) { - explorer.startNewExploration(storm::utility::zero()); + storm::storage::BitVector refinedObservations; + if (!refine) { + // If we build the model from scratch, we first have to setup the explorer for the overApproximation. + if (computeRewards) { + overApproximation->startNewExploration(storm::utility::zero()); + } else { + overApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); + } } else { - explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); + // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. + auto obsRatings = getObservationRatings(overApproximation); + ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); + // Potentially increase the aggressiveness so that at least one observation actually gets refinement. + *refinementAggressiveness = std::max(minRating, *refinementAggressiveness); + refinedObservations = storm::utility::vector::filter(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;}); + STORM_PRINT("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); + for (auto const& obs : refinedObservations) { + // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness) + ValueType incrementValue = storm::utility::one() + (*refinementAggressiveness) * storm::utility::convertNumber(observationResolutionVector[obs]); + observationResolutionVector[obs] += storm::utility::convertNumber(storm::utility::ceil(incrementValue)); + } + overApproximation->restartExploration(); } - // Expand the beliefs to generate the grid on-the-fly - while (explorer.hasUnexploredState()) { - uint64_t currId = explorer.exploreNextState(); + // Start exploration + std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + while (overApproximation->hasUnexploredState()) { + uint64_t currId = overApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - explorer.setCurrentStateIsTarget(); - explorer.addSelfloopTransition(); + overApproximation->setCurrentStateIsTarget(); + overApproximation->addSelfloopTransition(); } else { bool stopExploration = false; - if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + if (storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { stopExploration = true; - explorer.setCurrentStateIsTruncated(); + overApproximation->setCurrentStateIsTruncated(); } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { - ValueType truncationProbability = storm::utility::zero(); - ValueType truncationValueBound = storm::utility::zero(); - auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); - for (auto const& successor : successorGridPoints) { - bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); - if (!added) { - STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); - // We did not explore this successor state. Get a bound on the "missing" value - truncationProbability += successor.second; - truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first)); - } - } - if (stopExploration) { - if (computeRewards) { - explorer.addTransitionsToExtraStates(action, truncationProbability); - } else { - explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); - } - } - if (computeRewards) { - // The truncationValueBound will be added on top of the reward introduced by the current belief state. - explorer.computeRewardAtCurrentState(action, truncationValueBound); - } - } - } - if (storm::utility::resources::isTerminate()) { - statistics.overApproximationBuildAborted = true; - break; - } - } - statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates(); - if (storm::utility::resources::isTerminate()) { - statistics.overApproximationBuildTime.stop(); - return nullptr; - } - - explorer.finishExploration(); - statistics.overApproximationBuildTime.stop(); - - statistics.overApproximationCheckTime.start(); - explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); - statistics.overApproximationCheckTime.stop(); - - return std::make_shared(std::move(explorer)); - } - - template - void ApproximatePOMDPModelchecker::refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { - /*TODO: - template - std::shared_ptr> - ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, - bool computeRewards, - std::shared_ptr> refinementComponents, - std::set changedObservations, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, - uint64_t maxUaModelSize) { - bool initialBoundMapsSet = overApproximationMap && underApproximationMap; - std::map initialOverMap; - std::map initialUnderMap; - if (initialBoundMapsSet) { - initialOverMap = overApproximationMap.value(); - initialUnderMap = underApproximationMap.value(); - } - // Note that a persistent cache is not support by the current data structure. The resolution for the given belief also has to be stored somewhere to cache effectively - std::map>> subSimplexCache; - std::map> lambdaCache; - - // Map to save the weighted values resulting from the initial preprocessing for newly added beliefs / indices in beliefSpace - std::map weightedSumOverMap; - std::map weightedSumUnderMap; - - statistics.overApproximationBuildTime.start(); - - uint64_t nextBeliefId = refinementComponents->beliefList.size(); - uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); - std::set relevantStates; // The MDP states where the observation has changed - for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { - auto currentBelief = refinementComponents->beliefList[iter.first]; - if (changedObservations.find(currentBelief.observation) != changedObservations.end()) { - relevantStates.insert(iter.second); - } - } - - std::set> statesAndActionsToCheck; // The predecessors of states where the observation has changed - for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) { - for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) { - for (typename storm::storage::SparseMatrix::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow( - state, row).begin(); - itEntry != refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(state, row).end(); ++itEntry) { - if (relevantStates.find(itEntry->getColumn()) != relevantStates.end()) { - statesAndActionsToCheck.insert(std::make_pair(state, row)); - break; - } - } - } - } - - std::deque beliefsToBeExpanded; - - std::map, std::map> transitionsStateActionPair; - for (auto const &stateActionPair : statesAndActionsToCheck) { - auto currId = refinementComponents->overApproxBeliefStateMap.right.at(stateActionPair.first); - auto action = stateActionPair.second; - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], - action); - std::map transitionInActionBelief; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - // Expand and triangulate the successor - uint32_t observation = iter->first; - uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, - targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); - nextBeliefId = refinementComponents->beliefList.size(); - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - //TODO add caching - if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, - observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], - pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; - } - } - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); - if (approxId == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; - refinementComponents->beliefList.push_back(gridBelief); - refinementComponents->beliefGrid.push_back(gridBelief); - refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - // compute overapproximate value using MDP result map - if (initialBoundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); - } - weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; - weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; + // Check whether we expand this state/action pair + // We always expand if we are not doing refinement of if the state was not available in the "old" MDP. + // Otherwise, a heuristic decides. + bool expandStateAction = true; + if (refine && overApproximation->currentStateHasOldBehavior()) { + // Compute a rating of the current state/action pair + ValueType stateActionRating = storm::utility::one(); + gatheredSuccessorObservations.clear(); + overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); + for (auto const& obsInfo : gatheredSuccessorObservations) { + if (refinedObservations.get(obsInfo.first)) { + ValueType obsRating = rateObservation(obsInfo.second); + stateActionRating = std::min(stateActionRating, obsRating); } - beliefsToBeExpanded.push_back(nextBeliefId); - refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); - transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; - ++nextBeliefId; - ++nextStateId; - } else { - transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; } + // Only refine if this rating is below the doubled refinementAggressiveness + expandStateAction = stateActionRating < storm::utility::convertNumber(2.0) * (*refinementAggressiveness); } - } - } - if (!transitionInActionBelief.empty()) { - transitionsStateActionPair[stateActionPair] = transitionInActionBelief; - } - } - - std::set stoppedExplorationStateSet; - - // Expand newly added beliefs - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = refinementComponents->beliefIsTarget[currId]; - - if (initialBoundMapsSet && - cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(options.explorationThreshold))) { - STORM_PRINT("Stop Exploration in State " << refinementComponents->overApproxBeliefStateMap.left.at(currId) << " with Value " << weightedSumOverMap[currId] - << std::endl) - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = {{1, weightedSumOverMap[currId]}, - {0, storm::utility::one() - - weightedSumOverMap[currId]}}; - stoppedExplorationStateSet.insert(refinementComponents->overApproxBeliefStateMap.left.at(currId)); - continue; - } - - if (isTarget) { - // Depending on whether we compute rewards, we select the right initial result - // MDP stuff - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = - {{refinementComponents->overApproxBeliefStateMap.left.at(currId), storm::utility::one()}}; - } else { - uint64_t representativeState = pomdp.getStatesWithObservation(refinementComponents->beliefList[currId].observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector actionRewardsInState(numChoices); - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action); - std::map transitionInActionBelief; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, - targetObservations, refinementComponents->beliefList[currId], action, observation, - nextBeliefId); - nextBeliefId = refinementComponents->beliefList.size(); - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - - if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, - observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], - pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; + if (expandStateAction) { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); + for (auto const& successor : successorGridPoints) { + bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first)); } } - - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); - if (approxId == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; - refinementComponents->beliefList.push_back(gridBelief); - refinementComponents->beliefGrid.push_back(gridBelief); - refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - // compute overapproximate value using MDP result map - if (initialBoundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); - } - weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; - weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; - } - beliefsToBeExpanded.push_back(nextBeliefId); - refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); - transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; - ++nextBeliefId; - ++nextStateId; - } else { - transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; - } + if (stopExploration) { + if (computeRewards) { + overApproximation->addTransitionsToExtraStates(action, truncationProbability); + } else { + overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } - } - if (!transitionInActionBelief.empty()) { - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief; + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + overApproximation->computeRewardAtCurrentState(action, truncationValueBound); + } + } else { + // Do not refine here + overApproximation->restoreOldBehaviorAtCurrentState(action); } } } @@ -606,173 +398,85 @@ namespace storm { break; } } - - statistics.overApproximationStates = nextStateId; + // TODO: Drop unreachable states (sometimes?) + statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); - // Return the result from the old refinement step - return refinementComponents; - } - storm::models::sparse::StateLabeling mdpLabeling(nextStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)); - mdpLabeling.addLabelToState("target", 1); - uint_fast64_t currentRow = 0; - uint_fast64_t currentRowGroup = 0; - storm::storage::SparseMatrixBuilder smb(0, nextStateId, 0, false, true); - auto oldTransitionMatrix = refinementComponents->overApproxModelPtr->getTransitionMatrix(); - smb.newRowGroup(currentRow); - smb.addNextValue(currentRow, 0, storm::utility::one()); - ++currentRow; - ++currentRowGroup; - smb.newRowGroup(currentRow); - smb.addNextValue(currentRow, 1, storm::utility::one()); - ++currentRow; - ++currentRowGroup; - for (uint64_t state = 2; state < nextStateId; ++state) { - smb.newRowGroup(currentRow); - //STORM_PRINT("Loop State: " << state << std::endl) - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(refinementComponents->beliefList[refinementComponents->overApproxBeliefStateMap.right.at(state)].observation).front()); - bool isTarget = refinementComponents->beliefIsTarget[refinementComponents->overApproxBeliefStateMap.right.at(state)]; - for (uint64_t action = 0; action < numChoices; ++action) { - if (transitionsStateActionPair.find(std::make_pair(state, action)) == transitionsStateActionPair.end()) { - for (auto const &entry : oldTransitionMatrix.getRow(state, action)) { - smb.addNextValue(currentRow, entry.getColumn(), entry.getValue()); - } - } else { - for (auto const &iter : transitionsStateActionPair[std::make_pair(state, action)]) { - smb.addNextValue(currentRow, iter.first, iter.second); - } - } - ++currentRow; - if (isTarget) { - // If the state is a target, we only have one action, thus we add the target label and stop the iteration - mdpLabeling.addLabelToState("target", state); - break; - } - if (stoppedExplorationStateSet.find(state) != stoppedExplorationStateSet.end()) { - break; - } - } - ++currentRowGroup; - } - storm::storage::sparse::ModelComponents modelComponents(smb.build(), mdpLabeling); - storm::models::sparse::Mdp overApproxMdp(modelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { - auto currentBelief = refinementComponents->beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } - } - overApproxMdp.addRewardModel("std", mdpRewardModel); - overApproxMdp.restrictRewardModels(std::set({"std"})); + return; } - overApproxMdp.printModelInformationToStream(std::cout); - statistics.overApproximationBuildTime.stop(); - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - auto task = storm::api::createTask(property, false); + overApproximation->finishExploration(); + statistics.overApproximationBuildTime.stop(); + statistics.overApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return refinementComponents; // Return the result from the previous iteration - } - STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << std::endl) - STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); - auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)]; - - //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, - refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - if (storm::utility::resources::isTerminate() && !underApproxComponents) { - return std::make_unique>( - RefinementComponents{modelPtr, overApprox, refinementComponents->underApproxValue, overApproxResultMap, {}, refinementComponents->beliefList, refinementComponents->beliefGrid, refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, {}, refinementComponents->initialBeliefId}); - } - STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); - - return std::make_shared>( - RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, refinementComponents->beliefList, refinementComponents->beliefGrid, - refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, - underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId}); - } - */ } template - std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager) { + void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); - storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); - if (computeRewards) { - explorer.startNewExploration(storm::utility::zero()); + if (!underApproximation->hasComputedValues()) { + // Build a new under approximation + if (computeRewards) { + underApproximation->startNewExploration(storm::utility::zero()); + } else { + underApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); + } } else { - explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); + // Restart the building process + underApproximation->restartExploration(); } - // Expand the beliefs to generate the grid on-the-fly - if (options.explorationThreshold > storm::utility::zero()) { - STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) - } - while (explorer.hasUnexploredState()) { - uint64_t currId = explorer.exploreNextState(); + // Expand the beliefs + while (underApproximation->hasUnexploredState()) { + uint64_t currId = underApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - explorer.setCurrentStateIsTarget(); - explorer.addSelfloopTransition(); + underApproximation->setCurrentStateIsTarget(); + underApproximation->addSelfloopTransition(); } else { bool stopExploration = false; - if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { - stopExploration = true; - explorer.setCurrentStateIsTruncated(); - } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) { - stopExploration = true; - explorer.setCurrentStateIsTruncated(); + if (!underApproximation->currentStateHasOldBehavior()) { + if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } else if (underApproximation->getCurrentNumberOfMdpStates() >= maxStateCount) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { - ValueType truncationProbability = storm::utility::zero(); - ValueType truncationValueBound = storm::utility::zero(); - auto successors = beliefManager->expand(currId, action); - for (auto const& successor : successors) { - bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); - if (!added) { - STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); - // We did not explore this successor state. Get a bound on the "missing" value - truncationProbability += successor.second; - truncationValueBound += successor.second * (min ? explorer.computeUpperValueBoundAtBelief(successor.first) : explorer.computeLowerValueBoundAtBelief(successor.first)); + // Always restore old behavior if available + if (underApproximation->currentStateHasOldBehavior()) { + underApproximation->restoreOldBehaviorAtCurrentState(action); + } else { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successors = beliefManager->expand(currId, action); + for (auto const& successor : successors) { + bool added = underApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); + } + } + if (stopExploration) { + if (computeRewards) { + underApproximation->addTransitionsToExtraStates(action, truncationProbability); + } else { + underApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } } - } - if (stopExploration) { if (computeRewards) { - explorer.addTransitionsToExtraStates(action, truncationProbability); - } else { - explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + underApproximation->computeRewardAtCurrentState(action, truncationValueBound); } } - if (computeRewards) { - // The truncationValueBound will be added on top of the reward introduced by the current belief state. - explorer.computeRewardAtCurrentState(action, truncationValueBound); - } } } if (storm::utility::resources::isTerminate()) { @@ -780,25 +484,19 @@ namespace storm { break; } } - statistics.underApproximationStates = explorer.getCurrentNumberOfMdpStates(); + statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildTime.stop(); - return nullptr; + return; } - explorer.finishExploration(); + underApproximation->finishExploration(); statistics.underApproximationBuildTime.stop(); statistics.underApproximationCheckTime.start(); - explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); + underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); - return std::make_shared(std::move(explorer)); - } - - template - void ApproximatePOMDPModelchecker::refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { - // TODO } template class ApproximatePOMDPModelchecker>; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 0d59ac31a..7fbd2ab5e 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -18,32 +18,6 @@ namespace storm { namespace modelchecker { typedef boost::bimap bsmap_type; - /** - * Struct containing information which is supposed to be persistent over multiple refinement steps - * - */ - template> - struct RefinementComponents { - std::shared_ptr> overApproxModelPtr; - ValueType overApproxValue; - ValueType underApproxValue; - std::map overApproxMap; - std::map underApproxMap; - std::vector> beliefList; - std::vector> beliefGrid; - std::vector beliefIsTarget; - bsmap_type overApproxBeliefStateMap; - bsmap_type underApproxBeliefStateMap; - uint64_t initialBeliefId; - }; - - template> - struct UnderApproxComponents { - ValueType underApproxValue; - std::map underApproxMap; - bsmap_type underApproxBeliefStateMap; - }; - template class ApproximatePOMDPModelchecker { public: @@ -103,121 +77,16 @@ namespace storm { /** * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties */ - std::shared_ptr computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager); - - void refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); + void buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); /** * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties */ - std::shared_ptr computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager); - - void refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - - -#ifdef REMOVE_THIS - /** - * Helper to compute an underapproximation of the reachability property. - * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. - * - * @param beliefList vector containing already generated beliefs - * @param beliefIsTarget vector containinf for each belief in beliefList true if the belief is a target - * @param targetObservations set of target observations - * @param initialBeliefId Id of the belief corresponding to the POMDP's initial state - * @param min true if minimum value is to be computed - * @param computeReward true if rewards are to be computed - * @param maxModelSize number of states up until which the belief support should be unrolled - * @return struct containing the components generated during the under approximation - */ - std::unique_ptr> computeUnderapproximation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - uint64_t initialBeliefId, bool min, bool computeReward, - uint64_t maxModelSize); - std::unique_ptr> computeUnderapproximation(std::shared_ptr>> beliefManager, - std::set const &targetObservations, bool min, bool computeReward, - uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds); - - /** - * Constructs the initial belief for the given POMDP - * - * @param pomdp the POMDP - * @param id the id the initial belief is given - * @return a belief representing the initial belief - */ - storm::pomdp::Belief - getInitialBelief(uint64_t id); - - - /** - * Subroutine to compute the subsimplex a given belief is contained in and the corresponding lambda values necessary for the Freudenthal triangulation - * - * @param probabilities the probability distribution of the belief - * @param gridResolution the resolution used for the belief - * @param nrStates number of states in the POMDP - * @return a pair containing: 1) the subsimplices 2) the lambda values - */ - std::pair>, std::vector> - computeSubSimplexAndLambdas(std::map &probabilities, uint64_t gridResolution, uint64_t nrStates); + void buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - - /** - * Helper method to get the probabilities to be in a state with each observation after performing an action - * - * @param belief the belief in which the action is performed - * @param actionIndex the index of the action to be performed - * @return mapping from each observation to the probability to be in a state with that observation after performing the action - */ - std::map computeObservationProbabilitiesAfterAction(storm::pomdp::Belief &belief, - uint64_t actionIndex); - - /** - * Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation. - * If the belief does not exist yet, it is created and added to the list of all beliefs - * - * @param beliefList data structure to store all generated beliefs - * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief - * @param targetObservations set of target observations - * @param belief the starting belief - * @param actionIndex the index of the action to be performed - * @param observation the observation after the action was performed - * @return the resulting belief (observation and distribution) - */ - uint64_t getBeliefAfterActionAndObservation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - storm::pomdp::Belief &belief, - uint64_t actionIndex, uint32_t observation, uint64_t id); - - /** - * Helper to get the id of a Belief stored in a given vector structure - * - * @param grid the vector on which the lookup is performed - * @param observation the observation of the belief - * @param probabilities the probability distribution over the POMDP states of the Belief - * @return if the belief was found in the vector, the belief's ID, otherwise -1 - */ - uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, - std::map &probabilities); - - /** - * Helper method to build the transition matrix from a data structure containing transations - * - * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) - * @return sparseMatrix representing the transitions - */ - storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> &transitions); - - /** - * Get the reward for performing an action in a given belief - * - * @param action the index of the action to be performed - * @param belief the belief in which the action is performed - * @return the reward earned by performing the action in the belief - */ - ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief); - ValueType getRewardAfterAction(uint64_t action, std::map const& belief); -#endif //REMOVE_THIS + ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info); + + std::vector getObservationRatings(std::shared_ptr const& overApproximation); struct Statistics { Statistics(); From c3847d05afd0bce9aae4d80047a1a1543846f263 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 06:37:01 +0200 Subject: [PATCH 105/155] Scaling the rating of an observation with the current resolution. --- .../ApproximatePOMDPModelchecker.cpp | 18 +++++++++++------- .../ApproximatePOMDPModelchecker.h | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 4526607d4..2936d9b40 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -264,21 +264,23 @@ namespace storm { * Here, 0 means a bad approximation and 1 means a good approximation. */ template - typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info) { + typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) { auto n = storm::utility::convertNumber(info.successorWithObsCount); auto one = storm::utility::one(); - // Create the actual rating for this observation at this choice from the given info + // Create the rating for this observation at this choice from the given info ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 // Normalize the rating so that it ranges from 0 to 1, where // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. obsChoiceRating = (obsChoiceRating * n - one) / (n - one); + // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) + obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); return obsChoiceRating; } template - std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation) { + std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution) { uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices(); std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); @@ -289,7 +291,7 @@ namespace storm { overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { auto const& obs = obsInfo.first; - ValueType obsChoiceRating = rateObservation(obsInfo.second); + ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); // The rating of the observation will be the minimum over all choice-based observation ratings resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); @@ -303,7 +305,9 @@ namespace storm { STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero(), "Can not refine with negative aggressiveness."); STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one(), "Refinement with aggressiveness > 1 is invalid."); - + uint64_t maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); + STORM_LOG_INFO("Refining with maximal resolution " << maxResolution << "."); + statistics.overApproximationBuildTime.start(); storm::storage::BitVector refinedObservations; if (!refine) { @@ -315,7 +319,7 @@ namespace storm { } } else { // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. - auto obsRatings = getObservationRatings(overApproximation); + auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, maxResolution); ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); // Potentially increase the aggressiveness so that at least one observation actually gets refinement. *refinementAggressiveness = std::max(minRating, *refinementAggressiveness); @@ -356,7 +360,7 @@ namespace storm { overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { if (refinedObservations.get(obsInfo.first)) { - ValueType obsRating = rateObservation(obsInfo.second); + ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], maxResolution); stateActionRating = std::min(stateActionRating, obsRating); } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 7fbd2ab5e..f895a3138 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -84,9 +84,9 @@ namespace storm { */ void buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info); + ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution); - std::vector getObservationRatings(std::shared_ptr const& overApproximation); + std::vector getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution); struct Statistics { Statistics(); From c2837bb749668df1cbadf995062d84520495c9e3 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 12:41:36 +0200 Subject: [PATCH 106/155] ApproximatePOMDPModelchecker: Improved output a bit. --- .../ApproximatePOMDPModelchecker.cpp | 44 ++++++++++++------- .../ApproximatePOMDPModelchecker.h | 2 + 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 2936d9b40..fb95b0838 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -133,6 +133,7 @@ namespace storm { stream << ">="; } stream << statistics.overApproximationStates.get() << std::endl; + stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.get() << std::endl; stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; } @@ -148,6 +149,7 @@ namespace storm { stream << ">="; } stream << statistics.underApproximationStates.get() << std::endl; + stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.get() << std::endl; stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl; stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl; } @@ -231,14 +233,16 @@ namespace storm { // ValueType lastMinScore = storm::utility::infinity(); // Start refinement statistics.refinementSteps = 0; - ValueType refinementAggressiveness = storm::utility::zero(); + ValueType refinementAggressiveness = storm::utility::convertNumber(0.0); while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } + ++statistics.refinementSteps.get(); + STORM_LOG_INFO("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << "."); // Refine over-approximation - refinementAggressiveness *= storm::utility::convertNumber(1.1);; + STORM_LOG_DEBUG("Refining over-approximation with aggressiveness " << refinementAggressiveness << "."); buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation); if (overApproximation->hasComputedValues()) { overApproxValue = overApproximation->getComputedValueAtInitialState(); @@ -246,16 +250,18 @@ namespace storm { break; } - // Refine under-approximation - underApproxSizeThreshold *= storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); - underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); - buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); - if (underApproximation->hasComputedValues()) { - underApproxValue = underApproximation->getComputedValueAtInitialState(); - } else { - break; + if (result.diff() > options.refinementPrecision) { + // Refine under-approximation + underApproxSizeThreshold *= storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); + underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); + STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << "."); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + if (underApproximation->hasComputedValues()) { + underApproxValue = underApproximation->getComputedValueAtInitialState(); + } else { + break; + } } - ++statistics.refinementSteps.get(); } } @@ -305,8 +311,9 @@ namespace storm { STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero(), "Can not refine with negative aggressiveness."); STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one(), "Refinement with aggressiveness > 1 is invalid."); - uint64_t maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - STORM_LOG_INFO("Refining with maximal resolution " << maxResolution << "."); + + // current maximal resolution (needed for refinement heuristic) + uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); statistics.overApproximationBuildTime.start(); storm::storage::BitVector refinedObservations; @@ -319,12 +326,12 @@ namespace storm { } } else { // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. - auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, maxResolution); + auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); // Potentially increase the aggressiveness so that at least one observation actually gets refinement. *refinementAggressiveness = std::max(minRating, *refinementAggressiveness); refinedObservations = storm::utility::vector::filter(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;}); - STORM_PRINT("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); + STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness) ValueType incrementValue = storm::utility::one() + (*refinementAggressiveness) * storm::utility::convertNumber(observationResolutionVector[obs]); @@ -332,6 +339,7 @@ namespace storm { } overApproximation->restartExploration(); } + statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); // Start exploration std::map gatheredSuccessorObservations; // Declare here to avoid reallocations @@ -360,7 +368,7 @@ namespace storm { overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { if (refinedObservations.get(obsInfo.first)) { - ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], maxResolution); + ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], oldMaxResolution); stateActionRating = std::min(stateActionRating, obsRating); } } @@ -411,7 +419,8 @@ namespace storm { overApproximation->finishExploration(); statistics.overApproximationBuildTime.stop(); - + STORM_LOG_DEBUG("Explored " << statistics.overApproximationStates.get() << " states."); + statistics.overApproximationCheckTime.start(); overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); @@ -421,6 +430,7 @@ namespace storm { void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); + statistics.underApproximationStateLimit = maxStateCount; if (!underApproximation->hasComputedValues()) { // Build a new under approximation if (computeRewards) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index f895a3138..1d5521b6a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -96,11 +96,13 @@ namespace storm { bool overApproximationBuildAborted; storm::utility::Stopwatch overApproximationBuildTime; storm::utility::Stopwatch overApproximationCheckTime; + boost::optional overApproximationMaxResolution; boost::optional underApproximationStates; bool underApproximationBuildAborted; storm::utility::Stopwatch underApproximationBuildTime; storm::utility::Stopwatch underApproximationCheckTime; + boost::optional underApproximationStateLimit; bool aborted; }; From 961baa43868315b5c8eaef8c21eeffd962748ffb Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 12:43:33 +0200 Subject: [PATCH 107/155] BeliefMdpExplorer: Various bugfixes for exploration restarts. Unexplored (= unreachable) states are now dropped before building the MDP since we do not get a valid MDP otherwise. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 186 ++++++++++++++------ 1 file changed, 131 insertions(+), 55 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 2a97c5e05..d59b770f0 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -62,33 +62,36 @@ namespace storm { exploredChoiceIndices.clear(); mdpActionRewards.clear(); exploredMdp = nullptr; - currentMdpState = noState(); - + internalAddRowGroupIndex(); // Mark the start of the first row group + // Add some states with special treatment (if requested) if (extraBottomStateValue) { - extraBottomState = getCurrentNumberOfMdpStates(); + currentMdpState = getCurrentNumberOfMdpStates(); + extraBottomState = currentMdpState; mdpStateToBeliefIdMap.push_back(beliefManager->noId()); insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); - internalAddRowGroupIndex(); internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one()); + internalAddRowGroupIndex(); } else { extraBottomState = boost::none; } if (extraTargetStateValue) { - extraTargetState = getCurrentNumberOfMdpStates(); + currentMdpState = getCurrentNumberOfMdpStates(); + extraTargetState = currentMdpState; mdpStateToBeliefIdMap.push_back(beliefManager->noId()); insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); - internalAddRowGroupIndex(); internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one()); + internalAddRowGroupIndex(); targetStates.grow(getCurrentNumberOfMdpStates(), false); targetStates.set(extraTargetState.get(), true); } else { extraTargetState = boost::none; } - + currentMdpState = noState(); + // Set up the initial state. initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); } @@ -101,6 +104,7 @@ namespace storm { */ void restartExploration() { STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); + status = Status::Exploring; // We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid. exploredBeliefIds.clear(); exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); @@ -124,6 +128,7 @@ namespace storm { if (extraTargetState) { currentMdpState = extraTargetState.get(); restoreOldBehaviorAtCurrentState(0); + targetStates.set(extraTargetState.get(), true); } currentMdpState = noState(); @@ -138,23 +143,22 @@ namespace storm { BeliefId exploreNextState() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + // Mark the end of the previously explored row group. + if (currentMdpState != noState() && !currentStateHasOldBehavior()) { + internalAddRowGroupIndex(); + } // Pop from the queue. currentMdpState = mdpStatesToExplore.front(); mdpStatesToExplore.pop_front(); - if (!currentStateHasOldBehavior()) { - internalAddRowGroupIndex(); - } return mdpStateToBeliefIdMap[currentMdpState]; } void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - // We first insert the entries of the current row in a separate map. - // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) - + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; if (!storm::utility::isZero(bottomStateValue)) { STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); @@ -168,6 +172,7 @@ namespace storm { void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; internalAddTransition(row, getCurrentMdpState(), value); } @@ -182,8 +187,8 @@ namespace storm { */ bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - // We first insert the entries of the current row in a separate map. - // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + MdpStateType column; if (ignoreNewBeliefs) { column = getExploredMdpState(transitionTarget); @@ -221,6 +226,7 @@ namespace storm { bool currentStateHasOldBehavior() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state."); return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); } @@ -232,6 +238,8 @@ namespace storm { */ void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) { STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); + STORM_LOG_ASSERT(localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); @@ -255,10 +263,27 @@ namespace storm { void finishExploration() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); + + // Complete the exploration // Finish the last row grouping in case the last explored state was new if (!currentStateHasOldBehavior()) { internalAddRowGroupIndex(); } + // Resize state- and choice based vectors to the correct size + targetStates.resize(getCurrentNumberOfMdpStates(), false); + truncatedStates.resize(getCurrentNumberOfMdpStates(), false); + if (!mdpActionRewards.empty()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + } + + // We are not exploring anymore + currentMdpState = noState(); + + // If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build). + // We get rid of these before rebuilding the model + if (exploredMdp) { + dropUnexploredStates(); + } // Create the tranistion matrix uint64_t entryCount = 0; @@ -300,50 +325,101 @@ namespace storm { status = Status::ModelFinished; } - void dropUnreachableStates() { - STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); - auto reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(), - storm::storage::BitVector(getCurrentNumberOfMdpStates(), std::vector{initialMdpState}), - storm::storage::BitVector(getCurrentNumberOfMdpStates(), true), - getExploredMdp()->getStateLabeling().getStates("target")); - auto reachableTransitionMatrix = getExploredMdp()->getTransitionMatrix().getSubmatrix(true, reachableStates, reachableStates); - auto reachableStateLabeling = getExploredMdp()->getStateLabeling().getSubLabeling(reachableStates); - std::vector reachableMdpStateToBeliefIdMap(reachableStates.getNumberOfSetBits()); - std::vector reachableLowerValueBounds(reachableStates.getNumberOfSetBits()); - std::vector reachableUpperValueBounds(reachableStates.getNumberOfSetBits()); - std::vector reachableValues(reachableStates.getNumberOfSetBits()); - std::vector reachableMdpActionRewards; - for (uint64_t state = 0; state < reachableStates.size(); ++state) { - if (reachableStates[state]) { - reachableMdpStateToBeliefIdMap.push_back(mdpStateToBeliefIdMap[state]); - reachableLowerValueBounds.push_back(lowerValueBounds[state]); - reachableUpperValueBounds.push_back(upperValueBounds[state]); - reachableValues.push_back(values[state]); - if (getExploredMdp()->hasRewardModel()) { - //TODO FIXME is there some mismatch with the indices here? - for (uint64_t i = 0; i < getExploredMdp()->getTransitionMatrix().getRowGroupSize(state); ++i) { - reachableMdpActionRewards.push_back(getExploredMdp()->getUniqueRewardModel().getStateActionRewardVector()[state + i]); - } + void dropUnexploredStates() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); + + STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available."); + // Find the states (and corresponding choices) that were not explored. + // These correspond to "empty" MDP transitions + storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true); + std::vector toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState()); + MdpStateType nextRelevantIndex = 0; + for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { + uint64_t rowIndex = exploredChoiceIndices[groupIndex]; + // Check first row in group + if (exploredMdpTransitions[rowIndex].empty()) { + relevantMdpChoices.set(rowIndex, false); + relevantMdpStates.set(groupIndex, false); + } else { + toRelevantStateIndexMap[groupIndex] = nextRelevantIndex; + ++nextRelevantIndex; + } + uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; + // process remaining rows in group + for (++rowIndex; rowIndex < groupEnd; ++rowIndex) { + // Assert that all actions at the current state were consistently explored or unexplored. + STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex), "Actions at 'old' MDP state " << groupIndex << " were only partly explored."); + if (exploredMdpTransitions[rowIndex].empty()) { + relevantMdpChoices.set(rowIndex, false); } } - //TODO drop BeliefIds from exploredBeliefIDs? } - std::unordered_map> mdpRewardModels; - if (!reachableMdpActionRewards.empty()) { - //reachableMdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); - mdpRewardModels.emplace("default", - storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(reachableMdpActionRewards))); + + if (relevantMdpStates.full()) { + // All states are relevant so nothing to do + return; } - storm::storage::sparse::ModelComponents modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling), - std::move(mdpRewardModels)); - exploredMdp = std::make_shared>(std::move(modelComponents)); - - std::map reachableBeliefIdToMdpStateMap; - for (MdpStateType state = 0; state < reachableMdpStateToBeliefIdMap.size(); ++state) { - reachableBeliefIdToMdpStateMap[reachableMdpStateToBeliefIdMap[state]] = state; + + // Translate various components to the "new" MDP state set + storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates); + { // beliefIdToMdpStateMap + for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { + if (relevantMdpStates.get(belIdToMdpStateIt->second)) { + // Keep current entry and move on to the next one. + ++belIdToMdpStateIt; + } else { + STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); + // Delete current entry and move on to the next one. + // This works because std::map::erase does not invalidate other iterators within the map! + beliefIdToMdpStateMap.erase(belIdToMdpStateIt++); + } + } + } + { // exploredMdpTransitions + storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices); + // Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map + // It might payoff to do this when building the matrix. + for (auto& transitions : exploredMdpTransitions) { + std::map newTransitions; + for (auto const& entry : transitions) { + STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state."); + newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second); + } + transitions = std::move(newTransitions); + } } - mdpStateToBeliefIdMap = reachableMdpStateToBeliefIdMap; - beliefIdToMdpStateMap = reachableBeliefIdToMdpStateMap; + { // exploredChoiceIndices + MdpStateType newState = 0; + assert(exploredChoiceIndices[0] == 0u); + // Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched. + for (auto const& oldState : relevantMdpStates) { + if (oldState != newState) { + assert(oldState > newState); + uint64_t groupSize = exploredChoiceIndices[oldState + 1] - exploredChoiceIndices[oldState]; + exploredChoiceIndices[newState + 1] = exploredChoiceIndices[newState] + groupSize; + } + ++newState; + } + exploredChoiceIndices.resize(newState + 1); + } + if (!mdpActionRewards.empty()) { + storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices); + } + if (extraBottomState) { + extraBottomState = toRelevantStateIndexMap[extraBottomState.get()]; + } + if (extraTargetState) { + extraTargetState = toRelevantStateIndexMap[extraTargetState.get()]; + } + targetStates = targetStates % relevantMdpStates; + truncatedStates = truncatedStates % relevantMdpStates; + initialMdpState = toRelevantStateIndexMap[initialMdpState]; + + storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates); + storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates); + storm::utility::vector::filterVectorInPlace(values, relevantMdpStates); + } std::shared_ptr> getExploredMdp() const { @@ -364,7 +440,7 @@ namespace storm { MdpStateType getStartOfCurrentRowGroup() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - return exploredChoiceIndices.back(); + return exploredChoiceIndices[getCurrentMdpState()]; } ValueType getLowerValueBoundAtCurrentState() const { From 34d6ac9fe1f0e06ab209a51d91ba1adae0aaa3b0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 12:44:14 +0200 Subject: [PATCH 108/155] Fixed computing a state limit for the under-approximation. --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index fb95b0838..bea230599 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -252,7 +252,7 @@ namespace storm { if (result.diff() > options.refinementPrecision) { // Refine under-approximation - underApproxSizeThreshold *= storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); + underApproxSizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << "."); buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); From 5cd4281133b986360c7f5583516220c2ab117c3a Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 12:50:12 +0200 Subject: [PATCH 109/155] Further output improvements. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 4 +++- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index d59b770f0..c8f1775ed 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -278,7 +278,7 @@ namespace storm { // We are not exploring anymore currentMdpState = noState(); - + // If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build). // We get rid of these before rebuilding the model if (exploredMdp) { @@ -323,6 +323,8 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); exploredMdp = std::make_shared>(std::move(modelComponents)); status = Status::ModelFinished; + STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << "of which were flagged as truncated)."); + } void dropUnexploredStates() { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index bea230599..eb116fc7b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -419,7 +419,6 @@ namespace storm { overApproximation->finishExploration(); statistics.overApproximationBuildTime.stop(); - STORM_LOG_DEBUG("Explored " << statistics.overApproximationStates.get() << " states."); statistics.overApproximationCheckTime.start(); overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); From 26864067cf7930078f54eed972096f7db5bd20b8 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 13:04:38 +0200 Subject: [PATCH 110/155] BeliefManager: Made several methods private to hide the actual BeliefType. --- src/storm-pomdp/storage/BeliefManager.h | 164 +++++++++++++----------- 1 file changed, 86 insertions(+), 78 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 8f0dcd225..0731aef35 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -44,6 +44,92 @@ namespace storm { } }; + BeliefId noId() const { + return std::numeric_limits::max(); + } + + bool isEqual(BeliefId const& first, BeliefId const& second) const { + return isEqual(getBelief(first), getBelief(second)); + } + + std::string toString(BeliefId const& beliefId) const { + return toString(getBelief(beliefId)); + } + + + std::string toString(Triangulation const& t) const { + std::stringstream str; + str << "(\n"; + for (uint64_t i = 0; i < t.size(); ++i) { + str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n"; + } + str <<")\n"; + return str.str(); + } + + template + ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { + ValueType result = storm::utility::zero(); + for (auto const& entry : getBelief(beliefId)) { + result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); + } + return result; + } + + + BeliefId const& getInitialBelief() const { + return initialBeliefId; + } + + ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { + auto const& belief = getBelief(beliefId); + STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); + auto result = storm::utility::zero(); + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + for (auto const &entry : belief) { + uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex; + STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index."); + STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index."); + result += entry.second * pomdpActionRewardVector[choiceIndex]; + } + return result; + } + + uint32_t getBeliefObservation(BeliefId beliefId) { + return getBeliefObservation(getBelief(beliefId)); + } + + uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { + auto belief = getBelief(beliefId); + return pomdp.getNumberOfChoices(belief.begin()->first); + } + + Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) { + return triangulateBelief(getBelief(beliefId), resolution); + } + + template + void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { + auto insertionRes = distr.emplace(state, value); + if (!insertionRes.second) { + insertionRes.first->second += value; + } + } + + BeliefId getNumberOfBeliefIds() const { + return beliefs.size(); + } + + std::map expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { + return expandInternal(beliefId, actionIndex, observationResolutions); + } + + std::map expand(BeliefId const& beliefId, uint64_t actionIndex) { + return expandInternal(beliefId, actionIndex); + } + + private: + BeliefType const& getBelief(BeliefId const& id) const { STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief."); STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range."); @@ -56,10 +142,6 @@ namespace storm { return idIt->second; } - BeliefId noId() const { - return std::numeric_limits::max(); - } - std::string toString(BeliefType const& belief) const { std::stringstream str; str << "{ "; @@ -76,16 +158,6 @@ namespace storm { return str.str(); } - std::string toString(Triangulation const& t) const { - std::stringstream str; - str << "(\n"; - for (uint64_t i = 0; i < t.size(); ++i) { - str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n"; - } - str <<")\n"; - return str.str(); - } - bool isEqual(BeliefType const& first, BeliefType const& second) const { if (first.size() != second.size()) { return false; @@ -186,49 +258,11 @@ namespace storm { return true; } - template - ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { - ValueType result = storm::utility::zero(); - for (auto const& entry : getBelief(beliefId)) { - result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); - } - return result; - } - - - BeliefId const& getInitialBelief() const { - return initialBeliefId; - } - - ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { - auto const& belief = getBelief(beliefId); - STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); - auto result = storm::utility::zero(); - auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); - for (auto const &entry : belief) { - uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex; - STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index."); - STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index."); - result += entry.second * pomdpActionRewardVector[choiceIndex]; - } - return result; - } - uint32_t getBeliefObservation(BeliefType belief) { STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); return pomdp.getObservation(belief.begin()->first); } - uint32_t getBeliefObservation(BeliefId beliefId) { - return getBeliefObservation(getBelief(beliefId)); - } - - uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { - auto belief = getBelief(beliefId); - return pomdp.getNumberOfChoices(belief.begin()->first); - } - - Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { //TODO this can also be simplified using the sparse vector interpretation //TODO Enable chaching for this method? @@ -307,22 +341,6 @@ namespace storm { return result; } - Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) { - return triangulateBelief(getBelief(beliefId), resolution); - } - - template - void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { - auto insertionRes = distr.emplace(state, value); - if (!insertionRes.second) { - insertionRes.first->second += value; - } - } - - BeliefId getNumberOfBeliefIds() const { - return beliefs.size(); - } - std::map expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { std::map destinations; // TODO: Output as vector? @@ -369,16 +387,6 @@ namespace storm { } - std::map expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { - return expandInternal(beliefId, actionIndex, observationResolutions); - } - - std::map expand(BeliefId const& beliefId, uint64_t actionIndex) { - return expandInternal(beliefId, actionIndex); - } - - private: - BeliefId computeInitialBelief() { STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, "POMDP contains more than one initial state"); From eca4dab6c069642731639aea3a0b1dcb0cb8736f Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 13:43:49 +0200 Subject: [PATCH 111/155] Beliefmanager: expanding a belief now returns a vector instead of a map --- src/storm-pomdp/storage/BeliefManager.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 0731aef35..b74390a79 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -120,11 +120,11 @@ namespace storm { return beliefs.size(); } - std::map expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { + std::vector> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { return expandInternal(beliefId, actionIndex, observationResolutions); } - std::map expand(BeliefId const& beliefId, uint64_t actionIndex) { + std::vector> expand(BeliefId const& beliefId, uint64_t actionIndex) { return expandInternal(beliefId, actionIndex); } @@ -341,8 +341,8 @@ namespace storm { return result; } - std::map expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { - std::map destinations; + std::vector> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { + std::vector> destinations; // TODO: Output as vector? BeliefType belief = getBelief(beliefId); @@ -373,13 +373,15 @@ namespace storm { } STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); + // Insert the destination. We know that destinations have to be disjoined since they have different observations if (observationTriangulationResolutions) { Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]); for (size_t j = 0; j < triangulation.size(); ++j) { - addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); + // Here we additionally assume that triangulation.gridPoints does not contain the same point multiple times + destinations.emplace_back(triangulation.gridPoints[j], triangulation.weights[j] * successor.second); } } else { - addToDistribution(destinations, getOrAddBeliefId(successorBelief), successor.second); + destinations.emplace_back(getOrAddBeliefId(successorBelief), successor.second); } } From 937659f3565f51c9b0850a2d3b7452fb86cbc23e Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 15:32:08 +0200 Subject: [PATCH 112/155] First improvement step for Freudenthal triangulation --- src/storm-pomdp/storage/BeliefManager.h | 130 +++++++++++++----------- 1 file changed, 73 insertions(+), 57 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index b74390a79..7e37b9c16 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -263,77 +263,93 @@ namespace storm { return pomdp.getObservation(belief.begin()->first); } + struct FreudenthalData { + FreudenthalData(StateType const& pomdpState, StateType const& dimension, BeliefValueType const& x) : pomdpState(pomdpState), dimension(dimension), value(storm::utility::floor(x)), diff(x-value) { }; + StateType pomdpState; + StateType dimension; // i + BeliefValueType value; // v[i] in the Lovejoy paper + BeliefValueType diff; // d[i] in the Lovejoy paper + }; + struct FreudenthalDataComparator { + bool operator()(FreudenthalData const& first, FreudenthalData const& second) const { + if (first.diff != second.diff) { + return first.diff > second.diff; + } else { + return first.dimension < second.dimension; + } + } + }; + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { - //TODO this can also be simplified using the sparse vector interpretation //TODO Enable chaching for this method? STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); - auto nrStates = pomdp.getNumberOfStates(); + auto convResolution = storm::utility::convertNumber(resolution); // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Variable names are based on the paper - // TODO avoid reallocations for these vectors - std::vector x(nrStates); - std::vector v(nrStates); - std::vector d(nrStates); - auto convResolution = storm::utility::convertNumber(resolution); - - for (size_t i = 0; i < nrStates; ++i) { - for (auto const &probEntry : belief) { - if (probEntry.first >= i) { - x[i] += convResolution * probEntry.second; - } - } - v[i] = storm::utility::floor(x[i]); - d[i] = x[i] - v[i]; + // However, we speed this up a little by exploiting that belief states usually have sparse support. + // TODO: for the sorting, it probably suffices to have a map from diffs to dimensions. The other Freudenthaldata could then also be stored in vectors, which would be a bit more like the original algorithm + + // Initialize some data + std::vector::iterator> dataIterators; + dataIterators.reserve(belief.size()); + // Initialize first row of 'qs' matrix + std::vector qsRow; + qsRow.reserve(dataIterators.size()); + std::set freudenthalData; + BeliefValueType x = convResolution; + for (auto const& entry : belief) { + auto insertionIt = freudenthalData.emplace(entry.first, dataIterators.size(), x).first; + dataIterators.push_back(insertionIt); + qsRow.push_back(dataIterators.back()->value); + x -= entry.second * convResolution; } - - auto p = storm::utility::vector::getSortedIndices(d); - - std::vector> qs(nrStates, std::vector(nrStates)); - for (size_t i = 0; i < nrStates; ++i) { - if (i == 0) { - for (size_t j = 0; j < nrStates; ++j) { - qs[i][j] = v[j]; - } - } else { - for (size_t j = 0; j < nrStates; ++j) { - if (j == p[i - 1]) { - qs[i][j] = qs[i - 1][j] + storm::utility::one(); - } else { - qs[i][j] = qs[i - 1][j]; - } + qsRow.push_back(storm::utility::zero()); + assert(!freudenthalData.empty()); + + Triangulation result; + result.weights.reserve(freudenthalData.size()); + result.gridPoints.reserve(freudenthalData.size()); + + // Insert first grid point + // TODO: this special treatment is actually not necessary. + BeliefValueType firstWeight = storm::utility::one() - freudenthalData.begin()->diff + freudenthalData.rbegin()->diff; + if (!cc.isZero(firstWeight)) { + result.weights.push_back(firstWeight); + BeliefType gridPoint; + for (StateType j = 0; j < dataIterators.size(); ++j) { + BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; + if (!cc.isZero(gridPointEntry)) { + gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution; } } + result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); } - Triangulation result; - // The first weight is 1-sum(other weights). We therefore process the js in reverse order - BeliefValueType firstWeight = storm::utility::one(); - for (size_t j = nrStates; j > 0;) { - --j; - // First create the weights. The weights vector will be reversed at the end. - ValueType weight; - if (j == 0) { - weight = firstWeight; - } else { - weight = d[p[j - 1]] - d[p[j]]; - firstWeight -= weight; - } - if (!cc.isZero(weight)) { - result.weights.push_back(weight); - BeliefType gridPoint; - auto const& qsj = qs[j]; - for (size_t i = 0; i < nrStates - 1; ++i) { - BeliefValueType gridPointEntry = qsj[i] - qsj[i + 1]; - if (!cc.isZero(gridPointEntry)) { - gridPoint[i] = gridPointEntry / convResolution; + if (freudenthalData.size() > 1) { + // Insert remaining grid points + auto currentSortedEntry = freudenthalData.begin(); + auto previousSortedEntry = currentSortedEntry++; + for (StateType i = 1; i < dataIterators.size(); ++i) { + // 'compute' the next row of the qs matrix + qsRow[previousSortedEntry->dimension] += storm::utility::one(); + + BeliefValueType weight = previousSortedEntry->diff - currentSortedEntry->diff; + if (!cc.isZero(weight)) { + result.weights.push_back(weight); + + BeliefType gridPoint; + for (StateType j = 0; j < dataIterators.size(); ++j) { + BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; + if (!cc.isZero(gridPointEntry)) { + gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution; + } } + result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); } - if (!cc.isZero(qsj[nrStates - 1])) { - gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution; - } - result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); + ++previousSortedEntry; + ++currentSortedEntry; } } From 2f020ce6860af6cb45a460aca32e1d52f910df74 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 08:12:32 +0200 Subject: [PATCH 113/155] BeliefManager: Making Freudenthal happy (and fast) --- src/storm-pomdp/storage/BeliefManager.h | 122 ++++++++++-------------- 1 file changed, 53 insertions(+), 69 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 7e37b9c16..95c596005 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -263,96 +263,80 @@ namespace storm { return pomdp.getObservation(belief.begin()->first); } - struct FreudenthalData { - FreudenthalData(StateType const& pomdpState, StateType const& dimension, BeliefValueType const& x) : pomdpState(pomdpState), dimension(dimension), value(storm::utility::floor(x)), diff(x-value) { }; - StateType pomdpState; + struct FreudenthalDiff { + FreudenthalDiff(StateType const& dimension, BeliefValueType&& diff) : dimension(dimension), diff(std::move(diff)) { }; StateType dimension; // i - BeliefValueType value; // v[i] in the Lovejoy paper - BeliefValueType diff; // d[i] in the Lovejoy paper - }; - struct FreudenthalDataComparator { - bool operator()(FreudenthalData const& first, FreudenthalData const& second) const { - if (first.diff != second.diff) { - return first.diff > second.diff; + BeliefValueType diff; // d[i] + bool operator>(FreudenthalDiff const& other) const { + if (diff != other.diff) { + return diff > other.diff; } else { - return first.dimension < second.dimension; + return dimension < other.dimension; } } }; Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { - //TODO Enable chaching for this method? STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); - - auto convResolution = storm::utility::convertNumber(resolution); - - // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) - // Variable names are based on the paper - // However, we speed this up a little by exploiting that belief states usually have sparse support. - // TODO: for the sorting, it probably suffices to have a map from diffs to dimensions. The other Freudenthaldata could then also be stored in vectors, which would be a bit more like the original algorithm - - // Initialize some data - std::vector::iterator> dataIterators; - dataIterators.reserve(belief.size()); - // Initialize first row of 'qs' matrix - std::vector qsRow; - qsRow.reserve(dataIterators.size()); - std::set freudenthalData; - BeliefValueType x = convResolution; - for (auto const& entry : belief) { - auto insertionIt = freudenthalData.emplace(entry.first, dataIterators.size(), x).first; - dataIterators.push_back(insertionIt); - qsRow.push_back(dataIterators.back()->value); - x -= entry.second * convResolution; - } - qsRow.push_back(storm::utility::zero()); - assert(!freudenthalData.empty()); - + StateType numEntries = belief.size(); Triangulation result; - result.weights.reserve(freudenthalData.size()); - result.gridPoints.reserve(freudenthalData.size()); - - // Insert first grid point - // TODO: this special treatment is actually not necessary. - BeliefValueType firstWeight = storm::utility::one() - freudenthalData.begin()->diff + freudenthalData.rbegin()->diff; - if (!cc.isZero(firstWeight)) { - result.weights.push_back(firstWeight); - BeliefType gridPoint; - for (StateType j = 0; j < dataIterators.size(); ++j) { - BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; - if (!cc.isZero(gridPointEntry)) { - gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution; - } + + // Quickly triangulate Dirac beliefs + if (numEntries == 1u) { + result.weights.push_back(storm::utility::one()); + result.gridPoints.push_back(getOrAddBeliefId(belief)); + } else { + + auto convResolution = storm::utility::convertNumber(resolution); + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) + // Variable names are mostly based on the paper + // However, we speed this up a little by exploiting that belief states usually have sparse support (i.e. numEntries is much smaller than pomdp.getNumberOfStates()). + // Initialize diffs and the first row of the 'qs' matrix (aka v) + std::set> sorted_diffs; // d (and p?) in the paper + std::vector qsRow; // Row of the 'qs' matrix from the paper (initially corresponds to v + qsRow.reserve(numEntries); + std::vector toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices + toOriginalIndicesMap.reserve(numEntries); + BeliefValueType x = convResolution; + for (auto const& entry : belief) { + qsRow.push_back(storm::utility::floor(x)); // v + sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v + toOriginalIndicesMap.push_back(entry.first); + x -= entry.second * convResolution; } - result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); - } - - if (freudenthalData.size() > 1) { - // Insert remaining grid points - auto currentSortedEntry = freudenthalData.begin(); - auto previousSortedEntry = currentSortedEntry++; - for (StateType i = 1; i < dataIterators.size(); ++i) { - // 'compute' the next row of the qs matrix - qsRow[previousSortedEntry->dimension] += storm::utility::one(); - - BeliefValueType weight = previousSortedEntry->diff - currentSortedEntry->diff; + // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler + qsRow.push_back(storm::utility::zero()); + + result.weights.reserve(numEntries); + result.gridPoints.reserve(numEntries); + auto currentSortedDiff = sorted_diffs.begin(); + auto previousSortedDiff = sorted_diffs.end(); + --previousSortedDiff; + for (StateType i = 0; i < numEntries; ++i) { + // Compute the weight for the grid points + BeliefValueType weight = previousSortedDiff->diff - currentSortedDiff->diff; + if (i == 0) { + // The first weight is a bit different + weight += storm::utility::one(); + } else { + // 'compute' the next row of the qs matrix + qsRow[previousSortedDiff->dimension] += storm::utility::one(); + } if (!cc.isZero(weight)) { result.weights.push_back(weight); - + // Compute the grid point BeliefType gridPoint; - for (StateType j = 0; j < dataIterators.size(); ++j) { + for (StateType j = 0; j < numEntries; ++j) { BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; if (!cc.isZero(gridPointEntry)) { - gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution; + gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution; } } result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); } - ++previousSortedEntry; - ++currentSortedEntry; + previousSortedDiff = currentSortedDiff++; } } - STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result)); return result; } From fcee1d05fabe15de82f6213a3bd91acf25492ea1 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 10:09:45 +0200 Subject: [PATCH 114/155] Fixed an issue with dropping unexplored states. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index c8f1775ed..fd2b54862 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -323,7 +323,7 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); exploredMdp = std::make_shared>(std::move(modelComponents)); status = Status::ModelFinished; - STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << "of which were flagged as truncated)."); + STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << " of which were flagged as truncated)."); } @@ -368,7 +368,8 @@ namespace storm { { // beliefIdToMdpStateMap for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { if (relevantMdpStates.get(belIdToMdpStateIt->second)) { - // Keep current entry and move on to the next one. + // Translate current entry and move on to the next one. + belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second]; ++belIdToMdpStateIt; } else { STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); @@ -619,7 +620,7 @@ namespace storm { return findRes->second; } } - // At this poind we need to add a new MDP state + // At this point we need to add a new MDP state MdpStateType result = getCurrentNumberOfMdpStates(); assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); mdpStateToBeliefIdMap.push_back(beliefId); From 26a0544e4ba075bfbc773f75ee5b1489f41ada1f Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 10:10:34 +0200 Subject: [PATCH 115/155] BeiliefManager: Use flat_maps for beliefs and hash_maps for belief storage. --- src/storm-pomdp/storage/BeliefManager.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 95c596005..25ec3a3d2 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -1,9 +1,9 @@ #pragma once -#include +#include #include -//#include - +#include +#include "storm/adapters/RationalNumberAdapter.h" #include "storm/utility/macros.h" #include "storm/exceptions/UnexpectedException.h" @@ -15,8 +15,7 @@ namespace storm { public: typedef typename PomdpType::ValueType ValueType; - //typedef boost::container::flat_map BeliefType - typedef std::map BeliefType; + typedef boost::container::flat_map BeliefType; // iterating over this shall be ordered (for correct hash computation) typedef uint64_t BeliefId; BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { @@ -343,7 +342,6 @@ namespace storm { std::vector> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { std::vector> destinations; - // TODO: Output as vector? BeliefType belief = getBelief(beliefId); @@ -411,11 +409,23 @@ namespace storm { return insertioRes.first->second; } + struct BeliefHash { + std::size_t operator()(const BeliefType& belief) const { + std::size_t seed = 0; + // Assumes that beliefs are ordered + for (auto const& entry : belief) { + boost::hash_combine(seed, entry.first); + boost::hash_combine(seed, entry.second); + } + return seed; + } + }; + PomdpType const& pomdp; std::vector pomdpActionRewardVector; std::vector beliefs; - std::map beliefToIdMap; + std::unordered_map beliefToIdMap; BeliefId initialBeliefId; storm::utility::ConstantsComparator cc; From 03889958dab727f7a50f0f2f703a6a0c4fb055ec Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 11:29:37 +0200 Subject: [PATCH 116/155] Added a switch to control the size of the under-approximation via command line. --- .../modules/GridApproximationSettings.cpp | 10 ++++++++++ .../settings/modules/GridApproximationSettings.h | 3 ++- src/storm-pomdp-cli/storm-pomdp.cpp | 1 + .../ApproximatePOMDPModelchecker.cpp | 16 ++++++++++++++-- .../modelchecker/ApproximatePOMDPModelchecker.h | 1 + 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp index 8006b3851..6b5b17677 100644 --- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp @@ -19,6 +19,7 @@ namespace storm { const std::string limitBeliefExplorationOption = "limit-exploration"; const std::string numericPrecisionOption = "numeric-precision"; const std::string cacheSimplicesOption = "cache-simplices"; + const std::string unfoldBeliefMdpOption = "unfold-belief-mdp"; GridApproximationSettings::GridApproximationSettings() : ModuleSettings(moduleName) { @@ -35,6 +36,7 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, cacheSimplicesOption, false,"Enables caching of simplices which requires more memory but can be faster.").build()); + this->addOption(storm::settings::OptionBuilder(moduleName, unfoldBeliefMdpOption, false,"Sets the (initial-) size threshold of the unfolded belief MDP (higher means more precise results, 0 means automatic choice)").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("value","the maximal number of states").setDefaultValueUnsignedInteger(0).build()).build()); } bool GridApproximationSettings::isRefineSet() const { @@ -65,6 +67,14 @@ namespace storm { return this->getOption(cacheSimplicesOption).getHasOptionBeenSet(); } + bool GridApproximationSettings::isUnfoldBeliefMdpSizeThresholdSet() const { + return this->getOption(unfoldBeliefMdpOption).getHasOptionBeenSet(); + } + + uint64_t GridApproximationSettings::getUnfoldBeliefMdpSizeThreshold() const { + return this->getOption(unfoldBeliefMdpOption).getArgumentByName("value").getValueAsUnsignedInteger(); + } + } // namespace modules } // namespace settings } // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h index a01fdbd77..88e484128 100644 --- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h +++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h @@ -27,7 +27,8 @@ namespace storm { bool isNumericPrecisionSetFromDefault() const; double getNumericPrecision() const; bool isCacheSimplicesSet() const; - + bool isUnfoldBeliefMdpSizeThresholdSet() const; + uint64_t getUnfoldBeliefMdpSizeThreshold() const; // The name of the module. static const std::string moduleName; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 9b3026832..181c5c31f 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -109,6 +109,7 @@ namespace storm { options.refinementPrecision = storm::utility::convertNumber(gridSettings.getRefinementPrecision()); options.numericPrecision = storm::utility::convertNumber(gridSettings.getNumericPrecision()); options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); + options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold(); if (storm::NumberTraits::IsExact) { if (gridSettings.isNumericPrecisionSetFromDefault()) { STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index eb116fc7b..070279ac4 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -35,6 +35,7 @@ namespace storm { refinementPrecision = storm::utility::convertNumber(1e-4); numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; + beliefMdpSizeThreshold = 0ull; } template @@ -180,7 +181,7 @@ namespace storm { approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.lowerBound : result.upperBound; resultValue = approx->getComputedValueAtInitialState(); - underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates(); + underApproxSizeThreshold = std::max(approx->getExploredMdp()->getNumberOfStates(), underApproxSizeThreshold); } } { // Underapproximation (Uses a fresh Belief manager) @@ -189,6 +190,12 @@ namespace storm { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + if (options.beliefMdpSizeThreshold) { + underApproxSizeThreshold = options.beliefMdpSizeThreshold.get(); + } + if (underApproxSizeThreshold == 0) { + underApproxSizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); // Heuristically select this (only relevant if the over-approx could not be build) + } buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx); if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); @@ -221,7 +228,12 @@ namespace storm { overApproxValue = overApproximation->getComputedValueAtInitialState(); // UnderApproximation - uint64_t underApproxSizeThreshold = std::max(overApproximation->getExploredMdp()->getNumberOfStates(), 10); + uint64_t underApproxSizeThreshold; + if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0ull) { + underApproxSizeThreshold = options.beliefMdpSizeThreshold.get(); + } else { + underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); + } auto underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); if (!underApproximation->hasComputedValues()) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 1d5521b6a..823eebf60 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -34,6 +34,7 @@ namespace storm { ValueType refinementPrecision; /// Used to decide when the refinement should terminate ValueType numericPrecision; /// Used to decide whether two values are equal bool cacheSubsimplices; /// Enables caching of subsimplices + boost::optional beliefMdpSizeThreshold; /// Sets the (initial) size of the unfolded belief MDP. 0 means auto selection. }; struct Result { From f4f9376c966489ce0ba288235c54360883590138 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 11:30:19 +0200 Subject: [PATCH 117/155] Vector: Added a method for element-wise comparison of two vectors. --- src/storm/utility/vector.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/storm/utility/vector.h b/src/storm/utility/vector.h index dd562a8e1..af6098038 100644 --- a/src/storm/utility/vector.h +++ b/src/storm/utility/vector.h @@ -142,6 +142,12 @@ namespace storm { return true; } + template + bool compareElementWise(std::vector const& left, std::vector const& right, Comparator comp = std::less()) { + STORM_LOG_ASSERT(left.size() == right.size(), "Expected that vectors for comparison have equal size"); + return std::equal(left.begin(), left.end(), right.begin(), comp); + } + /*! * Selects the elements from a vector at the specified positions and writes them consecutively into another vector. * @param vector The vector into which the selected elements are to be written. From 3c5df045c1138457b5a98f49ec59a245cb0f7994 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 12:18:32 +0200 Subject: [PATCH 118/155] Added a few assertions --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 2 ++ .../modelchecker/TrivialPomdpValueBoundsModelChecker.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index fd2b54862..e5d233aec 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -473,6 +473,8 @@ namespace storm { std::unique_ptr res(storm::api::verifyWithSparseEngine(exploredMdp, task)); if (res) { values = std::move(res->asExplicitQuantitativeCheckResult().getValueVector()); + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(lowerValueBounds, values, std::less_equal()), "Computed values are smaller than the lower bound."); + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(upperValueBounds, values, std::greater_equal()), "Computed values are larger than the upper bound."); } else { STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); STORM_LOG_ERROR("No result obtained while checking."); diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h index 862a82a05..ca4c2192f 100644 --- a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h +++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h @@ -88,6 +88,7 @@ namespace storm { pomdpScheduler.setChoice(choiceDistribution, state); } } + STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler."); auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); auto resultPtr2 = storm::api::verifyWithSparseEngine(scheduledModel, storm::api::createTask(formula.asSharedPointer(), false)); @@ -104,6 +105,7 @@ namespace storm { result.lower = std::move(pomdpSchedulerResult); result.upper = std::move(fullyObservableResult); } + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower, result.upper, std::less_equal()), "Lower bound is larger than upper bound"); return result; } From 26764137f5cbc3a22181ab4e243d265f1943d03d Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 12:30:39 +0200 Subject: [PATCH 119/155] Fix for --unfold-belief-mdp setting --- src/storm-pomdp-cli/storm-pomdp.cpp | 4 +++- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 181c5c31f..19e139d1d 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -109,7 +109,9 @@ namespace storm { options.refinementPrecision = storm::utility::convertNumber(gridSettings.getRefinementPrecision()); options.numericPrecision = storm::utility::convertNumber(gridSettings.getNumericPrecision()); options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); - options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold(); + if (gridSettings.isUnfoldBeliefMdpSizeThresholdSet()) { + options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold(); + } if (storm::NumberTraits::IsExact) { if (gridSettings.isNumericPrecisionSetFromDefault()) { STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 070279ac4..622e63512 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -35,7 +35,7 @@ namespace storm { refinementPrecision = storm::utility::convertNumber(1e-4); numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; - beliefMdpSizeThreshold = 0ull; + beliefMdpSizeThreshold = boost::none; } template @@ -190,7 +190,7 @@ namespace storm { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); - if (options.beliefMdpSizeThreshold) { + if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0) { underApproxSizeThreshold = options.beliefMdpSizeThreshold.get(); } if (underApproxSizeThreshold == 0) { From 5f2a598f48e26bc7c757eaeafd2266abb0cf51f0 Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Tue, 14 Apr 2020 14:29:30 -0700 Subject: [PATCH 120/155] remove unsound 1-state computation --- .../analysis/QualitativeAnalysis.cpp | 76 +------------------ 1 file changed, 2 insertions(+), 74 deletions(-) diff --git a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp index fc27620af..e288ed4ef 100644 --- a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp +++ b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp @@ -69,81 +69,9 @@ namespace storm { storm::storage::BitVector QualitativeAnalysis::analyseProb1Max(storm::logic::UntilFormula const& formula) const { // We consider the states that satisfy the formula with prob.1 under arbitrary schedulers as goal states. storm::storage::BitVector goalStates = storm::utility::graph::performProb1A(pomdp.getTransitionMatrix(), pomdp.getTransitionMatrix().getRowGroupIndices(), pomdp.getBackwardTransitions(), checkPropositionalFormula(formula.getLeftSubformula()), checkPropositionalFormula(formula.getRightSubformula())); - + STORM_LOG_TRACE("Prob1A states according to MDP: " << goalStates); // Now find a set of observations such that there is a memoryless scheduler inducing prob. 1 for each state whose observation is in the set. - storm::storage::BitVector candidateStates = goalStates | checkPropositionalFormula(formula.getLeftSubformula()); - storm::storage::BitVector candidateActions = pomdp.getTransitionMatrix().getRowFilter(candidateStates); - storm::storage::BitVector candidateObservations(pomdp.getNrObservations(), true); - - - bool converged = false; - while (!converged) { - converged = true; - - // Get the candidate states that can reach the goal with prob1 via candidate actions - storm::storage::BitVector newCandidates; - if (candidateActions.full()) { - newCandidates = storm::utility::graph::performProb1E(pomdp.getTransitionMatrix(), pomdp.getTransitionMatrix().getRowGroupIndices(), pomdp.getBackwardTransitions(), candidateStates, goalStates); - } else { - storm::storage::SparseMatrix filteredTransitions(pomdp.getTransitionMatrix().filterEntries(candidateActions)); - newCandidates = storm::utility::graph::performProb1E(filteredTransitions, filteredTransitions.getRowGroupIndices(), filteredTransitions.transpose(true), candidateStates, goalStates); - } - if (candidateStates != newCandidates) { - converged = false; - candidateStates = std::move(newCandidates); - } - - // Unselect all observations that have a non-candidate state - for (uint64_t state = candidateStates.getNextUnsetIndex(0); state < candidateStates.size(); state = candidateStates.getNextUnsetIndex(state + 1)) { - candidateObservations.set(pomdp.getObservation(state), false); - } - - // update the candidate actions to the set of actions that stay inside the candidate state set - std::vector candidateActionsPerObservation(pomdp.getNrObservations()); - for (auto const& state : candidateStates) { - auto& candidateActionsAtState = candidateActionsPerObservation[pomdp.getObservation(state)]; - if (candidateActionsAtState.size() == 0) { - candidateActionsAtState.resize(pomdp.getNumberOfChoices(state), true); - } - STORM_LOG_ASSERT(candidateActionsAtState.size() == pomdp.getNumberOfChoices(state), "State " + std::to_string(state) + " has " + std::to_string(pomdp.getNumberOfChoices(state)) + " actions, different from other with same observation (" + std::to_string(candidateActionsAtState.size()) + ")." ); - for (auto const& action : candidateActionsAtState) { - for (auto const& entry : pomdp.getTransitionMatrix().getRow(state, action)) { - if (!candidateStates.get(entry.getColumn())) { - candidateActionsAtState.set(action, false); - break; - } - } - } - } - - // Unselect all observations without such an action - for (auto const& o : candidateObservations) { - if (candidateActionsPerObservation[o].empty()) { - candidateObservations.set(o, false); - } - } - - // only keep the candidate states with a candidateObservation - for (auto const& state : candidateStates) { - if (!candidateObservations.get(pomdp.getObservation(state)) && !goalStates.get(state)) { - candidateStates.set(state, false); - converged = false; - } - } - - // Only keep the candidate actions originating from a candidateState. Also transform the representation of candidate actions - candidateActions.clear(); - for (auto const& state : candidateStates) { - uint64_t offset = pomdp.getTransitionMatrix().getRowGroupIndices()[state]; - for (auto const& action : candidateActionsPerObservation[pomdp.getObservation(state)]) { - candidateActions.set(offset + action); - } - } - } - - assert(goalStates.isSubsetOf(candidateStates)); - - return candidateStates; + return goalStates; } From 4ea452854fbce16a2316d82c0b310bedd7b339fb Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 22 Apr 2020 08:58:19 +0200 Subject: [PATCH 121/155] Fixes for scoring observations --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 17 +++++++---- .../ApproximatePOMDPModelchecker.cpp | 30 ++++++++++++------- src/storm-pomdp/storage/BeliefManager.h | 11 ++++++- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index e5d233aec..5fb5bfe26 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -456,10 +456,14 @@ namespace storm { return upperValueBounds[getCurrentMdpState()]; } + /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for minimizing properties) + /// Or that the pomdpLowerValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid. ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); } - + + /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for maximizing properties) + /// Or that the pomdpUpperValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid. ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); } @@ -507,7 +511,7 @@ namespace storm { // Intentionally left empty. } - void join(SuccessorObservationInformation other) { + void join(SuccessorObservationInformation other) { /// Does not join support (for performance reasons) observationProbability += other.observationProbability; maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); successorWithObsCount += other.successorWithObsCount; @@ -515,17 +519,19 @@ namespace storm { ValueType observationProbability; /// The probability we move to the corresponding observation. ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. - uint64_t successorWithObsCount; /// The number of successors with this observation + uint64_t successorWithObsCount; /// The number of successor beliefstates with this observation + typename BeliefManagerType::BeliefSupportType support; }; - void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map gatheredSuccessorObservations) { + void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map& gatheredSuccessorObservations) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + } - void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map gatheredSuccessorObservations) { + void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map& gatheredSuccessorObservations) { STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { auto const& beliefId = getBeliefId(entry.getColumn()); @@ -537,6 +543,7 @@ namespace storm { // There already is an entry for this observation, so join the two informations obsInsertion.first->second.join(info); } + beliefManager->joinSupport(beliefId, obsInsertion.first->second.support); } } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 622e63512..b35ded61c 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -283,18 +283,23 @@ namespace storm { */ template typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) { - auto n = storm::utility::convertNumber(info.successorWithObsCount); + auto n = storm::utility::convertNumber(info.support.size()); auto one = storm::utility::one(); - - // Create the rating for this observation at this choice from the given info - ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; - // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 - // Normalize the rating so that it ranges from 0 to 1, where - // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. - obsChoiceRating = (obsChoiceRating * n - one) / (n - one); - // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) - obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); - return obsChoiceRating; + if (storm::utility::isOne(n)) { + // If the belief is Dirac, it has to be approximated precisely. + // In this case, we return the best possible rating + return one; + } else { + // Create the rating for this observation at this choice from the given info + ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; + // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 + // Normalize the rating so that it ranges from 0 to 1, where + // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. + obsChoiceRating = (obsChoiceRating * n - one) / (n - one); + // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) + obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); + return obsChoiceRating; + } } template @@ -487,6 +492,9 @@ namespace storm { STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); // We did not explore this successor state. Get a bound on the "missing" value truncationProbability += successor.second; + // Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of other + // under-approximation values. In general, this does not yield a sound underapproximation anymore. + // However, in our case this is still the case as the under-approximation values are based on a memoryless scheduler. truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); } } diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 25ec3a3d2..b01ad5358 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "storm/adapters/RationalNumberAdapter.h" #include "storm/utility/macros.h" #include "storm/exceptions/UnexpectedException.h" @@ -16,6 +17,7 @@ namespace storm { typedef typename PomdpType::ValueType ValueType; typedef boost::container::flat_map BeliefType; // iterating over this shall be ordered (for correct hash computation) + typedef boost::container::flat_set BeliefSupportType; typedef uint64_t BeliefId; BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { @@ -99,7 +101,7 @@ namespace storm { } uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { - auto belief = getBelief(beliefId); + auto const& belief = getBelief(beliefId); return pomdp.getNumberOfChoices(belief.begin()->first); } @@ -115,6 +117,13 @@ namespace storm { } } + void joinSupport(BeliefId const& beliefId, BeliefSupportType& support) { + auto const& belief = getBelief(beliefId); + for (auto const& entry : belief) { + support.insert(entry.first); + } + } + BeliefId getNumberOfBeliefIds() const { return beliefs.size(); } From 61215e4b24fb89e90e2e44665226c50a55794863 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 22 Apr 2020 09:06:28 +0200 Subject: [PATCH 122/155] Over-Approximation: Taking current values as new lower/upper bounds for next refinement step. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 9 +++++++++ .../modelchecker/ApproximatePOMDPModelchecker.cpp | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 5fb5bfe26..36f525663 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -548,6 +548,15 @@ namespace storm { } } + void takeCurrentValuesAsUpperBounds() { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + upperValueBounds = values; + } + + void takeCurrentValuesAsLowerBounds() { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + lowerValueBounds = values; + } private: MdpStateType noState() const { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index b35ded61c..8aa83452a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -255,6 +255,11 @@ namespace storm { // Refine over-approximation STORM_LOG_DEBUG("Refining over-approximation with aggressiveness " << refinementAggressiveness << "."); + if (min) { + overApproximation->takeCurrentValuesAsLowerBounds(); + } else { + overApproximation->takeCurrentValuesAsUpperBounds(); + } buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation); if (overApproximation->hasComputedValues()) { overApproxValue = overApproximation->getComputedValueAtInitialState(); From 45832d3de37e5283003a3bfd5114b59c60697140 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 22 Apr 2020 12:12:05 +0200 Subject: [PATCH 123/155] BeliefMdpExplorer: Implemented extraction of optimal scheduler choices and reachable states under these choices --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 77 ++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 36f525663..2e1b0f4cc 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -61,6 +61,8 @@ namespace storm { exploredMdpTransitions.clear(); exploredChoiceIndices.clear(); mdpActionRewards.clear(); + optimalMdpChoices = boost::none; + optimalChoicesReachableMdpStates = boost::none; exploredMdp = nullptr; internalAddRowGroupIndex(); // Mark the start of the first row group @@ -230,6 +232,31 @@ namespace storm { return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); } + /*! + * Retrieves whether the current state can be reached under a scheduler that was optimal in the most recent check. + * This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior. + */ + bool currentStateIsOptimalSchedulerReachable() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior"); + STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); + return optimalChoicesReachableMdpStates->get(getCurrentMdpState()); + } + + /*! + * Retrieves whether the given action at the current state was optimal in the most recent check. + * This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior. + */ + bool actionAtCurrentStateWasOptimal(uint64_t const& localActionIndex) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); + STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; + return optimalChoices->get(row); + } + /*! * Inserts transitions and rewards at the given action as in the MDP of the previous exploration. * Does NOT set whether the state is truncated and/or target. @@ -285,6 +312,10 @@ namespace storm { dropUnexploredStates(); } + // The potentially computed optimal choices and the set of states that are reachable under these choices are not valid anymore. + optimalChoices = boost::none; + optimalChoicesReachableMdpStates = boost::none; + // Create the tranistion matrix uint64_t entryCount = 0; for (auto const& row : exploredMdpTransitions) { @@ -558,6 +589,48 @@ namespace storm { lowerValueBounds = values; } + /*! + * + * Computes the set of states that are reachable via a path that is consistent with an optimal MDP scheduler. + * States that are only reachable via target states will not be in this set. + * @param ancillaryChoicesEpsilon if the difference of a 1-step value of a choice is only epsilon away from the optimal value, the choice will be included. + * @param relative if set, we consider the relative difference to detect ancillaryChoices + */ + void computeOptimalChoicesAndReachableMdpStates(ValueType const& ancillaryChoicesEpsilon, bool relativeDifference) { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + STORM_LOG_ASSERT(exploredMdp, "Method call is invalid in if no MDP is available."); + STORM_LOG_ASSERT(!optimalChoices.is_initialized(), "Tried to compute optimal scheduler but this has already been done before."); + STORM_LOG_ASSERT(!optimalChoicesReachableMdpStates.is_initialized(), "Tried to compute states that are reachable under an optimal scheduler but this has already been done before."); + + // First find the choices that are optimal + optimalChoices = storm::storage::BitVector(exploredMdp->getNumberOfChoices(), false); + auto const& choiceIndices = exploredMdp->getNondeterministicChoiceIndices(); + auto const& transitions = exploredMdp->getTransitionMatrix(); + auto const& targetStates = exploredMdp->getStates("target"); + for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) { + if (targetStates.get(mdpState)) { + // Target states can be skipped. + continue; + } else { + auto const& stateValue = values[mdpState]; + for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) { + ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values); + if (exploredMdp->hasRewardModel()) { + choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice); + } + ValueType absDiff = storm::utility::abs((choiceValue - stateValue)); + if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) { + optimalChoices->set(globalChoice, true); + } + } + STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(), "Could not find an optimal choice."); + } + } + + // Then, find the states that are reachable via these choices + optimalChoicesReachableMdpStates = storm::utility::graph::getReachableStates(transitions, exploredMdp->getInitialStates(), ~targetStates, targetStates, false, 0, optimalChoices.get()); + } + private: MdpStateType noState() const { return std::numeric_limits::max(); @@ -672,12 +745,14 @@ namespace storm { // Final Mdp std::shared_ptr> exploredMdp; - // Value related information + // Value and scheduler related information std::vector const& pomdpLowerValueBounds; std::vector const& pomdpUpperValueBounds; std::vector lowerValueBounds; std::vector upperValueBounds; std::vector values; // Contains an estimate during building and the actual result after a check has performed + boost::optional optimalMdpChoices; + boost::optional optimalChoicesReachableMdpStates; // The current status of this explorer Status status; From 75d792e9876913d1b000aa9fd43647897baaf97b Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 22 Apr 2020 13:51:18 +0200 Subject: [PATCH 124/155] Implemented refinement heuristic. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 87 ++++++- .../ApproximatePOMDPModelchecker.cpp | 212 ++++++++++++------ .../ApproximatePOMDPModelchecker.h | 11 +- 3 files changed, 234 insertions(+), 76 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 2e1b0f4cc..0fc6c9ba4 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -61,7 +61,10 @@ namespace storm { exploredMdpTransitions.clear(); exploredChoiceIndices.clear(); mdpActionRewards.clear(); - optimalMdpChoices = boost::none; + targetStates.clear(); + truncatedStates.clear(); + delayedExplorationChoices.clear(); + optimalChoices = boost::none; optimalChoicesReachableMdpStates = boost::none; exploredMdp = nullptr; internalAddRowGroupIndex(); // Mark the start of the first row group @@ -120,6 +123,7 @@ namespace storm { } targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); + delayedExplorationChoices.clear(); mdpStatesToExplore.clear(); // The extra states are not changed @@ -226,17 +230,51 @@ namespace storm { truncatedStates.set(getCurrentMdpState(), true); } - bool currentStateHasOldBehavior() { + void setCurrentChoiceIsDelayed(uint64_t const& localActionIndex) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + delayedExplorationChoices.grow(getCurrentNumberOfMdpChoices(), false); + delayedExplorationChoices.set(getStartOfCurrentRowGroup() + localActionIndex, true); + } + + bool currentStateHasOldBehavior() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state."); return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); } + bool getCurrentStateWasTruncated() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); + STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available"); + return exploredMdp->getStateLabeling().getStateHasLabel("truncated", getCurrentMdpState()); + } + + /*! + * Retrieves whether the current state can be reached under an optimal scheduler + * This requires a previous call of computeOptimalChoicesAndReachableMdpStates. + */ + bool stateIsOptimalSchedulerReachable(MdpStateType mdpState) const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), "Method 'stateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); + return optimalChoicesReachableMdpStates->get(mdpState); + } + + /*! + * Retrieves whether the given action at the current state was optimal in the most recent check. + * This requires a previous call of computeOptimalChoicesAndReachableMdpStates. + */ + bool actionIsOptimal(uint64_t const& globalActionIndex) const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'actionIsOptimal' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); + return optimalChoices->get(globalActionIndex); + } + /*! * Retrieves whether the current state can be reached under a scheduler that was optimal in the most recent check. * This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior. */ - bool currentStateIsOptimalSchedulerReachable() { + bool currentStateIsOptimalSchedulerReachable() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state."); STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior"); @@ -248,13 +286,22 @@ namespace storm { * Retrieves whether the given action at the current state was optimal in the most recent check. * This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior. */ - bool actionAtCurrentStateWasOptimal(uint64_t const& localActionIndex) { + bool actionAtCurrentStateWasOptimal(uint64_t const& localActionIndex) const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); - uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; - return optimalChoices->get(row); + uint64_t choice = getStartOfCurrentRowGroup() + localActionIndex; + return optimalChoices->get(choice); + } + + bool getCurrentStateActionExplorationWasDelayed(uint64_t const& localActionIndex) const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); + STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available"); + uint64_t choice = exploredMdp->getNondeterministicChoiceIndices()[getCurrentMdpState()] + localActionIndex; + return exploredMdp->hasChoiceLabeling() && exploredMdp->getChoiceLabeling().getChoiceHasLabel("delayed", choice); } /*! @@ -351,7 +398,17 @@ namespace storm { storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); } + // Create model components storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); + + // Potentially create a choice labeling + if (!delayedExplorationChoices.empty()) { + modelComponents.choiceLabeling = storm::models::sparse::ChoiceLabeling(getCurrentNumberOfMdpChoices()); + delayedExplorationChoices.resize(getCurrentNumberOfMdpChoices(), false); + modelComponents.choiceLabeling->addLabel("delayed", std::move(delayedExplorationChoices)); + } + + // Create the final model. exploredMdp = std::make_shared>(std::move(modelComponents)); status = Status::ModelFinished; STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << " of which were flagged as truncated)."); @@ -579,6 +636,19 @@ namespace storm { } } + bool currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, storm::storage::BitVector const& observationSet) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); + uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; + for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { + auto const& beliefId = getBeliefId(entry.getColumn()); + if (observationSet.get(beliefManager->getBeliefObservation(beliefId))) { + return true; + } + } + return false; + } + void takeCurrentValuesAsUpperBounds() { STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); upperValueBounds = values; @@ -735,12 +805,13 @@ namespace storm { std::vector mdpActionRewards; uint64_t currentMdpState; - // Special states during exploration + // Special states and choices during exploration boost::optional extraTargetState; boost::optional extraBottomState; storm::storage::BitVector targetStates; storm::storage::BitVector truncatedStates; MdpStateType initialMdpState; + storm::storage::BitVector delayedExplorationChoices; // Final Mdp std::shared_ptr> exploredMdp; @@ -751,7 +822,7 @@ namespace storm { std::vector lowerValueBounds; std::vector upperValueBounds; std::vector values; // Contains an estimate during building and the actual result after a check has performed - boost::optional optimalMdpChoices; + boost::optional optimalChoices; boost::optional optimalChoicesReachableMdpStates; // The current status of this explorer diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 8aa83452a..09e0f8ce8 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -175,7 +175,13 @@ namespace storm { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); - buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, manager, approx); + HeuristicParameters heuristicParameters; + heuristicParameters.gapThreshold = storm::utility::convertNumber(options.explorationThreshold); + heuristicParameters.observationThreshold = storm::utility::zero(); // Not relevant without refinement + heuristicParameters.sizeThreshold = std::numeric_limits::max(); + heuristicParameters.optimalChoiceValueEpsilon = storm::utility::convertNumber(1e-4); + + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, manager, approx); if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); @@ -220,7 +226,12 @@ namespace storm { // OverApproximaion auto overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); - buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, overApproxBeliefManager, overApproximation); + HeuristicParameters heuristicParameters; + heuristicParameters.gapThreshold = storm::utility::convertNumber(options.explorationThreshold); + heuristicParameters.observationThreshold = storm::utility::zero(); // Will be set to lowest observation score automatically + heuristicParameters.sizeThreshold = std::numeric_limits::max(); + heuristicParameters.optimalChoiceValueEpsilon = storm::utility::convertNumber(1e-4); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, overApproxBeliefManager, overApproximation); if (!overApproximation->hasComputedValues()) { return; } @@ -245,7 +256,6 @@ namespace storm { // ValueType lastMinScore = storm::utility::infinity(); // Start refinement statistics.refinementSteps = 0; - ValueType refinementAggressiveness = storm::utility::convertNumber(0.0); while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; @@ -254,13 +264,15 @@ namespace storm { STORM_LOG_INFO("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << "."); // Refine over-approximation - STORM_LOG_DEBUG("Refining over-approximation with aggressiveness " << refinementAggressiveness << "."); if (min) { overApproximation->takeCurrentValuesAsLowerBounds(); } else { overApproximation->takeCurrentValuesAsUpperBounds(); } - buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation); + heuristicParameters.gapThreshold /= storm::utility::convertNumber(4); + heuristicParameters.sizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates() * 4; + heuristicParameters.observationThreshold += storm::utility::convertNumber(0.1) * (storm::utility::one() - heuristicParameters.observationThreshold); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, heuristicParameters, observationResolutionVector, overApproxBeliefManager, overApproximation); if (overApproximation->hasComputedValues()) { overApproxValue = overApproximation->getComputedValueAtInitialState(); } else { @@ -269,7 +281,7 @@ namespace storm { if (result.diff() > options.refinementPrecision) { // Refine under-approximation - underApproxSizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); + underApproxSizeThreshold *= 4; underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << "."); buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); @@ -309,30 +321,38 @@ namespace storm { template std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution) { - uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices(); + uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates(); + auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices(); std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); std::map gatheredSuccessorObservations; // Declare here to avoid reallocations - for (uint64_t mdpChoice = 0; mdpChoice < numMdpChoices; ++mdpChoice) { - gatheredSuccessorObservations.clear(); - overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); - for (auto const& obsInfo : gatheredSuccessorObservations) { - auto const& obs = obsInfo.first; - ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); - - // The rating of the observation will be the minimum over all choice-based observation ratings - resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); + for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) { + // Check whether this state is reached under an optimal scheduler. + // The heuristic assumes that the remaining states are not relevant for the observation score. + if (overApproximation->stateIsOptimalSchedulerReachable(mdpState)) { + for (uint64_t mdpChoice = choiceIndices[mdpState]; mdpChoice < choiceIndices[mdpState + 1]; ++mdpChoice) { + // Similarly, only optimal actions are relevant + if (overApproximation->actionIsOptimal(mdpChoice)) { + // score the observations for this choice + gatheredSuccessorObservations.clear(); + overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + for (auto const& obsInfo : gatheredSuccessorObservations) { + auto const& obs = obsInfo.first; + ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); + + // The rating of the observation will be the minimum over all choice-based observation ratings + resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); + } + } + } } } return resultingRatings; } template - void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { - STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); - STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero(), "Can not refine with negative aggressiveness."); - STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one(), "Refinement with aggressiveness > 1 is invalid."); + void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { // current maximal resolution (needed for refinement heuristic) uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); @@ -347,17 +367,18 @@ namespace storm { overApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); } } else { - // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. + // If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy + overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true); + // We also need to find out which observation resolutions needs refinement. auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); - // Potentially increase the aggressiveness so that at least one observation actually gets refinement. - *refinementAggressiveness = std::max(minRating, *refinementAggressiveness); - refinedObservations = storm::utility::vector::filter(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;}); + // Potentially increase the observationThreshold so that at least one observation actually gets refinement. + heuristicParameters.observationThreshold = std::max(minRating, heuristicParameters.observationThreshold); + refinedObservations = storm::utility::vector::filter(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;}); STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { - // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness) - ValueType incrementValue = storm::utility::one() + (*refinementAggressiveness) * storm::utility::convertNumber(observationResolutionVector[obs]); - observationResolutionVector[obs] += storm::utility::convertNumber(storm::utility::ceil(incrementValue)); + // Increment the resolution at the refined observations + observationResolutionVector[obs] *= 2; } overApproximation->restartExploration(); } @@ -365,6 +386,7 @@ namespace storm { // Start exploration std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + uint64_t numRewiredOrExploredStates = 0; while (overApproximation->hasUnexploredState()) { uint64_t currId = overApproximation->exploreNextState(); @@ -373,66 +395,124 @@ namespace storm { overApproximation->setCurrentStateIsTarget(); overApproximation->addSelfloopTransition(); } else { - bool stopExploration = false; - if (storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { - stopExploration = true; - overApproximation->setCurrentStateIsTruncated(); + // We need to decide how to treat this state (and each individual enabled action). There are the following cases: + // 1 The state has no old behavior and + // 1.1 we explore all actions or + // 1.2 we truncate all actions + // 2 The state has old behavior and was truncated in the last iteration and + // 2.1 we explore all actions or + // 2.2 we truncate all actions (essentially restoring old behavior, but we do the truncation step again to benefit from updated bounds) + // 3 The state has old behavior and was not truncated in the last iteration and the current action + // 3.1 should be rewired or + // 3.2 should get the old behavior but either + // 3.2.1 none of the successor observation has been refined since the last rewiring or exploration of this action + // 3.2.2 rewiring is only delayed as it could still have an effect in a later refinement step + + // Find out in which case we are + bool exploreAllActions = false; + bool truncateAllActions = false; + bool restoreAllActions = false; + bool checkRewireForAllActions = false; + ValueType gap = storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()); + if (!refine || !overApproximation->currentStateHasOldBehavior()) { + // Case 1 + // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state + if (gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + exploreAllActions = true; // Case 1.1 + } else { + truncateAllActions = true; // Case 1.2 + overApproximation->setCurrentStateIsTruncated(); + } + } else { + if (overApproximation->getCurrentStateWasTruncated()) { + // Case 2 + if (overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + exploreAllActions = true; // Case 2.1 + } else { + truncateAllActions = true; // Case 2.2 + overApproximation->setCurrentStateIsTruncated(); + } + } else { + // Case 3 + // The decision for rewiring also depends on the corresponding action, but we have some criteria that lead to case 3.2 (independent of the action) + if (overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + checkRewireForAllActions = true; // Case 3.1 or Case 3.2 + } else { + restoreAllActions = true; // Definitely Case 3.2 + // We still need to check for each action whether rewiring makes sense later + checkRewireForAllActions = true; + } + } } + bool expandedAtLeastOneAction = false; for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { - // Check whether we expand this state/action pair - // We always expand if we are not doing refinement of if the state was not available in the "old" MDP. - // Otherwise, a heuristic decides. - bool expandStateAction = true; - if (refine && overApproximation->currentStateHasOldBehavior()) { - // Compute a rating of the current state/action pair - ValueType stateActionRating = storm::utility::one(); - gatheredSuccessorObservations.clear(); - overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); - for (auto const& obsInfo : gatheredSuccessorObservations) { - if (refinedObservations.get(obsInfo.first)) { - ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], oldMaxResolution); - stateActionRating = std::min(stateActionRating, obsRating); + bool expandCurrentAction = exploreAllActions || truncateAllActions; + if (checkRewireForAllActions) { + assert(refine); + // In this case, we still need to check whether this action needs to be expanded + assert(!expandCurrentAction); + // Check the action dependent conditions for rewiring + // First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring would actually change the successor states) + assert(overApproximation->currentStateHasOldBehavior()); + if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) || overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) { + // Then, check whether the other criteria for rewiring are satisfied + if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) { + // Do the rewiring now! (Case 3.1) + expandCurrentAction = true; + } else { + // Delay the rewiring (Case 3.2.2) + overApproximation->setCurrentChoiceIsDelayed(action); } - } - // Only refine if this rating is below the doubled refinementAggressiveness - expandStateAction = stateActionRating < storm::utility::convertNumber(2.0) * (*refinementAggressiveness); + } // else { Case 3.2.1 } } - if (expandStateAction) { - ValueType truncationProbability = storm::utility::zero(); - ValueType truncationValueBound = storm::utility::zero(); - auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); - for (auto const& successor : successorGridPoints) { - bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); - if (!added) { - STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); - // We did not explore this successor state. Get a bound on the "missing" value - truncationProbability += successor.second; - truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first)); + + if (expandCurrentAction) { + expandedAtLeastOneAction = true; + if (!truncateAllActions) { + // Cases 1.1, 2.1, or 3.1 + auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); + for (auto const& successor : successorGridPoints) { + overApproximation->addTransitionToBelief(action, successor.first, successor.second, false); } - } - if (stopExploration) { if (computeRewards) { + overApproximation->computeRewardAtCurrentState(action); + } + } else { + // Cases 1.2 or 2.2 + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); + for (auto const& successor : successorGridPoints) { + bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, true); + if (!added) { + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first)); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. overApproximation->addTransitionsToExtraStates(action, truncationProbability); + overApproximation->computeRewardAtCurrentState(action, truncationValueBound); } else { overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } - if (computeRewards) { - // The truncationValueBound will be added on top of the reward introduced by the current belief state. - overApproximation->computeRewardAtCurrentState(action, truncationValueBound); - } } else { - // Do not refine here + // Case 3.2 overApproximation->restoreOldBehaviorAtCurrentState(action); } } + if (expandedAtLeastOneAction) { + ++numRewiredOrExploredStates; + } } + if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildAborted = true; break; } } - // TODO: Drop unreachable states (sometimes?) statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 823eebf60..8b892e1f1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -74,11 +74,18 @@ namespace storm { * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ void refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); - + + struct HeuristicParameters { + ValueType gapThreshold; + ValueType observationThreshold; + uint64_t sizeThreshold; + ValueType optimalChoiceValueEpsilon; + }; + /** * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties */ - void buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); + void buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); /** * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties From 37490a8eca263dd1330bf8881f70bdcaf3beb0e9 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 22 Apr 2020 16:16:26 +0200 Subject: [PATCH 125/155] Started to integrate new refinement options. --- src/storm-pomdp-cli/storm-pomdp.cpp | 17 +- .../ApproximatePOMDPModelchecker.cpp | 214 ++++++++++-------- .../ApproximatePOMDPModelchecker.h | 35 ++- 3 files changed, 155 insertions(+), 111 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 19e139d1d..a6da25b16 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -103,14 +103,17 @@ namespace storm { STORM_PRINT_AND_LOG("Applying grid approximation... "); auto const& gridSettings = storm::settings::getModule(); typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker>::Options options; - options.initialGridResolution = gridSettings.getGridResolution(); - options.explorationThreshold = storm::utility::convertNumber(gridSettings.getExplorationThreshold()); - options.doRefinement = gridSettings.isRefineSet(); - options.refinementPrecision = storm::utility::convertNumber(gridSettings.getRefinementPrecision()); - options.numericPrecision = storm::utility::convertNumber(gridSettings.getNumericPrecision()); - options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); + std::cout << "TODO: create and read from new settings!" << std::endl; + // options.initialGridResolution = gridSettings.getGridResolution(); + // options.explorationThreshold = storm::utility::convertNumber(gridSettings.getExplorationThreshold()); + options.refine = gridSettings.isRefineSet(); + options.unfold = true; + options.discretize = true; + // options.refinementPrecision = storm::utility::convertNumber(gridSettings.getRefinementPrecision()); + // options.numericPrecision = storm::utility::convertNumber(gridSettings.getNumericPrecision()); + // options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); if (gridSettings.isUnfoldBeliefMdpSizeThresholdSet()) { - options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold(); + //options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold(); } if (storm::NumberTraits::IsExact) { if (gridSettings.isNumericPrecisionSetFromDefault()) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 09e0f8ce8..4f95793eb 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -29,13 +29,22 @@ namespace storm { namespace modelchecker { template ApproximatePOMDPModelchecker::Options::Options() { - initialGridResolution = 10; - explorationThreshold = storm::utility::zero(); - doRefinement = true; - refinementPrecision = storm::utility::convertNumber(1e-4); + discretize = false; + unfold = false; + refine = false; + + resolutionInit = 2; + resolutionFactor = storm::utility::convertNumber(2); + sizeThresholdInit = 0; // automatic + sizeThresholdFactor = 4; + gapThresholdInit = storm::utility::convertNumber(0.1); + gapThresholdFactor = storm::utility::convertNumber(0.25); + optimalChoiceValueThresholdInit = storm::utility::convertNumber(1e-3); + optimalChoiceValueThresholdFactor = storm::utility::one(); + obsThresholdInit = storm::utility::convertNumber(0.1); + obsThresholdIncrementFactor = storm::utility::convertNumber(0.1); + numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); - cacheSubsimplices = false; - beliefMdpSizeThreshold = boost::none; } template @@ -69,6 +78,7 @@ namespace storm { template typename ApproximatePOMDPModelchecker::Result ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { + STORM_LOG_ASSERT(options.unfold || options.discretize, "Invoked belief exploration but no task (unfold or discretize) given."); // Reset all collected statistics statistics = Statistics(); // Extract the relevant information from the formula @@ -96,7 +106,7 @@ namespace storm { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); } - if (options.doRefinement) { + if (options.refine) { refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); } else { computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); @@ -126,7 +136,7 @@ namespace storm { // The overapproximation MDP: if (statistics.overApproximationStates) { stream << "# Number of states in the "; - if (options.doRefinement) { + if (options.refine) { stream << "final "; } stream << "grid MDP for the over-approximation: "; @@ -142,7 +152,7 @@ namespace storm { // The underapproximation MDP: if (statistics.underApproximationStates) { stream << "# Number of states in the "; - if (options.doRefinement) { + if (options.refine) { stream << "final "; } stream << "grid MDP for the under-approximation: "; @@ -158,28 +168,21 @@ namespace storm { stream << "##########################################" << std::endl; } - - template void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { - if (options.explorationThreshold > storm::utility::zero()) { - STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) - } - - uint64_t underApproxSizeThreshold = 0; - { // Overapproximation - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + if (options.discretize) { + std::vector observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit); auto manager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); HeuristicParameters heuristicParameters; - heuristicParameters.gapThreshold = storm::utility::convertNumber(options.explorationThreshold); - heuristicParameters.observationThreshold = storm::utility::zero(); // Not relevant without refinement - heuristicParameters.sizeThreshold = std::numeric_limits::max(); - heuristicParameters.optimalChoiceValueEpsilon = storm::utility::convertNumber(1e-4); + heuristicParameters.gapThreshold = options.gapThresholdInit; + heuristicParameters.observationThreshold = options.obsThresholdInit; // Actually not relevant without refinement + heuristicParameters.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits::max() : options.sizeThresholdInit; + heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, manager, approx); if (approx->hasComputedValues()) { @@ -187,22 +190,23 @@ namespace storm { approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.lowerBound : result.upperBound; resultValue = approx->getComputedValueAtInitialState(); - underApproxSizeThreshold = std::max(approx->getExploredMdp()->getNumberOfStates(), underApproxSizeThreshold); } } - { // Underapproximation (Uses a fresh Belief manager) + if (options.unfold) { // Underapproximation (uses a fresh Belief manager) auto manager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); - if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0) { - underApproxSizeThreshold = options.beliefMdpSizeThreshold.get(); - } - if (underApproxSizeThreshold == 0) { - underApproxSizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); // Heuristically select this (only relevant if the over-approx could not be build) + HeuristicParameters heuristicParameters; + heuristicParameters.gapThreshold = options.gapThresholdInit; + heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; + heuristicParameters.sizeThreshold = options.sizeThresholdInit; + if (heuristicParameters.sizeThreshold == 0) { + // Select a decent value automatically + heuristicParameters.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); } - buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, manager, approx); if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); @@ -215,76 +219,91 @@ namespace storm { template void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; + ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; + // Set up exploration data - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - auto overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); - auto underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); - if (rewardModelName) { - overApproxBeliefManager->setRewardModel(rewardModelName); - underApproxBeliefManager->setRewardModel(rewardModelName); - } - - // OverApproximaion - auto overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); - HeuristicParameters heuristicParameters; - heuristicParameters.gapThreshold = storm::utility::convertNumber(options.explorationThreshold); - heuristicParameters.observationThreshold = storm::utility::zero(); // Will be set to lowest observation score automatically - heuristicParameters.sizeThreshold = std::numeric_limits::max(); - heuristicParameters.optimalChoiceValueEpsilon = storm::utility::convertNumber(1e-4); - buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, overApproxBeliefManager, overApproximation); - if (!overApproximation->hasComputedValues()) { - return; + std::vector observationResolutionVector; + std::shared_ptr overApproxBeliefManager; + std::shared_ptr overApproximation; + HeuristicParameters overApproxHeuristicPar; + if (options.discretize) { // Setup and build first OverApproximation + observationResolutionVector = std::vector(pomdp.getNrObservations(), options.resolutionInit); + overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + overApproxBeliefManager->setRewardModel(rewardModelName); + } + overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + overApproxHeuristicPar.gapThreshold = options.gapThresholdInit; + overApproxHeuristicPar.observationThreshold = options.obsThresholdInit; + overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit; + overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (!overApproximation->hasComputedValues()) { + return; + } + overApproxValue = overApproximation->getComputedValueAtInitialState(); } - ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; - overApproxValue = overApproximation->getComputedValueAtInitialState(); - // UnderApproximation - uint64_t underApproxSizeThreshold; - if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0ull) { - underApproxSizeThreshold = options.beliefMdpSizeThreshold.get(); - } else { - underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); - } - auto underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); - buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); - if (!underApproximation->hasComputedValues()) { - return; + std::shared_ptr underApproxBeliefManager; + std::shared_ptr underApproximation; + HeuristicParameters underApproxHeuristicPar; + if (options.unfold) { // Setup and build first OverApproximation + underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + underApproxBeliefManager->setRewardModel(rewardModelName); + } + underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + underApproxHeuristicPar.gapThreshold = options.gapThresholdInit; + underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; + underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit; + if (underApproxHeuristicPar.sizeThreshold == 0) { + // Select a decent value automatically + underApproxHeuristicPar.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); + } + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); + if (!underApproximation->hasComputedValues()) { + return; + } + underApproxValue = underApproximation->getComputedValueAtInitialState(); } - ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; - underApproxValue = underApproximation->getComputedValueAtInitialState(); - // ValueType lastMinScore = storm::utility::infinity(); // Start refinement statistics.refinementSteps = 0; - while (result.diff() > options.refinementPrecision) { + STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || options.refinePrecision.is_initialized(), "No termination criterion for refinement given. Consider to specify a steplimit, precisionlimit or timeout"); + STORM_LOG_WARN_COND(!options.refinePrecision.is_initialized() || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined."); + while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps < options.refineStepLimit.get()) && (!options.refinePrecision.is_initialized() || result.diff() > options.refinePrecision.get())) { if (storm::utility::resources::isTerminate()) { break; } ++statistics.refinementSteps.get(); - STORM_LOG_INFO("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << "."); + STORM_PRINT_AND_LOG("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << "." << std::endl); - // Refine over-approximation - if (min) { - overApproximation->takeCurrentValuesAsLowerBounds(); - } else { - overApproximation->takeCurrentValuesAsUpperBounds(); - } - heuristicParameters.gapThreshold /= storm::utility::convertNumber(4); - heuristicParameters.sizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates() * 4; - heuristicParameters.observationThreshold += storm::utility::convertNumber(0.1) * (storm::utility::one() - heuristicParameters.observationThreshold); - buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, heuristicParameters, observationResolutionVector, overApproxBeliefManager, overApproximation); - if (overApproximation->hasComputedValues()) { - overApproxValue = overApproximation->getComputedValueAtInitialState(); - } else { - break; + if (options.discretize) { + // Refine over-approximation + if (min) { + overApproximation->takeCurrentValuesAsLowerBounds(); + } else { + overApproximation->takeCurrentValuesAsUpperBounds(); + } + overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; + overApproxHeuristicPar.sizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates() * options.sizeThresholdFactor; + overApproxHeuristicPar.observationThreshold += options.obsThresholdIncrementFactor * (storm::utility::one() - overApproxHeuristicPar.observationThreshold); + overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (overApproximation->hasComputedValues()) { + overApproxValue = overApproximation->getComputedValueAtInitialState(); + } else { + break; + } } - if (result.diff() > options.refinementPrecision) { + if (options.unfold && (!options.refinePrecision.is_initialized() || result.diff() > options.refinePrecision.get())) { // Refine under-approximation - underApproxSizeThreshold *= 4; - underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); - STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << "."); - buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; + underApproxHeuristicPar.sizeThreshold = underApproximation->getExploredMdp()->getNumberOfStates() * options.sizeThresholdFactor; + overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); if (underApproximation->hasComputedValues()) { underApproxValue = underApproximation->getComputedValueAtInitialState(); } else { @@ -352,7 +371,7 @@ namespace storm { } template - void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { // current maximal resolution (needed for refinement heuristic) uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); @@ -371,9 +390,7 @@ namespace storm { overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true); // We also need to find out which observation resolutions needs refinement. auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); - ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); // Potentially increase the observationThreshold so that at least one observation actually gets refinement. - heuristicParameters.observationThreshold = std::max(minRating, heuristicParameters.observationThreshold); refinedObservations = storm::utility::vector::filter(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;}); STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { @@ -528,11 +545,11 @@ namespace storm { } template - void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { + void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); - statistics.underApproximationStateLimit = maxStateCount; - if (!underApproximation->hasComputedValues()) { + statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold; + if (!refine) { // Build a new under approximation if (computeRewards) { underApproximation->startNewExploration(storm::utility::zero()); @@ -545,6 +562,7 @@ namespace storm { } // Expand the beliefs + uint64_t newlyExploredStates = 0; while (underApproximation->hasUnexploredState()) { uint64_t currId = underApproximation->exploreNextState(); @@ -554,18 +572,24 @@ namespace storm { underApproximation->addSelfloopTransition(); } else { bool stopExploration = false; - if (!underApproximation->currentStateHasOldBehavior()) { - if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated(); + if (!stateAlreadyExplored) { + // Check whether we want to explore the state now! + if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < heuristicParameters.gapThreshold) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); - } else if (underApproximation->getCurrentNumberOfMdpStates() >= maxStateCount) { + } else if (newlyExploredStates >= heuristicParameters.sizeThreshold) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); } } + if (!stopExploration) { + // We are going to explore one more state + ++newlyExploredStates; + } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { // Always restore old behavior if available - if (underApproximation->currentStateHasOldBehavior()) { + if (stateAlreadyExplored) { underApproximation->restoreOldBehaviorAtCurrentState(action); } else { ValueType truncationProbability = storm::utility::zero(); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 8b892e1f1..a6eeb19bd 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -28,13 +28,30 @@ namespace storm { struct Options { Options(); - uint64_t initialGridResolution; /// Decides how precise the bounds are - ValueType explorationThreshold; /// the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state - bool doRefinement; /// Sets whether the bounds should be refined automatically until the refinement precision is reached - ValueType refinementPrecision; /// Used to decide when the refinement should terminate - ValueType numericPrecision; /// Used to decide whether two values are equal - bool cacheSubsimplices; /// Enables caching of subsimplices - boost::optional beliefMdpSizeThreshold; /// Sets the (initial) size of the unfolded belief MDP. 0 means auto selection. + bool discretize; + bool unfold; + bool refine; + boost::optional refineStepLimit; + boost::optional refinePrecision; + + // Controlparameters for the refinement heuristic + // Discretization Resolution + uint64_t resolutionInit; + ValueType resolutionFactor; + // The maximal number of newly expanded MDP states in a refinement step + uint64_t sizeThresholdInit; + uint64_t sizeThresholdFactor; + // Controls how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore + ValueType gapThresholdInit; + ValueType gapThresholdFactor; + // Controls whether "almost optimal" choices will be considered optimal + ValueType optimalChoiceValueThresholdInit; + ValueType optimalChoiceValueThresholdFactor; + // Controls which observations are refined. + ValueType obsThresholdInit; + ValueType obsThresholdIncrementFactor; + + ValueType numericPrecision; /// Used to decide whether two beliefs are equal }; struct Result { @@ -85,12 +102,12 @@ namespace storm { /** * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties */ - void buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); + void buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); /** * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties */ - void buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); + void buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution); From 6dd50575f9ccdc74a89887692e017b23c4461ab6 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Thu, 23 Apr 2020 06:08:37 +0200 Subject: [PATCH 126/155] New 'belief-exploration' setting (replaces gridapproximation setting) --- .../settings/modules/POMDPSettings.cpp | 19 +++++++++++++++---- .../settings/modules/POMDPSettings.h | 4 +++- src/storm-pomdp-cli/storm-pomdp.cpp | 8 ++++---- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 57c065f7a..09a5342b3 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -15,7 +15,8 @@ namespace storm { const std::string POMDPSettings::moduleName = "pomdp"; const std::string noCanonicOption = "nocanonic"; const std::string exportAsParametricModelOption = "parametric-drn"; - const std::string gridApproximationOption = "gridapproximation"; + const std::string beliefExplorationOption = "belief-exploration"; + std::vector beliefExplorationModes = {"both", "discretize", "unfold"}; const std::string qualitativeReductionOption = "qualitativereduction"; const std::string analyzeUniqueObservationsOption = "uniqueobservations"; const std::string mecReductionOption = "mecreduction"; @@ -43,7 +44,7 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, fscmode, false, "Sets the way the pMC is obtained").addArgument(storm::settings::ArgumentBuilder::createStringArgument("type", "type name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(fscModes)).setDefaultValueString("standard").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformBinaryOption, false, "Transforms the pomdp to a binary pomdp.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformSimpleOption, false, "Transforms the pomdp to a binary and simple pomdp.").build()); - this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false,"Analyze the POMDP using grid approximation.").build()); + this->addOption(storm::settings::OptionBuilder(moduleName, beliefExplorationOption, false,"Analyze the POMDP by exploring the belief state-space.").addArgument(storm::settings::ArgumentBuilder::createStringArgument("mode", "Sets whether lower, upper, or interval result bounds are computed.").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(beliefExplorationModes)).setDefaultValueString("both").makeOptional().build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, memlessSearchOption, false, "Search for a qualitative memoryless scheuler").addArgument(storm::settings::ArgumentBuilder::createStringArgument("method", "method name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(memlessSearchMethods)).setDefaultValueString("none").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, checkFullyObservableOption, false, "Performs standard model checking on the underlying MDP").build()); @@ -77,8 +78,18 @@ namespace storm { return this->getOption(selfloopReductionOption).getHasOptionBeenSet(); } - bool POMDPSettings::isGridApproximationSet() const { - return this->getOption(gridApproximationOption).getHasOptionBeenSet(); + bool POMDPSettings::isBeliefExplorationSet() const { + return this->getOption(beliefExplorationOption).getHasOptionBeenSet(); + } + + bool POMDPSettings::isBeliefExplorationDiscretizeSet() const { + std::string arg = this->getOption(beliefExplorationOption).getArgumentByName("mode").getValueAsString(); + return isBeliefExplorationSet() && (arg == "discretize" || arg == "both"); + } + + bool POMDPSettings::isBeliefExplorationUnfoldSet() const { + std::string arg = this->getOption(beliefExplorationOption).getArgumentByName("mode").getValueAsString(); + return isBeliefExplorationSet() && (arg == "unfold" || arg == "both"); } bool POMDPSettings::isMemlessSearchSet() const { diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 6754ac55c..d1a9e6b82 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -27,7 +27,9 @@ namespace storm { bool isQualitativeReductionSet() const; bool isNoCanonicSet() const; - bool isGridApproximationSet() const; + bool isBeliefExplorationSet() const; + bool isBeliefExplorationDiscretizeSet() const; + bool isBeliefExplorationUnfoldSet() const; bool isAnalyzeUniqueObservationsSet() const; bool isMecReductionSet() const; bool isSelfloopReductionSet() const; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index a6da25b16..ebfa22e7c 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -99,16 +99,16 @@ namespace storm { bool performAnalysis(std::shared_ptr> const& pomdp, storm::pomdp::analysis::FormulaInformation const& formulaInfo, storm::logic::Formula const& formula) { auto const& pomdpSettings = storm::settings::getModule(); bool analysisPerformed = false; - if (pomdpSettings.isGridApproximationSet()) { - STORM_PRINT_AND_LOG("Applying grid approximation... "); + if (pomdpSettings.isBeliefExplorationSet()) { + STORM_PRINT_AND_LOG("Exploring the belief MDP... "); auto const& gridSettings = storm::settings::getModule(); typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker>::Options options; std::cout << "TODO: create and read from new settings!" << std::endl; // options.initialGridResolution = gridSettings.getGridResolution(); // options.explorationThreshold = storm::utility::convertNumber(gridSettings.getExplorationThreshold()); options.refine = gridSettings.isRefineSet(); - options.unfold = true; - options.discretize = true; + options.unfold = pomdpSettings.isBeliefExplorationUnfoldSet(); + options.discretize = pomdpSettings.isBeliefExplorationDiscretizeSet(); // options.refinementPrecision = storm::utility::convertNumber(gridSettings.getRefinementPrecision()); // options.numericPrecision = storm::utility::convertNumber(gridSettings.getNumericPrecision()); // options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); From 2d94e77f2a9914f7df3924009b40cbedbdb66aad Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Thu, 23 Apr 2020 06:30:53 +0200 Subject: [PATCH 127/155] Only display the bound that was requested. --- src/storm-pomdp-cli/storm-pomdp.cpp | 15 ++++++++++++++- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 8 ++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index ebfa22e7c..ceddcc4b7 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -84,7 +84,20 @@ namespace storm { template void printResult(ValueType const& lowerBound, ValueType const& upperBound) { if (lowerBound == upperBound) { - STORM_PRINT_AND_LOG(lowerBound); + if (storm::utility::isInfinity(lowerBound)) { + STORM_PRINT_AND_LOG("inf"); + } else { + STORM_PRINT_AND_LOG(lowerBound); + } + } else if (storm::utility::isInfinity(-lowerBound)) { + if (storm::utility::isInfinity(upperBound)) { + STORM_PRINT_AND_LOG("[-inf, inf] (width=inf)"); + } else { + // Only upper bound is known + STORM_PRINT_AND_LOG("≤ " << upperBound); + } + } else if (storm::utility::isInfinity(upperBound)) { + STORM_PRINT_AND_LOG("≥ " << lowerBound); } else { STORM_PRINT_AND_LOG("[" << lowerBound << ", " << upperBound << "] (width=" << ValueType(upperBound - lowerBound) << ")"); } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 4f95793eb..f44eb3381 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -111,6 +111,14 @@ namespace storm { } else { computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); } + // "clear" results in case they were actually not requested (this will make the output a bit more clear) + if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) { + result.lowerBound = -storm::utility::infinity(); + } + if ((formulaInfo.maximize() && !options.discretize) || (formulaInfo.minimize() && !options.unfold)) { + result.upperBound = storm::utility::infinity(); + } + if (storm::utility::resources::isTerminate()) { statistics.aborted = true; } From fa624d2a20ed1b9637d4b43b2a5787ec337af19e Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Thu, 23 Apr 2020 08:27:21 +0200 Subject: [PATCH 128/155] Introduced new settings for controlling the refinement strategy and whether to produce only upper and/or lower bounds --- .../settings/PomdpSettings.cpp | 4 +- .../modules/BeliefExplorationSettings.cpp | 150 ++++++++++++++++++ .../modules/BeliefExplorationSettings.h | 71 +++++++++ .../modules/GridApproximationSettings.cpp | 80 ---------- .../modules/GridApproximationSettings.h | 42 ----- src/storm-pomdp-cli/storm-pomdp.cpp | 27 +--- .../ApproximatePOMDPModelCheckerOptions.h | 43 +++++ .../ApproximatePOMDPModelchecker.cpp | 33 +--- .../ApproximatePOMDPModelchecker.h | 35 +--- 9 files changed, 280 insertions(+), 205 deletions(-) create mode 100644 src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp create mode 100644 src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h delete mode 100644 src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp delete mode 100644 src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h create mode 100644 src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h diff --git a/src/storm-pomdp-cli/settings/PomdpSettings.cpp b/src/storm-pomdp-cli/settings/PomdpSettings.cpp index 1181bb2ff..7cd3aff57 100644 --- a/src/storm-pomdp-cli/settings/PomdpSettings.cpp +++ b/src/storm-pomdp-cli/settings/PomdpSettings.cpp @@ -31,7 +31,7 @@ #include "storm/settings/modules/HintSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" -#include "storm-pomdp-cli/settings/modules/GridApproximationSettings.h" +#include "storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h" namespace storm { namespace settings { @@ -45,7 +45,7 @@ namespace storm { storm::settings::addModule(); storm::settings::addModule(); - storm::settings::addModule(); + storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp new file mode 100644 index 000000000..c1c7e45bc --- /dev/null +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp @@ -0,0 +1,150 @@ +#include "storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h" + +#include "storm/settings/SettingsManager.h" +#include "storm/settings/SettingMemento.h" +#include "storm/settings/Option.h" +#include "storm/settings/OptionBuilder.h" +#include "storm/settings/ArgumentBuilder.h" + +#include "storm/utility/NumberTraits.h" +#include "storm/adapters/RationalNumberAdapter.h" +#include "storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h" + +#include "storm/exceptions/InvalidArgumentException.h" + + +namespace storm { + namespace settings { + namespace modules { + + const std::string BeliefExplorationSettings::moduleName = "belexpl"; + + const std::string refineOption = "refine"; + const std::string resolutionOption = "resolution"; + const std::string sizeThresholdOption = "size-threshold"; + const std::string gapThresholdOption = "gap-threshold"; + const std::string schedulerThresholdOption = "scheduler-threshold"; + const std::string observationThresholdOption = "obs-threshold"; + const std::string numericPrecisionOption = "numeric-precision"; + + BeliefExplorationSettings::BeliefExplorationSettings() : ModuleSettings(moduleName) { + + this->addOption(storm::settings::OptionBuilder(moduleName, refineOption, false,"Refines the result bounds until reaching either the goal precision or the refinement step limit").addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("prec","The goal precision.").setDefaultValueDouble(1e-4).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0.0)).build()).addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("steps","The number of allowed refinement steps (0 means no limit).").setDefaultValueUnsignedInteger(0).makeOptional().build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, resolutionOption, false,"Sets the resolution of the discretization and how it is increased in case of refinement").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("init","the initial resolution (higher means more precise)").setDefaultValueUnsignedInteger(12).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the resolution of refined observations (higher means more precise).").setDefaultValueDouble(2).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterValidator(1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, observationThresholdOption, false,"Only observations whose score is below this threshold will be refined.").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means more precise").setDefaultValueDouble(0.1).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Controlls how fast the threshold is increased in each refinement step (higher means more precise).").setDefaultValueDouble(0.1).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, sizeThresholdOption, false,"Sets how many new states are explored or rewired in a refinement step and how this value is increased in case of refinement.").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("init","initial limit (higher means more precise, 0 means automatic choice)").setDefaultValueUnsignedInteger(0).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Before each step the new threshold is set to the current state count times this number (higher means more precise).").setDefaultValueDouble(4).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, gapThresholdOption, false,"Sets how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means less precise").setDefaultValueDouble(0.1).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the gap in each refinement step (higher means less precise).").setDefaultValueDouble(0.25).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, schedulerThresholdOption, false,"Sets how much worse a sub-optimal choice can be in order to be included in the relevant explored fragment").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means more precise").setDefaultValueDouble(1e-3).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the threshold in each refinement step (higher means more precise).").setDefaultValueDouble(1).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, numericPrecisionOption, false,"Sets the precision used to determine whether two belief-states are equal.").setIsAdvanced().addArgument( + storm::settings::ArgumentBuilder::createDoubleArgument("value","the precision").setDefaultValueDouble(1e-9).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0, 1)).build()).build()); + } + + bool BeliefExplorationSettings::isRefineSet() const { + return this->getOption(refineOption).getHasOptionBeenSet(); + } + + double BeliefExplorationSettings::getRefinePrecision() const { + return this->getOption(refineOption).getArgumentByName("prec").getValueAsDouble(); + } + + bool BeliefExplorationSettings::isRefineStepLimitSet() const { + return this->getOption(refineOption).getArgumentByName("steps").getValueAsUnsignedInteger() != 0; + } + + uint64_t BeliefExplorationSettings::getRefineStepLimit() const { + assert(isRefineStepLimitSet()); + return this->getOption(refineOption).getArgumentByName("steps").getValueAsUnsignedInteger(); + } + + uint64_t BeliefExplorationSettings::getResolutionInit() const { + return this->getOption(resolutionOption).getArgumentByName("init").getValueAsUnsignedInteger(); + } + + double BeliefExplorationSettings::getResolutionFactor() const { + return this->getOption(resolutionOption).getArgumentByName("factor").getValueAsDouble(); + } + + uint64_t BeliefExplorationSettings::getSizeThresholdInit() const { + return this->getOption(sizeThresholdOption).getArgumentByName("init").getValueAsUnsignedInteger(); + } + + double BeliefExplorationSettings::getSizeThresholdFactor() const { + return this->getOption(sizeThresholdOption).getArgumentByName("factor").getValueAsDouble(); + } + + double BeliefExplorationSettings::getGapThresholdInit() const { + return this->getOption(gapThresholdOption).getArgumentByName("init").getValueAsDouble(); + } + + double BeliefExplorationSettings::getGapThresholdFactor() const { + return this->getOption(gapThresholdOption).getArgumentByName("factor").getValueAsDouble(); + } + + double BeliefExplorationSettings::getOptimalChoiceValueThresholdInit() const { + return this->getOption(schedulerThresholdOption).getArgumentByName("init").getValueAsDouble(); + } + + double BeliefExplorationSettings::getOptimalChoiceValueThresholdFactor() const { + return this->getOption(schedulerThresholdOption).getArgumentByName("factor").getValueAsDouble(); + } + + double BeliefExplorationSettings::getObservationScoreThresholdInit() const { + return this->getOption(observationThresholdOption).getArgumentByName("init").getValueAsDouble(); + } + + double BeliefExplorationSettings::getObservationScoreThresholdFactor() const { + return this->getOption(observationThresholdOption).getArgumentByName("factor").getValueAsDouble(); + } + + bool BeliefExplorationSettings::isNumericPrecisionSetFromDefault() const { + return !this->getOption(numericPrecisionOption).getHasOptionBeenSet() || this->getOption(numericPrecisionOption).getArgumentByName("value").wasSetFromDefaultValue(); + } + + double BeliefExplorationSettings::getNumericPrecision() const { + return this->getOption(numericPrecisionOption).getArgumentByName("value").getValueAsDouble(); + } + + template + void BeliefExplorationSettings::setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const { + options.refine = isRefineSet(); + options.refinePrecision = getRefinePrecision(); + if (isRefineStepLimitSet()) { + options.refineStepLimit = getRefineStepLimit(); + } + + options.resolutionInit = getResolutionInit(); + options.resolutionFactor = storm::utility::convertNumber(getResolutionFactor()); + options.sizeThresholdInit = getSizeThresholdInit(); + options.sizeThresholdFactor = storm::utility::convertNumber(getSizeThresholdFactor()); + options.gapThresholdInit = storm::utility::convertNumber(getGapThresholdInit()); + options.gapThresholdFactor = storm::utility::convertNumber(getGapThresholdFactor()); + options.optimalChoiceValueThresholdInit = storm::utility::convertNumber(getOptimalChoiceValueThresholdInit()); + options.optimalChoiceValueThresholdFactor = storm::utility::convertNumber(getOptimalChoiceValueThresholdFactor()); + options.obsThresholdInit = storm::utility::convertNumber(getObservationScoreThresholdInit()); + options.obsThresholdIncrementFactor = storm::utility::convertNumber(getObservationScoreThresholdFactor()); + + options.numericPrecision = getNumericPrecision(); + if (storm::NumberTraits::IsExact) { + if (isNumericPrecisionSetFromDefault()) { + STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); + options.numericPrecision = storm::utility::zero(); + } else { + STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "A non-zero numeric precision was set although exact arithmethic is used. Results might be inexact."); + } + } + } + + template void BeliefExplorationSettings::setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const; + template void BeliefExplorationSettings::setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const; + + + + } // namespace modules + } // namespace settings +} // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h new file mode 100644 index 000000000..0273a1945 --- /dev/null +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h @@ -0,0 +1,71 @@ +#pragma once + +#include "storm-config.h" +#include "storm/settings/modules/ModuleSettings.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + template + struct ApproximatePOMDPModelCheckerOptions; + } + } + + namespace settings { + namespace modules { + + /*! + * This class represents the settings for POMDP model checking. + */ + class BeliefExplorationSettings : public ModuleSettings { + public: + + /*! + * Creates a new set of POMDP settings. + */ + BeliefExplorationSettings(); + + virtual ~BeliefExplorationSettings() = default; + + bool isRefineSet() const; + double getRefinePrecision() const; + bool isRefineStepLimitSet() const; + uint64_t getRefineStepLimit() const; + + /// Discretization Resolution + uint64_t getResolutionInit() const; + double getResolutionFactor() const; + /// The maximal number of newly expanded MDP states in a refinement step + uint64_t getSizeThresholdInit() const; + double getSizeThresholdFactor() const; + + /// Controls how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore + double getGapThresholdInit() const; + double getGapThresholdFactor() const; + + /// Controls whether "almost optimal" choices will be considered optimal + double getOptimalChoiceValueThresholdInit() const; + double getOptimalChoiceValueThresholdFactor() const; + + /// Controls which observations are refined. + double getObservationScoreThresholdInit() const; + double getObservationScoreThresholdFactor() const; + + /// Used to determine whether two beliefs are equal + bool isNumericPrecisionSetFromDefault() const; + double getNumericPrecision() const; + + template + void setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const; + + // The name of the module. + static const std::string moduleName; + + private: + + + }; + + } // namespace modules + } // namespace settings +} // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp deleted file mode 100644 index 6b5b17677..000000000 --- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include "storm-pomdp-cli/settings/modules/GridApproximationSettings.h" - -#include "storm/settings/SettingsManager.h" -#include "storm/settings/SettingMemento.h" -#include "storm/settings/Option.h" -#include "storm/settings/OptionBuilder.h" -#include "storm/settings/ArgumentBuilder.h" - -#include "storm/exceptions/InvalidArgumentException.h" - -namespace storm { - namespace settings { - namespace modules { - - const std::string GridApproximationSettings::moduleName = "grid"; - - const std::string refineOption = "refine"; - const std::string resolutionOption = "resolution"; - const std::string limitBeliefExplorationOption = "limit-exploration"; - const std::string numericPrecisionOption = "numeric-precision"; - const std::string cacheSimplicesOption = "cache-simplices"; - const std::string unfoldBeliefMdpOption = "unfold-belief-mdp"; - - GridApproximationSettings::GridApproximationSettings() : ModuleSettings(moduleName) { - - this->addOption(storm::settings::OptionBuilder(moduleName, refineOption, false,"Enables automatic refinement of the grid until the goal precision is reached").addArgument( - storm::settings::ArgumentBuilder::createDoubleArgument("precision","Allowed difference between upper and lower bound of the result.").setDefaultValueDouble(1e-6).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0)).build()).build()); - - this->addOption(storm::settings::OptionBuilder(moduleName, resolutionOption, false,"Sets the (initial-) resolution of the grid (higher means more precise results)").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("value","the resolution").setDefaultValueUnsignedInteger(10).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).build()); - - this->addOption(storm::settings::OptionBuilder(moduleName, limitBeliefExplorationOption, false,"Sets whether the belief space exploration is stopped if upper and lower bound are close").addArgument( - storm::settings::ArgumentBuilder::createDoubleArgument("threshold","the difference between upper and lower bound when to stop").setDefaultValueDouble(0.0).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0)).build()).build()); - - this->addOption(storm::settings::OptionBuilder(moduleName, numericPrecisionOption, false,"Sets the precision used to determine whether two belief-states are equal.").addArgument( - storm::settings::ArgumentBuilder::createDoubleArgument("value","the precision").setDefaultValueDouble(1e-9).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0, 1)).build()).build()); - - this->addOption(storm::settings::OptionBuilder(moduleName, cacheSimplicesOption, false,"Enables caching of simplices which requires more memory but can be faster.").build()); - - this->addOption(storm::settings::OptionBuilder(moduleName, unfoldBeliefMdpOption, false,"Sets the (initial-) size threshold of the unfolded belief MDP (higher means more precise results, 0 means automatic choice)").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("value","the maximal number of states").setDefaultValueUnsignedInteger(0).build()).build()); - } - - bool GridApproximationSettings::isRefineSet() const { - return this->getOption(refineOption).getHasOptionBeenSet(); - } - - double GridApproximationSettings::getRefinementPrecision() const { - return this->getOption(refineOption).getArgumentByName("precision").getValueAsDouble(); - } - - uint64_t GridApproximationSettings::getGridResolution() const { - return this->getOption(resolutionOption).getArgumentByName("value").getValueAsUnsignedInteger(); - } - - double GridApproximationSettings::getExplorationThreshold() const { - return this->getOption(limitBeliefExplorationOption).getArgumentByName("threshold").getValueAsDouble(); - } - - bool GridApproximationSettings::isNumericPrecisionSetFromDefault() const { - return !this->getOption(numericPrecisionOption).getHasOptionBeenSet() || this->getOption(numericPrecisionOption).getArgumentByName("value").wasSetFromDefaultValue(); - } - - double GridApproximationSettings::getNumericPrecision() const { - return this->getOption(numericPrecisionOption).getArgumentByName("value").getValueAsDouble(); - } - - bool GridApproximationSettings::isCacheSimplicesSet() const { - return this->getOption(cacheSimplicesOption).getHasOptionBeenSet(); - } - - bool GridApproximationSettings::isUnfoldBeliefMdpSizeThresholdSet() const { - return this->getOption(unfoldBeliefMdpOption).getHasOptionBeenSet(); - } - - uint64_t GridApproximationSettings::getUnfoldBeliefMdpSizeThreshold() const { - return this->getOption(unfoldBeliefMdpOption).getArgumentByName("value").getValueAsUnsignedInteger(); - } - - } // namespace modules - } // namespace settings -} // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h deleted file mode 100644 index 88e484128..000000000 --- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include "storm-config.h" -#include "storm/settings/modules/ModuleSettings.h" - -namespace storm { - namespace settings { - namespace modules { - - /*! - * This class represents the settings for POMDP model checking. - */ - class GridApproximationSettings : public ModuleSettings { - public: - - /*! - * Creates a new set of POMDP settings. - */ - GridApproximationSettings(); - - virtual ~GridApproximationSettings() = default; - - bool isRefineSet() const; - double getRefinementPrecision() const; - uint64_t getGridResolution() const; - double getExplorationThreshold() const; - bool isNumericPrecisionSetFromDefault() const; - double getNumericPrecision() const; - bool isCacheSimplicesSet() const; - bool isUnfoldBeliefMdpSizeThresholdSet() const; - uint64_t getUnfoldBeliefMdpSizeThreshold() const; - // The name of the module. - static const std::string moduleName; - - private: - - - }; - - } // namespace modules - } // namespace settings -} // namespace storm diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index ceddcc4b7..b215b730f 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -5,7 +5,7 @@ #include "storm/settings/modules/GeneralSettings.h" #include "storm/settings/modules/DebugSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" -#include "storm-pomdp-cli/settings/modules/GridApproximationSettings.h" +#include "storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h" #include "storm-pomdp-cli/settings/PomdpSettings.h" #include "storm/analysis/GraphConditions.h" @@ -114,28 +114,9 @@ namespace storm { bool analysisPerformed = false; if (pomdpSettings.isBeliefExplorationSet()) { STORM_PRINT_AND_LOG("Exploring the belief MDP... "); - auto const& gridSettings = storm::settings::getModule(); - typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker>::Options options; - std::cout << "TODO: create and read from new settings!" << std::endl; - // options.initialGridResolution = gridSettings.getGridResolution(); - // options.explorationThreshold = storm::utility::convertNumber(gridSettings.getExplorationThreshold()); - options.refine = gridSettings.isRefineSet(); - options.unfold = pomdpSettings.isBeliefExplorationUnfoldSet(); - options.discretize = pomdpSettings.isBeliefExplorationDiscretizeSet(); - // options.refinementPrecision = storm::utility::convertNumber(gridSettings.getRefinementPrecision()); - // options.numericPrecision = storm::utility::convertNumber(gridSettings.getNumericPrecision()); - // options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); - if (gridSettings.isUnfoldBeliefMdpSizeThresholdSet()) { - //options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold(); - } - if (storm::NumberTraits::IsExact) { - if (gridSettings.isNumericPrecisionSetFromDefault()) { - STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); - options.numericPrecision = storm::utility::zero(); - } else { - STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "A non-zero numeric precision was set although exact arithmethic is used. Results might be inexact."); - } - } + auto options = storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions(pomdpSettings.isBeliefExplorationDiscretizeSet(), pomdpSettings.isBeliefExplorationUnfoldSet()); + auto const& beliefExplorationSettings = storm::settings::getModule(); + beliefExplorationSettings.setValuesInOptionsStruct(options); storm::pomdp::modelchecker::ApproximatePOMDPModelchecker> checker(*pomdp, options); auto result = checker.check(formula); checker.printStatisticsToStream(std::cout); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h new file mode 100644 index 000000000..91cfc6db0 --- /dev/null +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include "storm/utility/constants.h" +#include "storm/utility/NumberTraits.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + template + struct ApproximatePOMDPModelCheckerOptions { + ApproximatePOMDPModelCheckerOptions(bool discretize, bool unfold) : discretize(discretize), unfold(unfold) { + // Intentionally left empty + } + + bool discretize; + bool unfold; + bool refine = false; + boost::optional refineStepLimit; + ValueType refinePrecision = storm::utility::zero(); + + // Controlparameters for the refinement heuristic + // Discretization Resolution + uint64_t resolutionInit = 2; + ValueType resolutionFactor = storm::utility::convertNumber(2); + // The maximal number of newly expanded MDP states in a refinement step + uint64_t sizeThresholdInit = 0; + ValueType sizeThresholdFactor = storm::utility::convertNumber(4); + // Controls how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore + ValueType gapThresholdInit = storm::utility::convertNumber(0.1); + ValueType gapThresholdFactor = storm::utility::convertNumber(0.25); + // Controls whether "almost optimal" choices will be considered optimal + ValueType optimalChoiceValueThresholdInit = storm::utility::convertNumber(1e-3); + ValueType optimalChoiceValueThresholdFactor = storm::utility::one(); + // Controls which observations are refined. + ValueType obsThresholdInit = storm::utility::convertNumber(0.1); + ValueType obsThresholdIncrementFactor = storm::utility::convertNumber(0.1); + + ValueType numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); /// Used to decide whether two beliefs are equal + }; + } + } +} diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index f44eb3381..1b86744f9 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -27,25 +27,6 @@ namespace storm { namespace pomdp { namespace modelchecker { - template - ApproximatePOMDPModelchecker::Options::Options() { - discretize = false; - unfold = false; - refine = false; - - resolutionInit = 2; - resolutionFactor = storm::utility::convertNumber(2); - sizeThresholdInit = 0; // automatic - sizeThresholdFactor = 4; - gapThresholdInit = storm::utility::convertNumber(0.1); - gapThresholdFactor = storm::utility::convertNumber(0.25); - optimalChoiceValueThresholdInit = storm::utility::convertNumber(1e-3); - optimalChoiceValueThresholdFactor = storm::utility::one(); - obsThresholdInit = storm::utility::convertNumber(0.1); - obsThresholdIncrementFactor = storm::utility::convertNumber(0.1); - - numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); - } template ApproximatePOMDPModelchecker::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) { @@ -278,9 +259,9 @@ namespace storm { // Start refinement statistics.refinementSteps = 0; - STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || options.refinePrecision.is_initialized(), "No termination criterion for refinement given. Consider to specify a steplimit, precisionlimit or timeout"); - STORM_LOG_WARN_COND(!options.refinePrecision.is_initialized() || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined."); - while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps < options.refineStepLimit.get()) && (!options.refinePrecision.is_initialized() || result.diff() > options.refinePrecision.get())) { + STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || !storm::utility::isZero(options.refinePrecision), "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout"); + STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined."); + while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps < options.refineStepLimit.get()) && result.diff() > options.refinePrecision) { if (storm::utility::resources::isTerminate()) { break; } @@ -295,7 +276,7 @@ namespace storm { overApproximation->takeCurrentValuesAsUpperBounds(); } overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; - overApproxHeuristicPar.sizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates() * options.sizeThresholdFactor; + overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); overApproxHeuristicPar.observationThreshold += options.obsThresholdIncrementFactor * (storm::utility::one() - overApproxHeuristicPar.observationThreshold); overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); @@ -306,10 +287,10 @@ namespace storm { } } - if (options.unfold && (!options.refinePrecision.is_initialized() || result.diff() > options.refinePrecision.get())) { + if (options.unfold && result.diff() > options.refinePrecision) { // Refine under-approximation overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; - underApproxHeuristicPar.sizeThreshold = underApproximation->getExploredMdp()->getNumberOfStates() * options.sizeThresholdFactor; + overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); if (underApproximation->hasComputedValues()) { @@ -403,7 +384,7 @@ namespace storm { STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { // Increment the resolution at the refined observations - observationResolutionVector[obs] *= 2; + observationResolutionVector[obs] = storm::utility::convertNumber(storm::utility::convertNumber(observationResolutionVector[obs]) * options.resolutionFactor); } overApproximation->restartExploration(); } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index a6eeb19bd..945459c93 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -1,11 +1,10 @@ -#include #include "storm/api/storm.h" #include "storm/models/sparse/Pomdp.h" #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" #include "storm-pomdp/storage/BeliefManager.h" +#include "storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h" #include "storm-pomdp/builder/BeliefMdpExplorer.h" -#include #include "storm/storage/jani/Property.h" @@ -16,8 +15,7 @@ namespace storm { namespace pomdp { namespace modelchecker { - typedef boost::bimap bsmap_type; - + template class ApproximatePOMDPModelchecker { public: @@ -25,34 +23,7 @@ namespace storm { typedef typename PomdpModelType::RewardModelType RewardModelType; typedef storm::storage::BeliefManager BeliefManagerType; typedef storm::builder::BeliefMdpExplorer ExplorerType; - - struct Options { - Options(); - bool discretize; - bool unfold; - bool refine; - boost::optional refineStepLimit; - boost::optional refinePrecision; - - // Controlparameters for the refinement heuristic - // Discretization Resolution - uint64_t resolutionInit; - ValueType resolutionFactor; - // The maximal number of newly expanded MDP states in a refinement step - uint64_t sizeThresholdInit; - uint64_t sizeThresholdFactor; - // Controls how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore - ValueType gapThresholdInit; - ValueType gapThresholdFactor; - // Controls whether "almost optimal" choices will be considered optimal - ValueType optimalChoiceValueThresholdInit; - ValueType optimalChoiceValueThresholdFactor; - // Controls which observations are refined. - ValueType obsThresholdInit; - ValueType obsThresholdIncrementFactor; - - ValueType numericPrecision; /// Used to decide whether two beliefs are equal - }; + typedef ApproximatePOMDPModelCheckerOptions Options; struct Result { Result(ValueType lower, ValueType upper); From 1763f0c582138785a8eddd0aba754b3becbbaee4 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Thu, 23 Apr 2020 09:08:02 +0200 Subject: [PATCH 129/155] Making sure that we only store the best bounds found so far. Also added some output for the resulting values in each iteration. --- .../ApproximatePOMDPModelchecker.cpp | 44 +++++++++++++++---- .../ApproximatePOMDPModelchecker.h | 3 ++ 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 1b86744f9..86d24848d 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -47,6 +47,24 @@ namespace storm { return diff; } + template + bool ApproximatePOMDPModelchecker::Result::updateLowerBound(ValueType const& value) { + if (value > lowerBound) { + lowerBound = value; + return true; + } + return false; + } + + template + bool ApproximatePOMDPModelchecker::Result::updateUpperBound(ValueType const& value) { + if (value < upperBound) { + upperBound = value; + return true; + } + return false; + } + template ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { // intentionally left empty; @@ -62,13 +80,15 @@ namespace storm { STORM_LOG_ASSERT(options.unfold || options.discretize, "Invoked belief exploration but no task (unfold or discretize) given."); // Reset all collected statistics statistics = Statistics(); + statistics.totalTime.start(); // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); // Compute some initial bounds on the values for each state of the pomdp auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]); - + STORM_PRINT_AND_LOG("Initial value bounds are [" << result.lowerBound << ", " << result.upperBound << "]" << std::endl); + boost::optional rewardModelName; if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. @@ -103,6 +123,7 @@ namespace storm { if (storm::utility::resources::isTerminate()) { statistics.aborted = true; } + statistics.totalTime.stop(); return result; } @@ -117,6 +138,8 @@ namespace storm { stream << "# Computation aborted early" << std::endl; } + stream << "# Total check time: " << statistics.totalTime << std::endl; + // Refinement information: if (statistics.refinementSteps) { stream << "# Number of refinement steps: " << statistics.refinementSteps.get() << std::endl; @@ -208,9 +231,6 @@ namespace storm { template void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { - ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; - ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; - // Set up exploration data std::vector observationResolutionVector; std::shared_ptr overApproxBeliefManager; @@ -231,7 +251,9 @@ namespace storm { if (!overApproximation->hasComputedValues()) { return; } - overApproxValue = overApproximation->getComputedValueAtInitialState(); + ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); + bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); + STORM_PRINT_AND_LOG("Over-approx result for refinement step #0 is '" << newValue << "' which " << std::string(betterBound ? "improves" : "does not improve") << " the old value. Current runtime is " << statistics.totalTime << " seconds." << std::endl); } std::shared_ptr underApproxBeliefManager; @@ -254,7 +276,9 @@ namespace storm { if (!underApproximation->hasComputedValues()) { return; } - underApproxValue = underApproximation->getComputedValueAtInitialState(); + ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); + bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); + STORM_PRINT_AND_LOG("Under-approx result for refinement step #0 is '" << newValue << "' which " << std::string(betterBound ? "improves" : "does not improve") << " the old value. Current runtime is " << statistics.totalTime << " seconds." << std::endl); } // Start refinement @@ -281,7 +305,9 @@ namespace storm { overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); if (overApproximation->hasComputedValues()) { - overApproxValue = overApproximation->getComputedValueAtInitialState(); + ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); + bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); + STORM_PRINT_AND_LOG("Over-approx result for refinement step #" << statistics.refinementSteps.get() << " is '" << newValue << "' which " << std::string(betterBound ? "improves" : "does not improve") << " the old value. Current runtime is " << statistics.totalTime << " seconds." << std::endl); } else { break; } @@ -294,7 +320,9 @@ namespace storm { overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); if (underApproximation->hasComputedValues()) { - underApproxValue = underApproximation->getComputedValueAtInitialState(); + ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); + bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); + STORM_PRINT_AND_LOG("Under-approx result for refinement step #" << statistics.refinementSteps.get() << " is '" << newValue << "' which " << std::string(betterBound ? "improves" : "does not improve") << " the old value. Current runtime is " << statistics.totalTime << " seconds." << std::endl); } else { break; } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 945459c93..5a2cd683f 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -30,6 +30,8 @@ namespace storm { ValueType lowerBound; ValueType upperBound; ValueType diff (bool relative = false) const; + bool updateLowerBound(ValueType const& value); + bool updateUpperBound(ValueType const& value); }; ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options()); @@ -87,6 +89,7 @@ namespace storm { struct Statistics { Statistics(); boost::optional refinementSteps; + storm::utility::Stopwatch totalTime; boost::optional overApproximationStates; bool overApproximationBuildAborted; From c91c98f2de322d9625c13eadbd97699484a31065 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Thu, 23 Apr 2020 11:56:55 +0200 Subject: [PATCH 130/155] Pomdp: Fixing result output with exact numbers --- src/storm-pomdp-cli/storm-pomdp.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index b215b730f..2aa7b659a 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -103,7 +103,9 @@ namespace storm { } if (storm::NumberTraits::IsExact) { STORM_PRINT_AND_LOG(" (approx. "); - printResult(storm::utility::convertNumber(lowerBound), storm::utility::convertNumber(upperBound)); + double roundedLowerBound = storm::utility::isInfinity(-lowerBound) ? -storm::utility::infinity() : storm::utility::convertNumber(lowerBound); + double roundedUpperBound = storm::utility::isInfinity(upperBound) ? storm::utility::infinity() : storm::utility::convertNumber(upperBound); + printResult(roundedLowerBound, roundedUpperBound); STORM_PRINT_AND_LOG(")"); } } From e81b8f16222be6f6aa0eb2dfed90420dcb08d23a Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Thu, 23 Apr 2020 11:57:56 +0200 Subject: [PATCH 131/155] BeliefManager: fixed a few assertion conditions --- src/storm-pomdp/storage/BeliefManager.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index b01ad5358..7e2c350b7 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -200,12 +200,13 @@ namespace storm { } else { observation = entryObservation; } - if (cc.isZero(entry.second)) { + // Don't use cc for these checks, because computations with zero are usually fine + if (storm::utility::isZero(entry.second)) { // We assume that beliefs only consider their support. STORM_LOG_ERROR("Zero belief probability."); return false; } - if (cc.isLess(entry.second, storm::utility::zero())) { + if (entry.second < storm::utility::zero()) { STORM_LOG_ERROR("Negative belief probability."); return false; } @@ -216,7 +217,7 @@ namespace storm { sum += entry.second; } if (!cc.isOne(sum)) { - STORM_LOG_ERROR("Belief does not sum up to one."); + STORM_LOG_ERROR("Belief does not sum up to one. (" << sum << " instead)."); return false; } return true; From 43220759f409ec5a9cd480b9b72c3df1360c8587 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Thu, 23 Apr 2020 11:59:43 +0200 Subject: [PATCH 132/155] Implemented a time limit for exploration. --- .../modules/BeliefExplorationSettings.cpp | 19 +++++++++- .../modules/BeliefExplorationSettings.h | 3 ++ .../ApproximatePOMDPModelCheckerOptions.h | 1 + .../ApproximatePOMDPModelchecker.cpp | 38 ++++++++++++++++--- 4 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp index c1c7e45bc..b05d97d79 100644 --- a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp @@ -20,6 +20,7 @@ namespace storm { const std::string BeliefExplorationSettings::moduleName = "belexpl"; const std::string refineOption = "refine"; + const std::string explorationTimeLimitOption = "exploration-time"; const std::string resolutionOption = "resolution"; const std::string sizeThresholdOption = "size-threshold"; const std::string gapThresholdOption = "gap-threshold"; @@ -31,6 +32,8 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, refineOption, false,"Refines the result bounds until reaching either the goal precision or the refinement step limit").addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("prec","The goal precision.").setDefaultValueDouble(1e-4).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0.0)).build()).addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("steps","The number of allowed refinement steps (0 means no limit).").setDefaultValueUnsignedInteger(0).makeOptional().build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, explorationTimeLimitOption, false, "Sets after which time no further states shall be explored.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("time","In seconds.").build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, resolutionOption, false,"Sets the resolution of the discretization and how it is increased in case of refinement").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("init","the initial resolution (higher means more precise)").setDefaultValueUnsignedInteger(12).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the resolution of refined observations (higher means more precise).").setDefaultValueDouble(2).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterValidator(1)).build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, observationThresholdOption, false,"Only observations whose score is below this threshold will be refined.").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means more precise").setDefaultValueDouble(0.1).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Controlls how fast the threshold is increased in each refinement step (higher means more precise).").setDefaultValueDouble(0.1).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); @@ -62,6 +65,14 @@ namespace storm { return this->getOption(refineOption).getArgumentByName("steps").getValueAsUnsignedInteger(); } + bool BeliefExplorationSettings::isExplorationTimeLimitSet() const { + return this->getOption(explorationTimeLimitOption).getHasOptionBeenSet(); + } + + uint64_t BeliefExplorationSettings::getExplorationTimeLimit() const { + return this->getOption(explorationTimeLimitOption).getArgumentByName("time").getValueAsUnsignedInteger(); + } + uint64_t BeliefExplorationSettings::getResolutionInit() const { return this->getOption(resolutionOption).getArgumentByName("init").getValueAsUnsignedInteger(); } @@ -116,8 +127,14 @@ namespace storm { options.refinePrecision = getRefinePrecision(); if (isRefineStepLimitSet()) { options.refineStepLimit = getRefineStepLimit(); + } else { + options.refineStepLimit = boost::none; + } + if (isExplorationTimeLimitSet()) { + options.explorationTimeLimit = getExplorationTimeLimit(); + } else { + options.explorationTimeLimit = boost::none; } - options.resolutionInit = getResolutionInit(); options.resolutionFactor = storm::utility::convertNumber(getResolutionFactor()); options.sizeThresholdInit = getSizeThresholdInit(); diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h index 0273a1945..5ae3a1bde 100644 --- a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h @@ -32,6 +32,9 @@ namespace storm { bool isRefineStepLimitSet() const; uint64_t getRefineStepLimit() const; + bool isExplorationTimeLimitSet() const; + uint64_t getExplorationTimeLimit() const; + /// Discretization Resolution uint64_t getResolutionInit() const; double getResolutionFactor() const; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h index 91cfc6db0..6d977a902 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h @@ -18,6 +18,7 @@ namespace storm { bool refine = false; boost::optional refineStepLimit; ValueType refinePrecision = storm::utility::zero(); + boost::optional explorationTimeLimit; // Controlparameters for the refinement heuristic // Discretization Resolution diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 86d24848d..31bbea7ce 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -215,8 +215,12 @@ namespace storm { heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; heuristicParameters.sizeThreshold = options.sizeThresholdInit; if (heuristicParameters.sizeThreshold == 0) { - // Select a decent value automatically - heuristicParameters.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); + if (options.explorationTimeLimit) { + heuristicParameters.sizeThreshold = std::numeric_limits::max(); + } else { + heuristicParameters.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); + STORM_PRINT_AND_LOG("Heuristically selected an under-approximation mdp size threshold of " << heuristicParameters.sizeThreshold << "." << std::endl); + } } buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, manager, approx); if (approx->hasComputedValues()) { @@ -419,9 +423,19 @@ namespace storm { statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); // Start exploration + storm::utility::Stopwatch explorationTime; + if (options.explorationTimeLimit) { + explorationTime.start(); + } + bool timeLimitExceeded = false; std::map gatheredSuccessorObservations; // Declare here to avoid reallocations uint64_t numRewiredOrExploredStates = 0; while (overApproximation->hasUnexploredState()) { + if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { + STORM_LOG_INFO("Exploration time limit exceeded."); + timeLimitExceeded = true; + } + uint64_t currId = overApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); @@ -451,7 +465,7 @@ namespace storm { if (!refine || !overApproximation->currentStateHasOldBehavior()) { // Case 1 // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state - if (gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + if (!timeLimitExceeded && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { exploreAllActions = true; // Case 1.1 } else { truncateAllActions = true; // Case 1.2 @@ -460,7 +474,7 @@ namespace storm { } else { if (overApproximation->getCurrentStateWasTruncated()) { // Case 2 - if (overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { exploreAllActions = true; // Case 2.1 } else { truncateAllActions = true; // Case 2.2 @@ -469,7 +483,7 @@ namespace storm { } else { // Case 3 // The decision for rewiring also depends on the corresponding action, but we have some criteria that lead to case 3.2 (independent of the action) - if (overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { checkRewireForAllActions = true; // Case 3.1 or Case 3.2 } else { restoreAllActions = true; // Definitely Case 3.2 @@ -580,7 +594,16 @@ namespace storm { // Expand the beliefs uint64_t newlyExploredStates = 0; + storm::utility::Stopwatch explorationTime; + if (options.explorationTimeLimit) { + explorationTime.start(); + } + bool timeLimitExceeded = false; while (underApproximation->hasUnexploredState()) { + if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { + STORM_LOG_INFO("Exploration time limit exceeded."); + timeLimitExceeded = true; + } uint64_t currId = underApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); @@ -590,7 +613,10 @@ namespace storm { } else { bool stopExploration = false; bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated(); - if (!stateAlreadyExplored) { + if (timeLimitExceeded) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } else if (!stateAlreadyExplored) { // Check whether we want to explore the state now! if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < heuristicParameters.gapThreshold) { stopExploration = true; From 8b4595042ee78316a641f1a558366d881c754017 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 24 Apr 2020 08:58:50 +0200 Subject: [PATCH 133/155] Only do iteration output if the result bound improved. Handle integer overflows for the observation resolution. --- .../ApproximatePOMDPModelchecker.cpp | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 31bbea7ce..f8a49765c 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -234,7 +234,8 @@ namespace storm { template void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { - + statistics.refinementSteps = 0; + // Set up exploration data std::vector observationResolutionVector; std::shared_ptr overApproxBeliefManager; @@ -257,7 +258,9 @@ namespace storm { } ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); - STORM_PRINT_AND_LOG("Over-approx result for refinement step #0 is '" << newValue << "' which " << std::string(betterBound ? "improves" : "does not improve") << " the old value. Current runtime is " << statistics.totalTime << " seconds." << std::endl); + if (betterBound) { + STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); + } } std::shared_ptr underApproxBeliefManager; @@ -282,19 +285,19 @@ namespace storm { } ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); - STORM_PRINT_AND_LOG("Under-approx result for refinement step #0 is '" << newValue << "' which " << std::string(betterBound ? "improves" : "does not improve") << " the old value. Current runtime is " << statistics.totalTime << " seconds." << std::endl); + if (betterBound) { + STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); + } } // Start refinement - statistics.refinementSteps = 0; STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || !storm::utility::isZero(options.refinePrecision), "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout"); STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined."); - while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps < options.refineStepLimit.get()) && result.diff() > options.refinePrecision) { + while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps.get() < options.refineStepLimit.get()) && result.diff() > options.refinePrecision) { if (storm::utility::resources::isTerminate()) { break; } ++statistics.refinementSteps.get(); - STORM_PRINT_AND_LOG("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << "." << std::endl); if (options.discretize) { // Refine over-approximation @@ -311,7 +314,9 @@ namespace storm { if (overApproximation->hasComputedValues()) { ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); - STORM_PRINT_AND_LOG("Over-approx result for refinement step #" << statistics.refinementSteps.get() << " is '" << newValue << "' which " << std::string(betterBound ? "improves" : "does not improve") << " the old value. Current runtime is " << statistics.totalTime << " seconds." << std::endl); + if (betterBound) { + STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); + } } else { break; } @@ -326,7 +331,9 @@ namespace storm { if (underApproximation->hasComputedValues()) { ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); - STORM_PRINT_AND_LOG("Under-approx result for refinement step #" << statistics.refinementSteps.get() << " is '" << newValue << "' which " << std::string(betterBound ? "improves" : "does not improve") << " the old value. Current runtime is " << statistics.totalTime << " seconds." << std::endl); + if (betterBound) { + STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); + } } else { break; } @@ -415,8 +422,14 @@ namespace storm { refinedObservations = storm::utility::vector::filter(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;}); STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { - // Increment the resolution at the refined observations - observationResolutionVector[obs] = storm::utility::convertNumber(storm::utility::convertNumber(observationResolutionVector[obs]) * options.resolutionFactor); + // Increment the resolution at the refined observations. + // Detect overflows properly. + storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber(observationResolutionVector[obs]) * storm::utility::convertNumber(options.resolutionFactor); + if (newObsResolutionAsRational > storm::utility::convertNumber(std::numeric_limits::max())) { + observationResolutionVector[obs] = std::numeric_limits::max(); + } else { + observationResolutionVector[obs] = storm::utility::convertNumber(newObsResolutionAsRational); + } } overApproximation->restartExploration(); } From 16ad9d3a833dbd5b8287cae9c405b1f2416a6aa5 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 27 Apr 2020 12:38:24 +0200 Subject: [PATCH 134/155] fixed storm-pomdp output a little. --- src/storm-pomdp-cli/storm-pomdp.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 2aa7b659a..d2f139a58 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -287,20 +287,20 @@ namespace storm { // Note that formulaInfo contains state-based information which potentially needs to be updated during preprocessing if (performPreprocessing(pomdp, formulaInfo, *formula)) { sw.stop(); - STORM_PRINT_AND_LOG("Time for graph-based POMDP (pre-)processing: " << sw << "s." << std::endl); + STORM_PRINT_AND_LOG("Time for graph-based POMDP (pre-)processing: " << sw << "." << std::endl); pomdp->printModelInformationToStream(std::cout); } sw.restart(); if (performAnalysis(pomdp, formulaInfo, *formula)) { sw.stop(); - STORM_PRINT_AND_LOG("Time for POMDP analysis: " << sw << "s." << std::endl); + STORM_PRINT_AND_LOG("Time for POMDP analysis: " << sw << "." << std::endl); } sw.restart(); if (performTransformation(pomdp, *formula)) { sw.stop(); - STORM_PRINT_AND_LOG("Time for POMDP transformation(s): " << sw << "s." << std::endl); + STORM_PRINT_AND_LOG("Time for POMDP transformation(s): " << sw << "." << std::endl); } } else { STORM_LOG_WARN("Nothing to be done. Did you forget to specify a formula?"); From 5a76f7355d32b20cb880e249175e4c8bac56b104 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 27 Apr 2020 12:38:45 +0200 Subject: [PATCH 135/155] Fixed an issue with refinement of under-approximation --- .../ApproximatePOMDPModelchecker.cpp | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index f8a49765c..03b2c2105 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -324,9 +324,9 @@ namespace storm { if (options.unfold && result.diff() > options.refinePrecision) { // Refine under-approximation - overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; - overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); - overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; + underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; + underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); + underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); if (underApproximation->hasComputedValues()) { ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); @@ -570,15 +570,20 @@ namespace storm { } if (storm::utility::resources::isTerminate()) { - statistics.overApproximationBuildAborted = true; break; } } - statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); + if (storm::utility::resources::isTerminate()) { + // don't overwrite statistics of a previous, successful computation + if (!statistics.overApproximationStates) { + statistics.overApproximationBuildAborted = true; + statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); + } statistics.overApproximationBuildTime.stop(); return; } + statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); overApproximation->finishExploration(); statistics.overApproximationBuildTime.stop(); @@ -592,7 +597,9 @@ namespace storm { void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); - statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold; + if (heuristicParameters.sizeThreshold != std::numeric_limits::max()) { + statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold; + } if (!refine) { // Build a new under approximation if (computeRewards) { @@ -606,7 +613,6 @@ namespace storm { } // Expand the beliefs - uint64_t newlyExploredStates = 0; storm::utility::Stopwatch explorationTime; if (options.explorationTimeLimit) { explorationTime.start(); @@ -634,15 +640,11 @@ namespace storm { if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < heuristicParameters.gapThreshold) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); - } else if (newlyExploredStates >= heuristicParameters.sizeThreshold) { + } else if (underApproximation->getCurrentNumberOfMdpStates() >= heuristicParameters.sizeThreshold) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); } } - if (!stopExploration) { - // We are going to explore one more state - ++newlyExploredStates; - } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { // Always restore old behavior if available if (stateAlreadyExplored) { @@ -678,15 +680,20 @@ namespace storm { } } if (storm::utility::resources::isTerminate()) { - statistics.underApproximationBuildAborted = true; break; } } - statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); + if (storm::utility::resources::isTerminate()) { + // don't overwrite statistics of a previous, successful computation + if (!statistics.underApproximationStates) { + statistics.underApproximationBuildAborted = true; + statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); + } statistics.underApproximationBuildTime.stop(); return; } + statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); underApproximation->finishExploration(); statistics.underApproximationBuildTime.stop(); From 7504f6f315a7d60dc2227425560cb26cdefc106d Mon Sep 17 00:00:00 2001 From: TimQu Date: Tue, 28 Apr 2020 08:28:39 +0200 Subject: [PATCH 136/155] Improved statistics output for refinements, added detection of fixpoints --- .../ApproximatePOMDPModelchecker.cpp | 108 ++++++++++++++---- .../ApproximatePOMDPModelchecker.h | 6 +- 2 files changed, 90 insertions(+), 24 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 03b2c2105..5ebd52685 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -294,11 +294,8 @@ namespace storm { STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || !storm::utility::isZero(options.refinePrecision), "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout"); STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined."); while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps.get() < options.refineStepLimit.get()) && result.diff() > options.refinePrecision) { - if (storm::utility::resources::isTerminate()) { - break; - } - ++statistics.refinementSteps.get(); - + bool overApproxFixPoint = true; + bool underApproxFixPoint = true; if (options.discretize) { // Refine over-approximation if (min) { @@ -310,12 +307,12 @@ namespace storm { overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); overApproxHeuristicPar.observationThreshold += options.obsThresholdIncrementFactor * (storm::utility::one() - overApproxHeuristicPar.observationThreshold); overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; - buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); - if (overApproximation->hasComputedValues()) { + overApproxFixPoint = buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (overApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) { ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); if (betterBound) { - STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); + STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl); } } else { break; @@ -327,17 +324,50 @@ namespace storm { underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; - buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); - if (underApproximation->hasComputedValues()) { + underApproxFixPoint = buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); + if (underApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) { ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); if (betterBound) { - STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); + STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl); } } else { break; } } + + if (storm::utility::resources::isTerminate()) { + break; + } else { + ++statistics.refinementSteps.get(); + // Don't make too many outputs (to avoid logfile clutter) + if (statistics.refinementSteps.get() <= 1000) { + STORM_PRINT_AND_LOG("Completed iteration #" << statistics.refinementSteps.get() << ". Current checktime is " << statistics.totalTime << "."); + bool computingLowerBound = false; + bool computingUpperBound = false; + if (options.discretize) { + STORM_PRINT_AND_LOG(" Over-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << "."); + (min ? computingLowerBound : computingUpperBound) = true; + } + if (options.unfold) { + STORM_PRINT_AND_LOG(" Under-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << "."); + (min ? computingUpperBound : computingLowerBound) = true; + } + if (computingLowerBound && computingUpperBound) { + STORM_PRINT_AND_LOG(" Current result is [" << result.lowerBound << ", " << result.upperBound << "]."); + } else if (computingLowerBound) { + STORM_PRINT_AND_LOG(" Current result is ≥" << result.lowerBound << "."); + } else if (computingUpperBound) { + STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << "."); + } + STORM_PRINT_AND_LOG(std::endl); + STORM_LOG_WARN_COND(statistics.refinementSteps.get() == 1000, "Refinement requires more than 1000 iterations."); + } + } + if (overApproxFixPoint && underApproxFixPoint) { + STORM_PRINT_AND_LOG("Refinement fixpoint reached after " << statistics.refinementSteps.get() << " iterations." << std::endl); + break; + } } } @@ -399,7 +429,10 @@ namespace storm { } template - void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + bool ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + + // Detect whether the refinement reached a fixpoint. + bool fixPoint = true; // current maximal resolution (needed for refinement heuristic) uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); @@ -418,7 +451,10 @@ namespace storm { overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true); // We also need to find out which observation resolutions needs refinement. auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); - // Potentially increase the observationThreshold so that at least one observation actually gets refinement. + // If there is a score < 1, we have not reached a fixpoint, yet + if (std::any_of(obsRatings.begin(), obsRatings.end(), [](ValueType const& value){return value < storm::utility::one();})) { + fixPoint = false; + } refinedObservations = storm::utility::vector::filter(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;}); STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { @@ -447,10 +483,14 @@ namespace storm { if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { STORM_LOG_INFO("Exploration time limit exceeded."); timeLimitExceeded = true; + fixPoint = false; } uint64_t currId = overApproximation->exploreNextState(); - + bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior(); + if (!hasOldBehavior) { + fixPoint = false; // Exploring a new state! + } uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { overApproximation->setCurrentStateIsTarget(); @@ -475,7 +515,7 @@ namespace storm { bool restoreAllActions = false; bool checkRewireForAllActions = false; ValueType gap = storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()); - if (!refine || !overApproximation->currentStateHasOldBehavior()) { + if (!hasOldBehavior) { // Case 1 // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state if (!timeLimitExceeded && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { @@ -489,9 +529,19 @@ namespace storm { // Case 2 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { exploreAllActions = true; // Case 2.1 + fixPoint = false; } else { truncateAllActions = true; // Case 2.2 overApproximation->setCurrentStateIsTruncated(); + if (fixPoint) { + // Properly check whether this can still be a fixpoint + if (overApproximation->currentStateIsOptimalSchedulerReachable()) { + fixPoint = false; + } + //} else { + // In this case we truncated a state that is not reachable under optimal schedulers. + // If no other state is explored (i.e. fixPoint remaints true), these states should still not be reachable in subsequent iterations + } } } else { // Case 3 @@ -516,6 +566,7 @@ namespace storm { // First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring would actually change the successor states) assert(overApproximation->currentStateHasOldBehavior()); if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) || overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) { + fixPoint = false; // Then, check whether the other criteria for rewiring are satisfied if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) { // Do the rewiring now! (Case 3.1) @@ -581,9 +632,8 @@ namespace storm { statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); } statistics.overApproximationBuildTime.stop(); - return; + return false; } - statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); overApproximation->finishExploration(); statistics.overApproximationBuildTime.stop(); @@ -591,12 +641,18 @@ namespace storm { statistics.overApproximationCheckTime.start(); overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); + + // don't overwrite statistics of a previous, successful computation + if (!storm::utility::resources::isTerminate() || !statistics.overApproximationStates) { + statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates(); + } + return fixPoint; } template - void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { - + bool ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); + bool fixPoint = true; if (heuristicParameters.sizeThreshold != std::numeric_limits::max()) { statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold; } @@ -626,12 +682,15 @@ namespace storm { uint64_t currId = underApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); + bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated(); + if (!stateAlreadyExplored || timeLimitExceeded) { + fixPoint = false; + } if (targetObservations.count(currObservation) != 0) { underApproximation->setCurrentStateIsTarget(); underApproximation->addSelfloopTransition(); } else { bool stopExploration = false; - bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated(); if (timeLimitExceeded) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); @@ -691,9 +750,8 @@ namespace storm { statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); } statistics.underApproximationBuildTime.stop(); - return; + return false; } - statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); underApproximation->finishExploration(); statistics.underApproximationBuildTime.stop(); @@ -701,6 +759,12 @@ namespace storm { statistics.underApproximationCheckTime.start(); underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); + + // don't overwrite statistics of a previous, successful computation + if (!storm::utility::resources::isTerminate() || !statistics.underApproximationStates) { + statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates(); + } + return fixPoint; } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 5a2cd683f..44127e011 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -74,13 +74,15 @@ namespace storm { /** * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties + * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp) */ - void buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); + bool buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); /** * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties + * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp) */ - void buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); + bool buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution); From 08c60bcb3d4eb3ffbfd320fe84700bf3050a0672 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 27 Apr 2020 14:30:50 +0200 Subject: [PATCH 137/155] Added OVISolverSettings to storm-pomdp --- src/storm-pomdp-cli/settings/PomdpSettings.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/storm-pomdp-cli/settings/PomdpSettings.cpp b/src/storm-pomdp-cli/settings/PomdpSettings.cpp index 7cd3aff57..73d725c09 100644 --- a/src/storm-pomdp-cli/settings/PomdpSettings.cpp +++ b/src/storm-pomdp-cli/settings/PomdpSettings.cpp @@ -29,6 +29,7 @@ #include "storm/settings/modules/TransformationSettings.h" #include "storm/settings/modules/MultiObjectiveSettings.h" #include "storm/settings/modules/HintSettings.h" +#include "storm/settings/modules/OviSolverSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" #include "storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h" @@ -63,6 +64,7 @@ namespace storm { storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); + storm::settings::addModule(); } } } From ddec9ce740589fdd40adce98f7c704701be72307 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 1 May 2020 16:53:40 +0200 Subject: [PATCH 138/155] ApproximatePomdpModelchecker: Fixed output a little. --- .../ApproximatePOMDPModelchecker.cpp | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 5ebd52685..a116077a1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -266,7 +266,7 @@ namespace storm { std::shared_ptr underApproxBeliefManager; std::shared_ptr underApproximation; HeuristicParameters underApproxHeuristicPar; - if (options.unfold) { // Setup and build first OverApproximation + if (options.unfold) { // Setup and build first UnderApproximation underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { underApproxBeliefManager->setRewardModel(rewardModelName); @@ -290,6 +290,27 @@ namespace storm { } } + // Do some output + STORM_PRINT_AND_LOG("Completed iteration #" << statistics.refinementSteps.get() << ". Current checktime is " << statistics.totalTime << "."); + bool computingLowerBound = false; + bool computingUpperBound = false; + if (options.discretize) { + STORM_PRINT_AND_LOG(" Over-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << "."); + (min ? computingLowerBound : computingUpperBound) = true; + } + if (options.unfold) { + STORM_PRINT_AND_LOG(" Under-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << "."); + (min ? computingUpperBound : computingLowerBound) = true; + } + if (computingLowerBound && computingUpperBound) { + STORM_PRINT_AND_LOG(" Current result is [" << result.lowerBound << ", " << result.upperBound << "]."); + } else if (computingLowerBound) { + STORM_PRINT_AND_LOG(" Current result is ≥" << result.lowerBound << "."); + } else if (computingUpperBound) { + STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << "."); + } + STORM_PRINT_AND_LOG(std::endl); + // Start refinement STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || !storm::utility::isZero(options.refinePrecision), "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout"); STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined."); @@ -361,7 +382,7 @@ namespace storm { STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << "."); } STORM_PRINT_AND_LOG(std::endl); - STORM_LOG_WARN_COND(statistics.refinementSteps.get() == 1000, "Refinement requires more than 1000 iterations."); + STORM_LOG_WARN_COND(statistics.refinementSteps.get() < 1000, "Refinement requires more than 1000 iterations."); } } if (overApproxFixPoint && underApproxFixPoint) { From 6f476ef079af841534beaad9029bbe2f27bb73b0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 1 May 2020 20:43:46 +0200 Subject: [PATCH 139/155] belief exploration: Improved fixpoint detection for over-approx --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index a116077a1..be50cbfff 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -556,7 +556,7 @@ namespace storm { overApproximation->setCurrentStateIsTruncated(); if (fixPoint) { // Properly check whether this can still be a fixpoint - if (overApproximation->currentStateIsOptimalSchedulerReachable()) { + if (overApproximation->currentStateIsOptimalSchedulerReachable() && !storm::utility::isZero(gap)) { fixPoint = false; } //} else { From ab95e7d08b163d7262ab70dfd194b485e32f8764 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 4 May 2020 14:34:56 +0200 Subject: [PATCH 140/155] BeliefManager: organized stored beliefs in buckets (beliefs with the same observation belong in the same bucket) --- src/storm-pomdp/storage/BeliefManager.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 7e2c350b7..bc3f559d1 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -21,6 +21,7 @@ namespace storm { typedef uint64_t BeliefId; BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { + beliefToIdMap.resize(pomdp.getNrObservations()); initialBeliefId = computeInitialBelief(); } @@ -145,8 +146,10 @@ namespace storm { } BeliefId getId(BeliefType const& belief) const { - auto idIt = beliefToIdMap.find(belief); - STORM_LOG_THROW(idIt != beliefToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown Belief."); + uint32_t obs = getBeliefObservation(belief); + STORM_LOG_ASSERT(obs < beliefToIdMap.size(), "Belief has unknown observation."); + auto idIt = beliefToIdMap[obs].find(belief); + STORM_LOG_ASSERT(idIt != beliefToIdMap.end(), "Unknown Belief."); return idIt->second; } @@ -410,7 +413,9 @@ namespace storm { } BeliefId getOrAddBeliefId(BeliefType const& belief) { - auto insertioRes = beliefToIdMap.emplace(belief, beliefs.size()); + uint32_t obs = getBeliefObservation(belief); + STORM_LOG_ASSERT(obs < beliefToIdMap.size(), "Belief has unknown observation."); + auto insertioRes = beliefToIdMap[obs].emplace(belief, beliefs.size()); if (insertioRes.second) { // There actually was an insertion, so add the new belief beliefs.push_back(belief); @@ -435,7 +440,7 @@ namespace storm { std::vector pomdpActionRewardVector; std::vector beliefs; - std::unordered_map beliefToIdMap; + std::vector> beliefToIdMap; BeliefId initialBeliefId; storm::utility::ConstantsComparator cc; From 2ac1c73076b3679a2292db8ed1280764c8618c26 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 4 May 2020 14:35:15 +0200 Subject: [PATCH 141/155] Change default initial resolution to 3 --- .../settings/modules/BeliefExplorationSettings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp index b05d97d79..5bf1f4582 100644 --- a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp @@ -34,7 +34,7 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, explorationTimeLimitOption, false, "Sets after which time no further states shall be explored.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("time","In seconds.").build()).build()); - this->addOption(storm::settings::OptionBuilder(moduleName, resolutionOption, false,"Sets the resolution of the discretization and how it is increased in case of refinement").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("init","the initial resolution (higher means more precise)").setDefaultValueUnsignedInteger(12).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the resolution of refined observations (higher means more precise).").setDefaultValueDouble(2).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterValidator(1)).build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, resolutionOption, false,"Sets the resolution of the discretization and how it is increased in case of refinement").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("init","the initial resolution (higher means more precise)").setDefaultValueUnsignedInteger(3).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the resolution of refined observations (higher means more precise).").setDefaultValueDouble(2).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterValidator(1)).build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, observationThresholdOption, false,"Only observations whose score is below this threshold will be refined.").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means more precise").setDefaultValueDouble(0.1).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Controlls how fast the threshold is increased in each refinement step (higher means more precise).").setDefaultValueDouble(0.1).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); From cc5faee9c05be88195a3e8bfd2b50a0de3b28964 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 4 May 2020 14:35:35 +0200 Subject: [PATCH 142/155] Fixed initial size threshold for over-approx. --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index be50cbfff..c8bc31f10 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -250,7 +250,7 @@ namespace storm { overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); overApproxHeuristicPar.gapThreshold = options.gapThresholdInit; overApproxHeuristicPar.observationThreshold = options.obsThresholdInit; - overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit; + overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits::max() : options.sizeThresholdInit; overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); if (!overApproximation->hasComputedValues()) { From fa10087fba05b53c0a1417340c884fd0e359fffa Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 5 May 2020 13:00:26 +0200 Subject: [PATCH 143/155] Implemented triangulation in a more dynamic way. --- .../ApproximatePOMDPModelCheckerOptions.h | 1 + .../ApproximatePOMDPModelchecker.cpp | 8 +- src/storm-pomdp/storage/BeliefManager.h | 161 ++++++++++++------ 3 files changed, 112 insertions(+), 58 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h index 6d977a902..c24248467 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h @@ -38,6 +38,7 @@ namespace storm { ValueType obsThresholdIncrementFactor = storm::utility::convertNumber(0.1); ValueType numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); /// Used to decide whether two beliefs are equal + bool dynamicTriangulation = true; // Sets whether the triangulation is done in a dynamic way (yielding more precise triangulations) }; } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index c8bc31f10..d72d56a6d 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -185,7 +185,7 @@ namespace storm { if (options.discretize) { std::vector observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit); - auto manager = std::make_shared(pomdp, options.numericPrecision); + auto manager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); if (rewardModelName) { manager->setRewardModel(rewardModelName); } @@ -205,7 +205,7 @@ namespace storm { } } if (options.unfold) { // Underapproximation (uses a fresh Belief manager) - auto manager = std::make_shared(pomdp, options.numericPrecision); + auto manager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); if (rewardModelName) { manager->setRewardModel(rewardModelName); } @@ -243,7 +243,7 @@ namespace storm { HeuristicParameters overApproxHeuristicPar; if (options.discretize) { // Setup and build first OverApproximation observationResolutionVector = std::vector(pomdp.getNrObservations(), options.resolutionInit); - overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); + overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); if (rewardModelName) { overApproxBeliefManager->setRewardModel(rewardModelName); } @@ -267,7 +267,7 @@ namespace storm { std::shared_ptr underApproximation; HeuristicParameters underApproxHeuristicPar; if (options.unfold) { // Setup and build first UnderApproximation - underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); + underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); if (rewardModelName) { underApproxBeliefManager->setRewardModel(rewardModelName); } diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index bc3f559d1..5a135a151 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -20,7 +20,12 @@ namespace storm { typedef boost::container::flat_set BeliefSupportType; typedef uint64_t BeliefId; - BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { + enum class TriangulationMode { + Static, + Dynamic + }; + + BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision, TriangulationMode const& triangulationMode) : pomdp(pomdp), cc(precision, false), triangulationMode(triangulationMode) { beliefToIdMap.resize(pomdp.getNrObservations()); initialBeliefId = computeInitialBelief(); } @@ -288,65 +293,112 @@ namespace storm { } }; - Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { - STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); + void triangulateBeliefFreudenthal(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) { + STORM_LOG_ASSERT(resolution != 0, "Invalid resolution: 0"); StateType numEntries = belief.size(); + auto convResolution = storm::utility::convertNumber(resolution); + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) + // Probabilities will be triangulated to values in 0/N, 1/N, 2/N, ..., N/N + // Variable names are mostly based on the paper + // However, we speed this up a little by exploiting that belief states usually have sparse support (i.e. numEntries is much smaller than pomdp.getNumberOfStates()). + // Initialize diffs and the first row of the 'qs' matrix (aka v) + std::set> sorted_diffs; // d (and p?) in the paper + std::vector qsRow; // Row of the 'qs' matrix from the paper (initially corresponds to v + qsRow.reserve(numEntries); + std::vector toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices + toOriginalIndicesMap.reserve(numEntries); + BeliefValueType x = convResolution; + for (auto const& entry : belief) { + qsRow.push_back(storm::utility::floor(x)); // v + sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v + toOriginalIndicesMap.push_back(entry.first); + x -= entry.second * convResolution; + } + // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler + qsRow.push_back(storm::utility::zero()); + + result.weights.reserve(numEntries); + result.gridPoints.reserve(numEntries); + auto currentSortedDiff = sorted_diffs.begin(); + auto previousSortedDiff = sorted_diffs.end(); + --previousSortedDiff; + for (StateType i = 0; i < numEntries; ++i) { + // Compute the weight for the grid points + BeliefValueType weight = previousSortedDiff->diff - currentSortedDiff->diff; + if (i == 0) { + // The first weight is a bit different + weight += storm::utility::one(); + } else { + // 'compute' the next row of the qs matrix + qsRow[previousSortedDiff->dimension] += storm::utility::one(); + } + if (!cc.isZero(weight)) { + result.weights.push_back(weight); + // Compute the grid point + BeliefType gridPoint; + for (StateType j = 0; j < numEntries; ++j) { + BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; + if (!cc.isZero(gridPointEntry)) { + gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution; + } + } + result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); + } + previousSortedDiff = currentSortedDiff++; + } + } + + void triangulateBeliefDynamic(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) { + // Find the best resolution for this belief, i.e., N such that the largest distance between one of the belief values to a value in {i/N | 0 ≤ i ≤ N} is minimal + uint64_t finalResolution = resolution; + BeliefValueType finalResolutionDist = storm::utility::one(); + // We don't need to check resolutions that are smaller than the maximal resolution divided by 2 (as we already checked multiples of these) + for (uint64_t currResolution = resolution; currResolution > resolution / 2; --currResolution) { + BeliefValueType currResDist = storm::utility::zero(); + BeliefValueType currResolutionConverted = storm::utility::convertNumber(currResolution); + bool continueWithNextResolution = false; + for (auto const& belEntry : belief) { + BeliefValueType product = belEntry.second * currResolutionConverted; + BeliefValueType dist = storm::utility::abs(product - storm::utility::round(product)) / currResolutionConverted; + if (dist > currResDist) { + if (dist > finalResolutionDist) { + // This resolution is worse than a previous resolution + continueWithNextResolution = true; + break; + } + currResDist = dist; + } + } + STORM_LOG_ASSERT(continueWithNextResolution || currResDist <= finalResolutionDist, "Distance for this resolution should not be larger than a previously checked one."); + if (!continueWithNextResolution) { + finalResolution = currResolution; + finalResolutionDist = currResDist; + } + } + + STORM_LOG_TRACE("Picking resolution " << finalResolution << " for belief " << toString(belief)); + + // do standard freudenthal with the found resolution + triangulateBeliefFreudenthal(belief, finalResolution, result); + } + + Triangulation triangulateBelief(BeliefType const& belief, uint64_t const& resolution) { + STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); Triangulation result; - // Quickly triangulate Dirac beliefs - if (numEntries == 1u) { + if (belief.size() == 1u) { result.weights.push_back(storm::utility::one()); result.gridPoints.push_back(getOrAddBeliefId(belief)); } else { - - auto convResolution = storm::utility::convertNumber(resolution); - // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) - // Variable names are mostly based on the paper - // However, we speed this up a little by exploiting that belief states usually have sparse support (i.e. numEntries is much smaller than pomdp.getNumberOfStates()). - // Initialize diffs and the first row of the 'qs' matrix (aka v) - std::set> sorted_diffs; // d (and p?) in the paper - std::vector qsRow; // Row of the 'qs' matrix from the paper (initially corresponds to v - qsRow.reserve(numEntries); - std::vector toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices - toOriginalIndicesMap.reserve(numEntries); - BeliefValueType x = convResolution; - for (auto const& entry : belief) { - qsRow.push_back(storm::utility::floor(x)); // v - sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v - toOriginalIndicesMap.push_back(entry.first); - x -= entry.second * convResolution; - } - // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler - qsRow.push_back(storm::utility::zero()); - - result.weights.reserve(numEntries); - result.gridPoints.reserve(numEntries); - auto currentSortedDiff = sorted_diffs.begin(); - auto previousSortedDiff = sorted_diffs.end(); - --previousSortedDiff; - for (StateType i = 0; i < numEntries; ++i) { - // Compute the weight for the grid points - BeliefValueType weight = previousSortedDiff->diff - currentSortedDiff->diff; - if (i == 0) { - // The first weight is a bit different - weight += storm::utility::one(); - } else { - // 'compute' the next row of the qs matrix - qsRow[previousSortedDiff->dimension] += storm::utility::one(); - } - if (!cc.isZero(weight)) { - result.weights.push_back(weight); - // Compute the grid point - BeliefType gridPoint; - for (StateType j = 0; j < numEntries; ++j) { - BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; - if (!cc.isZero(gridPointEntry)) { - gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution; - } - } - result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); - } - previousSortedDiff = currentSortedDiff++; + switch (triangulationMode) { + case TriangulationMode::Static: + triangulateBeliefFreudenthal(belief, resolution, result); + break; + case TriangulationMode::Dynamic: + triangulateBeliefDynamic(belief, resolution, result); + break; + default: + STORM_LOG_ASSERT(false, "Invalid triangulation mode."); } } STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result)); @@ -445,6 +497,7 @@ namespace storm { storm::utility::ConstantsComparator cc; + TriangulationMode triangulationMode; }; } From 71c410a3befd9c82c641d4ef214984c4895d5c2a Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 5 May 2020 13:00:52 +0200 Subject: [PATCH 144/155] Added settings to switch between different triangulation modes. --- .../settings/modules/BeliefExplorationSettings.cpp | 13 +++++++++++++ .../settings/modules/BeliefExplorationSettings.h | 3 +++ 2 files changed, 16 insertions(+) diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp index 5bf1f4582..dc42679aa 100644 --- a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp @@ -27,6 +27,7 @@ namespace storm { const std::string schedulerThresholdOption = "scheduler-threshold"; const std::string observationThresholdOption = "obs-threshold"; const std::string numericPrecisionOption = "numeric-precision"; + const std::string triangulationModeOption = "triangulationmode"; BeliefExplorationSettings::BeliefExplorationSettings() : ModuleSettings(moduleName) { @@ -46,6 +47,9 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, numericPrecisionOption, false,"Sets the precision used to determine whether two belief-states are equal.").setIsAdvanced().addArgument( storm::settings::ArgumentBuilder::createDoubleArgument("value","the precision").setDefaultValueDouble(1e-9).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0, 1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, triangulationModeOption, false,"Sets how to triangulate beliefs when discretizing.").setIsAdvanced().addArgument( + storm::settings::ArgumentBuilder::createStringArgument("value","the triangulation mode").setDefaultValueString("dynamic").addValidatorString(storm::settings::ArgumentValidatorFactory::createMultipleChoiceValidator({"dynamic", "static"})).build()).build()); } bool BeliefExplorationSettings::isRefineSet() const { @@ -121,6 +125,14 @@ namespace storm { return this->getOption(numericPrecisionOption).getArgumentByName("value").getValueAsDouble(); } + bool BeliefExplorationSettings::isDynamicTriangulationModeSet() const { + return this->getOption(triangulationModeOption).getArgumentByName("value").getValueAsString() == "dynamic"; + + } + bool BeliefExplorationSettings::isStaticTriangulationModeSet() const { + return this->getOption(triangulationModeOption).getArgumentByName("value").getValueAsString() == "static"; + } + template void BeliefExplorationSettings::setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const { options.refine = isRefineSet(); @@ -155,6 +167,7 @@ namespace storm { STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "A non-zero numeric precision was set although exact arithmethic is used. Results might be inexact."); } } + options.dynamicTriangulation = isDynamicTriangulationModeSet(); } template void BeliefExplorationSettings::setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const; diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h index 5ae3a1bde..82d1d6010 100644 --- a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h @@ -57,6 +57,9 @@ namespace storm { /// Used to determine whether two beliefs are equal bool isNumericPrecisionSetFromDefault() const; double getNumericPrecision() const; + + bool isDynamicTriangulationModeSet() const; + bool isStaticTriangulationModeSet() const; template void setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const; From 703bdc4eb903cb41abae33b14ffecf1167393b7d Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 5 May 2020 14:39:46 +0200 Subject: [PATCH 145/155] Changed strategy of the dynamic triangulation approach such that the number of "missed" probabilities is minimized --- src/storm-pomdp/storage/BeliefManager.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 5a135a151..596d67bd5 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -351,28 +351,30 @@ namespace storm { void triangulateBeliefDynamic(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) { // Find the best resolution for this belief, i.e., N such that the largest distance between one of the belief values to a value in {i/N | 0 ≤ i ≤ N} is minimal uint64_t finalResolution = resolution; - BeliefValueType finalResolutionDist = storm::utility::one(); + uint64_t finalResolutionMisses = belief.size() + 1; // We don't need to check resolutions that are smaller than the maximal resolution divided by 2 (as we already checked multiples of these) for (uint64_t currResolution = resolution; currResolution > resolution / 2; --currResolution) { - BeliefValueType currResDist = storm::utility::zero(); + uint64_t currResMisses = 0; BeliefValueType currResolutionConverted = storm::utility::convertNumber(currResolution); bool continueWithNextResolution = false; for (auto const& belEntry : belief) { BeliefValueType product = belEntry.second * currResolutionConverted; - BeliefValueType dist = storm::utility::abs(product - storm::utility::round(product)) / currResolutionConverted; - if (dist > currResDist) { - if (dist > finalResolutionDist) { - // This resolution is worse than a previous resolution + if (!cc.isZero(product - storm::utility::round(product))) { + ++currResMisses; + if (currResMisses >= finalResolutionMisses) { + // This resolution is not better than a previous resolution continueWithNextResolution = true; break; } - currResDist = dist; } } - STORM_LOG_ASSERT(continueWithNextResolution || currResDist <= finalResolutionDist, "Distance for this resolution should not be larger than a previously checked one."); if (!continueWithNextResolution) { + STORM_LOG_ASSERT(currResMisses < finalResolutionMisses, "Distance for this resolution should not be larger than a previously checked one."); finalResolution = currResolution; - finalResolutionDist = currResDist; + finalResolutionMisses = currResMisses; + if (currResMisses == 0) { + break; + } } } From 2ebb5e8383f84d7cb385aa8a0039d32835206c02 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 5 May 2020 14:40:39 +0200 Subject: [PATCH 146/155] Fixed detection of fixpoints. --- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index d72d56a6d..788c96225 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -587,14 +587,20 @@ namespace storm { // First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring would actually change the successor states) assert(overApproximation->currentStateHasOldBehavior()); if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) || overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) { - fixPoint = false; // Then, check whether the other criteria for rewiring are satisfied if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) { // Do the rewiring now! (Case 3.1) expandCurrentAction = true; + fixPoint = false; } else { // Delay the rewiring (Case 3.2.2) overApproximation->setCurrentChoiceIsDelayed(action); + if (fixPoint) { + // Check whether this delay means that a fixpoint has not been reached + if (!overApproximation->getCurrentStateActionExplorationWasDelayed(action) || (overApproximation->currentStateIsOptimalSchedulerReachable() && overApproximation->actionAtCurrentStateWasOptimal(action) && !storm::utility::isZero(gap))) { + fixPoint = false; + } + } } } // else { Case 3.2.1 } } From fcbce6052c044b4f57e4ef71e62d939058cbda59 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 6 May 2020 10:09:46 +0200 Subject: [PATCH 147/155] Fixed getting invalid bounds if we abort during the initial approximation step. --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 788c96225..77d76f47b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -253,7 +253,7 @@ namespace storm { overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits::max() : options.sizeThresholdInit; overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); - if (!overApproximation->hasComputedValues()) { + if (!overApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) { return; } ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); @@ -280,7 +280,7 @@ namespace storm { underApproxHeuristicPar.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); } buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); - if (!underApproximation->hasComputedValues()) { + if (!underApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) { return; } ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); From 1766bc385e7ef27afef48be4f3958396529f2340 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 6 May 2020 10:10:16 +0200 Subject: [PATCH 148/155] POMDP Approximation: Use relative gap --- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 77d76f47b..55b252225 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -536,6 +536,8 @@ namespace storm { bool restoreAllActions = false; bool checkRewireForAllActions = false; ValueType gap = storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()); + // Get the relative gap + gap = gap * storm::utility::convertNumber(2) / (storm::utility::abs(overApproximation->getLowerValueBoundAtCurrentState()) + storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState())); if (!hasOldBehavior) { // Case 1 // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state @@ -723,7 +725,10 @@ namespace storm { underApproximation->setCurrentStateIsTruncated(); } else if (!stateAlreadyExplored) { // Check whether we want to explore the state now! - if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < heuristicParameters.gapThreshold) { + ValueType gap = storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()); + // Get the relative gap + gap = gap * storm::utility::convertNumber(2) / (storm::utility::abs(underApproximation->getLowerValueBoundAtCurrentState()) + storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState())); + if (gap < heuristicParameters.gapThreshold) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); } else if (underApproximation->getCurrentNumberOfMdpStates() >= heuristicParameters.sizeThreshold) { From 896d4096024c8394a157b7fab7cfb8830ea6971a Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 6 May 2020 12:44:16 +0200 Subject: [PATCH 149/155] Implemented simple (but incomplete) check to display whether the belief MDP is finite. --- .../analysis/FiniteBeliefMdpDetection.h | 59 +++++++++++++++++++ .../ApproximatePOMDPModelchecker.cpp | 8 ++- 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 src/storm-pomdp/analysis/FiniteBeliefMdpDetection.h diff --git a/src/storm-pomdp/analysis/FiniteBeliefMdpDetection.h b/src/storm-pomdp/analysis/FiniteBeliefMdpDetection.h new file mode 100644 index 000000000..e28b3a339 --- /dev/null +++ b/src/storm-pomdp/analysis/FiniteBeliefMdpDetection.h @@ -0,0 +1,59 @@ +#pragma once + +#include + +#include "storm/models/sparse/Pomdp.h" +#include "storm/storage/BitVector.h" +#include "storm/storage/StronglyConnectedComponentDecomposition.h" + +namespace storm { + namespace pomdp { + + /*! + * This method tries to detect that the beliefmdp is finite. + * If this returns true, the beliefmdp is certainly finite. + * However, if this returns false, the beliefmdp might still be finite + * It is assumed that the belief MDP is not further explored when reaching a targetstate + */ + template + bool detectFiniteBeliefMdp(storm::models::sparse::Pomdp const& pomdp, boost::optional const& targetStates) { + // All infinite paths of the POMDP (including the ones with prob. 0 ) either + // - reach a target state after finitely many steps or + // - after finitely many steps enter an SCC and do not leave it + // Hence, any path of the belief MDP will at some point either reach a target state or stay in a set of POMDP SCCs. + // Only in the latter case we can get infinitely many different belief states. + // Below, we check whether all SCCs only consist of Dirac distributions. + // If this is the case, no new belief states will be found at some point. + + // Get the SCC decomposition + storm::storage::StronglyConnectedComponentDecompositionOptions options; + options.dropNaiveSccs(); + storm::storage::BitVector relevantStates; + if (targetStates) { + relevantStates = ~targetStates.get(); + options.subsystem(&relevantStates); + } + storm::storage::StronglyConnectedComponentDecomposition sccs(pomdp.getTransitionMatrix(), options); + + // Check whether all choices that stay within an SCC have Dirac distributions + for (auto const& scc : sccs) { + for (auto const& sccState : scc) { + for (uint64_t rowIndex = pomdp.getNondeterministicChoiceIndices()[sccState]; rowIndex < pomdp.getNondeterministicChoiceIndices()[sccState + 1]; ++rowIndex) { + for (auto const& entry : pomdp.getTransitionMatrix().getRow(rowIndex)) { + if (!storm::utility::isOne(entry.getValue()) && !storm::utility::isZero(entry.getValue())) { + if (scc.containsState(entry.getColumn())) { + // There is a non-dirac choice that stays in the SCC. + // This could still mean that the belief MDP is finite + // e.g., if at some point the branches merge back to the same state + return false; + } + } + } + } + } + } + + return true; + } + } +} \ No newline at end of file diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 55b252225..81b2df397 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -5,6 +5,7 @@ #include #include "storm-pomdp/analysis/FormulaInformation.h" +#include "storm-pomdp/analysis/FiniteBeliefMdpDetection.h" #include "storm/utility/ConstantsComparator.h" #include "storm/utility/NumberTraits.h" @@ -106,6 +107,9 @@ namespace storm { } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); } + if (storm::pomdp::detectFiniteBeliefMdp(pomdp, formulaInfo.getTargetStates().states)) { + STORM_PRINT_AND_LOG("Detected that the belief MDP is finite." << std::endl); + } if (options.refine) { refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); @@ -172,7 +176,9 @@ namespace storm { stream << ">="; } stream << statistics.underApproximationStates.get() << std::endl; - stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.get() << std::endl; + if (statistics.underApproximationStateLimit) { + stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.get() << std::endl; + } stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl; stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl; } From 2500cc0cd212783e276a99883b2da19aa0632faf Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 8 May 2020 07:58:25 +0200 Subject: [PATCH 150/155] Fixed computation of relative gap for special cases (in particular l=u=0) --- .../ApproximatePOMDPModelchecker.cpp | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 81b2df397..fbbe017f8 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -455,6 +455,25 @@ namespace storm { return resultingRatings; } + template + ValueType getGap(ValueType const& l, ValueType const& u) { + STORM_LOG_ASSERT(l >= storm::utility::zero() && u >= storm::utility::zero(), "Gap computation currently does not handle negative values."); + if (storm::utility::isInfinity(u)) { + if (storm::utility::isInfinity(l)) { + return storm::utility::zero(); + } else { + return u; + } + } else if (storm::utility::isZero(u)) { + STORM_LOG_ASSERT(storm::utility::isZero(l), "Upper bound is zero but lower bound is " << l << "."); + return u; + } else { + STORM_LOG_ASSERT(!storm::utility::isInfinity(l), "Lower bound is infinity, but upper bound is " << u << "."); + // get the relative gap + return storm::utility::abs(u-l) * storm::utility::convertNumber(2) / (l+u); + } + } + template bool ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { @@ -541,9 +560,8 @@ namespace storm { bool truncateAllActions = false; bool restoreAllActions = false; bool checkRewireForAllActions = false; - ValueType gap = storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()); // Get the relative gap - gap = gap * storm::utility::convertNumber(2) / (storm::utility::abs(overApproximation->getLowerValueBoundAtCurrentState()) + storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState())); + ValueType gap = getGap(overApproximation->getLowerValueBoundAtCurrentState(), overApproximation->getUpperValueBoundAtCurrentState()); if (!hasOldBehavior) { // Case 1 // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state @@ -731,9 +749,7 @@ namespace storm { underApproximation->setCurrentStateIsTruncated(); } else if (!stateAlreadyExplored) { // Check whether we want to explore the state now! - ValueType gap = storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()); - // Get the relative gap - gap = gap * storm::utility::convertNumber(2) / (storm::utility::abs(underApproximation->getLowerValueBoundAtCurrentState()) + storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState())); + ValueType gap = getGap(underApproximation->getLowerValueBoundAtCurrentState(), underApproximation->getUpperValueBoundAtCurrentState()); if (gap < heuristicParameters.gapThreshold) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); From 2f2a007896198591d86036e253936768f009591f Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 11 May 2020 09:37:10 +0200 Subject: [PATCH 151/155] Implemented 'guessing' of initial pomdp schedulers for multiple guesses --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 26 ++- .../ApproximatePOMDPModelchecker.cpp | 19 +- .../ApproximatePOMDPModelchecker.h | 7 +- .../TrivialPomdpValueBoundsModelChecker.h | 202 +++++++++++++----- 4 files changed, 179 insertions(+), 75 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 0fc6c9ba4..32adf985c 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -14,6 +14,7 @@ #include "storm/storage/SparseMatrix.h" #include "storm/utility/macros.h" #include "storm-pomdp/storage/BeliefManager.h" +#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" #include "storm/utility/SignalHandler.h" #include "storm/modelchecker/results/CheckResult.h" #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" @@ -38,7 +39,7 @@ namespace storm { ModelChecked }; - BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds), status(Status::Uninitialized) { + BeliefMdpExplorer(std::shared_ptr beliefManager, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds) : beliefManager(beliefManager), pomdpValueBounds(pomdpValueBounds), status(Status::Uninitialized) { // Intentionally left empty } BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; @@ -544,16 +545,24 @@ namespace storm { return upperValueBounds[getCurrentMdpState()]; } - /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for minimizing properties) - /// Or that the pomdpLowerValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid. ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { - return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); + STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(), "Requested lower value bounds but none were available."); + auto it = pomdpValueBounds.lower.begin(); + ValueType result = beliefManager->getWeightedSum(beliefId, *it); + for (++it; it != pomdpValueBounds.lower.end(); ++it) { + result = std::max(result, beliefManager->getWeightedSum(beliefId, *it)); + } + return result; } - /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for maximizing properties) - /// Or that the pomdpUpperValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid. ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { - return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); + STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(), "Requested upper value bounds but none were available."); + auto it = pomdpValueBounds.upper.begin(); + ValueType result = beliefManager->getWeightedSum(beliefId, *it); + for (++it; it != pomdpValueBounds.upper.end(); ++it) { + result = std::min(result, beliefManager->getWeightedSum(beliefId, *it)); + } + return result; } void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { @@ -817,8 +826,7 @@ namespace storm { std::shared_ptr> exploredMdp; // Value and scheduler related information - std::vector const& pomdpLowerValueBounds; - std::vector const& pomdpUpperValueBounds; + storm::pomdp::modelchecker::TrivialPomdpValueBounds pomdpValueBounds; std::vector lowerValueBounds; std::vector upperValueBounds; std::vector values; // Contains an estimate during building and the actual result after a check has performed diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index fbbe017f8..b08eb8177 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -87,7 +87,8 @@ namespace storm { // Compute some initial bounds on the values for each state of the pomdp auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); - Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]); + uint64_t initialPomdpState = pomdp.getInitialStates().getNextSetIndex(0); + Result result(initialPomdpValueBounds.getHighestLowerBound(initialPomdpState), initialPomdpValueBounds.getSmallestUpperBound(initialPomdpState)); STORM_PRINT_AND_LOG("Initial value bounds are [" << result.lowerBound << ", " << result.upperBound << "]" << std::endl); boost::optional rewardModelName; @@ -112,9 +113,9 @@ namespace storm { } if (options.refine) { - refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); + refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds, result); } else { - computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); + computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds, result); } // "clear" results in case they were actually not requested (this will make the output a bit more clear) if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) { @@ -187,7 +188,7 @@ namespace storm { } template - void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds, Result& result) { if (options.discretize) { std::vector observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit); @@ -195,7 +196,7 @@ namespace storm { if (rewardModelName) { manager->setRewardModel(rewardModelName); } - auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + auto approx = std::make_shared(manager, pomdpValueBounds); HeuristicParameters heuristicParameters; heuristicParameters.gapThreshold = options.gapThresholdInit; heuristicParameters.observationThreshold = options.obsThresholdInit; // Actually not relevant without refinement @@ -215,7 +216,7 @@ namespace storm { if (rewardModelName) { manager->setRewardModel(rewardModelName); } - auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + auto approx = std::make_shared(manager, pomdpValueBounds); HeuristicParameters heuristicParameters; heuristicParameters.gapThreshold = options.gapThresholdInit; heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; @@ -239,7 +240,7 @@ namespace storm { } template - void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds, Result& result) { statistics.refinementSteps = 0; // Set up exploration data @@ -253,7 +254,7 @@ namespace storm { if (rewardModelName) { overApproxBeliefManager->setRewardModel(rewardModelName); } - overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + overApproximation = std::make_shared(overApproxBeliefManager, pomdpValueBounds); overApproxHeuristicPar.gapThreshold = options.gapThresholdInit; overApproxHeuristicPar.observationThreshold = options.obsThresholdInit; overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits::max() : options.sizeThresholdInit; @@ -277,7 +278,7 @@ namespace storm { if (rewardModelName) { underApproxBeliefManager->setRewardModel(rewardModelName); } - underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + underApproximation = std::make_shared(underApproxBeliefManager, pomdpValueBounds); underApproxHeuristicPar.gapThreshold = options.gapThresholdInit; underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 44127e011..c9f46aa1f 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -16,6 +16,9 @@ namespace storm { namespace pomdp { namespace modelchecker { + template + struct TrivialPomdpValueBounds; + template class ApproximatePOMDPModelchecker { public: @@ -53,7 +56,7 @@ namespace storm { * @param maxUaModelSize the maximum size of the underapproximation model to be generated * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ - void computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); + void computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds, Result& result); /** @@ -63,7 +66,7 @@ namespace storm { * @param min true if minimum probability is to be computed * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ - void refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); + void refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds, Result& result); struct HeuristicParameters { ValueType gapThreshold; diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h index ca4c2192f..82bfd9e5b 100644 --- a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h +++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h @@ -15,26 +15,112 @@ namespace storm { namespace pomdp { namespace modelchecker { + + template + struct TrivialPomdpValueBounds { + std::vector> lower; + std::vector> upper; + ValueType getHighestLowerBound(uint64_t const& state) { + STORM_LOG_ASSERT(!lower.empty(), "requested a lower bound but none were available"); + auto it = lower.begin(); + ValueType result = (*it)[state]; + for (++it; it != lower.end(); ++it) { + result = std::max(result, (*it)[state]); + } + return result; + } + ValueType getSmallestUpperBound(uint64_t const& state) { + STORM_LOG_ASSERT(!upper.empty(), "requested an upper bound but none were available"); + auto it = upper.begin(); + ValueType result = (*it)[state]; + for (++it; it != upper.end(); ++it) { + result = std::min(result, (*it)[state]); + } + return result; + } + }; + template class TrivialPomdpValueBoundsModelChecker { public: typedef typename PomdpType::ValueType ValueType; + typedef TrivialPomdpValueBounds ValueBounds; TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) { // Intentionally left empty } - struct ValueBounds { - std::vector lower; - std::vector upper; - }; ValueBounds getValueBounds(storm::logic::Formula const& formula) { return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula)); } + std::vector getChoiceValues(std::vector const& stateValues, std::vector* actionBasedRewards) { + std::vector choiceValues((pomdp.getNumberOfChoices())); + pomdp.getTransitionMatrix().multiplyWithVector(stateValues, choiceValues, actionBasedRewards); + return choiceValues; + } + + std::vector computeValuesForGuessedScheduler(std::vector const& stateValues, std::vector* actionBasedRewards, storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info, std::shared_ptr> underlyingMdp, ValueType const& scoreThreshold, bool relativeScore) { + // Create some positional scheduler for the POMDP + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + // For each state, we heuristically find a good distribution over output actions. + auto choiceValues = getChoiceValues(stateValues, actionBasedRewards); + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + std::vector> choiceDistributions(pomdp.getNrObservations()); + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + auto& choiceDistribution = choiceDistributions[pomdp.getObservation(state)]; + ValueType const& stateValue = stateValues[state]; + assert(stateValue >= storm::utility::zero()); + std::cout << state << ": " << stateValue << "\t t=" << scoreThreshold << " rel=" << relativeScore << std::endl; + for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { + ValueType const& choiceValue = choiceValues[choice]; + assert(choiceValue >= storm::utility::zero()); + std::cout << "\t" << (choice - choiceIndices[state]) << ": " << choiceValue << std::endl; + // Rate this choice by considering the relative difference between the choice value and the (optimal) state value + // A high score shall mean that the choice is "good" + if (storm::utility::isInfinity(stateValue)) { + // For infinity states, we simply distribute uniformly. + // FIXME: This case could be handled a bit more sensible + choiceDistribution.addProbability(choice - choiceIndices[state], scoreThreshold); + } else { + ValueType choiceScore = info.minimize() ? (choiceValue - stateValue) : (stateValue - choiceValue); + if (relativeScore) { + ValueType avg = (stateValue + choiceValue) / storm::utility::convertNumber(2); + if (!storm::utility::isZero(avg)) { + choiceScore /= avg; + } + } + choiceScore = storm::utility::one() - choiceScore; + if (choiceScore >= scoreThreshold) { + choiceDistribution.addProbability(choice - choiceIndices[state], choiceScore); + } + } + + } + STORM_LOG_ASSERT(choiceDistribution.size() > 0, "Empty choice distribution."); + } + // Normalize all distributions + for (auto& choiceDistribution : choiceDistributions) { + choiceDistribution.normalize(); + } + // Set the scheduler for all states + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + pomdpScheduler.setChoice(choiceDistributions[pomdp.getObservation(state)], state); + } + STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler."); + auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); + + auto resultPtr = storm::api::verifyWithSparseEngine(scheduledModel, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector pomdpSchedulerResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult().getValueVector()); + return pomdpSchedulerResult; + } + ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) { STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis."); + // Compute the values on the fully observable MDP - // We need an actual MDP here so that the apply scheduler method below will work. + // We need an actual MDP so that we can apply schedulers below. // Also, the api call in the next line will require a copy anyway. auto underlyingMdp = std::make_shared>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels()); auto resultPtr = storm::api::verifyWithSparseEngine(underlyingMdp, storm::api::createTask(formula.asSharedPointer(), false)); @@ -42,70 +128,76 @@ namespace storm { STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); std::vector fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult().getValueVector()); - // Create some positional scheduler for the POMDP - storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); - // For each state, we heuristically find a good distribution over output actions. - std::vector fullyObservableChoiceValues(pomdp.getNumberOfChoices()); + std::vector actionBasedRewards; + std::vector* actionBasedRewardsPtr = nullptr; if (info.isNonNestedExpectedRewardFormula()) { - std::vector actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix()); - pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards); - } else { - pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues); + actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix()); + actionBasedRewardsPtr = &actionBasedRewards; } - auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); - for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { - auto obsStates = pomdp.getStatesWithObservation(obs); - storm::storage::Distribution choiceDistribution; - for (auto const &state : obsStates) { - ValueType const& stateValue = fullyObservableResult[state]; - assert(stateValue >= storm::utility::zero()); - for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { - ValueType const& choiceValue = fullyObservableChoiceValues[choice]; - assert(choiceValue >= storm::utility::zero()); - // Rate this choice by considering the relative difference between the choice value and the (optimal) state value - ValueType choiceRating; - if (stateValue < choiceValue) { - choiceRating = choiceValue - stateValue; - if (!storm::utility::isZero(choiceValue)) { - choiceRating /= choiceValue; - } + std::vector> guessedSchedulerValues; + + std::vector> guessParameters({{0.875,false},{0.875,true},{0.75,false},{0.75,true}}); + for (auto const& pars : guessParameters) { + guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(fullyObservableResult, actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber(pars.first), pars.second)); + } + + // compute the 'best' guess and do a few iterations on it + uint64_t bestGuess = 0; + ValueType bestGuessSum = std::accumulate(guessedSchedulerValues.front().begin(), guessedSchedulerValues.front().end(), storm::utility::zero()); + for (uint64_t guess = 1; guess < guessedSchedulerValues.size(); ++guess) { + ValueType guessSum = std::accumulate(guessedSchedulerValues[guess].begin(), guessedSchedulerValues[guess].end(), storm::utility::zero()); + if ((info.minimize() && guessSum < bestGuessSum) || (info.maximize() && guessSum > bestGuessSum)) { + bestGuess = guess; + bestGuessSum = guessSum; + } + } + guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues[bestGuess], actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber(guessParameters[bestGuess].first), guessParameters[bestGuess].second)); + guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber(guessParameters[bestGuess].first), guessParameters[bestGuess].second)); + guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber(guessParameters[bestGuess].first), guessParameters[bestGuess].second)); + + // Check if one of the guesses is worse than one of the others (and potentially delete it) + // Avoid deleting entries during the loop to ensure that indices remain valid + storm::storage::BitVector keptGuesses(guessedSchedulerValues.size(), true); + for (uint64_t i = 0; i < guessedSchedulerValues.size() - 1; ++i) { + if (!keptGuesses.get(i)) { + continue; + } + for (uint64_t j = i + 1; j < guessedSchedulerValues.size(); ++j) { + if (!keptGuesses.get(j)) { + continue; + } + if (storm::utility::vector::compareElementWise(guessedSchedulerValues[i], guessedSchedulerValues[j], std::less_equal())) { + if (info.minimize()) { + // In this case we are guessing upper bounds (and smaller upper bounds are better) + keptGuesses.set(j, false); } else { - choiceRating = stateValue - choiceValue; - if (!storm::utility::isZero(stateValue)) { - choiceRating /= stateValue; - } + // In this case we are guessing lower bounds (and larger lower bounds are better) + keptGuesses.set(i, false); + break; } - assert(choiceRating <= storm::utility::one()); - assert(choiceRating >= storm::utility::zero()); - // choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice - if (choiceRating <= storm::utility::convertNumber(0.5)) { - choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one() - choiceRating); + } else if (storm::utility::vector::compareElementWise(guessedSchedulerValues[j], guessedSchedulerValues[i], std::less_equal())) { + if (info.minimize()) { + keptGuesses.set(i, false); + break; + } else { + keptGuesses.set(j, false); } } } - choiceDistribution.normalize(); - for (auto const& state : obsStates) { - pomdpScheduler.setChoice(choiceDistribution, state); - } } - STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler."); - auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); - - auto resultPtr2 = storm::api::verifyWithSparseEngine(scheduledModel, storm::api::createTask(formula.asSharedPointer(), false)); - STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained."); - STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); - std::vector pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult().getValueVector()); + std::cout << "Keeping scheduler guesses " << keptGuesses << std::endl; + storm::utility::vector::filterVectorInPlace(guessedSchedulerValues, keptGuesses); // Finally prepare the result ValueBounds result; if (info.minimize()) { - result.lower = std::move(fullyObservableResult); - result.upper = std::move(pomdpSchedulerResult); + result.lower.push_back(std::move(fullyObservableResult)); + result.upper = std::move(guessedSchedulerValues); } else { - result.lower = std::move(pomdpSchedulerResult); - result.upper = std::move(fullyObservableResult); + result.lower = std::move(guessedSchedulerValues); + result.upper.push_back(std::move(fullyObservableResult)); } - STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower, result.upper, std::less_equal()), "Lower bound is larger than upper bound"); + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower.front(), result.upper.front(), std::less_equal()), "Lower bound is larger than upper bound"); return result; } From 55c4408c6a783fac39ec191279ed49997e4ba74b Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 11 May 2020 10:01:18 +0200 Subject: [PATCH 152/155] Storing the observation resolutions as a float so that we can increase the resolution more accurately with a non-integer factor --- .../ApproximatePOMDPModelchecker.cpp | 41 +++++++++---------- .../ApproximatePOMDPModelchecker.h | 7 ++-- src/storm-pomdp/storage/BeliefManager.h | 33 +++++++-------- 3 files changed, 40 insertions(+), 41 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index b08eb8177..b43e62711 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -191,7 +191,7 @@ namespace storm { void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds, Result& result) { if (options.discretize) { - std::vector observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit); + std::vector observationResolutionVector(pomdp.getNrObservations(), storm::utility::convertNumber(options.resolutionInit)); auto manager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); if (rewardModelName) { manager->setRewardModel(rewardModelName); @@ -244,12 +244,12 @@ namespace storm { statistics.refinementSteps = 0; // Set up exploration data - std::vector observationResolutionVector; + std::vector observationResolutionVector; std::shared_ptr overApproxBeliefManager; std::shared_ptr overApproximation; HeuristicParameters overApproxHeuristicPar; if (options.discretize) { // Setup and build first OverApproximation - observationResolutionVector = std::vector(pomdp.getNrObservations(), options.resolutionInit); + observationResolutionVector = std::vector(pomdp.getNrObservations(), storm::utility::convertNumber(options.resolutionInit)); overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); if (rewardModelName) { overApproxBeliefManager->setRewardModel(rewardModelName); @@ -404,32 +404,33 @@ namespace storm { * Here, 0 means a bad approximation and 1 means a good approximation. */ template - typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) { - auto n = storm::utility::convertNumber(info.support.size()); - auto one = storm::utility::one(); + BeliefValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution) { + auto n = storm::utility::convertNumber(info.support.size()); + auto one = storm::utility::one(); if (storm::utility::isOne(n)) { // If the belief is Dirac, it has to be approximated precisely. // In this case, we return the best possible rating return one; } else { // Create the rating for this observation at this choice from the given info - ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; + BeliefValueType obsChoiceRating = storm::utility::convertNumber(info.maxProbabilityToSuccessorWithObs / info.observationProbability); // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 // Normalize the rating so that it ranges from 0 to 1, where // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. obsChoiceRating = (obsChoiceRating * n - one) / (n - one); // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) - obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); + obsChoiceRating *= observationResolution / maxResolution; return obsChoiceRating; } } template - std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution) { + std::vector ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector) { uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates(); auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices(); + BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); + std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); std::map gatheredSuccessorObservations; // Declare here to avoid reallocations for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) { @@ -444,7 +445,7 @@ namespace storm { overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { auto const& obs = obsInfo.first; - ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); + BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); // The rating of the observation will be the minimum over all choice-based observation ratings resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); @@ -476,14 +477,11 @@ namespace storm { } template - bool ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + bool ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { // Detect whether the refinement reached a fixpoint. bool fixPoint = true; - // current maximal resolution (needed for refinement heuristic) - uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - statistics.overApproximationBuildTime.start(); storm::storage::BitVector refinedObservations; if (!refine) { @@ -497,7 +495,8 @@ namespace storm { // If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true); // We also need to find out which observation resolutions needs refinement. - auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); + // current maximal resolution (needed for refinement heuristic) + auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector); // If there is a score < 1, we have not reached a fixpoint, yet if (std::any_of(obsRatings.begin(), obsRatings.end(), [](ValueType const& value){return value < storm::utility::one();})) { fixPoint = false; @@ -506,17 +505,17 @@ namespace storm { STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { // Increment the resolution at the refined observations. - // Detect overflows properly. + // Use storm's rational number to detect overflows properly. storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber(observationResolutionVector[obs]) * storm::utility::convertNumber(options.resolutionFactor); - if (newObsResolutionAsRational > storm::utility::convertNumber(std::numeric_limits::max())) { - observationResolutionVector[obs] = std::numeric_limits::max(); + if (newObsResolutionAsRational > storm::utility::convertNumber(std::numeric_limits::max())) { + observationResolutionVector[obs] = storm::utility::convertNumber(std::numeric_limits::max()); } else { - observationResolutionVector[obs] = storm::utility::convertNumber(newObsResolutionAsRational); + observationResolutionVector[obs] = storm::utility::convertNumber(newObsResolutionAsRational); } } overApproximation->restartExploration(); } - statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); + statistics.overApproximationMaxResolution = storm::utility::convertNumber(storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()))); // Start exploration storm::utility::Stopwatch explorationTime; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index c9f46aa1f..0e1b3a715 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -49,7 +49,6 @@ namespace storm { * * @param targetObservations set of target observations * @param min true if minimum value is to be computed - * @param observationResolutionVector vector containing the resolution to be used for each observation * @param computeRewards true if rewards are to be computed, false if probability is computed * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value @@ -79,7 +78,7 @@ namespace storm { * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp) */ - bool buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); + bool buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); /** * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties @@ -87,9 +86,9 @@ namespace storm { */ bool buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution); + BeliefValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution); - std::vector getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution); + std::vector getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector); struct Statistics { Statistics(); diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 596d67bd5..4f9ca445c 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -111,7 +111,7 @@ namespace storm { return pomdp.getNumberOfChoices(belief.begin()->first); } - Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) { + Triangulation triangulateBelief(BeliefId beliefId, BeliefValueType resolution) { return triangulateBelief(getBelief(beliefId), resolution); } @@ -134,7 +134,7 @@ namespace storm { return beliefs.size(); } - std::vector> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { + std::vector> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { return expandInternal(beliefId, actionIndex, observationResolutions); } @@ -293,10 +293,10 @@ namespace storm { } }; - void triangulateBeliefFreudenthal(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) { + void triangulateBeliefFreudenthal(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) { STORM_LOG_ASSERT(resolution != 0, "Invalid resolution: 0"); + STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution"); StateType numEntries = belief.size(); - auto convResolution = storm::utility::convertNumber(resolution); // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Probabilities will be triangulated to values in 0/N, 1/N, 2/N, ..., N/N // Variable names are mostly based on the paper @@ -307,12 +307,12 @@ namespace storm { qsRow.reserve(numEntries); std::vector toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices toOriginalIndicesMap.reserve(numEntries); - BeliefValueType x = convResolution; + BeliefValueType x = resolution; for (auto const& entry : belief) { qsRow.push_back(storm::utility::floor(x)); // v sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v toOriginalIndicesMap.push_back(entry.first); - x -= entry.second * convResolution; + x -= entry.second * resolution; } // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler qsRow.push_back(storm::utility::zero()); @@ -339,7 +339,7 @@ namespace storm { for (StateType j = 0; j < numEntries; ++j) { BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; if (!cc.isZero(gridPointEntry)) { - gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution; + gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / resolution; } } result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); @@ -348,17 +348,17 @@ namespace storm { } } - void triangulateBeliefDynamic(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) { + void triangulateBeliefDynamic(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) { // Find the best resolution for this belief, i.e., N such that the largest distance between one of the belief values to a value in {i/N | 0 ≤ i ≤ N} is minimal - uint64_t finalResolution = resolution; + STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution"); + BeliefValueType finalResolution = resolution; uint64_t finalResolutionMisses = belief.size() + 1; // We don't need to check resolutions that are smaller than the maximal resolution divided by 2 (as we already checked multiples of these) - for (uint64_t currResolution = resolution; currResolution > resolution / 2; --currResolution) { + for (BeliefValueType currResolution = resolution; currResolution > resolution / 2; --currResolution) { uint64_t currResMisses = 0; - BeliefValueType currResolutionConverted = storm::utility::convertNumber(currResolution); bool continueWithNextResolution = false; for (auto const& belEntry : belief) { - BeliefValueType product = belEntry.second * currResolutionConverted; + BeliefValueType product = belEntry.second * currResolution; if (!cc.isZero(product - storm::utility::round(product))) { ++currResMisses; if (currResMisses >= finalResolutionMisses) { @@ -384,7 +384,7 @@ namespace storm { triangulateBeliefFreudenthal(belief, finalResolution, result); } - Triangulation triangulateBelief(BeliefType const& belief, uint64_t const& resolution) { + Triangulation triangulateBelief(BeliefType const& belief, BeliefValueType const& resolution) { STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); Triangulation result; // Quickly triangulate Dirac beliefs @@ -392,12 +392,13 @@ namespace storm { result.weights.push_back(storm::utility::one()); result.gridPoints.push_back(getOrAddBeliefId(belief)); } else { + auto ceiledResolution = storm::utility::ceil(resolution); switch (triangulationMode) { case TriangulationMode::Static: - triangulateBeliefFreudenthal(belief, resolution, result); + triangulateBeliefFreudenthal(belief, ceiledResolution, result); break; case TriangulationMode::Dynamic: - triangulateBeliefDynamic(belief, resolution, result); + triangulateBeliefDynamic(belief, ceiledResolution, result); break; default: STORM_LOG_ASSERT(false, "Invalid triangulation mode."); @@ -407,7 +408,7 @@ namespace storm { return result; } - std::vector> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { + std::vector> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { std::vector> destinations; BeliefType belief = getBelief(beliefId); From 92aa029bc58a1cc828dc97cdc7b66344f58cbb70 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 11 May 2020 10:23:03 +0200 Subject: [PATCH 153/155] Removed debug output --- .../modelchecker/TrivialPomdpValueBoundsModelChecker.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h index 82bfd9e5b..0120f0817 100644 --- a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h +++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h @@ -70,11 +70,9 @@ namespace storm { auto& choiceDistribution = choiceDistributions[pomdp.getObservation(state)]; ValueType const& stateValue = stateValues[state]; assert(stateValue >= storm::utility::zero()); - std::cout << state << ": " << stateValue << "\t t=" << scoreThreshold << " rel=" << relativeScore << std::endl; for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { ValueType const& choiceValue = choiceValues[choice]; assert(choiceValue >= storm::utility::zero()); - std::cout << "\t" << (choice - choiceIndices[state]) << ": " << choiceValue << std::endl; // Rate this choice by considering the relative difference between the choice value and the (optimal) state value // A high score shall mean that the choice is "good" if (storm::utility::isInfinity(stateValue)) { From ee350ca3840eb78ba83b1f5623b9ed8673f72472 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 11 May 2020 13:38:24 +0200 Subject: [PATCH 154/155] Use same precision as BeliefValueType when dealing with triangulation resolutions. --- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 11 +++++++---- .../modelchecker/ApproximatePOMDPModelchecker.h | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index b43e62711..bee46318d 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -498,7 +498,9 @@ namespace storm { // current maximal resolution (needed for refinement heuristic) auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector); // If there is a score < 1, we have not reached a fixpoint, yet - if (std::any_of(obsRatings.begin(), obsRatings.end(), [](ValueType const& value){return value < storm::utility::one();})) { + BeliefValueType numericPresicion = storm::utility::convertNumber(options.numericPrecision); + if (std::any_of(obsRatings.begin(), obsRatings.end(), [&numericPresicion](ValueType const& value){return value + numericPresicion < storm::utility::one();})) { + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because there are still observations to refine."); fixPoint = false; } refinedObservations = storm::utility::vector::filter(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;}); @@ -507,15 +509,16 @@ namespace storm { // Increment the resolution at the refined observations. // Use storm's rational number to detect overflows properly. storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber(observationResolutionVector[obs]) * storm::utility::convertNumber(options.resolutionFactor); - if (newObsResolutionAsRational > storm::utility::convertNumber(std::numeric_limits::max())) { - observationResolutionVector[obs] = storm::utility::convertNumber(std::numeric_limits::max()); + static_assert(storm::NumberTraits::IsExact || std::is_same::value, "Unhandled belief value type"); + if (!storm::NumberTraits::IsExact && newObsResolutionAsRational > storm::utility::convertNumber(std::numeric_limits::max())) { + observationResolutionVector[obs] = storm::utility::convertNumber(std::numeric_limits::max()); } else { observationResolutionVector[obs] = storm::utility::convertNumber(newObsResolutionAsRational); } } overApproximation->restartExploration(); } - statistics.overApproximationMaxResolution = storm::utility::convertNumber(storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()))); + statistics.overApproximationMaxResolution = storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end())); // Start exploration storm::utility::Stopwatch explorationTime; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 0e1b3a715..f5904e0dc 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -99,7 +99,7 @@ namespace storm { bool overApproximationBuildAborted; storm::utility::Stopwatch overApproximationBuildTime; storm::utility::Stopwatch overApproximationCheckTime; - boost::optional overApproximationMaxResolution; + boost::optional overApproximationMaxResolution; boost::optional underApproximationStates; bool underApproximationBuildAborted; From e560c7f57cd64f146ef3a0a3e15f5153d47567c7 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 11 May 2020 13:38:49 +0200 Subject: [PATCH 155/155] Added some INFO output to check why there is no refinement fixpoint. --- .../modelchecker/ApproximatePOMDPModelchecker.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index bee46318d..2e8619819 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -532,12 +532,14 @@ namespace storm { if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { STORM_LOG_INFO("Exploration time limit exceeded."); timeLimitExceeded = true; + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because the exploration time limit is exceeded."); fixPoint = false; } uint64_t currId = overApproximation->exploreNextState(); bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior(); if (!hasOldBehavior) { + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a new state is explored"); fixPoint = false; // Exploring a new state! } uint32_t currObservation = beliefManager->getBeliefObservation(currId); @@ -579,6 +581,7 @@ namespace storm { // Case 2 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { exploreAllActions = true; // Case 2.1 + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a previously truncated state is now explored."); fixPoint = false; } else { truncateAllActions = true; // Case 2.2 @@ -586,6 +589,7 @@ namespace storm { if (fixPoint) { // Properly check whether this can still be a fixpoint if (overApproximation->currentStateIsOptimalSchedulerReachable() && !storm::utility::isZero(gap)) { + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we truncate a state with non-zero gap " << gap << " that is reachable via an optimal sched."); fixPoint = false; } //} else { @@ -620,6 +624,7 @@ namespace storm { if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) { // Do the rewiring now! (Case 3.1) expandCurrentAction = true; + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we rewire a state."); fixPoint = false; } else { // Delay the rewiring (Case 3.2.2) @@ -627,6 +632,7 @@ namespace storm { if (fixPoint) { // Check whether this delay means that a fixpoint has not been reached if (!overApproximation->getCurrentStateActionExplorationWasDelayed(action) || (overApproximation->currentStateIsOptimalSchedulerReachable() && overApproximation->actionAtCurrentStateWasOptimal(action) && !storm::utility::isZero(gap))) { + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we delay a rewiring of a state with non-zero gap " << gap << " that is reachable via an optimal scheduler."); fixPoint = false; } }