diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h new file mode 100644 index 000000000..107f699ae --- /dev/null +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -0,0 +1,354 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "storm/api/properties.h" +#include "storm/api/verification.h" + +#include "storm/storage/BitVector.h" +#include "storm/utility/macros.h" +#include "storm-pomdp/storage/BeliefManager.h" +#include "storm/utility/SignalHandler.h" + +namespace storm { + namespace builder { + template + class BeliefMdpExplorer { + public: + typedef typename PomdpType::ValueType ValueType; + typedef storm::storage::BeliefManager BeliefManagerType; + typedef typename BeliefManagerType::BeliefId BeliefId; + typedef uint64_t MdpStateType; + + BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) { + // Intentionally left empty + } + + void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { + // Reset data from potential previous explorations + mdpStateToBeliefIdMap.clear(); + beliefIdToMdpStateMap.clear(); + beliefIdsWithMdpState.clear(); + beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false); + lowerValueBounds.clear(); + upperValueBounds.clear(); + values.clear(); + mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder(0, 0, 0, true, true); + currentRowCount = 0; + startOfCurrentRowGroup = 0; + mdpActionRewards.clear(); + exploredMdp = nullptr; + + // Add some states with special treatment (if requested) + if (extraBottomStateValue) { + extraBottomState = getCurrentNumberOfMdpStates(); + mdpStateToBeliefIdMap.push_back(beliefManager->noId()); + insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); + + startOfCurrentRowGroup = currentRowCount; + mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); + mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one()); + ++currentRowCount; + } else { + extraBottomState = boost::none; + } + if (extraTargetStateValue) { + extraTargetState = getCurrentNumberOfMdpStates(); + mdpStateToBeliefIdMap.push_back(beliefManager->noId()); + insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); + + startOfCurrentRowGroup = currentRowCount; + mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); + mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one()); + ++currentRowCount; + + targetStates.grow(getCurrentNumberOfMdpStates(), false); + targetStates.set(extraTargetState.get(), true); + } else { + extraTargetState = boost::none; + } + + // Set up the initial state. + initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); + } + + bool hasUnexploredState() const { + return !beliefIdsToExplore.empty(); + } + + BeliefId exploreNextState() { + // Set up the matrix builder + finishCurrentRow(); + startOfCurrentRowGroup = currentRowCount; + mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); + ++currentRowCount; + + // Pop from the queue. + auto result = beliefIdsToExplore.front(); + beliefIdsToExplore.pop_front(); + return result; + } + + void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { + // We first insert the entries of the current row in a separate map. + // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + + uint64_t row = startOfCurrentRowGroup + localActionIndex; + if (!storm::utility::isZero(bottomStateValue)) { + STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); + internalAddTransition(row, extraBottomState.get(), bottomStateValue); + } + if (!storm::utility::isZero(targetStateValue)) { + STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); + internalAddTransition(row, extraTargetState.get(), targetStateValue); + } + } + + void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { + uint64_t row = startOfCurrentRowGroup + localActionIndex; + internalAddTransition(row, getCurrentMdpState(), value); + } + + /*! + * Adds the next transition to the given successor belief + * @param localActionIndex + * @param transitionTarget + * @param value + * @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition. + * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. + */ + bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { + // We first insert the entries of the current row in a separate map. + // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + MdpStateType column; + if (ignoreNewBeliefs) { + column = getMdpState(transitionTarget); + if (column == noState()) { + return false; + } + } else { + column = getOrAddMdpState(transitionTarget); + } + uint64_t row = startOfCurrentRowGroup + localActionIndex; + internalAddTransition(row, column, value); + return true; + } + + void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero()) { + if (currentRowCount >= mdpActionRewards.size()) { + mdpActionRewards.resize(currentRowCount, storm::utility::zero()); + } + uint64_t row = startOfCurrentRowGroup + localActionIndex; + mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; + } + + void setCurrentStateIsTarget() { + targetStates.grow(getCurrentNumberOfMdpStates(), false); + targetStates.set(getCurrentMdpState(), true); + } + + void setCurrentStateIsTruncated() { + truncatedStates.grow(getCurrentNumberOfMdpStates(), false); + truncatedStates.set(getCurrentMdpState(), true); + } + + void finishExploration() { + // Create the tranistion matrix + finishCurrentRow(); + auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates()); + + // Create a standard labeling + storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabelToState("init", initialMdpState); + targetStates.resize(getCurrentNumberOfMdpStates(), false); + mdpLabeling.addLabel("target", std::move(targetStates)); + truncatedStates.resize(getCurrentNumberOfMdpStates(), false); + mdpLabeling.addLabel("truncated", std::move(truncatedStates)); + + // Create a standard reward model (if rewards are available) + std::unordered_map> mdpRewardModels; + if (!mdpActionRewards.empty()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); + } + + storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); + exploredMdp = std::make_shared>(std::move(modelComponents)); + } + + std::shared_ptr> getExploredMdp() const { + STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); + return exploredMdp; + } + + MdpStateType getCurrentNumberOfMdpStates() const { + return mdpStateToBeliefIdMap.size(); + } + + MdpStateType getCurrentNumberOfMdpChoices() const { + return currentRowCount; + } + + ValueType getLowerValueBoundAtCurrentState() const { + return lowerValueBounds[getCurrentMdpState()]; + } + + ValueType getUpperValueBoundAtCurrentState() const { + return upperValueBounds[getCurrentMdpState()]; + } + + ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { + return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); + } + + ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { + return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); + } + + std::vector const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { + STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); + auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); + auto task = createStandardCheckTask(property); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(exploredMdp, task)); + if (res) { + values = std::move(res->asExplicitQuantitativeCheckResult().getValueVector()); + } else { + STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); + STORM_LOG_ERROR("No result obtained while checking."); + } + return values; + } + + ValueType const& getComputedValueAtInitialState() const { + STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); + return values[exploredMdp->getInitialStates().getNextSetIndex(0)]; + } + + private: + MdpStateType noState() const { + return std::numeric_limits::max(); + } + + std::shared_ptr createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) { + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += storm::solver::minimize(dir) ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + return storm::api::extractFormulasFromProperties(propertyVector).front(); + } + + storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property) { + //Note: The property should not run out of scope after calling this because the task only stores the property by reference. + // Therefore, this method needs the property by reference (and not const reference) + auto task = storm::api::createTask(property, false); + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(values); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + return task; + } + + MdpStateType getCurrentMdpState() const { + return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1; + } + + MdpStateType getCurrentBeliefId() const { + return mdpStateToBeliefIdMap[getCurrentMdpState()]; + } + + void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { + // We first insert the entries of the current row in a separate map. + // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row."); + if (row >= currentRowCount) { + // We are going to start a new row, so insert the entries of the old one + finishCurrentRow(); + currentRowCount = row + 1; + } + STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state."); + mdpTransitionsBuilderCurrentRowEntries[column] = value; + } + + void finishCurrentRow() { + for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) { + mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second); + } + mdpTransitionsBuilderCurrentRowEntries.clear(); + } + + MdpStateType getMdpState(BeliefId const& beliefId) const { + if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) { + return beliefIdToMdpStateMap.at(beliefId); + } else { + return noState(); + } + } + + void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) { + lowerValueBounds.push_back(lowerBound); + upperValueBounds.push_back(upperBound); + // Take the middle value as a hint + values.push_back((lowerBound + upperBound) / storm::utility::convertNumber(2)); + STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); + STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); + } + + MdpStateType getOrAddMdpState(BeliefId const& beliefId) { + beliefIdsWithMdpState.grow(beliefId + 1, false); + if (beliefIdsWithMdpState.get(beliefId)) { + return beliefIdToMdpStateMap[beliefId]; + } else { + // Add a new MDP state + beliefIdsWithMdpState.set(beliefId, true); + MdpStateType result = getCurrentNumberOfMdpStates(); + assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); + mdpStateToBeliefIdMap.push_back(beliefId); + beliefIdToMdpStateMap[beliefId] = result; + // This new belief needs exploration + beliefIdsToExplore.push_back(beliefId); + + insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); + return result; + } + } + + // Belief state related information + std::shared_ptr beliefManager; + std::vector mdpStateToBeliefIdMap; + std::map beliefIdToMdpStateMap; + storm::storage::BitVector beliefIdsWithMdpState; + + // Exploration information + std::deque beliefIdsToExplore; + storm::storage::SparseMatrixBuilder mdpTransitionsBuilder; + std::map mdpTransitionsBuilderCurrentRowEntries; + std::vector mdpActionRewards; + uint64_t startOfCurrentRowGroup; + uint64_t currentRowCount; + + // Special states during exploration + boost::optional extraTargetState; + boost::optional extraBottomState; + storm::storage::BitVector targetStates; + storm::storage::BitVector truncatedStates; + MdpStateType initialMdpState; + + // Final Mdp + std::shared_ptr> exploredMdp; + + // Value related information + std::vector const& pomdpLowerValueBounds; + std::vector const& pomdpUpperValueBounds; + std::vector lowerValueBounds; + std::vector upperValueBounds; + std::vector values; // Contains an estimate during building and the actual result after a check has performed + + }; + } +} \ No newline at end of file diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 6728535f5..42f1872f7 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -22,7 +22,8 @@ #include "storm/api/properties.h" #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" - +#include "storm-pomdp/builder/BeliefMdpExplorer.h" +#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" @@ -57,6 +58,10 @@ namespace storm { std::unique_ptr> result; // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); + + // Compute some initial bounds on the values for each state of the pomdp + auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); + if (formulaInfo.isNonNestedReachabilityProbability()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); @@ -68,7 +73,7 @@ namespace storm { if (options.doRefinement) { result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); } else { - result = computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); } } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. @@ -78,7 +83,7 @@ namespace storm { } else { // FIXME: pick the non-unique reward model here STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); - result = computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); @@ -233,8 +238,8 @@ namespace storm { uint64_t refinementCounter = 1; STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - initialOverApproxMap, - initialUnderApproxMap, underApproxModelSize); + {}, + {}, underApproxModelSize); if (res == nullptr) { statistics.refinementSteps = 0; return nullptr; @@ -335,14 +340,14 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, bool computeRewards, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, + std::vector const& lowerPomdpValueBounds, + std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap, - underApproximationMap, maxUaModelSize); + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds, + upperPomdpValueBounds, maxUaModelSize); if (result == nullptr) { return nullptr; } @@ -353,8 +358,6 @@ namespace storm { } } - - template ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) { ValueType result = storm::utility::zero(); @@ -369,155 +372,64 @@ namespace storm { ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, + std::vector const& lowerPomdpValueBounds, + std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize) { - bool boundMapsSet = overApproximationMap && underApproximationMap; - std::map overMap; - std::map underMap; - if (boundMapsSet) { - overMap = overApproximationMap.value(); - underMap = underApproximationMap.value(); - } - + auto beliefManager = std::make_shared>>(pomdp, options.numericPrecision); if (computeRewards) { beliefManager->setRewardModel(); // TODO: get actual name } - bsmap_type beliefStateMap; - - std::deque beliefsToBeExpanded; - statistics.overApproximationBuildTime.start(); - // Initial belief always has belief ID 0 - auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief()); - // These are the components to build the MDP from the grid - // Reserve states 0 and 1 as always sink/goal states - storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); - uint64_t extraBottomState = 0; - uint64_t extraTargetState = computeRewards ? 0 : 1; - uint64_t nextMdpStateId = extraTargetState + 1; - uint64_t mdpMatrixRow = 0; - for (uint64_t state = 0; state < nextMdpStateId; ++state) { - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); - ++mdpMatrixRow; - } - // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) - std::vector hintVector(nextMdpStateId, storm::utility::zero()); - if (!computeRewards) { - hintVector[extraTargetState] = storm::utility::one(); - } - std::vector targetStates = {extraTargetState}; - storm::storage::BitVector fullyExpandedStates; - - // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace - std::map weightedSumOverMap; - std::map weightedSumUnderMap; - - // for the initial belief, add the triangulated initial states - auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]); - uint64_t initialMdpState = nextMdpStateId; - ++nextMdpStateId; - if (triangulation.size() == 1) { - // The initial belief is on the grid itself - auto initBeliefId = triangulation.gridPoints.front(); - if (boundMapsSet) { - auto const& gridPoint = beliefManager->getBelief(initBeliefId); - weightedSumOverMap[initBeliefId] = getWeightedSum(gridPoint, overMap); - weightedSumUnderMap[initBeliefId] = getWeightedSum(gridPoint, underMap); - } - beliefsToBeExpanded.push_back(initBeliefId); - beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState)); - hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); + storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + if (computeRewards) { + explorer.startNewExploration(storm::utility::zero()); } else { - // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - for (uint64_t i = 0; i < triangulation.size(); ++i) { - beliefsToBeExpanded.push_back(triangulation.gridPoints[i]); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]); - beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId)); - ++nextMdpStateId; - if (boundMapsSet) { - auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]); - weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, overMap); - weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, underMap); - } - hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); - } - //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - ++mdpMatrixRow; + explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); } // Expand the beliefs to generate the grid on-the-fly if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false); - for (auto const& belId : beliefsToBeExpanded) { - foundBeliefs.set(belId, true); - } - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); + while (explorer.hasUnexploredState()) { + uint64_t currId = explorer.exploreNextState(); - uint64_t currMdpState = beliefStateMap.left.at(currId); uint32_t currObservation = beliefManager->getBeliefObservation(currId); - - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - if (targetObservations.count(currObservation) != 0) { - // Make this state absorbing - targetStates.push_back(currMdpState); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - ++mdpMatrixRow; - } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { - // TODO: with rewards we would have to assign the corresponding reward to this transition - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one() - weightedSumOverMap[currId]); - ++mdpMatrixRow; + explorer.setCurrentStateIsTarget(); + explorer.addSelfloopTransition(); } else { - fullyExpandedStates.grow(nextMdpStateId, false); - fullyExpandedStates.set(currMdpState, true); - uint64_t someState = beliefManager->getBelief(currId).begin()->first; - uint64_t numChoices = pomdp.getNumberOfChoices(someState); - - for (uint64_t action = 0; action < numChoices; ++action) { + bool stopExploration = false; + if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + explorer.setCurrentStateIsTruncated(); + } + for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); - // Check for newly found grid points - foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false); for (auto const& successor : successorGridPoints) { - auto successorId = successor.first; - auto const& successorBelief = beliefManager->getBelief(successorId); - auto successorObservation = beliefManager->getBeliefObservation(successorBelief); - if (!foundBeliefs.get(successorId)) { - foundBeliefs.set(successorId); - beliefsToBeExpanded.push_back(successorId); - beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); - ++nextMdpStateId; - - if (boundMapsSet) { - ValueType upperBound = getWeightedSum(successorBelief, overMap); - ValueType lowerBound = getWeightedSum(successorBelief, underMap); - if (cc.isEqual(upperBound, lowerBound)) { - hintVector.push_back(lowerBound); - } else { - hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); - } - weightedSumOverMap[successorId] = upperBound; - weightedSumUnderMap[successorId] = lowerBound; - } else { - hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); - } + bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first)); } - auto successorMdpState = beliefStateMap.left.at(successorId); - // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } - ++mdpMatrixRow; + if (stopExploration) { + if (computeRewards) { + explorer.addTransitionsToExtraStates(action, truncationProbability); + } else { + explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + explorer.computeRewardAtCurrentState(action, truncationValueBound); + } } } if (storm::utility::resources::isTerminate()) { @@ -525,64 +437,30 @@ namespace storm { break; } } - statistics.overApproximationStates = nextMdpStateId; - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); + statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); return nullptr; } - fullyExpandedStates.resize(nextMdpStateId, false); - storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", initialMdpState); - for (auto targetState : targetStates) { - mdpLabeling.addLabelToState("target", targetState); - } - storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); - auto overApproxMdp = std::make_shared>(std::move(modelComponents)); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); - for (auto const &iter : beliefStateMap.left) { - if (fullyExpandedStates.get(iter.second)) { - auto const& currentBelief = beliefManager->getBelief(iter.first); - auto representativeState = currentBelief.begin()->first; - for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action)); - } - } - } - overApproxMdp->addRewardModel("default", mdpRewardModel); - } + explorer.finishExploration(); statistics.overApproximationBuildTime.stop(); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - overApproxMdp->printModelInformationToStream(std::cout); - - auto modelPtr = std::static_pointer_cast>(overApproxMdp); - auto property = createStandardProperty(min, computeRewards); - auto task = createStandardCheckTask(property, std::move(hintVector)); + explorer.getExploredMdp()->printModelInformationToStream(std::cout); statistics.overApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(overApproxMdp, task)); + explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true))); - auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[initialMdpState]; STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); + //auto underApprox = weightedSumUnderMap[initialBelief.id]; auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize); if (storm::utility::resources::isTerminate() && !underApproxComponents) { // TODO: return other components needed for refinement. //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); - return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); + //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); } STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); @@ -592,8 +470,8 @@ namespace storm { underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); */ - return std::make_unique>(RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); + return std::make_unique>(RefinementComponents{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {}, + underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); } @@ -930,14 +808,14 @@ namespace storm { std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); + // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min) { std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); + // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); } @@ -1191,7 +1069,7 @@ namespace storm { auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action)); } } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index a97d36cb2..6216de097 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -117,8 +117,7 @@ namespace storm { */ std::shared_ptr> computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); std::shared_ptr> computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, @@ -140,10 +139,8 @@ namespace storm { * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - computeReachabilityOTF(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, bool computeRewards, - boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + computeReachabilityOTF(std::set const &targetObservations, bool min, bool computeRewards, + std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); /** * Helper to compute an underapproximation of the reachability property. diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index efc6bee2a..9cb7c039c 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -45,6 +45,8 @@ namespace storm { }; BeliefType const& getBelief(BeliefId const& id) const { + STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief."); + STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range."); return beliefs[id]; } @@ -54,6 +56,10 @@ namespace storm { return idIt->second; } + BeliefId noId() const { + return std::numeric_limits::max(); + } + std::string toString(BeliefType const& belief) const { std::stringstream str; str << "{ "; @@ -180,11 +186,22 @@ namespace storm { return true; } + template + ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { + ValueType result = storm::utility::zero(); + for (auto const& entry : getBelief(beliefId)) { + result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); + } + return result; + } + + BeliefId const& getInitialBelief() const { return initialBeliefId; } - ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const { + ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { + auto const& belief = getBelief(beliefId); STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); auto result = storm::utility::zero(); auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); @@ -206,6 +223,11 @@ namespace storm { return getBeliefObservation(getBelief(beliefId)); } + uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { + auto belief = getBelief(beliefId); + return pomdp.getNumberOfChoices(belief.begin()->first); + } + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { //TODO this can also be simplified using the sparse vector interpretation