#pragma once #include #include #include #include #include #include "storm-parsers/api/properties.h" #include "storm/api/properties.h" #include "storm/api/verification.h" #include "storm/storage/BitVector.h" #include "storm/storage/SparseMatrix.h" #include "storm/utility/macros.h" #include "storm-pomdp/storage/BeliefManager.h" #include "storm/utility/SignalHandler.h" #include "storm/modelchecker/results/CheckResult.h" #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" #include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" namespace storm { namespace builder { template class BeliefMdpExplorer { public: typedef typename PomdpType::ValueType ValueType; typedef storm::storage::BeliefManager BeliefManagerType; typedef typename BeliefManagerType::BeliefId BeliefId; typedef uint64_t MdpStateType; enum class Status { Uninitialized, Exploring, ModelFinished, ModelChecked }; BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds), status(Status::Uninitialized) { // Intentionally left empty } BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; BeliefManagerType const& getBeliefManager() const { return *beliefManager; } void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { status = Status::Exploring; // Reset data from potential previous explorations mdpStateToBeliefIdMap.clear(); beliefIdToMdpStateMap.clear(); exploredBeliefIds.clear(); exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); mdpStatesToExplore.clear(); lowerValueBounds.clear(); upperValueBounds.clear(); values.clear(); exploredMdpTransitions.clear(); exploredChoiceIndices.clear(); mdpActionRewards.clear(); exploredMdp = nullptr; internalAddRowGroupIndex(); // Mark the start of the first row group // Add some states with special treatment (if requested) if (extraBottomStateValue) { currentMdpState = getCurrentNumberOfMdpStates(); extraBottomState = currentMdpState; mdpStateToBeliefIdMap.push_back(beliefManager->noId()); insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one()); internalAddRowGroupIndex(); } else { extraBottomState = boost::none; } if (extraTargetStateValue) { currentMdpState = getCurrentNumberOfMdpStates(); extraTargetState = currentMdpState; mdpStateToBeliefIdMap.push_back(beliefManager->noId()); insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one()); internalAddRowGroupIndex(); targetStates.grow(getCurrentNumberOfMdpStates(), false); targetStates.set(extraTargetState.get(), true); } else { extraTargetState = boost::none; } currentMdpState = noState(); // Set up the initial state. initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); } /*! * Restarts the exploration to allow re-exploring each state. * After calling this, the "currently explored" MDP has the same number of states and choices as the "old" one, but the choices are still empty * This method inserts the initial state of the MDP in the exploration queue. * While re-exploring, the reference to the old MDP remains valid. */ void restartExploration() { STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); status = Status::Exploring; // We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid. exploredBeliefIds.clear(); exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); exploredMdpTransitions.clear(); exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices()); exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); mdpActionRewards.clear(); if (exploredMdp->hasRewardModel()) { // Can be overwritten during exploration mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector(); } targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); mdpStatesToExplore.clear(); // The extra states are not changed if (extraBottomState) { currentMdpState = extraBottomState.get(); restoreOldBehaviorAtCurrentState(0); } if (extraTargetState) { currentMdpState = extraTargetState.get(); restoreOldBehaviorAtCurrentState(0); targetStates.set(extraTargetState.get(), true); } currentMdpState = noState(); // Set up the initial state. initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); } bool hasUnexploredState() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return !mdpStatesToExplore.empty(); } BeliefId exploreNextState() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); // Mark the end of the previously explored row group. if (currentMdpState != noState() && !currentStateHasOldBehavior()) { internalAddRowGroupIndex(); } // Pop from the queue. currentMdpState = mdpStatesToExplore.front(); mdpStatesToExplore.pop_front(); return mdpStateToBeliefIdMap[currentMdpState]; } void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; if (!storm::utility::isZero(bottomStateValue)) { STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); internalAddTransition(row, extraBottomState.get(), bottomStateValue); } if (!storm::utility::isZero(targetStateValue)) { STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); internalAddTransition(row, extraTargetState.get(), targetStateValue); } } void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; internalAddTransition(row, getCurrentMdpState(), value); } /*! * Adds the next transition to the given successor belief * @param localActionIndex * @param transitionTarget * @param value * @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition. * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. */ bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); MdpStateType column; if (ignoreNewBeliefs) { column = getExploredMdpState(transitionTarget); if (column == noState()) { return false; } } else { column = getOrAddMdpState(transitionTarget); } uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; internalAddTransition(row, column, value); return true; } void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) { mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); } uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; } void setCurrentStateIsTarget() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); targetStates.grow(getCurrentNumberOfMdpStates(), false); targetStates.set(getCurrentMdpState(), true); } void setCurrentStateIsTruncated() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); truncatedStates.grow(getCurrentNumberOfMdpStates(), false); truncatedStates.set(getCurrentMdpState(), true); } bool currentStateHasOldBehavior() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state."); return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); } /*! * Inserts transitions and rewards at the given action as in the MDP of the previous exploration. * Does NOT set whether the state is truncated and/or target. * Will add "old" states that have not been considered before into the exploration queue * @param localActionIndex */ void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) { STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); STORM_LOG_ASSERT(localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); // Insert the transitions for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); // Check whether exploration is needed auto beliefId = getBeliefId(transition.getColumn()); if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state if (!exploredBeliefIds.get(beliefId)) { // This belief needs exploration exploredBeliefIds.set(beliefId, true); mdpStatesToExplore.push_back(transition.getColumn()); } } } // Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values } void finishExploration() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); // Complete the exploration // Finish the last row grouping in case the last explored state was new if (!currentStateHasOldBehavior()) { internalAddRowGroupIndex(); } // Resize state- and choice based vectors to the correct size targetStates.resize(getCurrentNumberOfMdpStates(), false); truncatedStates.resize(getCurrentNumberOfMdpStates(), false); if (!mdpActionRewards.empty()) { mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); } // We are not exploring anymore currentMdpState = noState(); // If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build). // We get rid of these before rebuilding the model if (exploredMdp) { dropUnexploredStates(); } // Create the tranistion matrix uint64_t entryCount = 0; for (auto const& row : exploredMdpTransitions) { entryCount += row.size(); } storm::storage::SparseMatrixBuilder builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true, getCurrentNumberOfMdpStates()); for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { uint64_t rowIndex = exploredChoiceIndices[groupIndex]; uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; builder.newRowGroup(rowIndex); for (; rowIndex < groupEnd; ++rowIndex) { for (auto const& entry : exploredMdpTransitions[rowIndex]) { builder.addNextValue(rowIndex, entry.first, entry.second); } } } auto mdpTransitionMatrix = builder.build(); // Create a standard labeling storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); mdpLabeling.addLabel("init"); mdpLabeling.addLabelToState("init", initialMdpState); targetStates.resize(getCurrentNumberOfMdpStates(), false); mdpLabeling.addLabel("target", std::move(targetStates)); truncatedStates.resize(getCurrentNumberOfMdpStates(), false); mdpLabeling.addLabel("truncated", std::move(truncatedStates)); // Create a standard reward model (if rewards are available) std::unordered_map> mdpRewardModels; if (!mdpActionRewards.empty()) { mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); } storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); exploredMdp = std::make_shared>(std::move(modelComponents)); status = Status::ModelFinished; } void dropUnexploredStates() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available."); // Find the states (and corresponding choices) that were not explored. // These correspond to "empty" MDP transitions storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true); std::vector toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState()); MdpStateType nextRelevantIndex = 0; for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { uint64_t rowIndex = exploredChoiceIndices[groupIndex]; // Check first row in group if (exploredMdpTransitions[rowIndex].empty()) { relevantMdpChoices.set(rowIndex, false); relevantMdpStates.set(groupIndex, false); } else { toRelevantStateIndexMap[groupIndex] = nextRelevantIndex; ++nextRelevantIndex; } uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; // process remaining rows in group for (++rowIndex; rowIndex < groupEnd; ++rowIndex) { // Assert that all actions at the current state were consistently explored or unexplored. STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex), "Actions at 'old' MDP state " << groupIndex << " were only partly explored."); if (exploredMdpTransitions[rowIndex].empty()) { relevantMdpChoices.set(rowIndex, false); } } } if (relevantMdpStates.full()) { // All states are relevant so nothing to do return; } // Translate various components to the "new" MDP state set storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates); { // beliefIdToMdpStateMap for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { if (relevantMdpStates.get(belIdToMdpStateIt->second)) { // Keep current entry and move on to the next one. ++belIdToMdpStateIt; } else { STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); // Delete current entry and move on to the next one. // This works because std::map::erase does not invalidate other iterators within the map! beliefIdToMdpStateMap.erase(belIdToMdpStateIt++); } } } { // exploredMdpTransitions storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices); // Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map // It might payoff to do this when building the matrix. for (auto& transitions : exploredMdpTransitions) { std::map newTransitions; for (auto const& entry : transitions) { STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state."); newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second); } transitions = std::move(newTransitions); } } { // exploredChoiceIndices MdpStateType newState = 0; assert(exploredChoiceIndices[0] == 0u); // Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched. for (auto const& oldState : relevantMdpStates) { if (oldState != newState) { assert(oldState > newState); uint64_t groupSize = exploredChoiceIndices[oldState + 1] - exploredChoiceIndices[oldState]; exploredChoiceIndices[newState + 1] = exploredChoiceIndices[newState] + groupSize; } ++newState; } exploredChoiceIndices.resize(newState + 1); } if (!mdpActionRewards.empty()) { storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices); } if (extraBottomState) { extraBottomState = toRelevantStateIndexMap[extraBottomState.get()]; } if (extraTargetState) { extraTargetState = toRelevantStateIndexMap[extraTargetState.get()]; } targetStates = targetStates % relevantMdpStates; truncatedStates = truncatedStates % relevantMdpStates; initialMdpState = toRelevantStateIndexMap[initialMdpState]; storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates); storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates); storm::utility::vector::filterVectorInPlace(values, relevantMdpStates); } std::shared_ptr> getExploredMdp() const { STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); return exploredMdp; } MdpStateType getCurrentNumberOfMdpStates() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return mdpStateToBeliefIdMap.size(); } MdpStateType getCurrentNumberOfMdpChoices() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return exploredMdpTransitions.size(); } MdpStateType getStartOfCurrentRowGroup() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return exploredChoiceIndices[getCurrentMdpState()]; } ValueType getLowerValueBoundAtCurrentState() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return lowerValueBounds[getCurrentMdpState()]; } ValueType getUpperValueBoundAtCurrentState() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return upperValueBounds[getCurrentMdpState()]; } ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); } ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); } void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); auto task = createStandardCheckTask(property); std::unique_ptr res(storm::api::verifyWithSparseEngine(exploredMdp, task)); if (res) { values = std::move(res->asExplicitQuantitativeCheckResult().getValueVector()); } else { STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); STORM_LOG_ERROR("No result obtained while checking."); } status = Status::ModelChecked; } bool hasComputedValues() const { return status == Status::ModelChecked; } std::vector const& getValuesOfExploredMdp() const { STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); return values; } ValueType const& getComputedValueAtInitialState() const { STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; } MdpStateType getBeliefId(MdpStateType exploredMdpState) const { STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); return mdpStateToBeliefIdMap[exploredMdpState]; } struct SuccessorObservationInformation { SuccessorObservationInformation(ValueType const& obsProb, ValueType const& maxProb, uint64_t const& count) : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) { // Intentionally left empty. } void join(SuccessorObservationInformation other) { observationProbability += other.observationProbability; maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); successorWithObsCount += other.successorWithObsCount; } ValueType observationProbability; /// The probability we move to the corresponding observation. ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. uint64_t successorWithObsCount; /// The number of successors with this observation }; void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map gatheredSuccessorObservations) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); } void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map gatheredSuccessorObservations) { STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { auto const& beliefId = getBeliefId(entry.getColumn()); if (beliefId != beliefManager->noId()) { auto const& obs = beliefManager->getBeliefObservation(beliefId); SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1); auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info); if (!obsInsertion.second) { // There already is an entry for this observation, so join the two informations obsInsertion.first->second.join(info); } } } } private: MdpStateType noState() const { return std::numeric_limits::max(); } std::shared_ptr createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) { std::string propertyString = computeRewards ? "R" : "P"; propertyString += storm::solver::minimize(dir) ? "min" : "max"; propertyString += "=? [F \"target\"]"; std::vector propertyVector = storm::api::parseProperties(propertyString); return storm::api::extractFormulasFromProperties(propertyVector).front(); } storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property) { //Note: The property should not run out of scope after calling this because the task only stores the property by reference. // Therefore, this method needs the property by reference (and not const reference) auto task = storm::api::createTask(property, false); auto hint = storm::modelchecker::ExplicitModelCheckerHint(); hint.setResultHint(values); auto hintPtr = std::make_shared>(hint); task.setHint(hintPtr); return task; } MdpStateType getCurrentMdpState() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return currentMdpState; } MdpStateType getCurrentBeliefId() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return getBeliefId(getCurrentMdpState()); } void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row."); if (row == exploredMdpTransitions.size()) { exploredMdpTransitions.emplace_back(); } STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state."); exploredMdpTransitions[row][column] = value; } void internalAddRowGroupIndex() { exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices()); } MdpStateType getExploredMdpState(BeliefId const& beliefId) const { if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) { return beliefIdToMdpStateMap.at(beliefId); } else { return noState(); } } void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) { lowerValueBounds.push_back(lowerBound); upperValueBounds.push_back(upperBound); // Take the middle value as a hint values.push_back((lowerBound + upperBound) / storm::utility::convertNumber(2)); STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); } MdpStateType getOrAddMdpState(BeliefId const& beliefId) { exploredBeliefIds.grow(beliefId + 1, false); if (exploredBeliefIds.get(beliefId)) { return beliefIdToMdpStateMap[beliefId]; } else { // This state needs exploration exploredBeliefIds.set(beliefId, true); // If this is a restart of the exploration, we still might have an MDP state for the belief if (exploredMdp) { auto findRes = beliefIdToMdpStateMap.find(beliefId); if (findRes != beliefIdToMdpStateMap.end()) { mdpStatesToExplore.push_back(findRes->second); return findRes->second; } } // At this poind we need to add a new MDP state MdpStateType result = getCurrentNumberOfMdpStates(); assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); mdpStateToBeliefIdMap.push_back(beliefId); beliefIdToMdpStateMap[beliefId] = result; insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); mdpStatesToExplore.push_back(result); return result; } } // Belief state related information std::shared_ptr beliefManager; std::vector mdpStateToBeliefIdMap; std::map beliefIdToMdpStateMap; storm::storage::BitVector exploredBeliefIds; // Exploration information std::deque mdpStatesToExplore; std::vector> exploredMdpTransitions; std::vector exploredChoiceIndices; std::vector mdpActionRewards; uint64_t currentMdpState; // Special states during exploration boost::optional extraTargetState; boost::optional extraBottomState; storm::storage::BitVector targetStates; storm::storage::BitVector truncatedStates; MdpStateType initialMdpState; // Final Mdp std::shared_ptr> exploredMdp; // Value related information std::vector const& pomdpLowerValueBounds; std::vector const& pomdpUpperValueBounds; std::vector lowerValueBounds; std::vector upperValueBounds; std::vector values; // Contains an estimate during building and the actual result after a check has performed // The current status of this explorer Status status; }; } }