Alexander Bork
5 years ago
committed by
Tim Quatmann
2 changed files with 903 additions and 702 deletions
@ -0,0 +1,793 @@ |
|||
#include "BeliefMdpExplorer.h"
|
|||
|
|||
namespace storm { |
|||
namespace builder { |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
BeliefMdpExplorer<PomdpType, BeliefValueType>::BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, |
|||
storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const &pomdpValueBounds) : beliefManager( |
|||
beliefManager), pomdpValueBounds(pomdpValueBounds), status(Status::Uninitialized) { |
|||
// Intentionally left empty
|
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::BeliefManagerType const &BeliefMdpExplorer<PomdpType, BeliefValueType>::getBeliefManager() const { |
|||
return *beliefManager; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void |
|||
BeliefMdpExplorer<PomdpType, BeliefValueType>::startNewExploration(boost::optional<ValueType> extraTargetStateValue, boost::optional<ValueType> extraBottomStateValue) { |
|||
status = Status::Exploring; |
|||
// Reset data from potential previous explorations
|
|||
mdpStateToBeliefIdMap.clear(); |
|||
beliefIdToMdpStateMap.clear(); |
|||
exploredBeliefIds.clear(); |
|||
exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); |
|||
mdpStatesToExplore.clear(); |
|||
lowerValueBounds.clear(); |
|||
upperValueBounds.clear(); |
|||
values.clear(); |
|||
exploredMdpTransitions.clear(); |
|||
exploredChoiceIndices.clear(); |
|||
mdpActionRewards.clear(); |
|||
targetStates.clear(); |
|||
truncatedStates.clear(); |
|||
delayedExplorationChoices.clear(); |
|||
optimalChoices = boost::none; |
|||
optimalChoicesReachableMdpStates = boost::none; |
|||
exploredMdp = nullptr; |
|||
internalAddRowGroupIndex(); // Mark the start of the first row group
|
|||
|
|||
// Add some states with special treatment (if requested)
|
|||
if (extraBottomStateValue) { |
|||
currentMdpState = getCurrentNumberOfMdpStates(); |
|||
extraBottomState = currentMdpState; |
|||
mdpStateToBeliefIdMap.push_back(beliefManager->noId()); |
|||
insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); |
|||
|
|||
internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one<ValueType>()); |
|||
internalAddRowGroupIndex(); |
|||
} else { |
|||
extraBottomState = boost::none; |
|||
} |
|||
if (extraTargetStateValue) { |
|||
currentMdpState = getCurrentNumberOfMdpStates(); |
|||
extraTargetState = currentMdpState; |
|||
mdpStateToBeliefIdMap.push_back(beliefManager->noId()); |
|||
insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); |
|||
|
|||
internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one<ValueType>()); |
|||
internalAddRowGroupIndex(); |
|||
|
|||
targetStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
targetStates.set(extraTargetState.get(), true); |
|||
} else { |
|||
extraTargetState = boost::none; |
|||
} |
|||
currentMdpState = noState(); |
|||
|
|||
// Set up the initial state.
|
|||
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::restartExploration() { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); |
|||
status = Status::Exploring; |
|||
// We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid.
|
|||
exploredBeliefIds.clear(); |
|||
exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); |
|||
exploredMdpTransitions.clear(); |
|||
exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices()); |
|||
exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); |
|||
mdpActionRewards.clear(); |
|||
if (exploredMdp->hasRewardModel()) { |
|||
// Can be overwritten during exploration
|
|||
mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector(); |
|||
} |
|||
targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); |
|||
truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); |
|||
delayedExplorationChoices.clear(); |
|||
mdpStatesToExplore.clear(); |
|||
|
|||
// The extra states are not changed
|
|||
if (extraBottomState) { |
|||
currentMdpState = extraBottomState.get(); |
|||
restoreOldBehaviorAtCurrentState(0); |
|||
} |
|||
if (extraTargetState) { |
|||
currentMdpState = extraTargetState.get(); |
|||
restoreOldBehaviorAtCurrentState(0); |
|||
targetStates.set(extraTargetState.get(), true); |
|||
} |
|||
currentMdpState = noState(); |
|||
|
|||
// Set up the initial state.
|
|||
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::hasUnexploredState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return !mdpStatesToExplore.empty(); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::BeliefId BeliefMdpExplorer<PomdpType, BeliefValueType>::exploreNextState() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
// Mark the end of the previously explored row group.
|
|||
if (currentMdpState != noState() && !currentStateHasOldBehavior()) { |
|||
internalAddRowGroupIndex(); |
|||
} |
|||
|
|||
// Pop from the queue.
|
|||
currentMdpState = mdpStatesToExplore.front(); |
|||
mdpStatesToExplore.pop_front(); |
|||
|
|||
|
|||
return mdpStateToBeliefIdMap[currentMdpState]; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::addTransitionsToExtraStates(uint64_t const &localActionIndex, ValueType const &targetStateValue, |
|||
ValueType const &bottomStateValue) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], |
|||
"Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
if (!storm::utility::isZero(bottomStateValue)) { |
|||
STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); |
|||
internalAddTransition(row, extraBottomState.get(), bottomStateValue); |
|||
} |
|||
if (!storm::utility::isZero(targetStateValue)) { |
|||
STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); |
|||
internalAddTransition(row, extraTargetState.get(), targetStateValue); |
|||
} |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::addSelfloopTransition(uint64_t const &localActionIndex, ValueType const &value) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], |
|||
"Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
internalAddTransition(row, getCurrentMdpState(), value); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::addTransitionToBelief(uint64_t const &localActionIndex, BeliefId const &transitionTarget, ValueType const &value, |
|||
bool ignoreNewBeliefs) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], |
|||
"Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
|
|||
MdpStateType column; |
|||
if (ignoreNewBeliefs) { |
|||
column = getExploredMdpState(transitionTarget); |
|||
if (column == noState()) { |
|||
return false; |
|||
} |
|||
} else { |
|||
column = getOrAddMdpState(transitionTarget); |
|||
} |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
internalAddTransition(row, column, value); |
|||
return true; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::computeRewardAtCurrentState(uint64 const &localActionIndex, ValueType extraReward) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) { |
|||
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
|||
} |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::setCurrentStateIsTarget() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
targetStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
targetStates.set(getCurrentMdpState(), true); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::setCurrentStateIsTruncated() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
truncatedStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
truncatedStates.set(getCurrentMdpState(), true); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::setCurrentChoiceIsDelayed(uint64_t const &localActionIndex) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
delayedExplorationChoices.grow(getCurrentNumberOfMdpChoices(), false); |
|||
delayedExplorationChoices.set(getStartOfCurrentRowGroup() + localActionIndex, true); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::currentStateHasOldBehavior() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state."); |
|||
return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentStateWasTruncated() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); |
|||
STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available"); |
|||
return exploredMdp->getStateLabeling().getStateHasLabel("truncated", getCurrentMdpState()); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::stateIsOptimalSchedulerReachable(MdpStateType mdpState) const { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), |
|||
"Method 'stateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
|||
return optimalChoicesReachableMdpStates->get(mdpState); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::actionIsOptimal(uint64_t const &globalActionIndex) const { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'actionIsOptimal' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
|||
return optimalChoices->get(globalActionIndex); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::currentStateIsOptimalSchedulerReachable() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior"); |
|||
STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), |
|||
"Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
|||
return optimalChoicesReachableMdpStates->get(getCurrentMdpState()); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::actionAtCurrentStateWasOptimal(uint64_t const &localActionIndex) const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); |
|||
STORM_LOG_ASSERT(optimalChoices.is_initialized(), |
|||
"Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
|||
uint64_t choice = getStartOfCurrentRowGroup() + localActionIndex; |
|||
return optimalChoices->get(choice); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentStateActionExplorationWasDelayed(uint64_t const &localActionIndex) const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); |
|||
STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available"); |
|||
uint64_t choice = exploredMdp->getNondeterministicChoiceIndices()[getCurrentMdpState()] + localActionIndex; |
|||
return exploredMdp->hasChoiceLabeling() && exploredMdp->getChoiceLabeling().getChoiceHasLabel("delayed", choice); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::restoreOldBehaviorAtCurrentState(uint64_t const &localActionIndex) { |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); |
|||
STORM_LOG_ASSERT(localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], |
|||
"Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
|
|||
uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; |
|||
STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); |
|||
|
|||
// Insert the transitions
|
|||
for (auto const &transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { |
|||
internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); |
|||
// Check whether exploration is needed
|
|||
auto beliefId = getBeliefId(transition.getColumn()); |
|||
if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state
|
|||
if (!exploredBeliefIds.get(beliefId)) { |
|||
// This belief needs exploration
|
|||
exploredBeliefIds.set(beliefId, true); |
|||
mdpStatesToExplore.push_back(transition.getColumn()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values
|
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::finishExploration() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); |
|||
|
|||
// Complete the exploration
|
|||
// Finish the last row grouping in case the last explored state was new
|
|||
if (!currentStateHasOldBehavior()) { |
|||
internalAddRowGroupIndex(); |
|||
} |
|||
// Resize state- and choice based vectors to the correct size
|
|||
targetStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
truncatedStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
if (!mdpActionRewards.empty()) { |
|||
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
|||
} |
|||
|
|||
// We are not exploring anymore
|
|||
currentMdpState = noState(); |
|||
|
|||
// If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build).
|
|||
// We get rid of these before rebuilding the model
|
|||
if (exploredMdp) { |
|||
dropUnexploredStates(); |
|||
} |
|||
|
|||
// The potentially computed optimal choices and the set of states that are reachable under these choices are not valid anymore.
|
|||
optimalChoices = boost::none; |
|||
optimalChoicesReachableMdpStates = boost::none; |
|||
|
|||
// Create the tranistion matrix
|
|||
uint64_t entryCount = 0; |
|||
for (auto const &row : exploredMdpTransitions) { |
|||
entryCount += row.size(); |
|||
} |
|||
storm::storage::SparseMatrixBuilder<ValueType> builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true, |
|||
getCurrentNumberOfMdpStates()); |
|||
for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { |
|||
uint64_t rowIndex = exploredChoiceIndices[groupIndex]; |
|||
uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; |
|||
builder.newRowGroup(rowIndex); |
|||
for (; rowIndex < groupEnd; ++rowIndex) { |
|||
for (auto const &entry : exploredMdpTransitions[rowIndex]) { |
|||
builder.addNextValue(rowIndex, entry.first, entry.second); |
|||
} |
|||
} |
|||
} |
|||
auto mdpTransitionMatrix = builder.build(); |
|||
|
|||
// Create a standard labeling
|
|||
storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); |
|||
mdpLabeling.addLabel("init"); |
|||
mdpLabeling.addLabelToState("init", initialMdpState); |
|||
targetStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
mdpLabeling.addLabel("target", std::move(targetStates)); |
|||
truncatedStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
mdpLabeling.addLabel("truncated", std::move(truncatedStates)); |
|||
|
|||
// Create a standard reward model (if rewards are available)
|
|||
std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels; |
|||
if (!mdpActionRewards.empty()) { |
|||
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
|||
mdpRewardModels.emplace("default", |
|||
storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards))); |
|||
} |
|||
|
|||
// Create model components
|
|||
storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); |
|||
|
|||
// Potentially create a choice labeling
|
|||
if (!delayedExplorationChoices.empty()) { |
|||
modelComponents.choiceLabeling = storm::models::sparse::ChoiceLabeling(getCurrentNumberOfMdpChoices()); |
|||
delayedExplorationChoices.resize(getCurrentNumberOfMdpChoices(), false); |
|||
modelComponents.choiceLabeling->addLabel("delayed", std::move(delayedExplorationChoices)); |
|||
} |
|||
|
|||
// Create the final model.
|
|||
exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents)); |
|||
status = Status::ModelFinished; |
|||
STORM_LOG_DEBUG( |
|||
"Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << " of which were flagged as truncated)."); |
|||
|
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::dropUnexploredStates() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); |
|||
|
|||
STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available."); |
|||
// Find the states (and corresponding choices) that were not explored.
|
|||
// These correspond to "empty" MDP transitions
|
|||
storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true); |
|||
std::vector<MdpStateType> toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState()); |
|||
MdpStateType nextRelevantIndex = 0; |
|||
for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { |
|||
uint64_t rowIndex = exploredChoiceIndices[groupIndex]; |
|||
// Check first row in group
|
|||
if (exploredMdpTransitions[rowIndex].empty()) { |
|||
relevantMdpChoices.set(rowIndex, false); |
|||
relevantMdpStates.set(groupIndex, false); |
|||
} else { |
|||
toRelevantStateIndexMap[groupIndex] = nextRelevantIndex; |
|||
++nextRelevantIndex; |
|||
} |
|||
uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; |
|||
// process remaining rows in group
|
|||
for (++rowIndex; rowIndex < groupEnd; ++rowIndex) { |
|||
// Assert that all actions at the current state were consistently explored or unexplored.
|
|||
STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex), |
|||
"Actions at 'old' MDP state " << groupIndex << " were only partly explored."); |
|||
if (exploredMdpTransitions[rowIndex].empty()) { |
|||
relevantMdpChoices.set(rowIndex, false); |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (relevantMdpStates.full()) { |
|||
// All states are relevant so nothing to do
|
|||
return; |
|||
} |
|||
|
|||
// Translate various components to the "new" MDP state set
|
|||
storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates); |
|||
{ // beliefIdToMdpStateMap
|
|||
for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { |
|||
if (relevantMdpStates.get(belIdToMdpStateIt->second)) { |
|||
// Translate current entry and move on to the next one.
|
|||
belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second]; |
|||
++belIdToMdpStateIt; |
|||
} else { |
|||
STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), |
|||
"Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); |
|||
// Delete current entry and move on to the next one.
|
|||
// This works because std::map::erase does not invalidate other iterators within the map!
|
|||
beliefIdToMdpStateMap.erase(belIdToMdpStateIt++); |
|||
} |
|||
} |
|||
} |
|||
{ // exploredMdpTransitions
|
|||
storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices); |
|||
// Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map
|
|||
// It might payoff to do this when building the matrix.
|
|||
for (auto &transitions : exploredMdpTransitions) { |
|||
std::map<MdpStateType, ValueType> newTransitions; |
|||
for (auto const &entry : transitions) { |
|||
STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state."); |
|||
newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second); |
|||
} |
|||
transitions = std::move(newTransitions); |
|||
} |
|||
} |
|||
{ // exploredChoiceIndices
|
|||
MdpStateType newState = 0; |
|||
assert(exploredChoiceIndices[0] == 0u); |
|||
// Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched.
|
|||
for (auto const &oldState : relevantMdpStates) { |
|||
if (oldState != newState) { |
|||
assert(oldState > newState); |
|||
uint64_t groupSize = exploredChoiceIndices[oldState + 1] - exploredChoiceIndices[oldState]; |
|||
exploredChoiceIndices[newState + 1] = exploredChoiceIndices[newState] + groupSize; |
|||
} |
|||
++newState; |
|||
} |
|||
exploredChoiceIndices.resize(newState + 1); |
|||
} |
|||
if (!mdpActionRewards.empty()) { |
|||
storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices); |
|||
} |
|||
if (extraBottomState) { |
|||
extraBottomState = toRelevantStateIndexMap[extraBottomState.get()]; |
|||
} |
|||
if (extraTargetState) { |
|||
extraTargetState = toRelevantStateIndexMap[extraTargetState.get()]; |
|||
} |
|||
targetStates = targetStates % relevantMdpStates; |
|||
truncatedStates = truncatedStates % relevantMdpStates; |
|||
initialMdpState = toRelevantStateIndexMap[initialMdpState]; |
|||
|
|||
storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates); |
|||
storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates); |
|||
storm::utility::vector::filterVectorInPlace(values, relevantMdpStates); |
|||
|
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
std::shared_ptr<storm::models::sparse::Mdp<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>> |
|||
BeliefMdpExplorer<PomdpType, BeliefValueType>::getExploredMdp() const { |
|||
STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); |
|||
return exploredMdp; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentNumberOfMdpStates() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return mdpStateToBeliefIdMap.size(); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentNumberOfMdpChoices() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return exploredMdpTransitions.size(); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getStartOfCurrentRowGroup() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return exploredChoiceIndices[getCurrentMdpState()]; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType BeliefMdpExplorer<PomdpType, BeliefValueType>::getLowerValueBoundAtCurrentState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return lowerValueBounds[getCurrentMdpState()]; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType BeliefMdpExplorer<PomdpType, BeliefValueType>::getUpperValueBoundAtCurrentState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return upperValueBounds[getCurrentMdpState()]; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType |
|||
BeliefMdpExplorer<PomdpType, BeliefValueType>::computeLowerValueBoundAtBelief(BeliefId const &beliefId) const { |
|||
STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(), "Requested lower value bounds but none were available."); |
|||
auto it = pomdpValueBounds.lower.begin(); |
|||
ValueType result = beliefManager->getWeightedSum(beliefId, *it); |
|||
for (++it; it != pomdpValueBounds.lower.end(); ++it) { |
|||
result = std::max(result, beliefManager->getWeightedSum(beliefId, *it)); |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType |
|||
BeliefMdpExplorer<PomdpType, BeliefValueType>::computeUpperValueBoundAtBelief(BeliefId const &beliefId) const { |
|||
STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(), "Requested upper value bounds but none were available."); |
|||
auto it = pomdpValueBounds.upper.begin(); |
|||
ValueType result = beliefManager->getWeightedSum(beliefId, *it); |
|||
for (++it; it != pomdpValueBounds.upper.end(); ++it) { |
|||
result = std::min(result, beliefManager->getWeightedSum(beliefId, *it)); |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::computeValuesOfExploredMdp(storm::solver::OptimizationDirection const &dir) { |
|||
STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); |
|||
auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); |
|||
auto task = createStandardCheckTask(property); |
|||
|
|||
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task)); |
|||
if (res) { |
|||
values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
|||
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(lowerValueBounds, values, std::less_equal<ValueType>()), |
|||
"Computed values are smaller than the lower bound."); |
|||
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(upperValueBounds, values, std::greater_equal<ValueType>()), |
|||
"Computed values are larger than the upper bound."); |
|||
} else { |
|||
STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); |
|||
STORM_LOG_ERROR("No result obtained while checking."); |
|||
} |
|||
status = Status::ModelChecked; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::hasComputedValues() const { |
|||
return status == Status::ModelChecked; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
std::vector<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType> const &BeliefMdpExplorer<PomdpType, BeliefValueType>::getValuesOfExploredMdp() const { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
return values; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType const &BeliefMdpExplorer<PomdpType, BeliefValueType>::getComputedValueAtInitialState() const { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); |
|||
return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getBeliefId(MdpStateType exploredMdpState) const { |
|||
STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); |
|||
return mdpStateToBeliefIdMap[exploredMdpState]; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, |
|||
std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); |
|||
uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; |
|||
gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); |
|||
|
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, |
|||
std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations) { |
|||
STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); |
|||
for (auto const &entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { |
|||
auto const &beliefId = getBeliefId(entry.getColumn()); |
|||
if (beliefId != beliefManager->noId()) { |
|||
auto const &obs = beliefManager->getBeliefObservation(beliefId); |
|||
SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1); |
|||
auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info); |
|||
if (!obsInsertion.second) { |
|||
// There already is an entry for this observation, so join the two informations
|
|||
obsInsertion.first->second.join(info); |
|||
} |
|||
beliefManager->joinSupport(beliefId, obsInsertion.first->second.support); |
|||
} |
|||
} |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, |
|||
storm::storage::BitVector const &observationSet) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); |
|||
uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; |
|||
for (auto const &entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { |
|||
auto const &beliefId = getBeliefId(entry.getColumn()); |
|||
if (observationSet.get(beliefManager->getBeliefObservation(beliefId))) { |
|||
return true; |
|||
} |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::takeCurrentValuesAsUpperBounds() { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
upperValueBounds = values; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::takeCurrentValuesAsLowerBounds() { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
lowerValueBounds = values; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::computeOptimalChoicesAndReachableMdpStates(ValueType const &ancillaryChoicesEpsilon, bool relativeDifference) { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Method call is invalid in if no MDP is available."); |
|||
STORM_LOG_ASSERT(!optimalChoices.is_initialized(), "Tried to compute optimal scheduler but this has already been done before."); |
|||
STORM_LOG_ASSERT(!optimalChoicesReachableMdpStates.is_initialized(), |
|||
"Tried to compute states that are reachable under an optimal scheduler but this has already been done before."); |
|||
|
|||
// First find the choices that are optimal
|
|||
optimalChoices = storm::storage::BitVector(exploredMdp->getNumberOfChoices(), false); |
|||
auto const &choiceIndices = exploredMdp->getNondeterministicChoiceIndices(); |
|||
auto const &transitions = exploredMdp->getTransitionMatrix(); |
|||
auto const &targetStates = exploredMdp->getStates("target"); |
|||
for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) { |
|||
if (targetStates.get(mdpState)) { |
|||
// Target states can be skipped.
|
|||
continue; |
|||
} else { |
|||
auto const &stateValue = values[mdpState]; |
|||
for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) { |
|||
ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values); |
|||
if (exploredMdp->hasRewardModel()) { |
|||
choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice); |
|||
} |
|||
ValueType absDiff = storm::utility::abs<ValueType>((choiceValue - stateValue)); |
|||
if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) { |
|||
optimalChoices->set(globalChoice, true); |
|||
} |
|||
} |
|||
STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(), "Could not find an optimal choice."); |
|||
} |
|||
} |
|||
|
|||
// Then, find the states that are reachable via these choices
|
|||
optimalChoicesReachableMdpStates = storm::utility::graph::getReachableStates(transitions, exploredMdp->getInitialStates(), ~targetStates, targetStates, false, 0, |
|||
optimalChoices.get()); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::noState() const { |
|||
return std::numeric_limits<MdpStateType>::max(); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
std::shared_ptr<storm::logic::Formula const> |
|||
BeliefMdpExplorer<PomdpType, BeliefValueType>::createStandardProperty(storm::solver::OptimizationDirection const &dir, bool computeRewards) { |
|||
std::string propertyString = computeRewards ? "R" : "P"; |
|||
propertyString += storm::solver::minimize(dir) ? "min" : "max"; |
|||
propertyString += "=? [F \"target\"]"; |
|||
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString); |
|||
return storm::api::extractFormulasFromProperties(propertyVector).front(); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
storm::modelchecker::CheckTask<storm::logic::Formula, typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType> |
|||
BeliefMdpExplorer<PomdpType, BeliefValueType>::createStandardCheckTask(std::shared_ptr<storm::logic::Formula const> &property) { |
|||
//Note: The property should not run out of scope after calling this because the task only stores the property by reference.
|
|||
// Therefore, this method needs the property by reference (and not const reference)
|
|||
auto task = storm::api::createTask<ValueType>(property, false); |
|||
auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>(); |
|||
hint.setResultHint(values); |
|||
auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint); |
|||
task.setHint(hintPtr); |
|||
return task; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentMdpState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return currentMdpState; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentBeliefId() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return getBeliefId(getCurrentMdpState()); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::internalAddTransition(uint64_t const &row, MdpStateType const &column, ValueType const &value) { |
|||
STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row."); |
|||
if (row == exploredMdpTransitions.size()) { |
|||
exploredMdpTransitions.emplace_back(); |
|||
} |
|||
STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state."); |
|||
exploredMdpTransitions[row][column] = value; |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::internalAddRowGroupIndex() { |
|||
exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices()); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getExploredMdpState(BeliefId const &beliefId) const { |
|||
if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) { |
|||
return beliefIdToMdpStateMap.at(beliefId); |
|||
} else { |
|||
return noState(); |
|||
} |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
void BeliefMdpExplorer<PomdpType, BeliefValueType>::insertValueHints(ValueType const &lowerBound, ValueType const &upperBound) { |
|||
lowerValueBounds.push_back(lowerBound); |
|||
upperValueBounds.push_back(upperBound); |
|||
// Take the middle value as a hint
|
|||
values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2)); |
|||
STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); |
|||
STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); |
|||
} |
|||
|
|||
template<typename PomdpType, typename BeliefValueType> |
|||
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getOrAddMdpState(BeliefId const &beliefId) { |
|||
exploredBeliefIds.grow(beliefId + 1, false); |
|||
if (exploredBeliefIds.get(beliefId)) { |
|||
return beliefIdToMdpStateMap[beliefId]; |
|||
} else { |
|||
// This state needs exploration
|
|||
exploredBeliefIds.set(beliefId, true); |
|||
|
|||
// If this is a restart of the exploration, we still might have an MDP state for the belief
|
|||
if (exploredMdp) { |
|||
auto findRes = beliefIdToMdpStateMap.find(beliefId); |
|||
if (findRes != beliefIdToMdpStateMap.end()) { |
|||
mdpStatesToExplore.push_back(findRes->second); |
|||
return findRes->second; |
|||
} |
|||
} |
|||
// At this point we need to add a new MDP state
|
|||
MdpStateType result = getCurrentNumberOfMdpStates(); |
|||
assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); |
|||
mdpStateToBeliefIdMap.push_back(beliefId); |
|||
beliefIdToMdpStateMap[beliefId] = result; |
|||
insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); |
|||
mdpStatesToExplore.push_back(result); |
|||
return result; |
|||
} |
|||
} |
|||
|
|||
template |
|||
class BeliefMdpExplorer<storm::models::sparse::Pomdp<double>>; |
|||
|
|||
template |
|||
class BeliefMdpExplorer<storm::models::sparse::Pomdp<storm::RationalNumber>>; |
|||
} |
|||
} |
|||
|
Write
Preview
Loading…
Cancel
Save
Reference in new issue