Alexander Bork
5 years ago
committed by
Tim Quatmann
2 changed files with 903 additions and 702 deletions
@ -0,0 +1,793 @@ |
|||||
|
#include "BeliefMdpExplorer.h"
|
||||
|
|
||||
|
namespace storm { |
||||
|
namespace builder { |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
BeliefMdpExplorer<PomdpType, BeliefValueType>::BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, |
||||
|
storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const &pomdpValueBounds) : beliefManager( |
||||
|
beliefManager), pomdpValueBounds(pomdpValueBounds), status(Status::Uninitialized) { |
||||
|
// Intentionally left empty
|
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::BeliefManagerType const &BeliefMdpExplorer<PomdpType, BeliefValueType>::getBeliefManager() const { |
||||
|
return *beliefManager; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void |
||||
|
BeliefMdpExplorer<PomdpType, BeliefValueType>::startNewExploration(boost::optional<ValueType> extraTargetStateValue, boost::optional<ValueType> extraBottomStateValue) { |
||||
|
status = Status::Exploring; |
||||
|
// Reset data from potential previous explorations
|
||||
|
mdpStateToBeliefIdMap.clear(); |
||||
|
beliefIdToMdpStateMap.clear(); |
||||
|
exploredBeliefIds.clear(); |
||||
|
exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); |
||||
|
mdpStatesToExplore.clear(); |
||||
|
lowerValueBounds.clear(); |
||||
|
upperValueBounds.clear(); |
||||
|
values.clear(); |
||||
|
exploredMdpTransitions.clear(); |
||||
|
exploredChoiceIndices.clear(); |
||||
|
mdpActionRewards.clear(); |
||||
|
targetStates.clear(); |
||||
|
truncatedStates.clear(); |
||||
|
delayedExplorationChoices.clear(); |
||||
|
optimalChoices = boost::none; |
||||
|
optimalChoicesReachableMdpStates = boost::none; |
||||
|
exploredMdp = nullptr; |
||||
|
internalAddRowGroupIndex(); // Mark the start of the first row group
|
||||
|
|
||||
|
// Add some states with special treatment (if requested)
|
||||
|
if (extraBottomStateValue) { |
||||
|
currentMdpState = getCurrentNumberOfMdpStates(); |
||||
|
extraBottomState = currentMdpState; |
||||
|
mdpStateToBeliefIdMap.push_back(beliefManager->noId()); |
||||
|
insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); |
||||
|
|
||||
|
internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one<ValueType>()); |
||||
|
internalAddRowGroupIndex(); |
||||
|
} else { |
||||
|
extraBottomState = boost::none; |
||||
|
} |
||||
|
if (extraTargetStateValue) { |
||||
|
currentMdpState = getCurrentNumberOfMdpStates(); |
||||
|
extraTargetState = currentMdpState; |
||||
|
mdpStateToBeliefIdMap.push_back(beliefManager->noId()); |
||||
|
insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); |
||||
|
|
||||
|
internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one<ValueType>()); |
||||
|
internalAddRowGroupIndex(); |
||||
|
|
||||
|
targetStates.grow(getCurrentNumberOfMdpStates(), false); |
||||
|
targetStates.set(extraTargetState.get(), true); |
||||
|
} else { |
||||
|
extraTargetState = boost::none; |
||||
|
} |
||||
|
currentMdpState = noState(); |
||||
|
|
||||
|
// Set up the initial state.
|
||||
|
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::restartExploration() { |
||||
|
STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); |
||||
|
status = Status::Exploring; |
||||
|
// We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid.
|
||||
|
exploredBeliefIds.clear(); |
||||
|
exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); |
||||
|
exploredMdpTransitions.clear(); |
||||
|
exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices()); |
||||
|
exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); |
||||
|
mdpActionRewards.clear(); |
||||
|
if (exploredMdp->hasRewardModel()) { |
||||
|
// Can be overwritten during exploration
|
||||
|
mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector(); |
||||
|
} |
||||
|
targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); |
||||
|
truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); |
||||
|
delayedExplorationChoices.clear(); |
||||
|
mdpStatesToExplore.clear(); |
||||
|
|
||||
|
// The extra states are not changed
|
||||
|
if (extraBottomState) { |
||||
|
currentMdpState = extraBottomState.get(); |
||||
|
restoreOldBehaviorAtCurrentState(0); |
||||
|
} |
||||
|
if (extraTargetState) { |
||||
|
currentMdpState = extraTargetState.get(); |
||||
|
restoreOldBehaviorAtCurrentState(0); |
||||
|
targetStates.set(extraTargetState.get(), true); |
||||
|
} |
||||
|
currentMdpState = noState(); |
||||
|
|
||||
|
// Set up the initial state.
|
||||
|
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::hasUnexploredState() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
return !mdpStatesToExplore.empty(); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::BeliefId BeliefMdpExplorer<PomdpType, BeliefValueType>::exploreNextState() { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
// Mark the end of the previously explored row group.
|
||||
|
if (currentMdpState != noState() && !currentStateHasOldBehavior()) { |
||||
|
internalAddRowGroupIndex(); |
||||
|
} |
||||
|
|
||||
|
// Pop from the queue.
|
||||
|
currentMdpState = mdpStatesToExplore.front(); |
||||
|
mdpStatesToExplore.pop_front(); |
||||
|
|
||||
|
|
||||
|
return mdpStateToBeliefIdMap[currentMdpState]; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::addTransitionsToExtraStates(uint64_t const &localActionIndex, ValueType const &targetStateValue, |
||||
|
ValueType const &bottomStateValue) { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], |
||||
|
"Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
||||
|
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
||||
|
if (!storm::utility::isZero(bottomStateValue)) { |
||||
|
STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); |
||||
|
internalAddTransition(row, extraBottomState.get(), bottomStateValue); |
||||
|
} |
||||
|
if (!storm::utility::isZero(targetStateValue)) { |
||||
|
STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); |
||||
|
internalAddTransition(row, extraTargetState.get(), targetStateValue); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::addSelfloopTransition(uint64_t const &localActionIndex, ValueType const &value) { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], |
||||
|
"Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
||||
|
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
||||
|
internalAddTransition(row, getCurrentMdpState(), value); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::addTransitionToBelief(uint64_t const &localActionIndex, BeliefId const &transitionTarget, ValueType const &value, |
||||
|
bool ignoreNewBeliefs) { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], |
||||
|
"Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
||||
|
|
||||
|
MdpStateType column; |
||||
|
if (ignoreNewBeliefs) { |
||||
|
column = getExploredMdpState(transitionTarget); |
||||
|
if (column == noState()) { |
||||
|
return false; |
||||
|
} |
||||
|
} else { |
||||
|
column = getOrAddMdpState(transitionTarget); |
||||
|
} |
||||
|
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
||||
|
internalAddTransition(row, column, value); |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::computeRewardAtCurrentState(uint64 const &localActionIndex, ValueType extraReward) { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) { |
||||
|
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
||||
|
} |
||||
|
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
||||
|
mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::setCurrentStateIsTarget() { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
targetStates.grow(getCurrentNumberOfMdpStates(), false); |
||||
|
targetStates.set(getCurrentMdpState(), true); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::setCurrentStateIsTruncated() { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
truncatedStates.grow(getCurrentNumberOfMdpStates(), false); |
||||
|
truncatedStates.set(getCurrentMdpState(), true); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::setCurrentChoiceIsDelayed(uint64_t const &localActionIndex) { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
delayedExplorationChoices.grow(getCurrentNumberOfMdpChoices(), false); |
||||
|
delayedExplorationChoices.set(getStartOfCurrentRowGroup() + localActionIndex, true); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::currentStateHasOldBehavior() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state."); |
||||
|
return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentStateWasTruncated() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); |
||||
|
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); |
||||
|
STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available"); |
||||
|
return exploredMdp->getStateLabeling().getStateHasLabel("truncated", getCurrentMdpState()); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::stateIsOptimalSchedulerReachable(MdpStateType mdpState) const { |
||||
|
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), |
||||
|
"Method 'stateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
||||
|
return optimalChoicesReachableMdpStates->get(mdpState); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::actionIsOptimal(uint64_t const &globalActionIndex) const { |
||||
|
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'actionIsOptimal' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
||||
|
return optimalChoices->get(globalActionIndex); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::currentStateIsOptimalSchedulerReachable() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state."); |
||||
|
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior"); |
||||
|
STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), |
||||
|
"Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
||||
|
return optimalChoicesReachableMdpStates->get(getCurrentMdpState()); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::actionAtCurrentStateWasOptimal(uint64_t const &localActionIndex) const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); |
||||
|
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); |
||||
|
STORM_LOG_ASSERT(optimalChoices.is_initialized(), |
||||
|
"Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
||||
|
uint64_t choice = getStartOfCurrentRowGroup() + localActionIndex; |
||||
|
return optimalChoices->get(choice); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentStateActionExplorationWasDelayed(uint64_t const &localActionIndex) const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); |
||||
|
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); |
||||
|
STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available"); |
||||
|
uint64_t choice = exploredMdp->getNondeterministicChoiceIndices()[getCurrentMdpState()] + localActionIndex; |
||||
|
return exploredMdp->hasChoiceLabeling() && exploredMdp->getChoiceLabeling().getChoiceHasLabel("delayed", choice); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::restoreOldBehaviorAtCurrentState(uint64_t const &localActionIndex) { |
||||
|
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); |
||||
|
STORM_LOG_ASSERT(localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], |
||||
|
"Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
||||
|
|
||||
|
uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; |
||||
|
STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); |
||||
|
|
||||
|
// Insert the transitions
|
||||
|
for (auto const &transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { |
||||
|
internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); |
||||
|
// Check whether exploration is needed
|
||||
|
auto beliefId = getBeliefId(transition.getColumn()); |
||||
|
if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state
|
||||
|
if (!exploredBeliefIds.get(beliefId)) { |
||||
|
// This belief needs exploration
|
||||
|
exploredBeliefIds.set(beliefId, true); |
||||
|
mdpStatesToExplore.push_back(transition.getColumn()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values
|
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::finishExploration() { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); |
||||
|
|
||||
|
// Complete the exploration
|
||||
|
// Finish the last row grouping in case the last explored state was new
|
||||
|
if (!currentStateHasOldBehavior()) { |
||||
|
internalAddRowGroupIndex(); |
||||
|
} |
||||
|
// Resize state- and choice based vectors to the correct size
|
||||
|
targetStates.resize(getCurrentNumberOfMdpStates(), false); |
||||
|
truncatedStates.resize(getCurrentNumberOfMdpStates(), false); |
||||
|
if (!mdpActionRewards.empty()) { |
||||
|
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
||||
|
} |
||||
|
|
||||
|
// We are not exploring anymore
|
||||
|
currentMdpState = noState(); |
||||
|
|
||||
|
// If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build).
|
||||
|
// We get rid of these before rebuilding the model
|
||||
|
if (exploredMdp) { |
||||
|
dropUnexploredStates(); |
||||
|
} |
||||
|
|
||||
|
// The potentially computed optimal choices and the set of states that are reachable under these choices are not valid anymore.
|
||||
|
optimalChoices = boost::none; |
||||
|
optimalChoicesReachableMdpStates = boost::none; |
||||
|
|
||||
|
// Create the tranistion matrix
|
||||
|
uint64_t entryCount = 0; |
||||
|
for (auto const &row : exploredMdpTransitions) { |
||||
|
entryCount += row.size(); |
||||
|
} |
||||
|
storm::storage::SparseMatrixBuilder<ValueType> builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true, |
||||
|
getCurrentNumberOfMdpStates()); |
||||
|
for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { |
||||
|
uint64_t rowIndex = exploredChoiceIndices[groupIndex]; |
||||
|
uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; |
||||
|
builder.newRowGroup(rowIndex); |
||||
|
for (; rowIndex < groupEnd; ++rowIndex) { |
||||
|
for (auto const &entry : exploredMdpTransitions[rowIndex]) { |
||||
|
builder.addNextValue(rowIndex, entry.first, entry.second); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
auto mdpTransitionMatrix = builder.build(); |
||||
|
|
||||
|
// Create a standard labeling
|
||||
|
storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); |
||||
|
mdpLabeling.addLabel("init"); |
||||
|
mdpLabeling.addLabelToState("init", initialMdpState); |
||||
|
targetStates.resize(getCurrentNumberOfMdpStates(), false); |
||||
|
mdpLabeling.addLabel("target", std::move(targetStates)); |
||||
|
truncatedStates.resize(getCurrentNumberOfMdpStates(), false); |
||||
|
mdpLabeling.addLabel("truncated", std::move(truncatedStates)); |
||||
|
|
||||
|
// Create a standard reward model (if rewards are available)
|
||||
|
std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels; |
||||
|
if (!mdpActionRewards.empty()) { |
||||
|
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
||||
|
mdpRewardModels.emplace("default", |
||||
|
storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards))); |
||||
|
} |
||||
|
|
||||
|
// Create model components
|
||||
|
storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); |
||||
|
|
||||
|
// Potentially create a choice labeling
|
||||
|
if (!delayedExplorationChoices.empty()) { |
||||
|
modelComponents.choiceLabeling = storm::models::sparse::ChoiceLabeling(getCurrentNumberOfMdpChoices()); |
||||
|
delayedExplorationChoices.resize(getCurrentNumberOfMdpChoices(), false); |
||||
|
modelComponents.choiceLabeling->addLabel("delayed", std::move(delayedExplorationChoices)); |
||||
|
} |
||||
|
|
||||
|
// Create the final model.
|
||||
|
exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents)); |
||||
|
status = Status::ModelFinished; |
||||
|
STORM_LOG_DEBUG( |
||||
|
"Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << " of which were flagged as truncated)."); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::dropUnexploredStates() { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); |
||||
|
|
||||
|
STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available."); |
||||
|
// Find the states (and corresponding choices) that were not explored.
|
||||
|
// These correspond to "empty" MDP transitions
|
||||
|
storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true); |
||||
|
std::vector<MdpStateType> toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState()); |
||||
|
MdpStateType nextRelevantIndex = 0; |
||||
|
for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { |
||||
|
uint64_t rowIndex = exploredChoiceIndices[groupIndex]; |
||||
|
// Check first row in group
|
||||
|
if (exploredMdpTransitions[rowIndex].empty()) { |
||||
|
relevantMdpChoices.set(rowIndex, false); |
||||
|
relevantMdpStates.set(groupIndex, false); |
||||
|
} else { |
||||
|
toRelevantStateIndexMap[groupIndex] = nextRelevantIndex; |
||||
|
++nextRelevantIndex; |
||||
|
} |
||||
|
uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; |
||||
|
// process remaining rows in group
|
||||
|
for (++rowIndex; rowIndex < groupEnd; ++rowIndex) { |
||||
|
// Assert that all actions at the current state were consistently explored or unexplored.
|
||||
|
STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex), |
||||
|
"Actions at 'old' MDP state " << groupIndex << " were only partly explored."); |
||||
|
if (exploredMdpTransitions[rowIndex].empty()) { |
||||
|
relevantMdpChoices.set(rowIndex, false); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (relevantMdpStates.full()) { |
||||
|
// All states are relevant so nothing to do
|
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// Translate various components to the "new" MDP state set
|
||||
|
storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates); |
||||
|
{ // beliefIdToMdpStateMap
|
||||
|
for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { |
||||
|
if (relevantMdpStates.get(belIdToMdpStateIt->second)) { |
||||
|
// Translate current entry and move on to the next one.
|
||||
|
belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second]; |
||||
|
++belIdToMdpStateIt; |
||||
|
} else { |
||||
|
STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), |
||||
|
"Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); |
||||
|
// Delete current entry and move on to the next one.
|
||||
|
// This works because std::map::erase does not invalidate other iterators within the map!
|
||||
|
beliefIdToMdpStateMap.erase(belIdToMdpStateIt++); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
{ // exploredMdpTransitions
|
||||
|
storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices); |
||||
|
// Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map
|
||||
|
// It might payoff to do this when building the matrix.
|
||||
|
for (auto &transitions : exploredMdpTransitions) { |
||||
|
std::map<MdpStateType, ValueType> newTransitions; |
||||
|
for (auto const &entry : transitions) { |
||||
|
STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state."); |
||||
|
newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second); |
||||
|
} |
||||
|
transitions = std::move(newTransitions); |
||||
|
} |
||||
|
} |
||||
|
{ // exploredChoiceIndices
|
||||
|
MdpStateType newState = 0; |
||||
|
assert(exploredChoiceIndices[0] == 0u); |
||||
|
// Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched.
|
||||
|
for (auto const &oldState : relevantMdpStates) { |
||||
|
if (oldState != newState) { |
||||
|
assert(oldState > newState); |
||||
|
uint64_t groupSize = exploredChoiceIndices[oldState + 1] - exploredChoiceIndices[oldState]; |
||||
|
exploredChoiceIndices[newState + 1] = exploredChoiceIndices[newState] + groupSize; |
||||
|
} |
||||
|
++newState; |
||||
|
} |
||||
|
exploredChoiceIndices.resize(newState + 1); |
||||
|
} |
||||
|
if (!mdpActionRewards.empty()) { |
||||
|
storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices); |
||||
|
} |
||||
|
if (extraBottomState) { |
||||
|
extraBottomState = toRelevantStateIndexMap[extraBottomState.get()]; |
||||
|
} |
||||
|
if (extraTargetState) { |
||||
|
extraTargetState = toRelevantStateIndexMap[extraTargetState.get()]; |
||||
|
} |
||||
|
targetStates = targetStates % relevantMdpStates; |
||||
|
truncatedStates = truncatedStates % relevantMdpStates; |
||||
|
initialMdpState = toRelevantStateIndexMap[initialMdpState]; |
||||
|
|
||||
|
storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates); |
||||
|
storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates); |
||||
|
storm::utility::vector::filterVectorInPlace(values, relevantMdpStates); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
std::shared_ptr<storm::models::sparse::Mdp<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>> |
||||
|
BeliefMdpExplorer<PomdpType, BeliefValueType>::getExploredMdp() const { |
||||
|
STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); |
||||
|
return exploredMdp; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentNumberOfMdpStates() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
return mdpStateToBeliefIdMap.size(); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentNumberOfMdpChoices() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
return exploredMdpTransitions.size(); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getStartOfCurrentRowGroup() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
return exploredChoiceIndices[getCurrentMdpState()]; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType BeliefMdpExplorer<PomdpType, BeliefValueType>::getLowerValueBoundAtCurrentState() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
return lowerValueBounds[getCurrentMdpState()]; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType BeliefMdpExplorer<PomdpType, BeliefValueType>::getUpperValueBoundAtCurrentState() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
return upperValueBounds[getCurrentMdpState()]; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType |
||||
|
BeliefMdpExplorer<PomdpType, BeliefValueType>::computeLowerValueBoundAtBelief(BeliefId const &beliefId) const { |
||||
|
STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(), "Requested lower value bounds but none were available."); |
||||
|
auto it = pomdpValueBounds.lower.begin(); |
||||
|
ValueType result = beliefManager->getWeightedSum(beliefId, *it); |
||||
|
for (++it; it != pomdpValueBounds.lower.end(); ++it) { |
||||
|
result = std::max(result, beliefManager->getWeightedSum(beliefId, *it)); |
||||
|
} |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType |
||||
|
BeliefMdpExplorer<PomdpType, BeliefValueType>::computeUpperValueBoundAtBelief(BeliefId const &beliefId) const { |
||||
|
STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(), "Requested upper value bounds but none were available."); |
||||
|
auto it = pomdpValueBounds.upper.begin(); |
||||
|
ValueType result = beliefManager->getWeightedSum(beliefId, *it); |
||||
|
for (++it; it != pomdpValueBounds.upper.end(); ++it) { |
||||
|
result = std::min(result, beliefManager->getWeightedSum(beliefId, *it)); |
||||
|
} |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::computeValuesOfExploredMdp(storm::solver::OptimizationDirection const &dir) { |
||||
|
STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); |
||||
|
auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); |
||||
|
auto task = createStandardCheckTask(property); |
||||
|
|
||||
|
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task)); |
||||
|
if (res) { |
||||
|
values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
||||
|
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(lowerValueBounds, values, std::less_equal<ValueType>()), |
||||
|
"Computed values are smaller than the lower bound."); |
||||
|
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(upperValueBounds, values, std::greater_equal<ValueType>()), |
||||
|
"Computed values are larger than the upper bound."); |
||||
|
} else { |
||||
|
STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); |
||||
|
STORM_LOG_ERROR("No result obtained while checking."); |
||||
|
} |
||||
|
status = Status::ModelChecked; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::hasComputedValues() const { |
||||
|
return status == Status::ModelChecked; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
std::vector<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType> const &BeliefMdpExplorer<PomdpType, BeliefValueType>::getValuesOfExploredMdp() const { |
||||
|
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
||||
|
return values; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType const &BeliefMdpExplorer<PomdpType, BeliefValueType>::getComputedValueAtInitialState() const { |
||||
|
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); |
||||
|
return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getBeliefId(MdpStateType exploredMdpState) const { |
||||
|
STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); |
||||
|
return mdpStateToBeliefIdMap[exploredMdpState]; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, |
||||
|
std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations) { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); |
||||
|
uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; |
||||
|
gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, |
||||
|
std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations) { |
||||
|
STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); |
||||
|
for (auto const &entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { |
||||
|
auto const &beliefId = getBeliefId(entry.getColumn()); |
||||
|
if (beliefId != beliefManager->noId()) { |
||||
|
auto const &obs = beliefManager->getBeliefObservation(beliefId); |
||||
|
SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1); |
||||
|
auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info); |
||||
|
if (!obsInsertion.second) { |
||||
|
// There already is an entry for this observation, so join the two informations
|
||||
|
obsInsertion.first->second.join(info); |
||||
|
} |
||||
|
beliefManager->joinSupport(beliefId, obsInsertion.first->second.support); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
bool BeliefMdpExplorer<PomdpType, BeliefValueType>::currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, |
||||
|
storm::storage::BitVector const &observationSet) { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); |
||||
|
uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; |
||||
|
for (auto const &entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { |
||||
|
auto const &beliefId = getBeliefId(entry.getColumn()); |
||||
|
if (observationSet.get(beliefManager->getBeliefObservation(beliefId))) { |
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::takeCurrentValuesAsUpperBounds() { |
||||
|
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
||||
|
upperValueBounds = values; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::takeCurrentValuesAsLowerBounds() { |
||||
|
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
||||
|
lowerValueBounds = values; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::computeOptimalChoicesAndReachableMdpStates(ValueType const &ancillaryChoicesEpsilon, bool relativeDifference) { |
||||
|
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
||||
|
STORM_LOG_ASSERT(exploredMdp, "Method call is invalid in if no MDP is available."); |
||||
|
STORM_LOG_ASSERT(!optimalChoices.is_initialized(), "Tried to compute optimal scheduler but this has already been done before."); |
||||
|
STORM_LOG_ASSERT(!optimalChoicesReachableMdpStates.is_initialized(), |
||||
|
"Tried to compute states that are reachable under an optimal scheduler but this has already been done before."); |
||||
|
|
||||
|
// First find the choices that are optimal
|
||||
|
optimalChoices = storm::storage::BitVector(exploredMdp->getNumberOfChoices(), false); |
||||
|
auto const &choiceIndices = exploredMdp->getNondeterministicChoiceIndices(); |
||||
|
auto const &transitions = exploredMdp->getTransitionMatrix(); |
||||
|
auto const &targetStates = exploredMdp->getStates("target"); |
||||
|
for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) { |
||||
|
if (targetStates.get(mdpState)) { |
||||
|
// Target states can be skipped.
|
||||
|
continue; |
||||
|
} else { |
||||
|
auto const &stateValue = values[mdpState]; |
||||
|
for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) { |
||||
|
ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values); |
||||
|
if (exploredMdp->hasRewardModel()) { |
||||
|
choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice); |
||||
|
} |
||||
|
ValueType absDiff = storm::utility::abs<ValueType>((choiceValue - stateValue)); |
||||
|
if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) { |
||||
|
optimalChoices->set(globalChoice, true); |
||||
|
} |
||||
|
} |
||||
|
STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(), "Could not find an optimal choice."); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Then, find the states that are reachable via these choices
|
||||
|
optimalChoicesReachableMdpStates = storm::utility::graph::getReachableStates(transitions, exploredMdp->getInitialStates(), ~targetStates, targetStates, false, 0, |
||||
|
optimalChoices.get()); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::noState() const { |
||||
|
return std::numeric_limits<MdpStateType>::max(); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
std::shared_ptr<storm::logic::Formula const> |
||||
|
BeliefMdpExplorer<PomdpType, BeliefValueType>::createStandardProperty(storm::solver::OptimizationDirection const &dir, bool computeRewards) { |
||||
|
std::string propertyString = computeRewards ? "R" : "P"; |
||||
|
propertyString += storm::solver::minimize(dir) ? "min" : "max"; |
||||
|
propertyString += "=? [F \"target\"]"; |
||||
|
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString); |
||||
|
return storm::api::extractFormulasFromProperties(propertyVector).front(); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
storm::modelchecker::CheckTask<storm::logic::Formula, typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType> |
||||
|
BeliefMdpExplorer<PomdpType, BeliefValueType>::createStandardCheckTask(std::shared_ptr<storm::logic::Formula const> &property) { |
||||
|
//Note: The property should not run out of scope after calling this because the task only stores the property by reference.
|
||||
|
// Therefore, this method needs the property by reference (and not const reference)
|
||||
|
auto task = storm::api::createTask<ValueType>(property, false); |
||||
|
auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>(); |
||||
|
hint.setResultHint(values); |
||||
|
auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint); |
||||
|
task.setHint(hintPtr); |
||||
|
return task; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentMdpState() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
return currentMdpState; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentBeliefId() const { |
||||
|
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
||||
|
return getBeliefId(getCurrentMdpState()); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::internalAddTransition(uint64_t const &row, MdpStateType const &column, ValueType const &value) { |
||||
|
STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row."); |
||||
|
if (row == exploredMdpTransitions.size()) { |
||||
|
exploredMdpTransitions.emplace_back(); |
||||
|
} |
||||
|
STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state."); |
||||
|
exploredMdpTransitions[row][column] = value; |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::internalAddRowGroupIndex() { |
||||
|
exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices()); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getExploredMdpState(BeliefId const &beliefId) const { |
||||
|
if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) { |
||||
|
return beliefIdToMdpStateMap.at(beliefId); |
||||
|
} else { |
||||
|
return noState(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
void BeliefMdpExplorer<PomdpType, BeliefValueType>::insertValueHints(ValueType const &lowerBound, ValueType const &upperBound) { |
||||
|
lowerValueBounds.push_back(lowerBound); |
||||
|
upperValueBounds.push_back(upperBound); |
||||
|
// Take the middle value as a hint
|
||||
|
values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2)); |
||||
|
STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); |
||||
|
STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); |
||||
|
} |
||||
|
|
||||
|
template<typename PomdpType, typename BeliefValueType> |
||||
|
typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getOrAddMdpState(BeliefId const &beliefId) { |
||||
|
exploredBeliefIds.grow(beliefId + 1, false); |
||||
|
if (exploredBeliefIds.get(beliefId)) { |
||||
|
return beliefIdToMdpStateMap[beliefId]; |
||||
|
} else { |
||||
|
// This state needs exploration
|
||||
|
exploredBeliefIds.set(beliefId, true); |
||||
|
|
||||
|
// If this is a restart of the exploration, we still might have an MDP state for the belief
|
||||
|
if (exploredMdp) { |
||||
|
auto findRes = beliefIdToMdpStateMap.find(beliefId); |
||||
|
if (findRes != beliefIdToMdpStateMap.end()) { |
||||
|
mdpStatesToExplore.push_back(findRes->second); |
||||
|
return findRes->second; |
||||
|
} |
||||
|
} |
||||
|
// At this point we need to add a new MDP state
|
||||
|
MdpStateType result = getCurrentNumberOfMdpStates(); |
||||
|
assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); |
||||
|
mdpStateToBeliefIdMap.push_back(beliefId); |
||||
|
beliefIdToMdpStateMap[beliefId] = result; |
||||
|
insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); |
||||
|
mdpStatesToExplore.push_back(result); |
||||
|
return result; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template |
||||
|
class BeliefMdpExplorer<storm::models::sparse::Pomdp<double>>; |
||||
|
|
||||
|
template |
||||
|
class BeliefMdpExplorer<storm::models::sparse::Pomdp<storm::RationalNumber>>; |
||||
|
} |
||||
|
} |
||||
|
|
Write
Preview
Loading…
Cancel
Save
Reference in new issue