Browse Source

BeliefMdpExplorer: Implemented extraction of optimal scheduler choices and reachable states under these choices

tempestpy_adaptions
Tim Quatmann 5 years ago
parent
commit
45832d3de3
  1. 77
      src/storm-pomdp/builder/BeliefMdpExplorer.h

77
src/storm-pomdp/builder/BeliefMdpExplorer.h

@ -61,6 +61,8 @@ namespace storm {
exploredMdpTransitions.clear(); exploredMdpTransitions.clear();
exploredChoiceIndices.clear(); exploredChoiceIndices.clear();
mdpActionRewards.clear(); mdpActionRewards.clear();
optimalMdpChoices = boost::none;
optimalChoicesReachableMdpStates = boost::none;
exploredMdp = nullptr; exploredMdp = nullptr;
internalAddRowGroupIndex(); // Mark the start of the first row group internalAddRowGroupIndex(); // Mark the start of the first row group
@ -230,6 +232,31 @@ namespace storm {
return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates();
} }
/*!
* Retrieves whether the current state can be reached under a scheduler that was optimal in the most recent check.
* This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior.
*/
bool currentStateIsOptimalSchedulerReachable() {
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state.");
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior");
STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
return optimalChoicesReachableMdpStates->get(getCurrentMdpState());
}
/*!
* Retrieves whether the given action at the current state was optimal in the most recent check.
* This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior.
*/
bool actionAtCurrentStateWasOptimal(uint64_t const& localActionIndex) {
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
return optimalChoices->get(row);
}
/*! /*!
* Inserts transitions and rewards at the given action as in the MDP of the previous exploration. * Inserts transitions and rewards at the given action as in the MDP of the previous exploration.
* Does NOT set whether the state is truncated and/or target. * Does NOT set whether the state is truncated and/or target.
@ -285,6 +312,10 @@ namespace storm {
dropUnexploredStates(); dropUnexploredStates();
} }
// The potentially computed optimal choices and the set of states that are reachable under these choices are not valid anymore.
optimalChoices = boost::none;
optimalChoicesReachableMdpStates = boost::none;
// Create the tranistion matrix // Create the tranistion matrix
uint64_t entryCount = 0; uint64_t entryCount = 0;
for (auto const& row : exploredMdpTransitions) { for (auto const& row : exploredMdpTransitions) {
@ -558,6 +589,48 @@ namespace storm {
lowerValueBounds = values; lowerValueBounds = values;
} }
/*!
*
* Computes the set of states that are reachable via a path that is consistent with an optimal MDP scheduler.
* States that are only reachable via target states will not be in this set.
* @param ancillaryChoicesEpsilon if the difference of a 1-step value of a choice is only epsilon away from the optimal value, the choice will be included.
* @param relative if set, we consider the relative difference to detect ancillaryChoices
*/
void computeOptimalChoicesAndReachableMdpStates(ValueType const& ancillaryChoicesEpsilon, bool relativeDifference) {
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
STORM_LOG_ASSERT(exploredMdp, "Method call is invalid in if no MDP is available.");
STORM_LOG_ASSERT(!optimalChoices.is_initialized(), "Tried to compute optimal scheduler but this has already been done before.");
STORM_LOG_ASSERT(!optimalChoicesReachableMdpStates.is_initialized(), "Tried to compute states that are reachable under an optimal scheduler but this has already been done before.");
// First find the choices that are optimal
optimalChoices = storm::storage::BitVector(exploredMdp->getNumberOfChoices(), false);
auto const& choiceIndices = exploredMdp->getNondeterministicChoiceIndices();
auto const& transitions = exploredMdp->getTransitionMatrix();
auto const& targetStates = exploredMdp->getStates("target");
for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) {
if (targetStates.get(mdpState)) {
// Target states can be skipped.
continue;
} else {
auto const& stateValue = values[mdpState];
for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) {
ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values);
if (exploredMdp->hasRewardModel()) {
choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice);
}
ValueType absDiff = storm::utility::abs<ValueType>((choiceValue - stateValue));
if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) {
optimalChoices->set(globalChoice, true);
}
}
STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(), "Could not find an optimal choice.");
}
}
// Then, find the states that are reachable via these choices
optimalChoicesReachableMdpStates = storm::utility::graph::getReachableStates(transitions, exploredMdp->getInitialStates(), ~targetStates, targetStates, false, 0, optimalChoices.get());
}
private: private:
MdpStateType noState() const { MdpStateType noState() const {
return std::numeric_limits<MdpStateType>::max(); return std::numeric_limits<MdpStateType>::max();
@ -672,12 +745,14 @@ namespace storm {
// Final Mdp // Final Mdp
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp; std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
// Value related information
// Value and scheduler related information
std::vector<ValueType> const& pomdpLowerValueBounds; std::vector<ValueType> const& pomdpLowerValueBounds;
std::vector<ValueType> const& pomdpUpperValueBounds; std::vector<ValueType> const& pomdpUpperValueBounds;
std::vector<ValueType> lowerValueBounds; std::vector<ValueType> lowerValueBounds;
std::vector<ValueType> upperValueBounds; std::vector<ValueType> upperValueBounds;
std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
boost::optional<storm::storage::BitVector> optimalMdpChoices;
boost::optional<storm::storage::BitVector> optimalChoicesReachableMdpStates;
// The current status of this explorer // The current status of this explorer
Status status; Status status;

Loading…
Cancel
Save