You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

354 lines
19 KiB

#pragma once
#include <memory>
#include <vector>
#include <deque>
#include <map>
#include <boost/optional.hpp>
#include "storm/api/properties.h"
#include "storm/api/verification.h"
#include "storm/storage/BitVector.h"
#include "storm/utility/macros.h"
#include "storm-pomdp/storage/BeliefManager.h"
#include "storm/utility/SignalHandler.h"
namespace storm {
namespace builder {
template<typename PomdpType>
class BeliefMdpExplorer {
public:
typedef typename PomdpType::ValueType ValueType;
typedef storm::storage::BeliefManager<PomdpType> BeliefManagerType;
typedef typename BeliefManagerType::BeliefId BeliefId;
typedef uint64_t MdpStateType;
BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) {
// Intentionally left empty
}
void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
// Reset data from potential previous explorations
mdpStateToBeliefIdMap.clear();
beliefIdToMdpStateMap.clear();
beliefIdsWithMdpState.clear();
beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false);
lowerValueBounds.clear();
upperValueBounds.clear();
values.clear();
mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder<ValueType>(0, 0, 0, true, true);
currentRowCount = 0;
startOfCurrentRowGroup = 0;
mdpActionRewards.clear();
exploredMdp = nullptr;
// Add some states with special treatment (if requested)
if (extraBottomStateValue) {
extraBottomState = getCurrentNumberOfMdpStates();
mdpStateToBeliefIdMap.push_back(beliefManager->noId());
insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get());
startOfCurrentRowGroup = currentRowCount;
mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one<ValueType>());
++currentRowCount;
} else {
extraBottomState = boost::none;
}
if (extraTargetStateValue) {
extraTargetState = getCurrentNumberOfMdpStates();
mdpStateToBeliefIdMap.push_back(beliefManager->noId());
insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get());
startOfCurrentRowGroup = currentRowCount;
mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one<ValueType>());
++currentRowCount;
targetStates.grow(getCurrentNumberOfMdpStates(), false);
targetStates.set(extraTargetState.get(), true);
} else {
extraTargetState = boost::none;
}
// Set up the initial state.
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
}
bool hasUnexploredState() const {
return !beliefIdsToExplore.empty();
}
BeliefId exploreNextState() {
// Set up the matrix builder
finishCurrentRow();
startOfCurrentRowGroup = currentRowCount;
mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
++currentRowCount;
// Pop from the queue.
auto result = beliefIdsToExplore.front();
beliefIdsToExplore.pop_front();
return result;
}
void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) {
// We first insert the entries of the current row in a separate map.
// This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
uint64_t row = startOfCurrentRowGroup + localActionIndex;
if (!storm::utility::isZero(bottomStateValue)) {
STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none.");
internalAddTransition(row, extraBottomState.get(), bottomStateValue);
}
if (!storm::utility::isZero(targetStateValue)) {
STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none.");
internalAddTransition(row, extraTargetState.get(), targetStateValue);
}
}
void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) {
uint64_t row = startOfCurrentRowGroup + localActionIndex;
internalAddTransition(row, getCurrentMdpState(), value);
}
/*!
* Adds the next transition to the given successor belief
* @param localActionIndex
* @param transitionTarget
* @param value
* @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition.
* @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true.
*/
bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) {
// We first insert the entries of the current row in a separate map.
// This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
MdpStateType column;
if (ignoreNewBeliefs) {
column = getMdpState(transitionTarget);
if (column == noState()) {
return false;
}
} else {
column = getOrAddMdpState(transitionTarget);
}
uint64_t row = startOfCurrentRowGroup + localActionIndex;
internalAddTransition(row, column, value);
return true;
}
void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>()) {
if (currentRowCount >= mdpActionRewards.size()) {
mdpActionRewards.resize(currentRowCount, storm::utility::zero<ValueType>());
}
uint64_t row = startOfCurrentRowGroup + localActionIndex;
mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward;
}
void setCurrentStateIsTarget() {
targetStates.grow(getCurrentNumberOfMdpStates(), false);
targetStates.set(getCurrentMdpState(), true);
}
void setCurrentStateIsTruncated() {
truncatedStates.grow(getCurrentNumberOfMdpStates(), false);
truncatedStates.set(getCurrentMdpState(), true);
}
void finishExploration() {
// Create the tranistion matrix
finishCurrentRow();
auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates());
// Create a standard labeling
storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates());
mdpLabeling.addLabel("init");
mdpLabeling.addLabelToState("init", initialMdpState);
targetStates.resize(getCurrentNumberOfMdpStates(), false);
mdpLabeling.addLabel("target", std::move(targetStates));
truncatedStates.resize(getCurrentNumberOfMdpStates(), false);
mdpLabeling.addLabel("truncated", std::move(truncatedStates));
// Create a standard reward model (if rewards are available)
std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
if (!mdpActionRewards.empty()) {
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards)));
}
storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels));
exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
}
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const {
STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet.");
return exploredMdp;
}
MdpStateType getCurrentNumberOfMdpStates() const {
return mdpStateToBeliefIdMap.size();
}
MdpStateType getCurrentNumberOfMdpChoices() const {
return currentRowCount;
}
ValueType getLowerValueBoundAtCurrentState() const {
return lowerValueBounds[getCurrentMdpState()];
}
ValueType getUpperValueBoundAtCurrentState() const {
return upperValueBounds[getCurrentMdpState()];
}
ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
}
ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
}
std::vector<ValueType> const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) {
STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored");
auto property = createStandardProperty(dir, exploredMdp->hasRewardModel());
auto task = createStandardCheckTask(property);
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task));
if (res) {
values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
} else {
STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!");
STORM_LOG_ERROR("No result obtained while checking.");
}
return values;
}
ValueType const& getComputedValueAtInitialState() const {
STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored.");
return values[exploredMdp->getInitialStates().getNextSetIndex(0)];
}
private:
MdpStateType noState() const {
return std::numeric_limits<MdpStateType>::max();
}
std::shared_ptr<storm::logic::Formula const> createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) {
std::string propertyString = computeRewards ? "R" : "P";
propertyString += storm::solver::minimize(dir) ? "min" : "max";
propertyString += "=? [F \"target\"]";
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
return storm::api::extractFormulasFromProperties(propertyVector).front();
}
storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property) {
//Note: The property should not run out of scope after calling this because the task only stores the property by reference.
// Therefore, this method needs the property by reference (and not const reference)
auto task = storm::api::createTask<ValueType>(property, false);
auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
hint.setResultHint(values);
auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
task.setHint(hintPtr);
return task;
}
MdpStateType getCurrentMdpState() const {
return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1;
}
MdpStateType getCurrentBeliefId() const {
return mdpStateToBeliefIdMap[getCurrentMdpState()];
}
void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) {
// We first insert the entries of the current row in a separate map.
// This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row.");
if (row >= currentRowCount) {
// We are going to start a new row, so insert the entries of the old one
finishCurrentRow();
currentRowCount = row + 1;
}
STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state.");
mdpTransitionsBuilderCurrentRowEntries[column] = value;
}
void finishCurrentRow() {
for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) {
mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second);
}
mdpTransitionsBuilderCurrentRowEntries.clear();
}
MdpStateType getMdpState(BeliefId const& beliefId) const {
if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) {
return beliefIdToMdpStateMap.at(beliefId);
} else {
return noState();
}
}
void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) {
lowerValueBounds.push_back(lowerBound);
upperValueBounds.push_back(upperBound);
// Take the middle value as a hint
values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2));
STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states.");
STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size.");
}
MdpStateType getOrAddMdpState(BeliefId const& beliefId) {
beliefIdsWithMdpState.grow(beliefId + 1, false);
if (beliefIdsWithMdpState.get(beliefId)) {
return beliefIdToMdpStateMap[beliefId];
} else {
// Add a new MDP state
beliefIdsWithMdpState.set(beliefId, true);
MdpStateType result = getCurrentNumberOfMdpStates();
assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size());
mdpStateToBeliefIdMap.push_back(beliefId);
beliefIdToMdpStateMap[beliefId] = result;
// This new belief needs exploration
beliefIdsToExplore.push_back(beliefId);
insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId));
return result;
}
}
// Belief state related information
std::shared_ptr<BeliefManagerType> beliefManager;
std::vector<BeliefId> mdpStateToBeliefIdMap;
std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap;
storm::storage::BitVector beliefIdsWithMdpState;
// Exploration information
std::deque<uint64_t> beliefIdsToExplore;
storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder;
std::map<MdpStateType, ValueType> mdpTransitionsBuilderCurrentRowEntries;
std::vector<ValueType> mdpActionRewards;
uint64_t startOfCurrentRowGroup;
uint64_t currentRowCount;
// Special states during exploration
boost::optional<MdpStateType> extraTargetState;
boost::optional<MdpStateType> extraBottomState;
storm::storage::BitVector targetStates;
storm::storage::BitVector truncatedStates;
MdpStateType initialMdpState;
// Final Mdp
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
// Value related information
std::vector<ValueType> const& pomdpLowerValueBounds;
std::vector<ValueType> const& pomdpUpperValueBounds;
std::vector<ValueType> lowerValueBounds;
std::vector<ValueType> upperValueBounds;
std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
};
}
}