Browse Source
Added BeliefMdpExplorer which does most of the work when exploring (triangulated Variants of) the BeliefMdp.
main
Added BeliefMdpExplorer which does most of the work when exploring (triangulated Variants of) the BeliefMdp.
main
4 changed files with 443 additions and 192 deletions
-
354src/storm-pomdp/builder/BeliefMdpExplorer.h
-
248src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
-
9src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
-
24src/storm-pomdp/storage/BeliefManager.h
@ -0,0 +1,354 @@ |
|||
#pragma once |
|||
|
|||
#include <memory> |
|||
#include <vector> |
|||
#include <deque> |
|||
#include <map> |
|||
#include <boost/optional.hpp> |
|||
|
|||
#include "storm/api/properties.h" |
|||
#include "storm/api/verification.h" |
|||
|
|||
#include "storm/storage/BitVector.h" |
|||
#include "storm/utility/macros.h" |
|||
#include "storm-pomdp/storage/BeliefManager.h" |
|||
#include "storm/utility/SignalHandler.h" |
|||
|
|||
namespace storm { |
|||
namespace builder { |
|||
template<typename PomdpType> |
|||
class BeliefMdpExplorer { |
|||
public: |
|||
typedef typename PomdpType::ValueType ValueType; |
|||
typedef storm::storage::BeliefManager<PomdpType> BeliefManagerType; |
|||
typedef typename BeliefManagerType::BeliefId BeliefId; |
|||
typedef uint64_t MdpStateType; |
|||
|
|||
BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) { |
|||
// Intentionally left empty |
|||
} |
|||
|
|||
void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) { |
|||
// Reset data from potential previous explorations |
|||
mdpStateToBeliefIdMap.clear(); |
|||
beliefIdToMdpStateMap.clear(); |
|||
beliefIdsWithMdpState.clear(); |
|||
beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false); |
|||
lowerValueBounds.clear(); |
|||
upperValueBounds.clear(); |
|||
values.clear(); |
|||
mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder<ValueType>(0, 0, 0, true, true); |
|||
currentRowCount = 0; |
|||
startOfCurrentRowGroup = 0; |
|||
mdpActionRewards.clear(); |
|||
exploredMdp = nullptr; |
|||
|
|||
// Add some states with special treatment (if requested) |
|||
if (extraBottomStateValue) { |
|||
extraBottomState = getCurrentNumberOfMdpStates(); |
|||
mdpStateToBeliefIdMap.push_back(beliefManager->noId()); |
|||
insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); |
|||
|
|||
startOfCurrentRowGroup = currentRowCount; |
|||
mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); |
|||
mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one<ValueType>()); |
|||
++currentRowCount; |
|||
} else { |
|||
extraBottomState = boost::none; |
|||
} |
|||
if (extraTargetStateValue) { |
|||
extraTargetState = getCurrentNumberOfMdpStates(); |
|||
mdpStateToBeliefIdMap.push_back(beliefManager->noId()); |
|||
insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); |
|||
|
|||
startOfCurrentRowGroup = currentRowCount; |
|||
mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); |
|||
mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one<ValueType>()); |
|||
++currentRowCount; |
|||
|
|||
targetStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
targetStates.set(extraTargetState.get(), true); |
|||
} else { |
|||
extraTargetState = boost::none; |
|||
} |
|||
|
|||
// Set up the initial state. |
|||
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); |
|||
} |
|||
|
|||
bool hasUnexploredState() const { |
|||
return !beliefIdsToExplore.empty(); |
|||
} |
|||
|
|||
BeliefId exploreNextState() { |
|||
// Set up the matrix builder |
|||
finishCurrentRow(); |
|||
startOfCurrentRowGroup = currentRowCount; |
|||
mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); |
|||
++currentRowCount; |
|||
|
|||
// Pop from the queue. |
|||
auto result = beliefIdsToExplore.front(); |
|||
beliefIdsToExplore.pop_front(); |
|||
return result; |
|||
} |
|||
|
|||
void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) { |
|||
// We first insert the entries of the current row in a separate map. |
|||
// This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) |
|||
|
|||
uint64_t row = startOfCurrentRowGroup + localActionIndex; |
|||
if (!storm::utility::isZero(bottomStateValue)) { |
|||
STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); |
|||
internalAddTransition(row, extraBottomState.get(), bottomStateValue); |
|||
} |
|||
if (!storm::utility::isZero(targetStateValue)) { |
|||
STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); |
|||
internalAddTransition(row, extraTargetState.get(), targetStateValue); |
|||
} |
|||
} |
|||
|
|||
void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) { |
|||
uint64_t row = startOfCurrentRowGroup + localActionIndex; |
|||
internalAddTransition(row, getCurrentMdpState(), value); |
|||
} |
|||
|
|||
/*! |
|||
* Adds the next transition to the given successor belief |
|||
* @param localActionIndex |
|||
* @param transitionTarget |
|||
* @param value |
|||
* @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition. |
|||
* @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. |
|||
*/ |
|||
bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { |
|||
// We first insert the entries of the current row in a separate map. |
|||
// This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) |
|||
MdpStateType column; |
|||
if (ignoreNewBeliefs) { |
|||
column = getMdpState(transitionTarget); |
|||
if (column == noState()) { |
|||
return false; |
|||
} |
|||
} else { |
|||
column = getOrAddMdpState(transitionTarget); |
|||
} |
|||
uint64_t row = startOfCurrentRowGroup + localActionIndex; |
|||
internalAddTransition(row, column, value); |
|||
return true; |
|||
} |
|||
|
|||
void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>()) { |
|||
if (currentRowCount >= mdpActionRewards.size()) { |
|||
mdpActionRewards.resize(currentRowCount, storm::utility::zero<ValueType>()); |
|||
} |
|||
uint64_t row = startOfCurrentRowGroup + localActionIndex; |
|||
mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; |
|||
} |
|||
|
|||
void setCurrentStateIsTarget() { |
|||
targetStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
targetStates.set(getCurrentMdpState(), true); |
|||
} |
|||
|
|||
void setCurrentStateIsTruncated() { |
|||
truncatedStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
truncatedStates.set(getCurrentMdpState(), true); |
|||
} |
|||
|
|||
void finishExploration() { |
|||
// Create the tranistion matrix |
|||
finishCurrentRow(); |
|||
auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates()); |
|||
|
|||
// Create a standard labeling |
|||
storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); |
|||
mdpLabeling.addLabel("init"); |
|||
mdpLabeling.addLabelToState("init", initialMdpState); |
|||
targetStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
mdpLabeling.addLabel("target", std::move(targetStates)); |
|||
truncatedStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
mdpLabeling.addLabel("truncated", std::move(truncatedStates)); |
|||
|
|||
// Create a standard reward model (if rewards are available) |
|||
std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels; |
|||
if (!mdpActionRewards.empty()) { |
|||
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
|||
mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards))); |
|||
} |
|||
|
|||
storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); |
|||
exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents)); |
|||
} |
|||
|
|||
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const { |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); |
|||
return exploredMdp; |
|||
} |
|||
|
|||
MdpStateType getCurrentNumberOfMdpStates() const { |
|||
return mdpStateToBeliefIdMap.size(); |
|||
} |
|||
|
|||
MdpStateType getCurrentNumberOfMdpChoices() const { |
|||
return currentRowCount; |
|||
} |
|||
|
|||
ValueType getLowerValueBoundAtCurrentState() const { |
|||
return lowerValueBounds[getCurrentMdpState()]; |
|||
} |
|||
|
|||
ValueType getUpperValueBoundAtCurrentState() const { |
|||
return upperValueBounds[getCurrentMdpState()]; |
|||
} |
|||
|
|||
ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { |
|||
return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); |
|||
} |
|||
|
|||
ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { |
|||
return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); |
|||
} |
|||
|
|||
std::vector<ValueType> const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); |
|||
auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); |
|||
auto task = createStandardCheckTask(property); |
|||
|
|||
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task)); |
|||
if (res) { |
|||
values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
|||
} else { |
|||
STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); |
|||
STORM_LOG_ERROR("No result obtained while checking."); |
|||
} |
|||
return values; |
|||
} |
|||
|
|||
ValueType const& getComputedValueAtInitialState() const { |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); |
|||
return values[exploredMdp->getInitialStates().getNextSetIndex(0)]; |
|||
} |
|||
|
|||
private: |
|||
MdpStateType noState() const { |
|||
return std::numeric_limits<MdpStateType>::max(); |
|||
} |
|||
|
|||
std::shared_ptr<storm::logic::Formula const> createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) { |
|||
std::string propertyString = computeRewards ? "R" : "P"; |
|||
propertyString += storm::solver::minimize(dir) ? "min" : "max"; |
|||
propertyString += "=? [F \"target\"]"; |
|||
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString); |
|||
return storm::api::extractFormulasFromProperties(propertyVector).front(); |
|||
} |
|||
|
|||
storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property) { |
|||
//Note: The property should not run out of scope after calling this because the task only stores the property by reference. |
|||
// Therefore, this method needs the property by reference (and not const reference) |
|||
auto task = storm::api::createTask<ValueType>(property, false); |
|||
auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>(); |
|||
hint.setResultHint(values); |
|||
auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint); |
|||
task.setHint(hintPtr); |
|||
return task; |
|||
} |
|||
|
|||
MdpStateType getCurrentMdpState() const { |
|||
return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1; |
|||
} |
|||
|
|||
MdpStateType getCurrentBeliefId() const { |
|||
return mdpStateToBeliefIdMap[getCurrentMdpState()]; |
|||
} |
|||
|
|||
void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { |
|||
// We first insert the entries of the current row in a separate map. |
|||
// This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) |
|||
STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row."); |
|||
if (row >= currentRowCount) { |
|||
// We are going to start a new row, so insert the entries of the old one |
|||
finishCurrentRow(); |
|||
currentRowCount = row + 1; |
|||
} |
|||
STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state."); |
|||
mdpTransitionsBuilderCurrentRowEntries[column] = value; |
|||
} |
|||
|
|||
void finishCurrentRow() { |
|||
for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) { |
|||
mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second); |
|||
} |
|||
mdpTransitionsBuilderCurrentRowEntries.clear(); |
|||
} |
|||
|
|||
MdpStateType getMdpState(BeliefId const& beliefId) const { |
|||
if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) { |
|||
return beliefIdToMdpStateMap.at(beliefId); |
|||
} else { |
|||
return noState(); |
|||
} |
|||
} |
|||
|
|||
void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) { |
|||
lowerValueBounds.push_back(lowerBound); |
|||
upperValueBounds.push_back(upperBound); |
|||
// Take the middle value as a hint |
|||
values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2)); |
|||
STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); |
|||
STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); |
|||
} |
|||
|
|||
MdpStateType getOrAddMdpState(BeliefId const& beliefId) { |
|||
beliefIdsWithMdpState.grow(beliefId + 1, false); |
|||
if (beliefIdsWithMdpState.get(beliefId)) { |
|||
return beliefIdToMdpStateMap[beliefId]; |
|||
} else { |
|||
// Add a new MDP state |
|||
beliefIdsWithMdpState.set(beliefId, true); |
|||
MdpStateType result = getCurrentNumberOfMdpStates(); |
|||
assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); |
|||
mdpStateToBeliefIdMap.push_back(beliefId); |
|||
beliefIdToMdpStateMap[beliefId] = result; |
|||
// This new belief needs exploration |
|||
beliefIdsToExplore.push_back(beliefId); |
|||
|
|||
insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); |
|||
return result; |
|||
} |
|||
} |
|||
|
|||
// Belief state related information |
|||
std::shared_ptr<BeliefManagerType> beliefManager; |
|||
std::vector<BeliefId> mdpStateToBeliefIdMap; |
|||
std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap; |
|||
storm::storage::BitVector beliefIdsWithMdpState; |
|||
|
|||
// Exploration information |
|||
std::deque<uint64_t> beliefIdsToExplore; |
|||
storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder; |
|||
std::map<MdpStateType, ValueType> mdpTransitionsBuilderCurrentRowEntries; |
|||
std::vector<ValueType> mdpActionRewards; |
|||
uint64_t startOfCurrentRowGroup; |
|||
uint64_t currentRowCount; |
|||
|
|||
// Special states during exploration |
|||
boost::optional<MdpStateType> extraTargetState; |
|||
boost::optional<MdpStateType> extraBottomState; |
|||
storm::storage::BitVector targetStates; |
|||
storm::storage::BitVector truncatedStates; |
|||
MdpStateType initialMdpState; |
|||
|
|||
// Final Mdp |
|||
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp; |
|||
|
|||
// Value related information |
|||
std::vector<ValueType> const& pomdpLowerValueBounds; |
|||
std::vector<ValueType> const& pomdpUpperValueBounds; |
|||
std::vector<ValueType> lowerValueBounds; |
|||
std::vector<ValueType> upperValueBounds; |
|||
std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed |
|||
|
|||
}; |
|||
} |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue