29 changed files with 1688 additions and 2100 deletions
-
16resources/3rdparty/CMakeLists.txt
-
0resources/cmake/find_modules/FindGUROBI.cmake
-
0resources/cmake/find_modules/FindHWLOC.cmake
-
2resources/cmake/find_modules/FindZ3.cmake
-
9src/storm-cli-utilities/model-handling.h
-
4src/storm-gspn/storage/gspn/GspnBuilder.cpp
-
4src/storm-gspn/storage/gspn/GspnBuilder.h
-
2src/storm-gspn/storage/gspn/Place.cpp
-
2src/storm-gspn/storage/gspn/Place.h
-
10src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp
-
3src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h
-
6src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp
-
1src/storm-pomdp-cli/settings/modules/POMDPSettings.h
-
203src/storm-pomdp-cli/storm-pomdp.cpp
-
76src/storm-pomdp/analysis/QualitativeAnalysis.cpp
-
670src/storm-pomdp/builder/BeliefMdpExplorer.h
-
1602src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
-
228src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
-
117src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
-
354src/storm-pomdp/storage/BeliefGrid.h
-
436src/storm-pomdp/storage/BeliefManager.h
-
5src/storm/models/sparse/StandardRewardModel.cpp
-
9src/storm/models/sparse/StandardRewardModel.h
-
2src/storm/settings/SettingsManager.cpp
-
12src/storm/storage/Distribution.cpp
-
5src/storm/storage/Distribution.h
-
2src/storm/storage/dd/cudd/InternalCuddBdd.cpp
-
2src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp
-
6src/storm/utility/vector.h
@ -0,0 +1,670 @@ |
|||
#pragma once |
|||
|
|||
#include <memory> |
|||
#include <vector> |
|||
#include <deque> |
|||
#include <map> |
|||
#include <boost/optional.hpp> |
|||
|
|||
#include "storm-parsers/api/properties.h" |
|||
#include "storm/api/properties.h" |
|||
#include "storm/api/verification.h" |
|||
|
|||
#include "storm/storage/BitVector.h" |
|||
#include "storm/storage/SparseMatrix.h" |
|||
#include "storm/utility/macros.h" |
|||
#include "storm-pomdp/storage/BeliefManager.h" |
|||
#include "storm/utility/SignalHandler.h" |
|||
#include "storm/modelchecker/results/CheckResult.h" |
|||
#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" |
|||
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" |
|||
#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" |
|||
|
|||
|
|||
namespace storm { |
|||
namespace builder { |
|||
template<typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType> |
|||
class BeliefMdpExplorer { |
|||
public: |
|||
typedef typename PomdpType::ValueType ValueType; |
|||
typedef storm::storage::BeliefManager<PomdpType, BeliefValueType> BeliefManagerType; |
|||
typedef typename BeliefManagerType::BeliefId BeliefId; |
|||
typedef uint64_t MdpStateType; |
|||
|
|||
enum class Status { |
|||
Uninitialized, |
|||
Exploring, |
|||
ModelFinished, |
|||
ModelChecked |
|||
}; |
|||
|
|||
BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds), status(Status::Uninitialized) { |
|||
// Intentionally left empty |
|||
} |
|||
BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; |
|||
|
|||
BeliefManagerType const& getBeliefManager() const { |
|||
return *beliefManager; |
|||
} |
|||
|
|||
void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) { |
|||
status = Status::Exploring; |
|||
// Reset data from potential previous explorations |
|||
mdpStateToBeliefIdMap.clear(); |
|||
beliefIdToMdpStateMap.clear(); |
|||
exploredBeliefIds.clear(); |
|||
exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); |
|||
mdpStatesToExplore.clear(); |
|||
lowerValueBounds.clear(); |
|||
upperValueBounds.clear(); |
|||
values.clear(); |
|||
exploredMdpTransitions.clear(); |
|||
exploredChoiceIndices.clear(); |
|||
mdpActionRewards.clear(); |
|||
exploredMdp = nullptr; |
|||
internalAddRowGroupIndex(); // Mark the start of the first row group |
|||
|
|||
// Add some states with special treatment (if requested) |
|||
if (extraBottomStateValue) { |
|||
currentMdpState = getCurrentNumberOfMdpStates(); |
|||
extraBottomState = currentMdpState; |
|||
mdpStateToBeliefIdMap.push_back(beliefManager->noId()); |
|||
insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); |
|||
|
|||
internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one<ValueType>()); |
|||
internalAddRowGroupIndex(); |
|||
} else { |
|||
extraBottomState = boost::none; |
|||
} |
|||
if (extraTargetStateValue) { |
|||
currentMdpState = getCurrentNumberOfMdpStates(); |
|||
extraTargetState = currentMdpState; |
|||
mdpStateToBeliefIdMap.push_back(beliefManager->noId()); |
|||
insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); |
|||
|
|||
internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one<ValueType>()); |
|||
internalAddRowGroupIndex(); |
|||
|
|||
targetStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
targetStates.set(extraTargetState.get(), true); |
|||
} else { |
|||
extraTargetState = boost::none; |
|||
} |
|||
currentMdpState = noState(); |
|||
|
|||
// Set up the initial state. |
|||
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); |
|||
} |
|||
|
|||
/*! |
|||
* Restarts the exploration to allow re-exploring each state. |
|||
* After calling this, the "currently explored" MDP has the same number of states and choices as the "old" one, but the choices are still empty |
|||
* This method inserts the initial state of the MDP in the exploration queue. |
|||
* While re-exploring, the reference to the old MDP remains valid. |
|||
*/ |
|||
void restartExploration() { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); |
|||
status = Status::Exploring; |
|||
// We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid. |
|||
exploredBeliefIds.clear(); |
|||
exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); |
|||
exploredMdpTransitions.clear(); |
|||
exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices()); |
|||
exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); |
|||
mdpActionRewards.clear(); |
|||
if (exploredMdp->hasRewardModel()) { |
|||
// Can be overwritten during exploration |
|||
mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector(); |
|||
} |
|||
targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); |
|||
truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); |
|||
mdpStatesToExplore.clear(); |
|||
|
|||
// The extra states are not changed |
|||
if (extraBottomState) { |
|||
currentMdpState = extraBottomState.get(); |
|||
restoreOldBehaviorAtCurrentState(0); |
|||
} |
|||
if (extraTargetState) { |
|||
currentMdpState = extraTargetState.get(); |
|||
restoreOldBehaviorAtCurrentState(0); |
|||
targetStates.set(extraTargetState.get(), true); |
|||
} |
|||
currentMdpState = noState(); |
|||
|
|||
// Set up the initial state. |
|||
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); |
|||
} |
|||
|
|||
bool hasUnexploredState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return !mdpStatesToExplore.empty(); |
|||
} |
|||
|
|||
BeliefId exploreNextState() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
// Mark the end of the previously explored row group. |
|||
if (currentMdpState != noState() && !currentStateHasOldBehavior()) { |
|||
internalAddRowGroupIndex(); |
|||
} |
|||
|
|||
// Pop from the queue. |
|||
currentMdpState = mdpStatesToExplore.front(); |
|||
mdpStatesToExplore.pop_front(); |
|||
|
|||
|
|||
return mdpStateToBeliefIdMap[currentMdpState]; |
|||
} |
|||
|
|||
void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
if (!storm::utility::isZero(bottomStateValue)) { |
|||
STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); |
|||
internalAddTransition(row, extraBottomState.get(), bottomStateValue); |
|||
} |
|||
if (!storm::utility::isZero(targetStateValue)) { |
|||
STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); |
|||
internalAddTransition(row, extraTargetState.get(), targetStateValue); |
|||
} |
|||
} |
|||
|
|||
void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
internalAddTransition(row, getCurrentMdpState(), value); |
|||
} |
|||
|
|||
/*! |
|||
* Adds the next transition to the given successor belief |
|||
* @param localActionIndex |
|||
* @param transitionTarget |
|||
* @param value |
|||
* @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition. |
|||
* @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. |
|||
*/ |
|||
bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
|
|||
MdpStateType column; |
|||
if (ignoreNewBeliefs) { |
|||
column = getExploredMdpState(transitionTarget); |
|||
if (column == noState()) { |
|||
return false; |
|||
} |
|||
} else { |
|||
column = getOrAddMdpState(transitionTarget); |
|||
} |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
internalAddTransition(row, column, value); |
|||
return true; |
|||
} |
|||
|
|||
void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>()) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) { |
|||
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
|||
} |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; |
|||
} |
|||
|
|||
void setCurrentStateIsTarget() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
targetStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
targetStates.set(getCurrentMdpState(), true); |
|||
} |
|||
|
|||
void setCurrentStateIsTruncated() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
truncatedStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
truncatedStates.set(getCurrentMdpState(), true); |
|||
} |
|||
|
|||
bool currentStateHasOldBehavior() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state."); |
|||
return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); |
|||
} |
|||
|
|||
/*! |
|||
* Inserts transitions and rewards at the given action as in the MDP of the previous exploration. |
|||
* Does NOT set whether the state is truncated and/or target. |
|||
* Will add "old" states that have not been considered before into the exploration queue |
|||
* @param localActionIndex |
|||
*/ |
|||
void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) { |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); |
|||
STORM_LOG_ASSERT(localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
|
|||
uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; |
|||
STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); |
|||
|
|||
// Insert the transitions |
|||
for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { |
|||
internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); |
|||
// Check whether exploration is needed |
|||
auto beliefId = getBeliefId(transition.getColumn()); |
|||
if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state |
|||
if (!exploredBeliefIds.get(beliefId)) { |
|||
// This belief needs exploration |
|||
exploredBeliefIds.set(beliefId, true); |
|||
mdpStatesToExplore.push_back(transition.getColumn()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values |
|||
} |
|||
|
|||
void finishExploration() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); |
|||
|
|||
// Complete the exploration |
|||
// Finish the last row grouping in case the last explored state was new |
|||
if (!currentStateHasOldBehavior()) { |
|||
internalAddRowGroupIndex(); |
|||
} |
|||
// Resize state- and choice based vectors to the correct size |
|||
targetStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
truncatedStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
if (!mdpActionRewards.empty()) { |
|||
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
|||
} |
|||
|
|||
// We are not exploring anymore |
|||
currentMdpState = noState(); |
|||
|
|||
// If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build). |
|||
// We get rid of these before rebuilding the model |
|||
if (exploredMdp) { |
|||
dropUnexploredStates(); |
|||
} |
|||
|
|||
// Create the tranistion matrix |
|||
uint64_t entryCount = 0; |
|||
for (auto const& row : exploredMdpTransitions) { |
|||
entryCount += row.size(); |
|||
} |
|||
storm::storage::SparseMatrixBuilder<ValueType> builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true, getCurrentNumberOfMdpStates()); |
|||
for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { |
|||
uint64_t rowIndex = exploredChoiceIndices[groupIndex]; |
|||
uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; |
|||
builder.newRowGroup(rowIndex); |
|||
for (; rowIndex < groupEnd; ++rowIndex) { |
|||
for (auto const& entry : exploredMdpTransitions[rowIndex]) { |
|||
builder.addNextValue(rowIndex, entry.first, entry.second); |
|||
} |
|||
} |
|||
} |
|||
auto mdpTransitionMatrix = builder.build(); |
|||
|
|||
// Create a standard labeling |
|||
storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); |
|||
mdpLabeling.addLabel("init"); |
|||
mdpLabeling.addLabelToState("init", initialMdpState); |
|||
targetStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
mdpLabeling.addLabel("target", std::move(targetStates)); |
|||
truncatedStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
mdpLabeling.addLabel("truncated", std::move(truncatedStates)); |
|||
|
|||
// Create a standard reward model (if rewards are available) |
|||
std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels; |
|||
if (!mdpActionRewards.empty()) { |
|||
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
|||
mdpRewardModels.emplace("default", |
|||
storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards))); |
|||
} |
|||
|
|||
storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); |
|||
exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents)); |
|||
status = Status::ModelFinished; |
|||
STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << " of which were flagged as truncated)."); |
|||
|
|||
} |
|||
|
|||
void dropUnexploredStates() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); |
|||
|
|||
STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available."); |
|||
// Find the states (and corresponding choices) that were not explored. |
|||
// These correspond to "empty" MDP transitions |
|||
storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true); |
|||
std::vector<MdpStateType> toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState()); |
|||
MdpStateType nextRelevantIndex = 0; |
|||
for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { |
|||
uint64_t rowIndex = exploredChoiceIndices[groupIndex]; |
|||
// Check first row in group |
|||
if (exploredMdpTransitions[rowIndex].empty()) { |
|||
relevantMdpChoices.set(rowIndex, false); |
|||
relevantMdpStates.set(groupIndex, false); |
|||
} else { |
|||
toRelevantStateIndexMap[groupIndex] = nextRelevantIndex; |
|||
++nextRelevantIndex; |
|||
} |
|||
uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; |
|||
// process remaining rows in group |
|||
for (++rowIndex; rowIndex < groupEnd; ++rowIndex) { |
|||
// Assert that all actions at the current state were consistently explored or unexplored. |
|||
STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex), "Actions at 'old' MDP state " << groupIndex << " were only partly explored."); |
|||
if (exploredMdpTransitions[rowIndex].empty()) { |
|||
relevantMdpChoices.set(rowIndex, false); |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (relevantMdpStates.full()) { |
|||
// All states are relevant so nothing to do |
|||
return; |
|||
} |
|||
|
|||
// Translate various components to the "new" MDP state set |
|||
storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates); |
|||
{ // beliefIdToMdpStateMap |
|||
for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { |
|||
if (relevantMdpStates.get(belIdToMdpStateIt->second)) { |
|||
// Translate current entry and move on to the next one. |
|||
belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second]; |
|||
++belIdToMdpStateIt; |
|||
} else { |
|||
STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); |
|||
// Delete current entry and move on to the next one. |
|||
// This works because std::map::erase does not invalidate other iterators within the map! |
|||
beliefIdToMdpStateMap.erase(belIdToMdpStateIt++); |
|||
} |
|||
} |
|||
} |
|||
{ // exploredMdpTransitions |
|||
storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices); |
|||
// Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map |
|||
// It might payoff to do this when building the matrix. |
|||
for (auto& transitions : exploredMdpTransitions) { |
|||
std::map<MdpStateType, ValueType> newTransitions; |
|||
for (auto const& entry : transitions) { |
|||
STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state."); |
|||
newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second); |
|||
} |
|||
transitions = std::move(newTransitions); |
|||
} |
|||
} |
|||
{ // exploredChoiceIndices |
|||
MdpStateType newState = 0; |
|||
assert(exploredChoiceIndices[0] == 0u); |
|||
// Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched. |
|||
for (auto const& oldState : relevantMdpStates) { |
|||
if (oldState != newState) { |
|||
assert(oldState > newState); |
|||
uint64_t groupSize = exploredChoiceIndices[oldState + 1] - exploredChoiceIndices[oldState]; |
|||
exploredChoiceIndices[newState + 1] = exploredChoiceIndices[newState] + groupSize; |
|||
} |
|||
++newState; |
|||
} |
|||
exploredChoiceIndices.resize(newState + 1); |
|||
} |
|||
if (!mdpActionRewards.empty()) { |
|||
storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices); |
|||
} |
|||
if (extraBottomState) { |
|||
extraBottomState = toRelevantStateIndexMap[extraBottomState.get()]; |
|||
} |
|||
if (extraTargetState) { |
|||
extraTargetState = toRelevantStateIndexMap[extraTargetState.get()]; |
|||
} |
|||
targetStates = targetStates % relevantMdpStates; |
|||
truncatedStates = truncatedStates % relevantMdpStates; |
|||
initialMdpState = toRelevantStateIndexMap[initialMdpState]; |
|||
|
|||
storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates); |
|||
storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates); |
|||
storm::utility::vector::filterVectorInPlace(values, relevantMdpStates); |
|||
|
|||
} |
|||
|
|||
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const { |
|||
STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); |
|||
return exploredMdp; |
|||
} |
|||
|
|||
MdpStateType getCurrentNumberOfMdpStates() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return mdpStateToBeliefIdMap.size(); |
|||
} |
|||
|
|||
MdpStateType getCurrentNumberOfMdpChoices() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return exploredMdpTransitions.size(); |
|||
} |
|||
|
|||
MdpStateType getStartOfCurrentRowGroup() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return exploredChoiceIndices[getCurrentMdpState()]; |
|||
} |
|||
|
|||
ValueType getLowerValueBoundAtCurrentState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return lowerValueBounds[getCurrentMdpState()]; |
|||
} |
|||
|
|||
ValueType getUpperValueBoundAtCurrentState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return upperValueBounds[getCurrentMdpState()]; |
|||
} |
|||
|
|||
ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { |
|||
return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); |
|||
} |
|||
|
|||
ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { |
|||
return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); |
|||
} |
|||
|
|||
void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { |
|||
STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); |
|||
auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); |
|||
auto task = createStandardCheckTask(property); |
|||
|
|||
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task)); |
|||
if (res) { |
|||
values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
|||
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(lowerValueBounds, values, std::less_equal<ValueType>()), "Computed values are smaller than the lower bound."); |
|||
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(upperValueBounds, values, std::greater_equal<ValueType>()), "Computed values are larger than the upper bound."); |
|||
} else { |
|||
STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); |
|||
STORM_LOG_ERROR("No result obtained while checking."); |
|||
} |
|||
status = Status::ModelChecked; |
|||
} |
|||
|
|||
bool hasComputedValues() const { |
|||
return status == Status::ModelChecked; |
|||
} |
|||
|
|||
std::vector<ValueType> const& getValuesOfExploredMdp() const { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
return values; |
|||
} |
|||
|
|||
ValueType const& getComputedValueAtInitialState() const { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); |
|||
return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; |
|||
} |
|||
|
|||
MdpStateType getBeliefId(MdpStateType exploredMdpState) const { |
|||
STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); |
|||
return mdpStateToBeliefIdMap[exploredMdpState]; |
|||
} |
|||
|
|||
struct SuccessorObservationInformation { |
|||
SuccessorObservationInformation(ValueType const& obsProb, ValueType const& maxProb, uint64_t const& count) : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) { |
|||
// Intentionally left empty. |
|||
} |
|||
|
|||
void join(SuccessorObservationInformation other) { |
|||
observationProbability += other.observationProbability; |
|||
maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); |
|||
successorWithObsCount += other.successorWithObsCount; |
|||
} |
|||
|
|||
ValueType observationProbability; /// The probability we move to the corresponding observation. |
|||
ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. |
|||
uint64_t successorWithObsCount; /// The number of successors with this observation |
|||
}; |
|||
|
|||
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); |
|||
uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; |
|||
gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); |
|||
} |
|||
|
|||
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) { |
|||
STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); |
|||
for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { |
|||
auto const& beliefId = getBeliefId(entry.getColumn()); |
|||
if (beliefId != beliefManager->noId()) { |
|||
auto const& obs = beliefManager->getBeliefObservation(beliefId); |
|||
SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1); |
|||
auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info); |
|||
if (!obsInsertion.second) { |
|||
// There already is an entry for this observation, so join the two informations |
|||
obsInsertion.first->second.join(info); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
private: |
|||
MdpStateType noState() const { |
|||
return std::numeric_limits<MdpStateType>::max(); |
|||
} |
|||
|
|||
std::shared_ptr<storm::logic::Formula const> createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) { |
|||
std::string propertyString = computeRewards ? "R" : "P"; |
|||
propertyString += storm::solver::minimize(dir) ? "min" : "max"; |
|||
propertyString += "=? [F \"target\"]"; |
|||
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString); |
|||
return storm::api::extractFormulasFromProperties(propertyVector).front(); |
|||
} |
|||
|
|||
storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property) { |
|||
//Note: The property should not run out of scope after calling this because the task only stores the property by reference. |
|||
// Therefore, this method needs the property by reference (and not const reference) |
|||
auto task = storm::api::createTask<ValueType>(property, false); |
|||
auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>(); |
|||
hint.setResultHint(values); |
|||
auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint); |
|||
task.setHint(hintPtr); |
|||
return task; |
|||
} |
|||
|
|||
MdpStateType getCurrentMdpState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return currentMdpState; |
|||
} |
|||
|
|||
MdpStateType getCurrentBeliefId() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return getBeliefId(getCurrentMdpState()); |
|||
} |
|||
|
|||
void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { |
|||
STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row."); |
|||
if (row == exploredMdpTransitions.size()) { |
|||
exploredMdpTransitions.emplace_back(); |
|||
} |
|||
STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state."); |
|||
exploredMdpTransitions[row][column] = value; |
|||
} |
|||
|
|||
void internalAddRowGroupIndex() { |
|||
exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices()); |
|||
} |
|||
|
|||
MdpStateType getExploredMdpState(BeliefId const& beliefId) const { |
|||
if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) { |
|||
return beliefIdToMdpStateMap.at(beliefId); |
|||
} else { |
|||
return noState(); |
|||
} |
|||
} |
|||
|
|||
void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) { |
|||
lowerValueBounds.push_back(lowerBound); |
|||
upperValueBounds.push_back(upperBound); |
|||
// Take the middle value as a hint |
|||
values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2)); |
|||
STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); |
|||
STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); |
|||
} |
|||
|
|||
MdpStateType getOrAddMdpState(BeliefId const& beliefId) { |
|||
exploredBeliefIds.grow(beliefId + 1, false); |
|||
if (exploredBeliefIds.get(beliefId)) { |
|||
return beliefIdToMdpStateMap[beliefId]; |
|||
} else { |
|||
// This state needs exploration |
|||
exploredBeliefIds.set(beliefId, true); |
|||
|
|||
// If this is a restart of the exploration, we still might have an MDP state for the belief |
|||
if (exploredMdp) { |
|||
auto findRes = beliefIdToMdpStateMap.find(beliefId); |
|||
if (findRes != beliefIdToMdpStateMap.end()) { |
|||
mdpStatesToExplore.push_back(findRes->second); |
|||
return findRes->second; |
|||
} |
|||
} |
|||
// At this point we need to add a new MDP state |
|||
MdpStateType result = getCurrentNumberOfMdpStates(); |
|||
assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); |
|||
mdpStateToBeliefIdMap.push_back(beliefId); |
|||
beliefIdToMdpStateMap[beliefId] = result; |
|||
insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); |
|||
mdpStatesToExplore.push_back(result); |
|||
return result; |
|||
} |
|||
} |
|||
|
|||
// Belief state related information |
|||
std::shared_ptr<BeliefManagerType> beliefManager; |
|||
std::vector<BeliefId> mdpStateToBeliefIdMap; |
|||
std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap; |
|||
storm::storage::BitVector exploredBeliefIds; |
|||
|
|||
// Exploration information |
|||
std::deque<uint64_t> mdpStatesToExplore; |
|||
std::vector<std::map<MdpStateType, ValueType>> exploredMdpTransitions; |
|||
std::vector<MdpStateType> exploredChoiceIndices; |
|||
std::vector<ValueType> mdpActionRewards; |
|||
uint64_t currentMdpState; |
|||
|
|||
// Special states during exploration |
|||
boost::optional<MdpStateType> extraTargetState; |
|||
boost::optional<MdpStateType> extraBottomState; |
|||
storm::storage::BitVector targetStates; |
|||
storm::storage::BitVector truncatedStates; |
|||
MdpStateType initialMdpState; |
|||
|
|||
// Final Mdp |
|||
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp; |
|||
|
|||
// Value related information |
|||
std::vector<ValueType> const& pomdpLowerValueBounds; |
|||
std::vector<ValueType> const& pomdpUpperValueBounds; |
|||
std::vector<ValueType> lowerValueBounds; |
|||
std::vector<ValueType> upperValueBounds; |
|||
std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed |
|||
|
|||
// The current status of this explorer |
|||
Status status; |
|||
}; |
|||
} |
|||
} |
1602
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,117 @@ |
|||
#pragma once |
|||
|
|||
#include "storm-pomdp/analysis/FormulaInformation.h" |
|||
|
|||
#include "storm/api/verification.h" |
|||
#include "storm/models/sparse/Pomdp.h" |
|||
#include "storm/models/sparse/StandardRewardModel.h" |
|||
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" |
|||
#include "storm/storage/Scheduler.h" |
|||
|
|||
#include "storm/utility/macros.h" |
|||
#include "storm/exceptions/UnexpectedException.h" |
|||
#include "storm/exceptions/NotSupportedException.h" |
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
namespace modelchecker { |
|||
template <typename PomdpType> |
|||
class TrivialPomdpValueBoundsModelChecker { |
|||
public: |
|||
typedef typename PomdpType::ValueType ValueType; |
|||
TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) { |
|||
// Intentionally left empty |
|||
} |
|||
|
|||
struct ValueBounds { |
|||
std::vector<ValueType> lower; |
|||
std::vector<ValueType> upper; |
|||
}; |
|||
ValueBounds getValueBounds(storm::logic::Formula const& formula) { |
|||
return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula)); |
|||
} |
|||
|
|||
ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) { |
|||
STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis."); |
|||
// Compute the values on the fully observable MDP |
|||
// We need an actual MDP here so that the apply scheduler method below will work. |
|||
// Also, the api call in the next line will require a copy anyway. |
|||
auto underlyingMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels()); |
|||
auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(underlyingMdp, storm::api::createTask<ValueType>(formula.asSharedPointer(), false)); |
|||
STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); |
|||
STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); |
|||
std::vector<ValueType> fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
|||
|
|||
// Create some positional scheduler for the POMDP |
|||
storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates()); |
|||
// For each state, we heuristically find a good distribution over output actions. |
|||
std::vector<ValueType> fullyObservableChoiceValues(pomdp.getNumberOfChoices()); |
|||
if (info.isNonNestedExpectedRewardFormula()) { |
|||
std::vector<ValueType> actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix()); |
|||
pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards); |
|||
} else { |
|||
pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues); |
|||
} |
|||
auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); |
|||
for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { |
|||
auto obsStates = pomdp.getStatesWithObservation(obs); |
|||
storm::storage::Distribution<ValueType, uint_fast64_t> choiceDistribution; |
|||
for (auto const &state : obsStates) { |
|||
ValueType const& stateValue = fullyObservableResult[state]; |
|||
assert(stateValue >= storm::utility::zero<ValueType>()); |
|||
for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { |
|||
ValueType const& choiceValue = fullyObservableChoiceValues[choice]; |
|||
assert(choiceValue >= storm::utility::zero<ValueType>()); |
|||
// Rate this choice by considering the relative difference between the choice value and the (optimal) state value |
|||
ValueType choiceRating; |
|||
if (stateValue < choiceValue) { |
|||
choiceRating = choiceValue - stateValue; |
|||
if (!storm::utility::isZero(choiceValue)) { |
|||
choiceRating /= choiceValue; |
|||
} |
|||
} else { |
|||
choiceRating = stateValue - choiceValue; |
|||
if (!storm::utility::isZero(stateValue)) { |
|||
choiceRating /= stateValue; |
|||
} |
|||
} |
|||
assert(choiceRating <= storm::utility::one<ValueType>()); |
|||
assert(choiceRating >= storm::utility::zero<ValueType>()); |
|||
// choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice |
|||
if (choiceRating <= storm::utility::convertNumber<ValueType>(0.5)) { |
|||
choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one<ValueType>() - choiceRating); |
|||
} |
|||
} |
|||
} |
|||
choiceDistribution.normalize(); |
|||
for (auto const& state : obsStates) { |
|||
pomdpScheduler.setChoice(choiceDistribution, state); |
|||
} |
|||
} |
|||
STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler."); |
|||
auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); |
|||
|
|||
auto resultPtr2 = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false)); |
|||
STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained."); |
|||
STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); |
|||
std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
|||
|
|||
// Finally prepare the result |
|||
ValueBounds result; |
|||
if (info.minimize()) { |
|||
result.lower = std::move(fullyObservableResult); |
|||
result.upper = std::move(pomdpSchedulerResult); |
|||
} else { |
|||
result.lower = std::move(pomdpSchedulerResult); |
|||
result.upper = std::move(fullyObservableResult); |
|||
} |
|||
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower, result.upper, std::less_equal<ValueType>()), "Lower bound is larger than upper bound"); |
|||
return result; |
|||
} |
|||
|
|||
private: |
|||
PomdpType const& pomdp; |
|||
}; |
|||
} |
|||
} |
|||
} |
@ -1,354 +0,0 @@ |
|||
#pragma once |
|||
|
|||
#include <map> |
|||
#include <boost/optional.hpp> |
|||
//#include <boost/container/flat_map.hpp> |
|||
|
|||
#include "storm/utility/macros.h" |
|||
#include "storm/exceptions/UnexpectedException.h" |
|||
|
|||
namespace storm { |
|||
namespace storage { |
|||
|
|||
template <typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType, typename StateType = uint64_t> |
|||
// TODO: Change name. This actually does not only manage grid points. |
|||
class BeliefGrid { |
|||
public: |
|||
|
|||
typedef typename PomdpType::ValueType ValueType; |
|||
//typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType |
|||
typedef std::map<StateType, BeliefValueType> BeliefType; |
|||
typedef uint64_t BeliefId; |
|||
|
|||
BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { |
|||
// Intentionally left empty |
|||
} |
|||
|
|||
struct Triangulation { |
|||
std::vector<BeliefId> gridPoints; |
|||
std::vector<BeliefValueType> weights; |
|||
uint64_t size() const { |
|||
return weights.size(); |
|||
} |
|||
}; |
|||
|
|||
BeliefType const& getGridPoint(BeliefId const& id) const { |
|||
return gridPoints[id]; |
|||
} |
|||
|
|||
BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const { |
|||
auto idIt = gridPointToIdMap.find(gridPoint); |
|||
STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state."); |
|||
return idIt->second; |
|||
} |
|||
|
|||
std::string toString(BeliefType const& belief) const { |
|||
std::stringstream str; |
|||
str << "{ "; |
|||
bool first = true; |
|||
for (auto const& entry : belief) { |
|||
if (first) { |
|||
first = false; |
|||
} else { |
|||
str << ", "; |
|||
} |
|||
str << entry.first << ": " << entry.second; |
|||
} |
|||
str << " }"; |
|||
return str.str(); |
|||
} |
|||
|
|||
std::string toString(Triangulation const& t) const { |
|||
std::stringstream str; |
|||
str << "(\n"; |
|||
for (uint64_t i = 0; i < t.size(); ++i) { |
|||
str << "\t" << t.weights[i] << " * \t" << toString(getGridPoint(t.gridPoints[i])) << "\n"; |
|||
} |
|||
str <<")\n"; |
|||
return str.str(); |
|||
} |
|||
|
|||
bool isEqual(BeliefType const& first, BeliefType const& second) const { |
|||
if (first.size() != second.size()) { |
|||
return false; |
|||
} |
|||
auto secondIt = second.begin(); |
|||
for (auto const& firstEntry : first) { |
|||
if (firstEntry.first != secondIt->first) { |
|||
return false; |
|||
} |
|||
if (!cc.isEqual(firstEntry.second, secondIt->second)) { |
|||
return false; |
|||
} |
|||
++secondIt; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
bool assertBelief(BeliefType const& belief) const { |
|||
BeliefValueType sum = storm::utility::zero<ValueType>(); |
|||
boost::optional<uint32_t> observation; |
|||
for (auto const& entry : belief) { |
|||
uintmax_t entryObservation = pomdp.getObservation(entry.first); |
|||
if (observation) { |
|||
if (observation.get() != entryObservation) { |
|||
STORM_LOG_ERROR("Beliefsupport contains different observations."); |
|||
return false; |
|||
} |
|||
} else { |
|||
observation = entryObservation; |
|||
} |
|||
if (cc.isZero(entry.second)) { |
|||
// We assume that beliefs only consider their support. |
|||
STORM_LOG_ERROR("Zero belief probability."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(entry.second, storm::utility::zero<BeliefValueType>())) { |
|||
STORM_LOG_ERROR("Negative belief probability."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(storm::utility::one<BeliefValueType>(), entry.second)) { |
|||
STORM_LOG_ERROR("Belief probability greater than one."); |
|||
return false; |
|||
} |
|||
sum += entry.second; |
|||
} |
|||
if (!cc.isOne(sum)) { |
|||
STORM_LOG_ERROR("Belief does not sum up to one."); |
|||
return false; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const { |
|||
if (triangulation.weights.size() != triangulation.gridPoints.size()) { |
|||
STORM_LOG_ERROR("Number of weights and points in triangulation does not match."); |
|||
return false; |
|||
} |
|||
if (triangulation.size() == 0) { |
|||
STORM_LOG_ERROR("Empty triangulation."); |
|||
return false; |
|||
} |
|||
BeliefType triangulatedBelief; |
|||
BeliefValueType weightSum = storm::utility::zero<BeliefValueType>(); |
|||
for (uint64_t i = 0; i < triangulation.weights.size(); ++i) { |
|||
if (cc.isZero(triangulation.weights[i])) { |
|||
STORM_LOG_ERROR("Zero weight in triangulation."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(triangulation.weights[i], storm::utility::zero<BeliefValueType>())) { |
|||
STORM_LOG_ERROR("Negative weight in triangulation."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(storm::utility::one<BeliefValueType>(), triangulation.weights[i])) { |
|||
STORM_LOG_ERROR("Weight greater than one in triangulation."); |
|||
} |
|||
weightSum += triangulation.weights[i]; |
|||
BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]); |
|||
for (auto const& pointEntry : gridPoint) { |
|||
BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero<ValueType>()).first->second; |
|||
triangulatedValue += triangulation.weights[i] * pointEntry.second; |
|||
} |
|||
} |
|||
if (!cc.isOne(weightSum)) { |
|||
STORM_LOG_ERROR("Triangulation weights do not sum up to one."); |
|||
return false; |
|||
} |
|||
if (!assertBelief(triangulatedBelief)) { |
|||
STORM_LOG_ERROR("Triangulated belief is not a belief."); |
|||
} |
|||
if (!isEqual(belief, triangulatedBelief)) { |
|||
STORM_LOG_ERROR("Belief:\n\t" << toString(belief) << "\ndoes not match triangulated belief:\n\t" << toString(triangulatedBelief) << "."); |
|||
return false; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
BeliefId getInitialBelief() { |
|||
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, |
|||
"POMDP contains more than one initial state"); |
|||
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, |
|||
"POMDP does not contain an initial state"); |
|||
BeliefType belief; |
|||
belief[*pomdp.getInitialStates().begin()] = storm::utility::one<ValueType>(); |
|||
|
|||
STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); |
|||
return getOrAddGridPointId(belief); |
|||
} |
|||
|
|||
uint32_t getBeliefObservation(BeliefType belief) { |
|||
STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); |
|||
return pomdp.getObservation(belief.begin()->first); |
|||
} |
|||
|
|||
uint32_t getBeliefObservation(BeliefId beliefId) { |
|||
return getBeliefObservation(getGridPoint(beliefId)); |
|||
} |
|||
|
|||
|
|||
Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { |
|||
//TODO this can also be simplified using the sparse vector interpretation |
|||
//TODO Enable chaching for this method? |
|||
STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); |
|||
|
|||
auto nrStates = pomdp.getNumberOfStates(); |
|||
|
|||
// This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) |
|||
// Variable names are based on the paper |
|||
// TODO avoid reallocations for these vectors |
|||
std::vector<BeliefValueType> x(nrStates); |
|||
std::vector<BeliefValueType> v(nrStates); |
|||
std::vector<BeliefValueType> d(nrStates); |
|||
auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution); |
|||
|
|||
for (size_t i = 0; i < nrStates; ++i) { |
|||
for (auto const &probEntry : belief) { |
|||
if (probEntry.first >= i) { |
|||
x[i] += convResolution * probEntry.second; |
|||
} |
|||
} |
|||
v[i] = storm::utility::floor(x[i]); |
|||
d[i] = x[i] - v[i]; |
|||
} |
|||
|
|||
auto p = storm::utility::vector::getSortedIndices(d); |
|||
|
|||
std::vector<std::vector<BeliefValueType>> qs(nrStates, std::vector<BeliefValueType>(nrStates)); |
|||
for (size_t i = 0; i < nrStates; ++i) { |
|||
if (i == 0) { |
|||
for (size_t j = 0; j < nrStates; ++j) { |
|||
qs[i][j] = v[j]; |
|||
} |
|||
} else { |
|||
for (size_t j = 0; j < nrStates; ++j) { |
|||
if (j == p[i - 1]) { |
|||
qs[i][j] = qs[i - 1][j] + storm::utility::one<BeliefValueType>(); |
|||
} else { |
|||
qs[i][j] = qs[i - 1][j]; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
Triangulation result; |
|||
// The first weight is 1-sum(other weights). We therefore process the js in reverse order |
|||
BeliefValueType firstWeight = storm::utility::one<BeliefValueType>(); |
|||
for (size_t j = nrStates; j > 0;) { |
|||
--j; |
|||
// First create the weights. The weights vector will be reversed at the end. |
|||
ValueType weight; |
|||
if (j == 0) { |
|||
weight = firstWeight; |
|||
} else { |
|||
weight = d[p[j - 1]] - d[p[j]]; |
|||
firstWeight -= weight; |
|||
} |
|||
if (!cc.isZero(weight)) { |
|||
result.weights.push_back(weight); |
|||
BeliefType gridPoint; |
|||
auto const& qsj = qs[j]; |
|||
for (size_t i = 0; i < nrStates - 1; ++i) { |
|||
BeliefValueType gridPointEntry = qsj[i] - qsj[i + 1]; |
|||
if (!cc.isZero(gridPointEntry)) { |
|||
gridPoint[i] = gridPointEntry / convResolution; |
|||
} |
|||
} |
|||
if (!cc.isZero(qsj[nrStates - 1])) { |
|||
gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution; |
|||
} |
|||
result.gridPoints.push_back(getOrAddGridPointId(gridPoint)); |
|||
} |
|||
} |
|||
|
|||
STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result)); |
|||
return result; |
|||
} |
|||
|
|||
template<typename DistributionType> |
|||
void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { |
|||
auto insertionRes = distr.emplace(state, value); |
|||
if (!insertionRes.second) { |
|||
insertionRes.first->second += value; |
|||
} |
|||
} |
|||
|
|||
BeliefId getNumberOfGridPointIds() const { |
|||
return gridPoints.size(); |
|||
} |
|||
|
|||
std::map<BeliefId, ValueType> expandInternal(BeliefId const& gridPointId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) { |
|||
std::map<BeliefId, ValueType> destinations; // The belief ids should be ordered |
|||
// TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds |
|||
|
|||
BeliefType gridPoint = getGridPoint(gridPointId); |
|||
|
|||
// Find the probability we go to each observation |
|||
BeliefType successorObs; // This is actually not a belief but has the same type |
|||
for (auto const& pointEntry : gridPoint) { |
|||
uint64_t state = pointEntry.first; |
|||
for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { |
|||
if (!storm::utility::isZero(pomdpTransition.getValue())) { |
|||
auto obs = pomdp.getObservation(pomdpTransition.getColumn()); |
|||
addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Now for each successor observation we find and potentially triangulate the successor belief |
|||
for (auto const& successor : successorObs) { |
|||
BeliefType successorBelief; |
|||
for (auto const& pointEntry : gridPoint) { |
|||
uint64_t state = pointEntry.first; |
|||
for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { |
|||
if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { |
|||
ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second; |
|||
addToDistribution(successorBelief, pomdpTransition.getColumn(), prob); |
|||
} |
|||
} |
|||
} |
|||
STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); |
|||
|
|||
if (observationTriangulationResolutions) { |
|||
Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]); |
|||
for (size_t j = 0; j < triangulation.size(); ++j) { |
|||
addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); |
|||
} |
|||
} else { |
|||
addToDistribution(destinations, getOrAddGridPointId(successorBelief), successor.second); |
|||
} |
|||
} |
|||
|
|||
return destinations; |
|||
|
|||
} |
|||
|
|||
std::map<BeliefId, ValueType> expandAndTriangulate(BeliefId const& gridPointId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) { |
|||
return expandInternal(gridPointId, actionIndex, observationResolutions); |
|||
} |
|||
|
|||
std::map<BeliefId, ValueType> expand(BeliefId const& gridPointId, uint64_t actionIndex) { |
|||
return expandInternal(gridPointId, actionIndex); |
|||
} |
|||
|
|||
private: |
|||
|
|||
BeliefId getOrAddGridPointId(BeliefType const& gridPoint) { |
|||
auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size()); |
|||
if (insertioRes.second) { |
|||
// There actually was an insertion, so add the new grid state |
|||
gridPoints.push_back(gridPoint); |
|||
} |
|||
// Return the id |
|||
return insertioRes.first->second; |
|||
} |
|||
|
|||
PomdpType const& pomdp; |
|||
|
|||
std::vector<BeliefType> gridPoints; |
|||
std::map<BeliefType, BeliefId> gridPointToIdMap; |
|||
storm::utility::ConstantsComparator<ValueType> cc; |
|||
|
|||
|
|||
}; |
|||
} |
|||
} |
@ -0,0 +1,436 @@ |
|||
#pragma once |
|||
|
|||
#include <unordered_map> |
|||
#include <boost/optional.hpp> |
|||
#include <boost/container/flat_map.hpp> |
|||
#include "storm/adapters/RationalNumberAdapter.h" |
|||
#include "storm/utility/macros.h" |
|||
#include "storm/exceptions/UnexpectedException.h" |
|||
|
|||
namespace storm { |
|||
namespace storage { |
|||
|
|||
template <typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType, typename StateType = uint64_t> |
|||
class BeliefManager { |
|||
public: |
|||
|
|||
typedef typename PomdpType::ValueType ValueType; |
|||
typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType; // iterating over this shall be ordered (for correct hash computation) |
|||
typedef uint64_t BeliefId; |
|||
|
|||
BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { |
|||
initialBeliefId = computeInitialBelief(); |
|||
} |
|||
|
|||
void setRewardModel(boost::optional<std::string> rewardModelName = boost::none) { |
|||
if (rewardModelName) { |
|||
auto const& rewardModel = pomdp.getRewardModel(rewardModelName.get()); |
|||
pomdpActionRewardVector = rewardModel.getTotalRewardVector(pomdp.getTransitionMatrix()); |
|||
} else { |
|||
setRewardModel(pomdp.getUniqueRewardModelName()); |
|||
} |
|||
} |
|||
|
|||
void unsetRewardModel() { |
|||
pomdpActionRewardVector.clear(); |
|||
} |
|||
|
|||
struct Triangulation { |
|||
std::vector<BeliefId> gridPoints; |
|||
std::vector<BeliefValueType> weights; |
|||
uint64_t size() const { |
|||
return weights.size(); |
|||
} |
|||
}; |
|||
|
|||
BeliefId noId() const { |
|||
return std::numeric_limits<BeliefId>::max(); |
|||
} |
|||
|
|||
bool isEqual(BeliefId const& first, BeliefId const& second) const { |
|||
return isEqual(getBelief(first), getBelief(second)); |
|||
} |
|||
|
|||
std::string toString(BeliefId const& beliefId) const { |
|||
return toString(getBelief(beliefId)); |
|||
} |
|||
|
|||
|
|||
std::string toString(Triangulation const& t) const { |
|||
std::stringstream str; |
|||
str << "(\n"; |
|||
for (uint64_t i = 0; i < t.size(); ++i) { |
|||
str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n"; |
|||
} |
|||
str <<")\n"; |
|||
return str.str(); |
|||
} |
|||
|
|||
template <typename SummandsType> |
|||
ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { |
|||
ValueType result = storm::utility::zero<ValueType>(); |
|||
for (auto const& entry : getBelief(beliefId)) { |
|||
result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first)); |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
|
|||
BeliefId const& getInitialBelief() const { |
|||
return initialBeliefId; |
|||
} |
|||
|
|||
ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { |
|||
auto const& belief = getBelief(beliefId); |
|||
STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); |
|||
auto result = storm::utility::zero<ValueType>(); |
|||
auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); |
|||
for (auto const &entry : belief) { |
|||
uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex; |
|||
STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index."); |
|||
STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index."); |
|||
result += entry.second * pomdpActionRewardVector[choiceIndex]; |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
uint32_t getBeliefObservation(BeliefId beliefId) { |
|||
return getBeliefObservation(getBelief(beliefId)); |
|||
} |
|||
|
|||
uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { |
|||
auto belief = getBelief(beliefId); |
|||
return pomdp.getNumberOfChoices(belief.begin()->first); |
|||
} |
|||
|
|||
Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) { |
|||
return triangulateBelief(getBelief(beliefId), resolution); |
|||
} |
|||
|
|||
template<typename DistributionType> |
|||
void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { |
|||
auto insertionRes = distr.emplace(state, value); |
|||
if (!insertionRes.second) { |
|||
insertionRes.first->second += value; |
|||
} |
|||
} |
|||
|
|||
BeliefId getNumberOfBeliefIds() const { |
|||
return beliefs.size(); |
|||
} |
|||
|
|||
std::vector<std::pair<BeliefId, ValueType>> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) { |
|||
return expandInternal(beliefId, actionIndex, observationResolutions); |
|||
} |
|||
|
|||
std::vector<std::pair<BeliefId, ValueType>> expand(BeliefId const& beliefId, uint64_t actionIndex) { |
|||
return expandInternal(beliefId, actionIndex); |
|||
} |
|||
|
|||
private: |
|||
|
|||
BeliefType const& getBelief(BeliefId const& id) const { |
|||
STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief."); |
|||
STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range."); |
|||
return beliefs[id]; |
|||
} |
|||
|
|||
BeliefId getId(BeliefType const& belief) const { |
|||
auto idIt = beliefToIdMap.find(belief); |
|||
STORM_LOG_THROW(idIt != beliefToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown Belief."); |
|||
return idIt->second; |
|||
} |
|||
|
|||
std::string toString(BeliefType const& belief) const { |
|||
std::stringstream str; |
|||
str << "{ "; |
|||
bool first = true; |
|||
for (auto const& entry : belief) { |
|||
if (first) { |
|||
first = false; |
|||
} else { |
|||
str << ", "; |
|||
} |
|||
str << entry.first << ": " << entry.second; |
|||
} |
|||
str << " }"; |
|||
return str.str(); |
|||
} |
|||
|
|||
bool isEqual(BeliefType const& first, BeliefType const& second) const { |
|||
if (first.size() != second.size()) { |
|||
return false; |
|||
} |
|||
auto secondIt = second.begin(); |
|||
for (auto const& firstEntry : first) { |
|||
if (firstEntry.first != secondIt->first) { |
|||
return false; |
|||
} |
|||
if (!cc.isEqual(firstEntry.second, secondIt->second)) { |
|||
return false; |
|||
} |
|||
++secondIt; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
bool assertBelief(BeliefType const& belief) const { |
|||
BeliefValueType sum = storm::utility::zero<ValueType>(); |
|||
boost::optional<uint32_t> observation; |
|||
for (auto const& entry : belief) { |
|||
if (entry.first >= pomdp.getNumberOfStates()) { |
|||
STORM_LOG_ERROR("Belief does refer to non-existing pomdp state " << entry.first << "."); |
|||
return false; |
|||
} |
|||
uint64_t entryObservation = pomdp.getObservation(entry.first); |
|||
if (observation) { |
|||
if (observation.get() != entryObservation) { |
|||
STORM_LOG_ERROR("Beliefsupport contains different observations."); |
|||
return false; |
|||
} |
|||
} else { |
|||
observation = entryObservation; |
|||
} |
|||
if (cc.isZero(entry.second)) { |
|||
// We assume that beliefs only consider their support. |
|||
STORM_LOG_ERROR("Zero belief probability."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(entry.second, storm::utility::zero<BeliefValueType>())) { |
|||
STORM_LOG_ERROR("Negative belief probability."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(storm::utility::one<BeliefValueType>(), entry.second)) { |
|||
STORM_LOG_ERROR("Belief probability greater than one."); |
|||
return false; |
|||
} |
|||
sum += entry.second; |
|||
} |
|||
if (!cc.isOne(sum)) { |
|||
STORM_LOG_ERROR("Belief does not sum up to one."); |
|||
return false; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const { |
|||
if (triangulation.weights.size() != triangulation.gridPoints.size()) { |
|||
STORM_LOG_ERROR("Number of weights and points in triangulation does not match."); |
|||
return false; |
|||
} |
|||
if (triangulation.size() == 0) { |
|||
STORM_LOG_ERROR("Empty triangulation."); |
|||
return false; |
|||
} |
|||
BeliefType triangulatedBelief; |
|||
BeliefValueType weightSum = storm::utility::zero<BeliefValueType>(); |
|||
for (uint64_t i = 0; i < triangulation.weights.size(); ++i) { |
|||
if (cc.isZero(triangulation.weights[i])) { |
|||
STORM_LOG_ERROR("Zero weight in triangulation."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(triangulation.weights[i], storm::utility::zero<BeliefValueType>())) { |
|||
STORM_LOG_ERROR("Negative weight in triangulation."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(storm::utility::one<BeliefValueType>(), triangulation.weights[i])) { |
|||
STORM_LOG_ERROR("Weight greater than one in triangulation."); |
|||
} |
|||
weightSum += triangulation.weights[i]; |
|||
BeliefType const& gridPoint = getBelief(triangulation.gridPoints[i]); |
|||
for (auto const& pointEntry : gridPoint) { |
|||
BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero<ValueType>()).first->second; |
|||
triangulatedValue += triangulation.weights[i] * pointEntry.second; |
|||
} |
|||
} |
|||
if (!cc.isOne(weightSum)) { |
|||
STORM_LOG_ERROR("Triangulation weights do not sum up to one."); |
|||
return false; |
|||
} |
|||
if (!assertBelief(triangulatedBelief)) { |
|||
STORM_LOG_ERROR("Triangulated belief is not a belief."); |
|||
} |
|||
if (!isEqual(belief, triangulatedBelief)) { |
|||
STORM_LOG_ERROR("Belief:\n\t" << toString(belief) << "\ndoes not match triangulated belief:\n\t" << toString(triangulatedBelief) << "."); |
|||
return false; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
uint32_t getBeliefObservation(BeliefType belief) { |
|||
STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); |
|||
return pomdp.getObservation(belief.begin()->first); |
|||
} |
|||
|
|||
struct FreudenthalDiff { |
|||
FreudenthalDiff(StateType const& dimension, BeliefValueType&& diff) : dimension(dimension), diff(std::move(diff)) { }; |
|||
StateType dimension; // i |
|||
BeliefValueType diff; // d[i] |
|||
bool operator>(FreudenthalDiff const& other) const { |
|||
if (diff != other.diff) { |
|||
return diff > other.diff; |
|||
} else { |
|||
return dimension < other.dimension; |
|||
} |
|||
} |
|||
}; |
|||
|
|||
Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { |
|||
STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); |
|||
StateType numEntries = belief.size(); |
|||
Triangulation result; |
|||
|
|||
// Quickly triangulate Dirac beliefs |
|||
if (numEntries == 1u) { |
|||
result.weights.push_back(storm::utility::one<BeliefValueType>()); |
|||
result.gridPoints.push_back(getOrAddBeliefId(belief)); |
|||
} else { |
|||
|
|||
auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution); |
|||
// This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) |
|||
// Variable names are mostly based on the paper |
|||
// However, we speed this up a little by exploiting that belief states usually have sparse support (i.e. numEntries is much smaller than pomdp.getNumberOfStates()). |
|||
// Initialize diffs and the first row of the 'qs' matrix (aka v) |
|||
std::set<FreudenthalDiff, std::greater<FreudenthalDiff>> sorted_diffs; // d (and p?) in the paper |
|||
std::vector<BeliefValueType> qsRow; // Row of the 'qs' matrix from the paper (initially corresponds to v |
|||
qsRow.reserve(numEntries); |
|||
std::vector<StateType> toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices |
|||
toOriginalIndicesMap.reserve(numEntries); |
|||
BeliefValueType x = convResolution; |
|||
for (auto const& entry : belief) { |
|||
qsRow.push_back(storm::utility::floor(x)); // v |
|||
sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v |
|||
toOriginalIndicesMap.push_back(entry.first); |
|||
x -= entry.second * convResolution; |
|||
} |
|||
// Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler |
|||
qsRow.push_back(storm::utility::zero<BeliefValueType>()); |
|||
|
|||
result.weights.reserve(numEntries); |
|||
result.gridPoints.reserve(numEntries); |
|||
auto currentSortedDiff = sorted_diffs.begin(); |
|||
auto previousSortedDiff = sorted_diffs.end(); |
|||
--previousSortedDiff; |
|||
for (StateType i = 0; i < numEntries; ++i) { |
|||
// Compute the weight for the grid points |
|||
BeliefValueType weight = previousSortedDiff->diff - currentSortedDiff->diff; |
|||
if (i == 0) { |
|||
// The first weight is a bit different |
|||
weight += storm::utility::one<ValueType>(); |
|||
} else { |
|||
// 'compute' the next row of the qs matrix |
|||
qsRow[previousSortedDiff->dimension] += storm::utility::one<BeliefValueType>(); |
|||
} |
|||
if (!cc.isZero(weight)) { |
|||
result.weights.push_back(weight); |
|||
// Compute the grid point |
|||
BeliefType gridPoint; |
|||
for (StateType j = 0; j < numEntries; ++j) { |
|||
BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; |
|||
if (!cc.isZero(gridPointEntry)) { |
|||
gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution; |
|||
} |
|||
} |
|||
result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); |
|||
} |
|||
previousSortedDiff = currentSortedDiff++; |
|||
} |
|||
} |
|||
STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result)); |
|||
return result; |
|||
} |
|||
|
|||
std::vector<std::pair<BeliefId, ValueType>> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) { |
|||
std::vector<std::pair<BeliefId, ValueType>> destinations; |
|||
|
|||
BeliefType belief = getBelief(beliefId); |
|||
|
|||
// Find the probability we go to each observation |
|||
BeliefType successorObs; // This is actually not a belief but has the same type |
|||
for (auto const& pointEntry : belief) { |
|||
uint64_t state = pointEntry.first; |
|||
for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { |
|||
if (!storm::utility::isZero(pomdpTransition.getValue())) { |
|||
auto obs = pomdp.getObservation(pomdpTransition.getColumn()); |
|||
addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Now for each successor observation we find and potentially triangulate the successor belief |
|||
for (auto const& successor : successorObs) { |
|||
BeliefType successorBelief; |
|||
for (auto const& pointEntry : belief) { |
|||
uint64_t state = pointEntry.first; |
|||
for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { |
|||
if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { |
|||
ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second; |
|||
addToDistribution(successorBelief, pomdpTransition.getColumn(), prob); |
|||
} |
|||
} |
|||
} |
|||
STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); |
|||
|
|||
// Insert the destination. We know that destinations have to be disjoined since they have different observations |
|||
if (observationTriangulationResolutions) { |
|||
Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]); |
|||
for (size_t j = 0; j < triangulation.size(); ++j) { |
|||
// Here we additionally assume that triangulation.gridPoints does not contain the same point multiple times |
|||
destinations.emplace_back(triangulation.gridPoints[j], triangulation.weights[j] * successor.second); |
|||
} |
|||
} else { |
|||
destinations.emplace_back(getOrAddBeliefId(successorBelief), successor.second); |
|||
} |
|||
} |
|||
|
|||
return destinations; |
|||
|
|||
} |
|||
|
|||
BeliefId computeInitialBelief() { |
|||
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, |
|||
"POMDP contains more than one initial state"); |
|||
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, |
|||
"POMDP does not contain an initial state"); |
|||
BeliefType belief; |
|||
belief[*pomdp.getInitialStates().begin()] = storm::utility::one<ValueType>(); |
|||
|
|||
STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); |
|||
return getOrAddBeliefId(belief); |
|||
} |
|||
|
|||
BeliefId getOrAddBeliefId(BeliefType const& belief) { |
|||
auto insertioRes = beliefToIdMap.emplace(belief, beliefs.size()); |
|||
if (insertioRes.second) { |
|||
// There actually was an insertion, so add the new belief |
|||
beliefs.push_back(belief); |
|||
} |
|||
// Return the id |
|||
return insertioRes.first->second; |
|||
} |
|||
|
|||
struct BeliefHash { |
|||
std::size_t operator()(const BeliefType& belief) const { |
|||
std::size_t seed = 0; |
|||
// Assumes that beliefs are ordered |
|||
for (auto const& entry : belief) { |
|||
boost::hash_combine(seed, entry.first); |
|||
boost::hash_combine(seed, entry.second); |
|||
} |
|||
return seed; |
|||
} |
|||
}; |
|||
|
|||
PomdpType const& pomdp; |
|||
std::vector<ValueType> pomdpActionRewardVector; |
|||
|
|||
std::vector<BeliefType> beliefs; |
|||
std::unordered_map<BeliefType, BeliefId, BeliefHash> beliefToIdMap; |
|||
BeliefId initialBeliefId; |
|||
|
|||
storm::utility::ConstantsComparator<ValueType> cc; |
|||
|
|||
|
|||
}; |
|||
} |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue