Browse Source
implemented the pomdp unfolding to convert k-memory-bounded pomdps to memoryless pomdps
tempestpy_adaptions
implemented the pomdp unfolding to convert k-memory-bounded pomdps to memoryless pomdps
tempestpy_adaptions
TimQu
7 years ago
3 changed files with 235 additions and 34 deletions
-
83src/storm-pomdp-cli/storm-pomdp.cpp
-
147src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp
-
37src/storm-pomdp/transformer/PomdpMemoryUnfolder.h
@ -0,0 +1,147 @@ |
|||
#include <storm/exceptions/NotSupportedException.h>
|
|||
#include "storm-pomdp/transformer/PomdpMemoryUnfolder.h"
|
|||
#include "storm/storage/sparse/ModelComponents.h"
|
|||
|
|||
#include "storm/exceptions/NotSupportedException.h"
|
|||
|
|||
namespace storm { |
|||
namespace transformer { |
|||
|
|||
|
|||
template<typename ValueType> |
|||
PomdpMemoryUnfolder<ValueType>::PomdpMemoryUnfolder(storm::models::sparse::Pomdp<ValueType> const& pomdp, uint64_t numMemoryStates) : pomdp(pomdp), numMemoryStates(numMemoryStates) { |
|||
// intentionally left empty
|
|||
} |
|||
|
|||
|
|||
template<typename ValueType> |
|||
std::shared_ptr<storm::models::sparse::Pomdp<ValueType>> PomdpMemoryUnfolder<ValueType>::transform() const { |
|||
storm::storage::sparse::ModelComponents<ValueType> components; |
|||
components.transitionMatrix = transformTransitions(); |
|||
components.stateLabeling = transformStateLabeling(); |
|||
components.observabilityClasses = transformObservabilityClasses(); |
|||
for (auto const& rewModel : pomdp.getRewardModels()) { |
|||
components.rewardModels.emplace(rewModel.first, transformRewardModel(rewModel.second)); |
|||
} |
|||
|
|||
return std::make_shared<storm::models::sparse::Pomdp<ValueType>>(std::move(components)); |
|||
} |
|||
|
|||
|
|||
template<typename ValueType> |
|||
storm::storage::SparseMatrix<ValueType> PomdpMemoryUnfolder<ValueType>::transformTransitions() const { |
|||
storm::storage::SparseMatrix<ValueType> const& origTransitions = pomdp.getTransitionMatrix(); |
|||
storm::storage::SparseMatrixBuilder<ValueType> builder(pomdp.getNumberOfStates() * numMemoryStates * numMemoryStates, |
|||
pomdp.getNumberOfStates() * numMemoryStates, |
|||
origTransitions.getEntryCount() * numMemoryStates * numMemoryStates, |
|||
true, |
|||
false, |
|||
pomdp.getNumberOfStates() * numMemoryStates); |
|||
|
|||
uint64_t row = 0; |
|||
for (uint64_t modelState = 0; modelState < pomdp.getNumberOfStates(); ++modelState) { |
|||
for (uint32_t memState = 0; memState < numMemoryStates; ++memState) { |
|||
builder.newRowGroup(row); |
|||
for (uint64_t origRow = origTransitions.getRowGroupIndices()[modelState]; origRow < origTransitions.getRowGroupIndices()[modelState + 1]; ++origRow) { |
|||
for (uint32_t memStatePrime = 0; memStatePrime < numMemoryStates; ++memStatePrime) { |
|||
for (auto const& entry : origTransitions.getRow(origRow)) { |
|||
builder.addNextValue(row, getUnfoldingState(entry.getColumn(), memStatePrime), entry.getValue()); |
|||
} |
|||
++row; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
return builder.build(); |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
storm::models::sparse::StateLabeling PomdpMemoryUnfolder<ValueType>::transformStateLabeling() const { |
|||
storm::models::sparse::StateLabeling labeling(pomdp.getNumberOfStates() * numMemoryStates); |
|||
for (auto const& labelName : pomdp.getStateLabeling().getLabels()) { |
|||
storm::storage::BitVector newStates(pomdp.getNumberOfStates() * numMemoryStates, false); |
|||
for (auto const& modelState : pomdp.getStateLabeling().getStates(labelName)) { |
|||
for (uint32_t memState = 0; memState < numMemoryStates; ++memState) { |
|||
newStates.set(getUnfoldingState(modelState, memState)); |
|||
} |
|||
} |
|||
labeling.addLabel(labelName, std::move(newStates)); |
|||
} |
|||
return labeling; |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
std::vector<uint32_t> PomdpMemoryUnfolder<ValueType>::transformObservabilityClasses() const { |
|||
std::vector<uint32_t> observations; |
|||
observations.reserve(pomdp.getNumberOfStates() * numMemoryStates); |
|||
for (uint64_t modelState = 0; modelState < pomdp.getNumberOfStates(); ++modelState) { |
|||
for (uint32_t memState = 0; memState < numMemoryStates; ++memState) { |
|||
observations.push_back(getUnfoldingObersvation(pomdp.getObservation(modelState), memState)); |
|||
} |
|||
} |
|||
return observations; |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
storm::models::sparse::StandardRewardModel<ValueType> PomdpMemoryUnfolder<ValueType>::transformRewardModel(storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel) const { |
|||
boost::optional<std::vector<ValueType>> stateRewards, actionRewards; |
|||
if (rewardModel.hasStateRewards()) { |
|||
stateRewards = std::vector<ValueType>(); |
|||
stateRewards->reserve(pomdp.getNumberOfStates() * numMemoryStates); |
|||
for (auto const& stateReward : rewardModel.getStateRewardVector()) { |
|||
for (uint32_t memState = 0; memState < numMemoryStates; ++memState) { |
|||
stateRewards->push_back(stateReward); |
|||
} |
|||
} |
|||
} |
|||
if (rewardModel.hasStateActionRewards()) { |
|||
actionRewards = std::vector<ValueType>(); |
|||
stateRewards->reserve(pomdp.getNumberOfStates() * numMemoryStates * numMemoryStates); |
|||
for (uint64_t modelState = 0; modelState < pomdp.getNumberOfStates(); ++modelState) { |
|||
for (uint32_t memState = 0; memState < numMemoryStates; ++memState) { |
|||
for (uint64_t origRow = pomdp.getTransitionMatrix().getRowGroupIndices()[modelState]; origRow < pomdp.getTransitionMatrix().getRowGroupIndices()[modelState + 1]; ++origRow) { |
|||
ValueType const& actionReward = rewardModel.getStateActionReward(origRow); |
|||
for (uint32_t memStatePrime = 0; memStatePrime < numMemoryStates; ++memStatePrime) { |
|||
actionRewards->push_back(actionReward); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
STORM_LOG_THROW(rewardModel.hasTransitionRewards(), storm::exceptions::NotSupportedException, "Transition rewards are currently not supported."); |
|||
return storm::models::sparse::StandardRewardModel<ValueType>(std::move(stateRewards), std::move(actionRewards)); |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
uint64_t PomdpMemoryUnfolder<ValueType>::getUnfoldingState(uint64_t modelState, uint32_t memoryState) const { |
|||
return modelState * numMemoryStates + memoryState; |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
uint64_t PomdpMemoryUnfolder<ValueType>::getModelState(uint64_t unfoldingState) const { |
|||
return unfoldingState / numMemoryStates; |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
uint32_t PomdpMemoryUnfolder<ValueType>::getMemoryState(uint64_t unfoldingState) const { |
|||
return unfoldingState % numMemoryStates; |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
uint32_t PomdpMemoryUnfolder<ValueType>::getUnfoldingObersvation(uint32_t modelObservation, uint32_t memoryState) const { |
|||
return modelObservation * numMemoryStates + memoryState; |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
uint32_t PomdpMemoryUnfolder<ValueType>::getModelObersvation(uint32_t unfoldingObservation) const { |
|||
return unfoldingObservation / numMemoryStates; |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
uint32_t PomdpMemoryUnfolder<ValueType>::getMemoryStateFromObservation(uint32_t unfoldingObservation) const { |
|||
return unfoldingObservation % numMemoryStates; |
|||
} |
|||
|
|||
template class PomdpMemoryUnfolder<storm::RationalNumber>; |
|||
} |
|||
} |
@ -0,0 +1,37 @@ |
|||
#pragma once |
|||
|
|||
#include "storm/models/sparse/Pomdp.h" |
|||
#include "storm/models/sparse/StandardRewardModel.h" |
|||
|
|||
namespace storm { |
|||
namespace transformer { |
|||
|
|||
template<typename ValueType> |
|||
class PomdpMemoryUnfolder { |
|||
|
|||
public: |
|||
|
|||
PomdpMemoryUnfolder(storm::models::sparse::Pomdp<ValueType> const& pomdp, uint64_t numMemoryStates); |
|||
|
|||
std::shared_ptr<storm::models::sparse::Pomdp<ValueType>> transform() const; |
|||
|
|||
private: |
|||
storm::storage::SparseMatrix<ValueType> transformTransitions() const; |
|||
storm::models::sparse::StateLabeling transformStateLabeling() const; |
|||
std::vector<uint32_t> transformObservabilityClasses() const; |
|||
storm::models::sparse::StandardRewardModel<ValueType> transformRewardModel(storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel) const; |
|||
|
|||
uint64_t getUnfoldingState(uint64_t modelState, uint32_t memoryState) const; |
|||
uint64_t getModelState(uint64_t unfoldingState) const; |
|||
uint32_t getMemoryState(uint64_t unfoldingState) const; |
|||
|
|||
uint32_t getUnfoldingObersvation(uint32_t modelObservation, uint32_t memoryState) const; |
|||
uint32_t getModelObersvation(uint32_t unfoldingObservation) const; |
|||
uint32_t getMemoryStateFromObservation(uint32_t unfoldingObservation) const; |
|||
|
|||
|
|||
storm::models::sparse::Pomdp<ValueType> const& pomdp; |
|||
uint32_t numMemoryStates; |
|||
}; |
|||
} |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue