@ -1,5 +1,7 @@
# include "storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.h"
# include "storm/utility/vector.h"
namespace storm {
namespace transformer {
@ -16,57 +18,89 @@ namespace storm {
std : : shared_ptr < RawPolynomialCache > cache ;
} ;
template < typename ValueType >
std : : shared_ptr < storm : : models : : sparse : : Model < storm : : RationalFunction > > ApplyFiniteSchedulerToPomdp < ValueType > : : transform ( ) const {
uint64_t nrStates = pomdp . getNumberOfStates ( ) ;
std : : unordered_map < uint32_t , std : : vector < storm : : RationalFunction > > parameters ;
bool nondeterminism = false ;
storm : : storage : : SparseMatrixBuilder < storm : : RationalFunction > smb ( nrStates , nrStates , 0 , ! nondeterminism , false , nrStates ) ;
template < typename ValueType >
std : : unordered_map < uint32_t , std : : vector < storm : : RationalFunction > > ApplyFiniteSchedulerToPomdp < ValueType > : : getObservationChoiceWeights ( ) const {
std : : unordered_map < uint32_t , std : : vector < storm : : RationalFunction > > res ;
RationalFunctionConstructor ratFuncConstructor ;
for ( uint64_t state = 0 ; state < nrStates ; + + state ) {
if ( nondeterminism ) {
smb . newRowGroup ( state ) ;
}
for ( uint64_t state = 0 ; state < pomdp . getNumberOfStates ( ) ; + + state ) {
auto observation = pomdp . getObservation ( state ) ;
auto it = pa ram eter s. find ( observation ) ;
std : : vector < storm : : RationalFunction > localWeights ;
if ( it = = parameters . end ( ) ) {
storm : : RationalFunction lastWeight ( 1 ) ;
auto it = res . find ( observation ) ;
if ( it = = res . end ( ) ) {
std : : vector < storm : : RationalFunction > weights ;
storm : : RationalFunction lastWeight = storm : : utility : : one < storm : : RationalFunction > ( ) ;
for ( uint64_t a = 0 ; a < pomdp . getNumberOfChoices ( state ) - 1 ; + + a ) {
std : : string varName = " p " + std : : to_string ( observation ) + " _ " + std : : to_string ( a ) ;
localW eights. push_back ( ratFuncConstructor . translate ( carl : : freshRealVariable ( varName ) ) ) ;
lastWeight - = localW eights. back ( ) ;
w eights. push_back ( ratFuncConstructor . translate ( carl : : freshRealVariable ( varName ) ) ) ;
lastWeight - = w eights. back ( ) ;
}
localWeights . push_back ( lastWeight ) ;
parameters . emplace ( observation , localWeights ) ;
} else {
STORM_LOG_ASSERT ( it - > second . size ( ) = = pomdp . getNumberOfChoices ( state ) , " Number of choices must be equal for every state with same number of actions " ) ;
localWeights = it - > second ;
weights . push_back ( lastWeight ) ;
res . emplace ( observation , weights ) ;
}
STORM_LOG_ASSERT ( it = = res . end ( ) | | it - > second . size ( ) = = pomdp . getNumberOfChoices ( state ) , " Number of choices must be equal for every state with same number of actions " ) ;
}
return res ;
}
template < typename ValueType >
std : : shared_ptr < storm : : models : : sparse : : Model < storm : : RationalFunction > > ApplyFiniteSchedulerToPomdp < ValueType > : : transform ( ) const {
storm : : storage : : sparse : : ModelComponents < storm : : RationalFunction > modelComponents ;
uint64_t nrStates = pomdp . getNumberOfStates ( ) ;
std : : unordered_map < uint32_t , std : : vector < storm : : RationalFunction > > observationChoiceWeights = getObservationChoiceWeights ( ) ;
storm : : storage : : SparseMatrixBuilder < storm : : RationalFunction > smb ( nrStates , nrStates , 0 , true ) ;
for ( uint64_t state = 0 ; state < nrStates ; + + state ) {
auto const & weights = observationChoiceWeights . at ( pomdp . getObservation ( state ) ) ;
std : : map < uint64_t , storm : : RationalFunction > weightedTransitions ;
for ( uint64_t action = 0 ; action < pomdp . getNumberOfChoices ( state ) ; + + action ) {
for ( auto const & entry : pomdp . getTransitionMatrix ( ) . getRow ( state , action ) ) {
auto it = weightedTransitions . find ( entry . getColumn ( ) ) ;
if ( it = = weightedTransitions . end ( ) ) {
weightedTransitions [ entry . getColumn ( ) ] = storm : : utility : : convertNumber < storm : : RationalFunctionCoefficient > ( entry . getValue ( ) ) * localWeights [ action ] ; //carl::rationalize<storm::RationalFunctionCoefficient>(entry.getValue()) * localWeights[action];
weightedTransitions [ entry . getColumn ( ) ] = storm : : utility : : convertNumber < storm : : RationalFunction > ( entry . getValue ( ) ) * w eights[ action ] ;
} else {
it - > second + = storm : : utility : : convertNumber < storm : : RationalFunctionCoefficient > ( entry . getValue ( ) ) * localWeights [ action ] ;
it - > second + = storm : : utility : : convertNumber < storm : : RationalFunction > ( entry . getValue ( ) ) * w eights[ action ] ;
}
}
}
for ( auto const & entry : weightedTransitions ) {
smb . addNextValue ( state , entry . first , entry . second ) ;
}
}
// TODO rewards.
storm : : storage : : sparse : : ModelComponents < storm : : RationalFunction > modelComponents ( smb . build ( ) , pomdp . getStateLabeling ( ) ) ;
modelComponents . transitionMatrix = smb . build ( ) ;
for ( auto const & pomdpRewardModel : pomdp . getRewardModels ( ) ) {
std : : vector < storm : : RationalFunction > stateRewards ;
if ( pomdpRewardModel . second . hasStateRewards ( ) ) {
stateRewards = storm : : utility : : vector : : convertNumericVector < storm : : RationalFunction > ( pomdpRewardModel . second . getStateRewardVector ( ) ) ;
} else {
stateRewards . resize ( nrStates , storm : : utility : : zero < storm : : RationalFunction > ( ) ) ;
}
if ( pomdpRewardModel . second . hasStateActionRewards ( ) ) {
std : : vector < ValueType > pomdpActionRewards = pomdpRewardModel . second . getStateActionRewardVector ( ) ;
for ( uint64_t state = 0 ; state < nrStates ; + + state ) {
auto & stateReward = stateRewards [ state ] ;
auto const & weights = observationChoiceWeights . at ( pomdp . getObservation ( state ) ) ;
uint64_t offset = pomdp . getTransitionMatrix ( ) . getRowGroupIndices ( ) [ state ] ;
for ( uint64_t action = 0 ; action < pomdp . getNumberOfChoices ( state ) ; + + action ) {
if ( ! storm : : utility : : isZero ( pomdpActionRewards [ offset + action ] ) ) {
stateReward + = storm : : utility : : convertNumber < storm : : RationalFunction > ( pomdpActionRewards [ offset + action ] ) * weights [ action ] ;
}
}
}
}
storm : : models : : sparse : : StandardRewardModel < storm : : RationalFunction > rewardModel ( std : : move ( stateRewards ) ) ;
modelComponents . rewardModels . emplace ( pomdpRewardModel . first , std : : move ( rewardModel ) ) ;
}
modelComponents . stateLabeling = pomdp . getStateLabeling ( ) ;
return std : : make_shared < storm : : models : : sparse : : Dtmc < storm : : RationalFunction > > ( modelComponents ) ;
}
template class ApplyFiniteSchedulerToPomdp < storm : : RationalNumber > ;