@ -179,7 +179,6 @@ namespace storm {
}
}
finished = ! improvement ;
storm : : utility : : Stopwatch backupTimer ( true ) ;
// back up
result_backup = result ;
@ -231,6 +230,249 @@ namespace storm {
POMDPCheckResult < ValueType > { overApprox , underApprox } ) ;
}
//TODO This function reuses a lot of code from the probability computation, refactor to minimize code duplication!
template < typename ValueType , typename RewardModelType >
std : : unique_ptr < POMDPCheckResult < ValueType > >
ApproximatePOMDPModelchecker < ValueType , RewardModelType > : : computeReachabilityReward ( storm : : models : : sparse : : Pomdp < ValueType , RewardModelType > const & pomdp ,
std : : set < uint32_t > targetObservations , bool min , uint64_t gridResolution ) {
storm : : utility : : Stopwatch beliefGridTimer ( true ) ;
uint64_t maxIterations = 100 ;
bool finished = false ;
uint64_t iteration = 0 ;
RewardModelType pomdpRewardModel = pomdp . getUniqueRewardModel ( ) ;
std : : vector < storm : : pomdp : : Belief < ValueType > > beliefList ;
std : : vector < bool > beliefIsTarget ;
uint64_t nextId = 0 ;
// Initial belief always has ID 0
storm : : pomdp : : Belief < ValueType > initialBelief = getInitialBelief ( pomdp , nextId ) ;
+ + nextId ;
beliefList . push_back ( initialBelief ) ;
beliefIsTarget . push_back (
targetObservations . find ( initialBelief . observation ) ! = targetObservations . end ( ) ) ;
std : : vector < storm : : pomdp : : Belief < ValueType > > beliefGrid ;
constructBeliefGrid ( pomdp , targetObservations , gridResolution , beliefList , beliefGrid , beliefIsTarget ,
nextId ) ;
nextId = beliefList . size ( ) ;
beliefGridTimer . stop ( ) ;
storm : : utility : : Stopwatch overApproxTimer ( true ) ;
// Belief ID -> Value
std : : map < uint64_t , ValueType > result ;
std : : map < uint64_t , ValueType > result_backup ;
// Belief ID -> ActionIndex
std : : map < uint64_t , uint64_t > chosenActions ;
// Belief ID -> Observation -> Probability
std : : map < uint64_t , std : : vector < std : : map < uint32_t , ValueType > > > observationProbabilities ;
// current ID -> action -> next ID
std : : map < uint64_t , std : : vector < std : : map < uint32_t , uint64_t > > > nextBelieves ;
// current ID -> action -> reward
std : : map < uint64_t , std : : vector < ValueType > > beliefActionRewards ;
storm : : utility : : Stopwatch nextBeliefGeneration ( true ) ;
for ( size_t i = 0 ; i < beliefGrid . size ( ) ; + + i ) {
auto currentBelief = beliefGrid [ i ] ;
bool isTarget = beliefIsTarget [ currentBelief . id ] ;
result . emplace ( std : : make_pair ( currentBelief . id , storm : : utility : : zero < ValueType > ( ) ) ) ;
result_backup . emplace ( std : : make_pair ( currentBelief . id , storm : : utility : : zero < ValueType > ( ) ) ) ;
if ( ! isTarget ) {
//TODO put this in extra function
storm : : utility : : Stopwatch beliefWatch ( true ) ;
// As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative
uint64_t representativeState = pomdp . getStatesWithObservation ( currentBelief . observation ) . front ( ) ;
uint64_t numChoices = pomdp . getNumberOfChoices ( representativeState ) ;
std : : vector < std : : map < uint32_t , ValueType > > observationProbabilitiesInAction ( numChoices ) ;
std : : vector < std : : map < uint32_t , uint64_t > > nextBelievesInAction ( numChoices ) ;
std : : vector < ValueType > actionRewardsInState ( numChoices ) ;
for ( uint64_t action = 0 ; action < numChoices ; + + action ) {
storm : : utility : : Stopwatch aopWatch ( true ) ;
std : : map < uint32_t , ValueType > actionObservationProbabilities = computeObservationProbabilitiesAfterAction (
pomdp , currentBelief , action ) ;
aopWatch . stop ( ) ;
//STORM_PRINT("AOP " << actionObservationProbabilities.size() << ": " << aopWatch << std::endl)
std : : map < uint32_t , uint64_t > actionObservationBelieves ;
for ( auto iter = actionObservationProbabilities . begin ( ) ;
iter ! = actionObservationProbabilities . end ( ) ; + + iter ) {
uint32_t observation = iter - > first ;
storm : : utility : : Stopwatch callWatch ( true ) ;
// THIS CALL IS SLOW
// TODO speed this up
actionObservationBelieves [ observation ] = getBeliefAfterActionAndObservation ( pomdp ,
beliefList ,
beliefIsTarget ,
targetObservations ,
currentBelief ,
action ,
observation ,
nextId ) ;
nextId = beliefList . size ( ) ;
callWatch . stop ( ) ;
//STORM_PRINT("Overall: " << callWatch << std::endl)
}
observationProbabilitiesInAction [ action ] = actionObservationProbabilities ;
nextBelievesInAction [ action ] = actionObservationBelieves ;
actionRewardsInState [ action ] = getRewardAfterAction ( pomdp , pomdp . getChoiceIndex ( storm : : storage : : StateActionPair ( representativeState , action ) ) ,
currentBelief ) ;
}
observationProbabilities . emplace (
std : : make_pair ( currentBelief . id , observationProbabilitiesInAction ) ) ;
nextBelieves . emplace ( std : : make_pair ( currentBelief . id , nextBelievesInAction ) ) ;
beliefActionRewards . emplace ( std : : make_pair ( currentBelief . id , actionRewardsInState ) ) ;
beliefWatch . stop ( ) ;
//STORM_PRINT("Belief " << currentBelief.id << " (" << isTarget << "): " << beliefWatch << std::endl)
}
}
nextBeliefGeneration . stop ( ) ;
//Use chaching to avoid multiple computation of the subsimplices and lambdas
std : : map < uint64_t , std : : vector < std : : vector < ValueType > > > subSimplexCache ;
std : : map < uint64_t , std : : vector < ValueType > > lambdaCache ;
STORM_PRINT ( " Nr Grid Believes " < < beliefGrid . size ( ) < < std : : endl )
STORM_PRINT ( " Time generation of next believes: " < < nextBeliefGeneration < < std : : endl )
// Value Iteration
while ( ! finished & & iteration < maxIterations ) {
storm : : utility : : Stopwatch iterationTimer ( true ) ;
STORM_LOG_DEBUG ( " Iteration " < < iteration + 1 ) ;
bool improvement = false ;
for ( size_t i = 0 ; i < beliefGrid . size ( ) ; + + i ) {
storm : : pomdp : : Belief < ValueType > currentBelief = beliefGrid [ i ] ;
bool isTarget = beliefIsTarget [ currentBelief . id ] ;
if ( ! isTarget ) {
// we can take any state with the observation as they have the same number of choices
uint64_t numChoices = pomdp . getNumberOfChoices (
pomdp . getStatesWithObservation ( currentBelief . observation ) . front ( ) ) ;
// Initialize the values for the value iteration
ValueType chosenValue = min ? storm : : utility : : infinity < ValueType > ( )
: - storm : : utility : : infinity < ValueType > ( ) ;
uint64_t chosenActionIndex = std : : numeric_limits < uint64_t > : : infinity ( ) ;
ValueType currentValue ;
for ( uint64_t action = 0 ; action < numChoices ; + + action ) {
storm : : utility : : Stopwatch actionWatch ( true ) ;
currentValue = beliefActionRewards [ currentBelief . id ] [ action ] ;
storm : : utility : : Stopwatch loopTimer ( true ) ;
for ( auto iter = observationProbabilities [ currentBelief . id ] [ action ] . begin ( ) ;
iter ! = observationProbabilities [ currentBelief . id ] [ action ] . end ( ) ; + + iter ) {
storm : : utility : : Stopwatch subsimplexTime ( true ) ;
uint32_t observation = iter - > first ;
storm : : pomdp : : Belief < ValueType > nextBelief = beliefList [ nextBelieves [ currentBelief . id ] [ action ] [ observation ] ] ;
// compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief
// cache the values to not always re-calculate
std : : vector < std : : vector < ValueType > > subSimplex ;
std : : vector < ValueType > lambdas ;
if ( subSimplexCache . count ( nextBelief . id ) > 0 ) {
subSimplex = subSimplexCache [ nextBelief . id ] ;
lambdas = lambdaCache [ nextBelief . id ] ;
} else {
std : : pair < std : : vector < std : : vector < ValueType > > , std : : vector < ValueType > > temp = computeSubSimplexAndLambdas (
nextBelief . probabilities , gridResolution ) ;
subSimplex = temp . first ;
lambdas = temp . second ;
subSimplexCache [ nextBelief . id ] = subSimplex ;
lambdaCache [ nextBelief . id ] = lambdas ;
}
subsimplexTime . stop ( ) ;
//STORM_PRINT("--subsimplex: " << subsimplexTime.getTimeInNanoseconds() << "ns" << std::endl)
storm : : utility : : Stopwatch sumTime ( true ) ;
auto sum = storm : : utility : : zero < ValueType > ( ) ;
for ( size_t j = 0 ; j < lambdas . size ( ) ; + + j ) {
if ( ! cc . isEqual ( lambdas [ j ] , storm : : utility : : zero < ValueType > ( ) ) ) {
sum + = lambdas [ j ] * result_backup . at (
getBeliefIdInVector ( beliefGrid , observation , subSimplex [ j ] ) ) ;
}
}
currentValue + = iter - > second * sum ;
sumTime . stop ( ) ;
//STORM_PRINT("--value: " << sumTime.getTimeInNanoseconds() << "ns" << std::endl)
}
loopTimer . stop ( ) ;
//STORM_PRINT("-Loop: " << loopTimer.getTimeInNanoseconds() << "ns" << std::endl)
// Update the selected actions
if ( ( min & & cc . isLess ( storm : : utility : : zero < ValueType > ( ) , chosenValue - currentValue ) ) | |
( ! min & &
cc . isLess ( storm : : utility : : zero < ValueType > ( ) , currentValue - chosenValue ) ) | |
cc . isEqual ( storm : : utility : : zero < ValueType > ( ) , chosenValue - currentValue ) ) {
chosenValue = currentValue ;
chosenActionIndex = action ;
}
actionWatch . stop ( ) ;
//STORM_PRINT("Action: " << actionWatch.getTimeInNanoseconds() << "ns" << std::endl)
}
result [ currentBelief . id ] = chosenValue ;
chosenActions [ currentBelief . id ] = chosenActionIndex ;
// Check if the iteration brought an improvement
if ( cc . isLess ( storm : : utility : : zero < ValueType > ( ) , result_backup [ currentBelief . id ] - result [ currentBelief . id ] ) | |
cc . isLess ( storm : : utility : : zero < ValueType > ( ) , result [ currentBelief . id ] - result_backup [ currentBelief . id ] ) ) {
improvement = true ;
}
}
}
finished = ! improvement ;
// back up
result_backup = result ;
+ + iteration ;
iterationTimer . stop ( ) ;
STORM_PRINT ( " Iteration " < < iteration < < " : " < < iterationTimer < < std : : endl ) ;
}
STORM_PRINT ( " Overapproximation took " < < iteration < < " iterations " < < std : : endl ) ;
beliefGrid . push_back ( initialBelief ) ;
beliefIsTarget . push_back (
targetObservations . find ( initialBelief . observation ) ! = targetObservations . end ( ) ) ;
std : : pair < std : : vector < std : : vector < ValueType > > , std : : vector < ValueType > > temp = computeSubSimplexAndLambdas (
initialBelief . probabilities , gridResolution ) ;
std : : vector < std : : vector < ValueType > > initSubSimplex = temp . first ;
std : : vector < ValueType > initLambdas = temp . second ;
auto overApprox = storm : : utility : : zero < ValueType > ( ) ;
for ( size_t j = 0 ; j < initLambdas . size ( ) ; + + j ) {
if ( initLambdas [ j ] ! = storm : : utility : : zero < ValueType > ( ) ) {
overApprox + = initLambdas [ j ] *
result_backup [ getBeliefIdInVector ( beliefGrid , initialBelief . observation ,
initSubSimplex [ j ] ) ] ;
}
}
overApproxTimer . stop ( ) ;
/*
// Now onto the under-approximation
bool useMdp = false ; true ;
storm : : utility : : Stopwatch underApproxTimer ( true ) ;
ValueType underApprox = useMdp ? computeUnderapproximationWithMDP ( pomdp , beliefList , beliefIsTarget , targetObservations , observationProbabilities , nextBelieves ,
result , chosenActions , gridResolution , initialBelief . id , min ) :
computeUnderapproximationWithDTMC ( pomdp , beliefList , beliefIsTarget , targetObservations , observationProbabilities , nextBelieves , result ,
chosenActions , gridResolution , initialBelief . id , min ) ;
underApproxTimer . stop ( ) ;
STORM_PRINT ( " Time Belief Grid Generation: " < < beliefGridTimer < < std : : endl
< < " Time Overapproximation: " < < overApproxTimer
< < std : : endl
< < " Time Underapproximation: " < < underApproxTimer
< < std : : endl ) ;
*/
STORM_PRINT ( " Over-Approximation Result: " < < overApprox < < std : : endl ) ;
//STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
return std : : make_unique < POMDPCheckResult < ValueType > > (
POMDPCheckResult < ValueType > { overApprox , storm : : utility : : zero < ValueType > ( ) } ) ;
}
template < typename ValueType , typename RewardModelType >
ValueType
ApproximatePOMDPModelchecker < ValueType , RewardModelType > : : computeUnderapproximationWithDTMC ( storm : : models : : sparse : : Pomdp < ValueType , RewardModelType > const & pomdp ,
@ -831,6 +1073,16 @@ namespace storm {
}
}
template < typename ValueType , typename RewardModelType >
ValueType ApproximatePOMDPModelchecker < ValueType , RewardModelType > : : getRewardAfterAction ( storm : : models : : sparse : : Pomdp < ValueType , RewardModelType > const & pomdp ,
uint64_t action , storm : : pomdp : : Belief < ValueType > belief ) {
auto result = storm : : utility : : zero < ValueType > ( ) ;
for ( size_t i = 0 ; i < belief . probabilities . size ( ) ; + + i ) {
result + = belief . probabilities [ i ] * pomdp . getUniqueRewardModel ( ) . getTotalStateActionReward ( i , action , pomdp . getTransitionMatrix ( ) ) ;
}
return result ;
}
template
class ApproximatePOMDPModelchecker < double > ;