tempest/examples/pmdp/reporter4/reporter4.pm


								// GRID WORLD MODEL OF A SEMIAUTONOMOUS EXPLORING ROBOT

								// Sebastian Junges, RWTH Aachen University

								// As described in

								// Junges, Jansen, Dehnert, Topcu, Katoen:

								// Safety Constrained Reinforcement Learning

								// Proc. of TACAS’16


								mdp


								//PARAMETERS

								//The difference of the reliability of the channels between the worst and at the best position

								const double pLDiff;//=0.1;

								const double pHDiff;//=0.1;

								//Scaling factor for the minimum reliability of the channels

								const double pL;//=8/9;

								const double pH;//=1;


								//CONSTANTS

								//The minimum reliablities

								const double pLMin=pL*(1-pLDiff);

								const double pHMin=pH*(1-pHDiff);


								// Grid size

								const int Xsize;

								const int Ysize;

								// Number of tries before an error

								const int MAXTRIES;

								// Ball within the robot has to move.

								const int B;


								formula T = (xLoc = Xsize & yLoc = Ysize);


								module robot

								  xLoc : [1..Ysize] init 1;

								  yLoc : [1..Xsize] init 1;

								  unreported : [0..B] init 0;

								  hasSendNow : bool init false;

								  tries : [0..MAXTRIES] init 0;


								  [up] xLoc < Xsize & !T  & hasSendNow  -> 1:(xLoc'=xLoc+1) & (unreported' = 0) & (hasSendNow'=false);

								  [up] xLoc < Xsize & !T  & !hasSendNow -> 1:(xLoc'=xLoc+1) & (unreported'=min(unreported+1, B));

								  [right] yLoc < Ysize & !T  & hasSendNow  -> 1:(yLoc'=yLoc+1) & (unreported' = 0)& (hasSendNow'=false);

								  [right] yLoc < Ysize & !T  & !hasSendNow -> 1:(yLoc'=yLoc+1) & (unreported'=min(unreported+1,B));

								  [sendL] !hasSendNow & !T & tries < MAXTRIES -> (pLMin + pLDiff * xLoc/Xsize):(hasSendNow'=true) & (tries'=0) + (1 - pLMin - pLDiff * xLoc/Xsize): (tries'=tries+1);

								  [sendH] !hasSendNow & !T & tries < MAXTRIES -> (pHMin + pHDiff * yLoc/Ysize):(hasSendNow'=true) & (tries'=0) + (1 - pHMin - pHDiff * yLoc/Ysize): (tries'=tries+1);

								  [done] T -> 1:true;

								endmodule


								rewards "sendbased"

								  [up] true: 0.03;

								  [right] true: 0.03;

									[sendL] true: max(10, min(11 + xLoc - yLoc, 20));

									[sendH] true: min(13 + xLoc + yLoc, 24);

								endrewards


								rewards "sendbased_lower"

								  [up] true: 0.03;

								  [right] true: 0.03;

									[sendL] true: 10;

									[sendH] true: 12;

								endrewards


								rewards "sendbased_upper"

								  [up] true: 0.03;

								  [right] true: 0.03;

									[sendL] true: 20;

									[sendH] true: 24;

								endrewards


								label "Target" = T;

								label "Crash" = unreported=B;