You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							73 lines
						
					
					
						
							2.2 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							73 lines
						
					
					
						
							2.2 KiB
						
					
					
				
								// GRID WORLD MODEL OF A SEMIAUTONOMOUS EXPLORING ROBOT
							 | 
						|
								// Sebastian Junges, RWTH Aachen University
							 | 
						|
								// As described in 
							 | 
						|
								// Junges, Jansen, Dehnert, Topcu, Katoen: 
							 | 
						|
								// Safety Constrained Reinforcement Learning
							 | 
						|
								// Proc. of TACAS’16
							 | 
						|
								
							 | 
						|
								mdp
							 | 
						|
								
							 | 
						|
								//PARAMETERS
							 | 
						|
								//The difference of the reliability of the channels between the worst and at the best position
							 | 
						|
								const double pLDiff;//=0.1;
							 | 
						|
								const double pHDiff;//=0.1;
							 | 
						|
								//Scaling factor for the minimum reliability of the channels
							 | 
						|
								const double pL;//=8/9;
							 | 
						|
								const double pH;//=1;
							 | 
						|
								
							 | 
						|
								//CONSTANTS
							 | 
						|
								//The minimum reliablities
							 | 
						|
								const double pLMin=pL*(1-pLDiff);
							 | 
						|
								const double pHMin=pH*(1-pHDiff);
							 | 
						|
								
							 | 
						|
								// Grid size
							 | 
						|
								const int Xsize = 6;
							 | 
						|
								const int Ysize = 6;
							 | 
						|
								// Number of tries before an error
							 | 
						|
								const int MAXTRIES = 2;
							 | 
						|
								// Ball within the robot has to move.
							 | 
						|
								const int B=2;
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								formula T = (xLoc = Xsize & yLoc = Ysize);
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								module robot
							 | 
						|
								  xLoc : [1..Ysize] init 1;
							 | 
						|
								  yLoc : [1..Xsize] init 1;
							 | 
						|
								  unreported : [0..B] init 0;
							 | 
						|
								  hasSendNow : bool init false;
							 | 
						|
								  tries : [0..MAXTRIES] init 0;
							 | 
						|
								
							 | 
						|
								  [up] xLoc < Xsize & !T  & hasSendNow  -> 1:(xLoc'=xLoc+1) & (unreported' = 0) & (hasSendNow'=false);
							 | 
						|
								  [up] xLoc < Xsize & !T  & !hasSendNow -> 1:(xLoc'=xLoc+1) & (unreported'=min(unreported+1, B));
							 | 
						|
								  [right] yLoc < Ysize & !T  & hasSendNow  -> 1:(yLoc'=yLoc+1) & (unreported' = 0)& (hasSendNow'=false);
							 | 
						|
								  [right] yLoc < Ysize & !T  & !hasSendNow -> 1:(yLoc'=yLoc+1) & (unreported'=min(unreported+1,B));
							 | 
						|
								  [sendL] !hasSendNow & !T & tries < MAXTRIES -> (pLMin + pLDiff * xLoc/Xsize):(hasSendNow'=true) & (tries'=0) + (1 - pLMin - pLDiff * xLoc/Xsize): (tries'=tries+1);
							 | 
						|
								  [sendH] !hasSendNow & !T & tries < MAXTRIES -> (pHMin + pHDiff * yLoc/Ysize):(hasSendNow'=true) & (tries'=0) + (1 - pHMin - pHDiff * yLoc/Ysize): (tries'=tries+1);
							 | 
						|
								  [done] T -> 1:true;
							 | 
						|
								endmodule
							 | 
						|
								
							 | 
						|
								rewards "sendbased"
							 | 
						|
								  [up] true: 0.03;
							 | 
						|
								  [right] true: 0.03;
							 | 
						|
									[sendL] true: max(10, min(11 + xLoc - yLoc, 20));
							 | 
						|
									[sendH] true: min(13 + xLoc + yLoc, 24);
							 | 
						|
								endrewards
							 | 
						|
								
							 | 
						|
								rewards "sendbased_lower"
							 | 
						|
								  [up] true: 0.03;
							 | 
						|
								  [right] true: 0.03;
							 | 
						|
									[sendL] true: 10;
							 | 
						|
									[sendH] true: 12;
							 | 
						|
								endrewards
							 | 
						|
								
							 | 
						|
								rewards "sendbased_upper"
							 | 
						|
								  [up] true: 0.03;
							 | 
						|
								  [right] true: 0.03;
							 | 
						|
									[sendL] true: 20;
							 | 
						|
									[sendH] true: 24;
							 | 
						|
								endrewards
							 | 
						|
								
							 | 
						|
								label "Target" = T;
							 | 
						|
								label "Crash" = unreported=B;
							 |