You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							124 lines
						
					
					
						
							5.3 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							124 lines
						
					
					
						
							5.3 KiB
						
					
					
				
								// GRID WORLD MODEL OF ROBOT AND JANITOR
							 | 
						|
								// Hakan Younes/gxn/dxp 04/05/04
							 | 
						|
								// Variant by Sebastian Junges, RWTH Aachen University
							 | 
						|
								// As described in 
							 | 
						|
								// Junges, Jansen, Dehnert, Topcu, Katoen: 
							 | 
						|
								// Safety Constrained Reinforcement Learning
							 | 
						|
								// Proc. of TACAS’16
							 | 
						|
								
							 | 
						|
								mdp
							 | 
						|
								
							 | 
						|
								// PARAMETERS: Probability that the janitor moves North, South, East or West
							 | 
						|
								const double jProbN;
							 | 
						|
								const double jProbE;
							 | 
						|
								const double jProbS = 1 - jProbN;
							 | 
						|
								const double jProbW = 1 - jProbE;
							 | 
						|
								
							 | 
						|
								// CONSTANTS
							 | 
						|
								const int xSize = 6; // size of the grid
							 | 
						|
								const int ySize = 6;
							 | 
						|
								const int jXmin = 1; // janitor area
							 | 
						|
								const int jXmax = 6;
							 | 
						|
								const int jYmin = 1;
							 | 
						|
								const int jYmax = 6;
							 | 
						|
								const int maxSteps = 15;
							 | 
						|
								
							 | 
						|
								formula T = (xR = xSize & yR = ySize);
							 | 
						|
								formula C = (xR = xJ & yR = yJ);
							 | 
						|
								
							 | 
						|
								module counter
							 | 
						|
									count : [0..maxSteps] init 0;
							 | 
						|
									[forward] (count<maxSteps & !C) -> 1:(count'=count+1);
							 | 
						|
									[turnLeft] (count<maxSteps & !C) -> 1:(count'=count+1);
							 | 
						|
									[turnRight] (count<maxSteps & !C) -> 1:(count'=count+1);
							 | 
						|
									[forward] (count<maxSteps & C) -> 1:(count'=maxSteps);
							 | 
						|
									[turnLeft] (count<maxSteps & C) -> 1:(count'=maxSteps);
							 | 
						|
									[turnRight] (count<maxSteps & C) -> 1:(count'=maxSteps);
							 | 
						|
									[forward] (count=maxSteps) -> 1:true;
							 | 
						|
									[turnLeft] (count=maxSteps) -> 1:true;
							 | 
						|
									[turnRight] (count=maxSteps) -> 1:true;
							 | 
						|
								endmodule
							 | 
						|
								
							 | 
						|
								module robot
							 | 
						|
									xR : [1..xSize] init 1;
							 | 
						|
									yR : [1..ySize] init 1;
							 | 
						|
									d: [0..3] init 0;
							 | 
						|
								
							 | 
						|
									  [forward] (d=0 & yR < ySize & !T) -> 1:(yR'=yR+1);
							 | 
						|
								   	[forward] (d=2 & yR > 1 & !T) -> 1:(yR'=yR-1);
							 | 
						|
								 		[forward] (d=1 & xR < xSize & !T) -> 1:(xR'=xR+1);
							 | 
						|
								 		[forward] (d=3 & xR > 1 & !T) -> 1:(xR'=xR-1);
							 | 
						|
								  	//[backward] (d=0 & yR > 1 & !T) -> 1:(yR'=yR-1);
							 | 
						|
								  	//[backward] (d=2 & yR < ySize & !T) -> 1:(yR'=yR+1);
							 | 
						|
								  	//[backward] (d=1 & xR > 1 & !T) -> 1:(xR'=xR-1);
							 | 
						|
								  	//[backward] (d=3 & xR < xSize & !T) -> 1:(xR'=xR+1);
							 | 
						|
								  	[turnLeft] (d>0 & !T) -> 1:(d'=d-1);
							 | 
						|
								  	[turnLeft] (d=0 & !T) -> 1:(d'=3);
							 | 
						|
								  	[turnRight] (d=3 & !T) -> 1:(d'=0);
							 | 
						|
								  	[turnRight] (d<3 & !T) -> 1:(d'=d+1);
							 | 
						|
									// [turnLeft] (d=1) -> 1:(d’=0);
							 | 
						|
									// [turnRight] (d=0) -> 1:(d’=1);
							 | 
						|
										[done] T -> 1:true;
							 | 
						|
								endmodule
							 | 
						|
								
							 | 
						|
								// The Janitor moves probabilistically within the grid.
							 | 
						|
								module janitor
							 | 
						|
									xJ : [jXmin..jXmax] init jXmax; // x position of janitor
							 | 
						|
									yJ : [jYmin..jYmax] init jYmin; // y position of janitor
							 | 
						|
								
							 | 
						|
									[forward] (yJ=jYmax & xJ=jXmax) -> 0.5:(yJ'=yJ-1) + 0.5:(xJ'=xJ-1);
							 | 
						|
									[forward] (yJ=jYmax & xJ=jXmin) -> 0.5:(yJ'=yJ-1) + 0.5:(xJ'=xJ+1);
							 | 
						|
									[forward] (yJ=jYmax & xJ<jXmax & xJ > jXmin) -> 0.3333:(yJ'=yJ-1) + 0.6667*jProbW:(xJ'=xJ-1) + 0.6667*jProbE:(xJ'=xJ+1);
							 | 
						|
									[forward] (yJ=jYmin & xJ=jXmax) -> 0.5:(yJ'=yJ+1) + 0.5:(xJ'=xJ-1);
							 | 
						|
									[forward] (yJ=jYmin & xJ=jXmin) -> 0.5:(yJ'=yJ+1) + 0.5:(xJ'=xJ+1);
							 | 
						|
									[forward] (yJ=jYmin & xJ<jXmax & xJ>jXmin) -> 0.3333:(yJ'=yJ+1) + 0.6667*jProbW:(xJ'=xJ-1) + 0.6667*jProbE:(xJ'=xJ+1);
							 | 
						|
									[forward] (yJ<jYmax & yJ>jYmin & xJ=jXmax) -> 0.6667*jProbN:(yJ'=yJ+1) + 0.6667*jProbS:(yJ'=yJ-1) + 0.3333:(xJ'=xJ-1);
							 | 
						|
									[forward] (yJ<jYmax & yJ>jYmin & xJ=jXmin) -> 0.6667*jProbN:(yJ'=yJ+1) + 0.6667*jProbS:(yJ'=yJ-1) + 0.3333:(xJ'=xJ+1);
							 | 
						|
									[forward] (yJ<jYmax & yJ>jYmin & xJ<jXmax & xJ>jXmin) -> 0.5*jProbS:(yJ'=yJ-1) + 0.5*jProbN:(yJ'=yJ+1) + 0.5*jProbW:(xJ'=xJ-1) + 0.5*jProbE:(xJ'=xJ+1);
							 | 
						|
								
							 | 
						|
								
							 | 
						|
									[turnLeft] (yJ=jYmax & xJ=jXmax) -> 0.5:(yJ'=yJ-1) + 0.5:(xJ'=xJ-1);
							 | 
						|
									[turnLeft] (yJ=jYmax & xJ=jXmin) -> 0.5:(yJ'=yJ-1) + 0.5:(xJ'=xJ+1);
							 | 
						|
									[turnLeft] (yJ=jYmax & xJ<jXmax & xJ > jXmin) -> 0.3333:(yJ'=yJ-1) + 0.6667*jProbW:(xJ'=xJ-1) + 0.6667*jProbE:(xJ'=xJ+1);
							 | 
						|
									[turnLeft] (yJ=jYmin & xJ=jXmax) -> 0.5:(yJ'=yJ+1) + 0.5:(xJ'=xJ-1);
							 | 
						|
									[turnLeft] (yJ=jYmin & xJ=jXmin) -> 0.5:(yJ'=yJ+1) + 0.5:(xJ'=xJ+1);
							 | 
						|
									[turnLeft] (yJ=jYmin & xJ<jXmax & xJ>jXmin) -> 0.3333:(yJ'=yJ+1) + 0.6667*jProbW:(xJ'=xJ-1) + 0.6667*jProbE:(xJ'=xJ+1);
							 | 
						|
									[turnLeft] (yJ<jYmax & yJ>jYmin & xJ=jXmax) -> 0.6667*jProbN:(yJ'=yJ+1) + 0.6667*jProbS:(yJ'=yJ-1) + 0.3333:(xJ'=xJ-1);
							 | 
						|
									[turnLeft] (yJ<jYmax & yJ>jYmin & xJ=jXmin) -> 0.6667*jProbN:(yJ'=yJ+1) + 0.6667*jProbS:(yJ'=yJ-1) + 0.3333:(xJ'=xJ+1);
							 | 
						|
									[turnLeft] (yJ<jYmax & yJ>jYmin & xJ<jXmax & xJ>jXmin) -> 0.5*jProbS:(yJ'=yJ-1) + 0.5*jProbN:(yJ'=yJ+1) + 0.5*jProbW:(xJ'=xJ-1) + 0.5*jProbE:(xJ'=xJ+1);
							 | 
						|
								
							 | 
						|
									[turnRight] (yJ=jYmax & xJ=jXmax) -> 0.5:(yJ'=yJ-1) + 0.5:(xJ'=xJ-1);
							 | 
						|
									[turnRight] (yJ=jYmax & xJ=jXmin) -> 0.5:(yJ'=yJ-1) + 0.5:(xJ'=xJ+1);
							 | 
						|
									[turnRight] (yJ=jYmax & xJ<jXmax & xJ > jXmin) -> 0.3333:(yJ'=yJ-1) + 0.6667*jProbW:(xJ'=xJ-1) + 0.6667*jProbE:(xJ'=xJ+1);
							 | 
						|
									[turnRight] (yJ=jYmin & xJ=jXmax) -> 0.5:(yJ'=yJ+1) + 0.5:(xJ'=xJ-1);
							 | 
						|
									[turnRight] (yJ=jYmin & xJ=jXmin) -> 0.5:(yJ'=yJ+1) + 0.5:(xJ'=xJ+1);
							 | 
						|
									[turnRight] (yJ=jYmin & xJ<jXmax & xJ>jXmin) -> 0.3333:(yJ'=yJ+1) + 0.6667*jProbW:(xJ'=xJ-1) + 0.6667*jProbE:(xJ'=xJ+1);
							 | 
						|
									[turnRight] (yJ<jYmax & yJ>jYmin & xJ=jXmax) -> 0.6667*jProbN:(yJ'=yJ+1) + 0.6667*jProbS:(yJ'=yJ-1) + 0.3333:(xJ'=xJ-1);
							 | 
						|
									[turnRight] (yJ<jYmax & yJ>jYmin & xJ=jXmin) -> 0.6667*jProbN:(yJ'=yJ+1) + 0.6667*jProbS:(yJ'=yJ-1) + 0.3333:(xJ'=xJ+1);
							 | 
						|
									[turnRight] (yJ<jYmax & yJ>jYmin & xJ<jXmax & xJ>jXmin) -> 0.5*jProbS:(yJ'=yJ-1) + 0.5*jProbN:(yJ'=yJ+1) + 0.5*jProbW:(xJ'=xJ-1) + 0.5*jProbE:(xJ'=xJ+1);
							 | 
						|
								
							 | 
						|
								endmodule
							 | 
						|
								
							 | 
						|
								rewards "mvbased"
							 | 
						|
									[forward] true: 5;
							 | 
						|
									//[backward] true: 10;
							 | 
						|
									[turnLeft] true: 25;
							 | 
						|
									[turnRight] true: 28;
							 | 
						|
								endrewards
							 | 
						|
								
							 | 
						|
								rewards "mvbased_upper"
							 | 
						|
									[forward] true: 30;
							 | 
						|
									//[backward] true: 35;
							 | 
						|
									[turnLeft] true: 40;
							 | 
						|
									[turnRight] true: 40;
							 | 
						|
								endrewards
							 | 
						|
								
							 | 
						|
								rewards "mvbased_lower"
							 | 
						|
									[forward] true: 4;
							 | 
						|
									//[backward] true: 4;
							 | 
						|
									[turnLeft] true: 8;
							 | 
						|
									[turnRight] true: 8;
							 | 
						|
								endrewards
							 | 
						|
								
							 | 
						|
								label "Target" = T & (count<maxSteps) & !C;
							 | 
						|
								label "Crash" = C;
							 |