You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							59 lines
						
					
					
						
							1.5 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							59 lines
						
					
					
						
							1.5 KiB
						
					
					
				
								
							 | 
						|
								
							 | 
						|
								// 3x3 grid
							 | 
						|
								// based on Littman, Cassandra and Kaelbling
							 | 
						|
								// Learning policies for partially observable environments: Scaling up  
							 | 
						|
								// Technical Report CS, Brown University
							 | 
						|
								
							 | 
						|
								pomdp
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								// only the target is observable which is in the south east corner
							 | 
						|
								observables
							 | 
						|
									o
							 | 
						|
								endobservables
							 | 
						|
								
							 | 
						|
								module grid
							 | 
						|
									
							 | 
						|
									x : [0..2]; // x coordinate
							 | 
						|
									y : [0..2]; // y coordinate
							 | 
						|
									o : [0..2]; // observables
							 | 
						|
									// 0 - initial observation
							 | 
						|
									// 1 - in the grid (not target)
							 | 
						|
									// 2 - observe target
							 | 
						|
									
							 | 
						|
									// initially randomly placed within the grid (not at the target)
							 | 
						|
									[] o=0 -> 1/8 : (o'=1) & (x'=0) & (y'=0)
							 | 
						|
											+ 1/8 : (o'=1) & (x'=0) & (y'=1)
							 | 
						|
											+ 1/8 : (o'=1) & (x'=0) & (y'=2)
							 | 
						|
											+ 1/8 : (o'=1) & (x'=1) & (y'=0)
							 | 
						|
											+ 1/8 : (o'=1) & (x'=1) & (y'=1)
							 | 
						|
											+ 1/8 : (o'=1) & (x'=1) & (y'=2)
							 | 
						|
											// + 1/8 : (o'=1) & (x'=2) & (y'=0) the target
							 | 
						|
											+ 1/8 : (o'=1) & (x'=2) & (y'=1)
							 | 
						|
											+ 1/8 : (o'=1) & (x'=2) & (y'=2);
							 | 
						|
											
							 | 
						|
									// move around the grid
							 | 
						|
									[east] o=1 & !(x=1 & y=0) -> (x'=min(x+1,2)); // not reached target
							 | 
						|
									[east] o=1 & x=1 & y=0 -> (x'=min(x+1,2)) & (o'=2);
							 | 
						|
									[west] o=1 -> (x'=max(x-1,0)); // not reached target
							 | 
						|
									[north] o=1 -> (y'=min(y+1,2)); // reached target
							 | 
						|
									[south] o=1 & !(x=2 & y=1) -> (y'=max(y-1,0)); // not reached target
							 | 
						|
									[south] o=1 & x=2 & y=1 -> (y'=max(y-1,0)) & (o'=2); // reached target
							 | 
						|
									
							 | 
						|
									// reached target
							 | 
						|
									[done] o=2 -> true;
							 | 
						|
									
							 | 
						|
								endmodule
							 | 
						|
								
							 | 
						|
								// reward structure for number of steps to reach the target
							 | 
						|
								rewards
							 | 
						|
								        [east] true : 1;
							 | 
						|
								        [west] true : 1;
							 | 
						|
								        [north] true : 1;
							 | 
						|
								        [south] true : 1;
							 | 
						|
								endrewards
							 | 
						|
								
							 | 
						|
								// target observation
							 | 
						|
								label "goal" = o=2;
							 |