You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
			
		
		
		
		
			
		
			
				
					
					
						
							576 lines
						
					
					
						
							14 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							576 lines
						
					
					
						
							14 KiB
						
					
					
				| """ | |
| Copied and adapted from https://github.com/mila-iqia/babyai. | |
| Levels described in the Baby AI ICLR 2019 submission. | |
| The instructions are a synthesis of those from `PutNext`, `Open`, `GoTo`, and `Pickup`. | |
| """ | |
| 
 | |
| from __future__ import annotations | |
| 
 | |
| from minigrid.envs.babyai.core.levelgen import LevelGen | |
| 
 | |
| 
 | |
| class Synth(LevelGen): | |
|     """ | |
|  | |
|     ## Description | |
|  | |
|     Union of all instructions from PutNext, Open, Goto and PickUp. | |
|     The agent may need to move objects around. The agent may have | |
|     to unlock the door, but only if it is explicitly referred by | |
|     the instruction. | |
|  | |
|     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open | |
|  | |
|     ## Mission Space | |
|  | |
|     "go to the {color} {type}" | |
|  | |
|     or | |
|  | |
|     "pick up a/the {color} {type}" | |
|  | |
|     or | |
|  | |
|     "open the {color} door" | |
|  | |
|     or | |
|  | |
|     "put the {color} {type} next to the {color} {type}" | |
|  | |
|     {color} is the color of the box. Can be "red", "green", "blue", "purple", | |
|     "yellow" or "grey". | |
|  | |
|     {type} is the type of the object. Can be "ball", "box" or "key". | |
|  | |
|     ## Action Space | |
|  | |
|     | Num | Name         | Action            | | |
|     |-----|--------------|-------------------| | |
|     | 0   | left         | Turn left         | | |
|     | 1   | right        | Turn right        | | |
|     | 2   | forward      | Move forward      | | |
|     | 3   | pickup       | Pick up an object | | |
|     | 4   | drop         | Unused            | | |
|     | 5   | toggle       | Unused            | | |
|     | 6   | done         | Unused            | | |
|  | |
|     ## Observation Encoding | |
|  | |
|     - Each tile is encoded as a 3 dimensional tuple: | |
|         `(OBJECT_IDX, COLOR_IDX, STATE)` | |
|     - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in | |
|         [minigrid/minigrid.py](minigrid/minigrid.py) | |
|     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked | |
|  | |
|     ## Rewards | |
|  | |
|     A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. | |
|  | |
|     ## Termination | |
|  | |
|     The episode ends if any one of the following conditions is met: | |
|  | |
|     1. The agent achieves the task. | |
|     2. Timeout (see `max_steps`). | |
|  | |
|     ## Registered Configurations | |
|  | |
|     - `BabyAI-Synth-v0` | |
|     - `BabyAI-SynthS5R2-v0` | |
|  | |
|     """ | |
| 
 | |
|     def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs): | |
|         # We add many distractors to increase the probability | |
|         # of ambiguous locations within the same room | |
|         super().__init__( | |
|             room_size=room_size, | |
|             num_rows=num_rows, | |
|             num_cols=num_cols, | |
|             num_dists=num_dists, | |
|             instr_kinds=["action"], | |
|             locations=False, | |
|             unblocking=True, | |
|             implicit_unlock=False, | |
|             **kwargs, | |
|         ) | |
| 
 | |
| 
 | |
| class SynthLoc(LevelGen): | |
|     """ | |
|  | |
|     ## Description | |
|  | |
|     Like Synth, but a significant share of object descriptions involves | |
|     location language like in PickUpLoc. No implicit unlocking. | |
|  | |
|     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc | |
|  | |
|     ## Mission Space | |
|  | |
|     "go to the {color} {type} {location}" | |
|  | |
|     or | |
|  | |
|     "pick up a/the {color} {type} {location}" | |
|  | |
|     or | |
|  | |
|     "open the {color} door {location}" | |
|  | |
|     or | |
|  | |
|     "put the {color} {type} {location} next to the {color} {type} {location}" | |
|  | |
|     {color} is the color of the box. Can be "red", "green", "blue", "purple", | |
|     "yellow" or "grey". | |
|  | |
|     {type} is the type of the object. Can be "ball", "box" or "key". | |
|  | |
|     {location} can be " ", "in front of you", "behind you", "on your left" | |
|     or "on your right" | |
|  | |
|     ## Action Space | |
|  | |
|     | Num | Name         | Action            | | |
|     |-----|--------------|-------------------| | |
|     | 0   | left         | Turn left         | | |
|     | 1   | right        | Turn right        | | |
|     | 2   | forward      | Move forward      | | |
|     | 3   | pickup       | Pick up an object | | |
|     | 4   | drop         | Unused            | | |
|     | 5   | toggle       | Unused            | | |
|     | 6   | done         | Unused            | | |
|  | |
|     ## Observation Encoding | |
|  | |
|     - Each tile is encoded as a 3 dimensional tuple: | |
|         `(OBJECT_IDX, COLOR_IDX, STATE)` | |
|     - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in | |
|         [minigrid/minigrid.py](minigrid/minigrid.py) | |
|     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked | |
|  | |
|     ## Rewards | |
|  | |
|     A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. | |
|  | |
|     ## Termination | |
|  | |
|     The episode ends if any one of the following conditions is met: | |
|  | |
|     1. The agent achieves the task. | |
|     2. Timeout (see `max_steps`). | |
|  | |
|     ## Registered Configurations | |
|  | |
|     - `BabyAI-SynthLoc-v0` | |
|     """ | |
| 
 | |
|     def __init__(self, **kwargs): | |
|         # We add many distractors to increase the probability | |
|         # of ambiguous locations within the same room | |
|         super().__init__( | |
|             instr_kinds=["action"], | |
|             locations=True, | |
|             unblocking=True, | |
|             implicit_unlock=False, | |
|             **kwargs, | |
|         ) | |
| 
 | |
| 
 | |
| class SynthSeq(LevelGen): | |
|     """ | |
|  | |
|     ## Description | |
|  | |
|     Like SynthLoc, but now with multiple commands, combined just like in GoToSeq. | |
|     No implicit unlocking. | |
|  | |
|     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc, Seq | |
|  | |
|     ## Mission Space | |
|  | |
|     Action mission space: | |
|  | |
|     "go to the {color} {type} {location}" | |
|  | |
|     or | |
|  | |
|     "pick up a/the {color} {type} {location}" | |
|  | |
|     or | |
|  | |
|     "open the {color} door {location}" | |
|  | |
|     or | |
|  | |
|     "put the {color} {type} {location} next to the {color} {type} {location}" | |
|  | |
|     {color} is the color of the box. Can be "red", "green", "blue", "purple", | |
|     "yellow" or "grey". | |
|  | |
|     {type} is the type of the object. Can be "ball", "box" or "key". | |
|  | |
|     {location} can be " ", "in front of you", "behind you", "on your left" | |
|     or "on your right" | |
|  | |
|     And mission space: | |
|  | |
|     Two action missions concatenated with "and" | |
|  | |
|     Example: | |
|  | |
|     go to the green key | |
|     and | |
|     put the box next to the yellow ball | |
|  | |
|     Sequence mission space: | |
|  | |
|     Two missions, they can be action or and missions, concatenated with | |
|     ", then" or "after you". | |
|  | |
|     Example: | |
|  | |
|     open a red door and go to the ball on your left | |
|     after you | |
|     put the grey ball next to a door | |
|  | |
|     ## Action Space | |
|  | |
|     | Num | Name         | Action            | | |
|     |-----|--------------|-------------------| | |
|     | 0   | left         | Turn left         | | |
|     | 1   | right        | Turn right        | | |
|     | 2   | forward      | Move forward      | | |
|     | 3   | pickup       | Pick up an object | | |
|     | 4   | drop         | Unused            | | |
|     | 5   | toggle       | Unused            | | |
|     | 6   | done         | Unused            | | |
|  | |
|     ## Observation Encoding | |
|  | |
|     - Each tile is encoded as a 3 dimensional tuple: | |
|         `(OBJECT_IDX, COLOR_IDX, STATE)` | |
|     - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in | |
|         [minigrid/minigrid.py](minigrid/minigrid.py) | |
|     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked | |
|  | |
|     ## Rewards | |
|  | |
|     A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. | |
|  | |
|     ## Termination | |
|  | |
|     The episode ends if any one of the following conditions is met: | |
|  | |
|     1. The agent achieves the task. | |
|     2. Timeout (see `max_steps`). | |
|  | |
|     ## Registered Configurations | |
|  | |
|     - `BabyAI-SynthSeq-v0` | |
|  | |
|     """ | |
| 
 | |
|     def __init__(self, **kwargs): | |
|         # We add many distractors to increase the probability | |
|         # of ambiguous locations within the same room | |
|         super().__init__( | |
|             locations=True, unblocking=True, implicit_unlock=False, **kwargs | |
|         ) | |
| 
 | |
| 
 | |
| class MiniBossLevel(LevelGen): | |
|     """ | |
|  | |
|     ## Description | |
|  | |
|     Command can be any sentence drawn from the Baby Language grammar. | |
|     Union of all competencies. This level is a superset of all other levels. | |
|     Compared to BossLevel this has a smaller room and a lower probability of | |
|     locked rooms. | |
|  | |
|     ## Mission Space | |
|  | |
|     Action mission space: | |
|  | |
|     "go to the {color} {type} {location}" | |
|  | |
|     or | |
|  | |
|     "pick up a/the {color} {type} {location}" | |
|  | |
|     or | |
|  | |
|     "open the {color} door {location}" | |
|  | |
|     or | |
|  | |
|     "put the {color} {type} {location} next to the {color} {type} {location}" | |
|  | |
|     {color} is the color of the box. Can be "red", "green", "blue", "purple", | |
|     "yellow" or "grey". | |
|  | |
|     {type} is the type of the object. Can be "ball", "box" or "key". | |
|  | |
|     {location} can be " ", "in front of you", "behind you", "on your left" | |
|     or "on your right" | |
|  | |
|     And mission space: | |
|  | |
|     Two action missions concatenated with "and" | |
|  | |
|     Example: | |
|  | |
|     go to the green key | |
|     and | |
|     put the box next to the yellow ball | |
|  | |
|     Sequence mission space: | |
|  | |
|     Two missions, they can be action or and missions, concatenated with | |
|     ", then" or "after you". | |
|  | |
|     Example: | |
|  | |
|     open a red door and go to the ball on your left | |
|     after you | |
|     put the grey ball next to a door | |
|  | |
|     ## Action Space | |
|  | |
|     | Num | Name         | Action            | | |
|     |-----|--------------|-------------------| | |
|     | 0   | left         | Turn left         | | |
|     | 1   | right        | Turn right        | | |
|     | 2   | forward      | Move forward      | | |
|     | 3   | pickup       | Pick up an object | | |
|     | 4   | drop         | Unused            | | |
|     | 5   | toggle       | Unused            | | |
|     | 6   | done         | Unused            | | |
|  | |
|     ## Observation Encoding | |
|  | |
|     - Each tile is encoded as a 3 dimensional tuple: | |
|         `(OBJECT_IDX, COLOR_IDX, STATE)` | |
|     - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in | |
|         [minigrid/minigrid.py](minigrid/minigrid.py) | |
|     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked | |
|  | |
|     ## Rewards | |
|  | |
|     A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. | |
|  | |
|     ## Termination | |
|  | |
|     The episode ends if any one of the following conditions is met: | |
|  | |
|     1. The agent achieves the task. | |
|     2. Timeout (see `max_steps`). | |
|  | |
|     ## Registered Configurations | |
|  | |
|     - `BabyAI-MiniBossLevel-v0` | |
|  | |
|     """ | |
| 
 | |
|     def __init__(self, **kwargs): | |
|         super().__init__( | |
|             num_cols=2, | |
|             num_rows=2, | |
|             room_size=5, | |
|             num_dists=7, | |
|             locked_room_prob=0.25, | |
|             **kwargs, | |
|         ) | |
| 
 | |
| 
 | |
| class BossLevel(LevelGen): | |
|     """ | |
|  | |
|     ## Description | |
|  | |
|     Command can be any sentence drawn from the Baby Language grammar. | |
|     Union of all competencies. This level is a superset of all other levels. | |
|  | |
|     ## Mission Space | |
|  | |
|     Action mission space: | |
|  | |
|     "go to the {color} {type} {location}" | |
|  | |
|     or | |
|  | |
|     "pick up a/the {color} {type} {location}" | |
|  | |
|     or | |
|  | |
|     "open the {color} door {location}" | |
|  | |
|     or | |
|  | |
|     "put the {color} {type} {location} next to the {color} {type} {location}" | |
|  | |
|     {color} is the color of the box. Can be "red", "green", "blue", "purple", | |
|     "yellow" or "grey". | |
|  | |
|     {type} is the type of the object. Can be "ball", "box" or "key". | |
|  | |
|     {location} can be " ", "in front of you", "behind you", "on your left" | |
|     or "on your right" | |
|  | |
|     And mission space: | |
|  | |
|     Two action missions concatenated with "and" | |
|  | |
|     Example: | |
|  | |
|     go to the green key | |
|     and | |
|     put the box next to the yellow ball | |
|  | |
|     Sequence mission space: | |
|  | |
|     Two missions, they can be action or and missions, concatenated with | |
|     ", then" or "after you". | |
|  | |
|     Example: | |
|  | |
|     open a red door and go to the ball on your left | |
|     after you | |
|     put the grey ball next to a door | |
|  | |
|     ## Action Space | |
|  | |
|     | Num | Name         | Action            | | |
|     |-----|--------------|-------------------| | |
|     | 0   | left         | Turn left         | | |
|     | 1   | right        | Turn right        | | |
|     | 2   | forward      | Move forward      | | |
|     | 3   | pickup       | Pick up an object | | |
|     | 4   | drop         | Unused            | | |
|     | 5   | toggle       | Unused            | | |
|     | 6   | done         | Unused            | | |
|  | |
|     ## Observation Encoding | |
|  | |
|     - Each tile is encoded as a 3 dimensional tuple: | |
|         `(OBJECT_IDX, COLOR_IDX, STATE)` | |
|     - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in | |
|         [minigrid/minigrid.py](minigrid/minigrid.py) | |
|     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked | |
|  | |
|     ## Rewards | |
|  | |
|     A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. | |
|  | |
|     ## Termination | |
|  | |
|     The episode ends if any one of the following conditions is met: | |
|  | |
|     1. The agent achieves the task. | |
|     2. Timeout (see `max_steps`). | |
|  | |
|     ## Registered Configurations | |
|  | |
|     - `BabyAI-BossLevel-v0` | |
|     """ | |
| 
 | |
|     def __init__(self, **kwargs): | |
|         super().__init__(**kwargs) | |
| 
 | |
| 
 | |
| class BossLevelNoUnlock(LevelGen): | |
|     """ | |
|  | |
|     ## Description | |
|  | |
|     Command can be any sentence drawn from the Baby Language grammar. | |
|     Union of all competencies. This level is a superset of all other levels. | |
|     No implicit unlocking. | |
|  | |
|     ## Mission Space | |
|  | |
|     Action mission space: | |
|  | |
|     "go to the {color} {type} {location}" | |
|  | |
|     or | |
|  | |
|     "pick up a/the {color} {type} {location}" | |
|  | |
|     or | |
|  | |
|     "open the {color} door {location}" | |
|  | |
|     or | |
|  | |
|     "put the {color} {type} {location} next to the {color} {type} {location}" | |
|  | |
|     {color} is the color of the box. Can be "red", "green", "blue", "purple", | |
|     "yellow" or "grey". | |
|  | |
|     {type} is the type of the object. Can be "ball", "box" or "key". | |
|  | |
|     {location} can be " ", "in front of you", "behind you", "on your left" | |
|     or "on your right" | |
|  | |
|     And mission space: | |
|  | |
|     Two action missions concatenated with "and" | |
|  | |
|     Example: | |
|  | |
|     go to the green key | |
|     and | |
|     put the box next to the yellow ball | |
|  | |
|     Sequence mission space: | |
|  | |
|     Two missions, they can be action or and missions, concatenated with | |
|     ", then" or "after you". | |
|  | |
|     Example: | |
|  | |
|     open a red door and go to the ball on your left | |
|     after you | |
|     put the grey ball next to a door | |
|  | |
|     ## Action Space | |
|  | |
|     | Num | Name         | Action            | | |
|     |-----|--------------|-------------------| | |
|     | 0   | left         | Turn left         | | |
|     | 1   | right        | Turn right        | | |
|     | 2   | forward      | Move forward      | | |
|     | 3   | pickup       | Pick up an object | | |
|     | 4   | drop         | Unused            | | |
|     | 5   | toggle       | Unused            | | |
|     | 6   | done         | Unused            | | |
|  | |
|     ## Observation Encoding | |
|  | |
|     - Each tile is encoded as a 3 dimensional tuple: | |
|         `(OBJECT_IDX, COLOR_IDX, STATE)` | |
|     - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in | |
|         [minigrid/minigrid.py](minigrid/minigrid.py) | |
|     - `STATE` refers to the door state with 0=open, 1=closed and 2=locked | |
|  | |
|     ## Rewards | |
|  | |
|     A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. | |
|  | |
|     ## Termination | |
|  | |
|     The episode ends if any one of the following conditions is met: | |
|  | |
|     1. The agent achieves the task. | |
|     2. Timeout (see `max_steps`). | |
|  | |
|     ## Registered Configurations | |
|  | |
|     - `BabyAI-BossLevelNoUnlock-v0` | |
|     """ | |
| 
 | |
|     def __init__(self, **kwargs): | |
|         super().__init__(locked_room_prob=0, implicit_unlock=False, **kwargs)
 |