You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							814 lines
						
					
					
						
							23 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							814 lines
						
					
					
						
							23 KiB
						
					
					
				
								"""
							 | 
						|
								Copied and adapted from https://github.com/mila-iqia/babyai.
							 | 
						|
								Levels described in the Baby AI ICLR 2019 submission, with the `Go to` instruction.
							 | 
						|
								"""
							 | 
						|
								from __future__ import annotations
							 | 
						|
								
							 | 
						|
								from minigrid.envs.babyai.core.levelgen import LevelGen
							 | 
						|
								from minigrid.envs.babyai.core.roomgrid_level import RejectSampling, RoomGridLevel
							 | 
						|
								from minigrid.envs.babyai.core.verifier import GoToInstr, ObjDesc
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoToRedBallGrey(RoomGridLevel):
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Go to the red ball, single room, with distractors.
							 | 
						|
								    The distractors are all grey to reduce perceptual complexity.
							 | 
						|
								    This level has distractors but doesn't make use of language.
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to the red ball"
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the red ball.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoToRedBallGrey-v0`
							 | 
						|
								
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def __init__(self, room_size=8, num_dists=7, **kwargs):
							 | 
						|
								        self.num_dists = num_dists
							 | 
						|
								        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
							 | 
						|
								
							 | 
						|
								    def gen_mission(self):
							 | 
						|
								        self.place_agent()
							 | 
						|
								        obj, _ = self.add_object(0, 0, "ball", "red")
							 | 
						|
								        dists = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
							 | 
						|
								
							 | 
						|
								        for dist in dists:
							 | 
						|
								            dist.color = "grey"
							 | 
						|
								
							 | 
						|
								        # Make sure no unblocking is required
							 | 
						|
								        self.check_objs_reachable()
							 | 
						|
								
							 | 
						|
								        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoToRedBall(RoomGridLevel):
							 | 
						|
								    """
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Go to the red ball, single room, with distractors.
							 | 
						|
								    This level has distractors but doesn't make use of language.
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to the red ball"
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the red ball.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoToRedBall-v0`
							 | 
						|
								
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def __init__(self, room_size=8, num_dists=7, **kwargs):
							 | 
						|
								        self.num_dists = num_dists
							 | 
						|
								        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
							 | 
						|
								
							 | 
						|
								    def gen_mission(self):
							 | 
						|
								        self.place_agent()
							 | 
						|
								        obj, _ = self.add_object(0, 0, "ball", "red")
							 | 
						|
								        self.add_distractors(num_distractors=self.num_dists, all_unique=False)
							 | 
						|
								
							 | 
						|
								        # Make sure no unblocking is required
							 | 
						|
								        self.check_objs_reachable()
							 | 
						|
								
							 | 
						|
								        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoToRedBallNoDists(GoToRedBall):
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Go to the red ball. No distractors present.
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to the red ball"
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the red ball.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoToRedBallNoDists-v0`
							 | 
						|
								
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def __init__(self, **kwargs):
							 | 
						|
								        super().__init__(room_size=8, num_dists=0, **kwargs)
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoToObj(RoomGridLevel):
							 | 
						|
								    """
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Go to an object, inside a single room with no doors, no distractors. The
							 | 
						|
								    naming convention `GoToObjS{X}` represents a room of size `X`.
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to the {color} {type}"
							 | 
						|
								
							 | 
						|
								    {color} is the color of the box. Can be "red", "green", "blue", "purple",
							 | 
						|
								    "yellow" or "grey".
							 | 
						|
								
							 | 
						|
								    {type} is the type of the object. Can be "ball", "box" or "key".
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the object.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoToObj-v0`
							 | 
						|
								    - `BabyAI-GoToObjS4-v0`
							 | 
						|
								    - `BabyAI-GoToObjS6-v1`
							 | 
						|
								
							 | 
						|
								    Notice: `BabyAI-GoToObjS6-v0` is no longer for use due to a bug in the registry parameters.
							 | 
						|
								
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def __init__(self, room_size=8, **kwargs):
							 | 
						|
								        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
							 | 
						|
								
							 | 
						|
								    def gen_mission(self):
							 | 
						|
								        self.place_agent()
							 | 
						|
								        objs = self.add_distractors(num_distractors=1)
							 | 
						|
								        obj = objs[0]
							 | 
						|
								        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoToLocal(RoomGridLevel):
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Go to an object, inside a single room with no doors, no distractors. The
							 | 
						|
								    naming convention `GoToLocalS{X}N{Y}` represents a room of size `X` with
							 | 
						|
								    distractor number `Y`.
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to the {color} {type}"
							 | 
						|
								
							 | 
						|
								    {color} is the color of the box. Can be "red", "green", "blue", "purple",
							 | 
						|
								    "yellow" or "grey".
							 | 
						|
								
							 | 
						|
								    {type} is the type of the object. Can be "ball", "box" or "key".
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the object.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoToLocal-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS5N2-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS6N2-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS6N3-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS6N4-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS7N4-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS7N5-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS8N2-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS8N3-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS8N4-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS8N5-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS8N6-v0`
							 | 
						|
								    - `BabyAI-GoToLocalS8N7-v0`
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def __init__(self, room_size=8, num_dists=8, **kwargs):
							 | 
						|
								        self.num_dists = num_dists
							 | 
						|
								        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
							 | 
						|
								
							 | 
						|
								    def gen_mission(self):
							 | 
						|
								        self.place_agent()
							 | 
						|
								        objs = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
							 | 
						|
								        self.check_objs_reachable()
							 | 
						|
								        obj = self._rand_elem(objs)
							 | 
						|
								        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoTo(RoomGridLevel):
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Go to an object, the object may be in another room. Many distractors.
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to a/the {color} {type}"
							 | 
						|
								
							 | 
						|
								    {color} is the color of the box. Can be "red", "green", "blue", "purple",
							 | 
						|
								    "yellow" or "grey".
							 | 
						|
								
							 | 
						|
								    {type} is the type of the object. Can be "ball", "box" or "key".
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the object.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoTo-v0`
							 | 
						|
								    - `BabyAI-GoToOpen-v0`
							 | 
						|
								    - `BabyAI-GoToObjMaze-v0`
							 | 
						|
								    - `BabyAI-GoToObjMazeOpen-v0`
							 | 
						|
								    - `BabyAI-GoToObjMazeS4R2-v0`
							 | 
						|
								    - `BabyAI-GoToObjMazeS4-v0`
							 | 
						|
								    - `BabyAI-GoToObjMazeS5-v0`
							 | 
						|
								    - `BabyAI-GoToObjMazeS6-v0`
							 | 
						|
								    - `BabyAI-GoToObjMazeS7-v0`
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def __init__(
							 | 
						|
								        self,
							 | 
						|
								        room_size=8,
							 | 
						|
								        num_rows=3,
							 | 
						|
								        num_cols=3,
							 | 
						|
								        num_dists=18,
							 | 
						|
								        doors_open=False,
							 | 
						|
								        **kwargs,
							 | 
						|
								    ):
							 | 
						|
								        self.num_dists = num_dists
							 | 
						|
								        self.doors_open = doors_open
							 | 
						|
								        super().__init__(
							 | 
						|
								            num_rows=num_rows, num_cols=num_cols, room_size=room_size, **kwargs
							 | 
						|
								        )
							 | 
						|
								
							 | 
						|
								    def gen_mission(self):
							 | 
						|
								        self.place_agent()
							 | 
						|
								        self.connect_all()
							 | 
						|
								        objs = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
							 | 
						|
								        self.check_objs_reachable()
							 | 
						|
								        obj = self._rand_elem(objs)
							 | 
						|
								        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
							 | 
						|
								
							 | 
						|
								        # If requested, open all the doors
							 | 
						|
								        if self.doors_open:
							 | 
						|
								            self.open_all_doors()
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoToImpUnlock(RoomGridLevel):
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Go to an object, which may be in a locked room.
							 | 
						|
								    Competencies: Maze, GoTo, ImpUnlock
							 | 
						|
								    No unblocking.
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to a/the {color} {type}"
							 | 
						|
								
							 | 
						|
								    {color} is the color of the box. Can be "red", "green", "blue", "purple",
							 | 
						|
								    "yellow" or "grey".
							 | 
						|
								
							 | 
						|
								    {type} is the type of the object. Can be "ball", "box" or "key".
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the object.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoToImpUnlock-v0`
							 | 
						|
								
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def gen_mission(self):
							 | 
						|
								        # Add a locked door to a random room
							 | 
						|
								        id = self._rand_int(0, self.num_cols)
							 | 
						|
								        jd = self._rand_int(0, self.num_rows)
							 | 
						|
								        door, pos = self.add_door(id, jd, locked=True)
							 | 
						|
								        locked_room = self.get_room(id, jd)
							 | 
						|
								
							 | 
						|
								        # Add the key to a different room
							 | 
						|
								        while True:
							 | 
						|
								            ik = self._rand_int(0, self.num_cols)
							 | 
						|
								            jk = self._rand_int(0, self.num_rows)
							 | 
						|
								            if ik is id and jk is jd:
							 | 
						|
								                continue
							 | 
						|
								            self.add_object(ik, jk, "key", door.color)
							 | 
						|
								            break
							 | 
						|
								
							 | 
						|
								        self.connect_all()
							 | 
						|
								
							 | 
						|
								        # Add distractors to all but the locked room.
							 | 
						|
								        # We do this to speed up the reachability test,
							 | 
						|
								        # which otherwise will reject all levels with
							 | 
						|
								        # objects in the locked room.
							 | 
						|
								        for i in range(self.num_cols):
							 | 
						|
								            for j in range(self.num_rows):
							 | 
						|
								                if i is not id or j is not jd:
							 | 
						|
								                    self.add_distractors(i, j, num_distractors=2, all_unique=False)
							 | 
						|
								
							 | 
						|
								        # The agent must be placed after all the object to respect constraints
							 | 
						|
								        while True:
							 | 
						|
								            self.place_agent()
							 | 
						|
								            start_room = self.room_from_pos(*self.agent_pos)
							 | 
						|
								            # Ensure that we are not placing the agent in the locked room
							 | 
						|
								            if start_room is locked_room:
							 | 
						|
								                continue
							 | 
						|
								            break
							 | 
						|
								
							 | 
						|
								        self.check_objs_reachable()
							 | 
						|
								
							 | 
						|
								        # Add a single object to the locked room
							 | 
						|
								        # The instruction requires going to an object matching that description
							 | 
						|
								        (obj,) = self.add_distractors(id, jd, num_distractors=1, all_unique=False)
							 | 
						|
								        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoToSeq(LevelGen):
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Sequencing of go-to-object commands.
							 | 
						|
								
							 | 
						|
								    Competencies: Maze, GoTo, Seq
							 | 
						|
								    No locked room.
							 | 
						|
								    No locations.
							 | 
						|
								    No unblocking.
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to a/the {color} {type}" +
							 | 
						|
								    "and go to a/the {color} {type}" +
							 | 
						|
								    ", then go to a/the {color} {type}" +
							 | 
						|
								    "and go to a/the {color} {type}"
							 | 
						|
								
							 | 
						|
								    {color} is the color of the box. Can be "red", "green", "blue", "purple",
							 | 
						|
								    "yellow" or "grey".
							 | 
						|
								
							 | 
						|
								    {type} is the type of the object. Can be "ball", "box" or "key".
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the object.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoToSeq-v0`
							 | 
						|
								    - `BabyAI-GoToSeqS5R2-v0`
							 | 
						|
								
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
							 | 
						|
								        super().__init__(
							 | 
						|
								            room_size=room_size,
							 | 
						|
								            num_rows=num_rows,
							 | 
						|
								            num_cols=num_cols,
							 | 
						|
								            num_dists=num_dists,
							 | 
						|
								            action_kinds=["goto"],
							 | 
						|
								            locked_room_prob=0,
							 | 
						|
								            locations=False,
							 | 
						|
								            unblocking=False,
							 | 
						|
								            **kwargs,
							 | 
						|
								        )
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoToRedBlueBall(RoomGridLevel):
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Go to the red ball or to the blue ball.
							 | 
						|
								    There is exactly one red or blue ball, and some distractors.
							 | 
						|
								    The distractors are guaranteed not to be red or blue balls.
							 | 
						|
								    Language is not required to solve this level.
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to the {color} ball"
							 | 
						|
								
							 | 
						|
								    {color} is the color of the box. Can be "red" or "blue".
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the ball.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoToRedBlueBall-v0`
							 | 
						|
								
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def __init__(self, room_size=8, num_dists=7, **kwargs):
							 | 
						|
								        self.num_dists = num_dists
							 | 
						|
								        super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
							 | 
						|
								
							 | 
						|
								    def gen_mission(self):
							 | 
						|
								        self.place_agent()
							 | 
						|
								
							 | 
						|
								        dists = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
							 | 
						|
								
							 | 
						|
								        # Ensure there is only one red or blue ball
							 | 
						|
								        for dist in dists:
							 | 
						|
								            if dist.type == "ball" and (dist.color == "blue" or dist.color == "red"):
							 | 
						|
								                raise RejectSampling("can only have one blue or red ball")
							 | 
						|
								
							 | 
						|
								        color = self._rand_elem(["red", "blue"])
							 | 
						|
								        obj, _ = self.add_object(0, 0, "ball", color)
							 | 
						|
								
							 | 
						|
								        # Make sure no unblocking is required
							 | 
						|
								        self.check_objs_reachable()
							 | 
						|
								
							 | 
						|
								        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoToDoor(RoomGridLevel):
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Go to a door
							 | 
						|
								    (of a given color, in the current room)
							 | 
						|
								    No distractors, no language variation
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to the {color} door"
							 | 
						|
								
							 | 
						|
								    {color} is the color of the box. Can be "red", "green", "blue", "purple",
							 | 
						|
								    "yellow" or "grey".
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the door.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoToDoor-v0`
							 | 
						|
								
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def __init__(self, **kwargs):
							 | 
						|
								        super().__init__(room_size=7, **kwargs)
							 | 
						|
								
							 | 
						|
								    def gen_mission(self):
							 | 
						|
								        objs = []
							 | 
						|
								        for _ in range(4):
							 | 
						|
								            door, _ = self.add_door(1, 1)
							 | 
						|
								            objs.append(door)
							 | 
						|
								        self.place_agent(1, 1)
							 | 
						|
								
							 | 
						|
								        obj = self._rand_elem(objs)
							 | 
						|
								        self.instrs = GoToInstr(ObjDesc("door", obj.color))
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class GoToObjDoor(RoomGridLevel):
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    ## Description
							 | 
						|
								
							 | 
						|
								    Go to an object or door
							 | 
						|
								    (of a given type and color, in the current room)
							 | 
						|
								
							 | 
						|
								    ## Mission Space
							 | 
						|
								
							 | 
						|
								    "go to the {color} {type}"
							 | 
						|
								
							 | 
						|
								    {color} is the color of the box. Can be "red", "green", "blue", "purple",
							 | 
						|
								    "yellow" or "grey".
							 | 
						|
								
							 | 
						|
								    {type} is the type of the object. Can be "ball", "box", "key" or "door".
							 | 
						|
								
							 | 
						|
								    ## Action Space
							 | 
						|
								
							 | 
						|
								    | Num | Name         | Action            |
							 | 
						|
								    |-----|--------------|-------------------|
							 | 
						|
								    | 0   | left         | Turn left         |
							 | 
						|
								    | 1   | right        | Turn right        |
							 | 
						|
								    | 2   | forward      | Move forward      |
							 | 
						|
								    | 3   | pickup       | Pick up an object |
							 | 
						|
								    | 4   | drop         | Unused            |
							 | 
						|
								    | 5   | toggle       | Unused            |
							 | 
						|
								    | 6   | done         | Unused            |
							 | 
						|
								
							 | 
						|
								    ## Observation Encoding
							 | 
						|
								
							 | 
						|
								    - Each tile is encoded as a 3 dimensional tuple:
							 | 
						|
								        `(OBJECT_IDX, COLOR_IDX, STATE)`
							 | 
						|
								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
							 | 
						|
								        [minigrid/minigrid.py](minigrid/minigrid.py)
							 | 
						|
								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
							 | 
						|
								
							 | 
						|
								    ## Rewards
							 | 
						|
								
							 | 
						|
								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
							 | 
						|
								
							 | 
						|
								    ## Termination
							 | 
						|
								
							 | 
						|
								    The episode ends if any one of the following conditions is met:
							 | 
						|
								
							 | 
						|
								    1. The agent goes to the object or door.
							 | 
						|
								    2. Timeout (see `max_steps`).
							 | 
						|
								
							 | 
						|
								    ## Registered Configurations
							 | 
						|
								
							 | 
						|
								    - `BabyAI-GoToObjDoor-v0`
							 | 
						|
								
							 | 
						|
								    """
							 | 
						|
								
							 | 
						|
								    def __init__(self, **kwargs):
							 | 
						|
								        super().__init__(room_size=8, **kwargs)
							 | 
						|
								
							 | 
						|
								    def gen_mission(self):
							 | 
						|
								        self.place_agent(1, 1)
							 | 
						|
								        objs = self.add_distractors(1, 1, num_distractors=8, all_unique=False)
							 | 
						|
								
							 | 
						|
								        for _ in range(4):
							 | 
						|
								            door, _ = self.add_door(1, 1)
							 | 
						|
								            objs.append(door)
							 | 
						|
								
							 | 
						|
								        self.check_objs_reachable()
							 | 
						|
								
							 | 
						|
								        obj = self._rand_elem(objs)
							 | 
						|
								        self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
							 |