You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

271 lines
8.5 KiB

from __future__ import annotations
from minigrid.core.constants import COLOR_NAMES, DIR_TO_VEC
from minigrid.core.mission import MissionSpace
from minigrid.core.roomgrid import RoomGrid
from minigrid.core.world_object import Ball, Box, Key
class ObstructedMazeEnv(RoomGrid):
"""
## Description
The agent has to pick up a box which is placed in a corner of a 3x3 maze.
The doors are locked, the keys are hidden in boxes and doors are obstructed
by balls. This environment can be solved without relying on language.
## Mission Space
"pick up the {COLOR_NAMES[0]} ball"
## Action Space
| Num | Name | Action |
|-----|--------------|---------------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Toggle/activate an object |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent picks up the blue ball.
2. Timeout (see `max_steps`).
## Registered Configurations
"NDl" are the number of doors locked.
"h" if the key is hidden in a box.
"b" if the door is obstructed by a ball.
"Q" number of quarters that will have doors and keys out of the 9 that the
map already has.
"Full" 3x3 maze with "h" and "b" options.
"v1" prevents the key from being covered by the blocking ball. Only 2Dlhb, 1Q, 2Q, and Full are
updated to v1. Other configurations won't face this issue because there is no blocking ball (1Dl,
1Dlh, 2Dl, 2Dlh) or the only blocking ball is added before the key (1Dlhb).
- `MiniGrid-ObstructedMaze-1Dl-v0`
- `MiniGrid-ObstructedMaze-1Dlh-v0`
- `MiniGrid-ObstructedMaze-1Dlhb-v0`
- `MiniGrid-ObstructedMaze-2Dl-v0`
- `MiniGrid-ObstructedMaze-2Dlh-v0`
- `MiniGrid-ObstructedMaze-2Dlhb-v0`
- `MiniGrid-ObstructedMaze-2Dlhb-v1`
- `MiniGrid-ObstructedMaze-1Q-v0`
- `MiniGrid-ObstructedMaze-1Q-v1`
- `MiniGrid-ObstructedMaze-2Q-v0`
- `MiniGrid-ObstructedMaze-2Q-v1`
- `MiniGrid-ObstructedMaze-Full-v0`
- `MiniGrid-ObstructedMaze-Full-v1`
"""
def __init__(
self,
num_rows,
num_cols,
num_rooms_visited,
max_steps: int | None = None,
**kwargs,
):
room_size = 6
if max_steps is None:
max_steps = 4 * num_rooms_visited * room_size**2
mission_space = MissionSpace(
mission_func=self._gen_mission,
ordered_placeholders=[[COLOR_NAMES[0]]],
)
super().__init__(
mission_space=mission_space,
room_size=room_size,
num_rows=num_rows,
num_cols=num_cols,
max_steps=max_steps,
**kwargs,
)
self.obj = Ball() # initialize the obj attribute, that will be changed later on
@staticmethod
def _gen_mission(color: str):
return f"pick up the {color} ball"
def _gen_grid(self, width, height):
super()._gen_grid(width, height)
# Define all possible colors for doors
self.door_colors = self._rand_subset(COLOR_NAMES, len(COLOR_NAMES))
# Define the color of the ball to pick up
self.ball_to_find_color = COLOR_NAMES[0]
# Define the color of the balls that obstruct doors
self.blocking_ball_color = COLOR_NAMES[1]
# Define the color of boxes in which keys are hidden
self.box_color = COLOR_NAMES[2]
self.mission = "pick up the %s ball" % self.ball_to_find_color
def step(self, action):
obs, reward, terminated, truncated, info = super().step(action)
if action == self.actions.pickup:
if self.carrying and self.carrying == self.obj:
reward = self._reward()
terminated = True
return obs, reward, terminated, truncated, info
def add_door(
self,
i,
j,
door_idx=0,
color=None,
locked=False,
key_in_box=False,
blocked=False,
):
"""
Add a door. If the door must be locked, it also adds the key.
If the key must be hidden, it is put in a box. If the door must
be obstructed, it adds a ball in front of the door.
"""
door, door_pos = super().add_door(i, j, door_idx, color, locked=locked)
if blocked:
vec = DIR_TO_VEC[door_idx]
blocking_ball = Ball(self.blocking_ball_color) if blocked else None
self.grid.set(door_pos[0] - vec[0], door_pos[1] - vec[1], blocking_ball)
if locked:
obj = Key(door.color)
if key_in_box:
box = Box(self.box_color)
box.contains = obj
obj = box
self.place_in_room(i, j, obj)
return door, door_pos
class ObstructedMaze_1Dlhb(ObstructedMazeEnv):
"""
A blue ball is hidden in a 2x1 maze. A locked door separates
rooms. Doors are obstructed by a ball and keys are hidden in boxes.
"""
def __init__(self, key_in_box=True, blocked=True, **kwargs):
self.key_in_box = key_in_box
self.blocked = blocked
super().__init__(num_rows=1, num_cols=2, num_rooms_visited=2, **kwargs)
def _gen_grid(self, width, height):
super()._gen_grid(width, height)
self.add_door(
0,
0,
door_idx=0,
color=self.door_colors[0],
locked=True,
key_in_box=self.key_in_box,
blocked=self.blocked,
)
self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color)
self.place_agent(0, 0)
class ObstructedMaze_Full(ObstructedMazeEnv):
"""
A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors
are locked, doors are obstructed by a ball and keys are hidden in
boxes.
"""
def __init__(
self,
agent_room=(1, 1),
key_in_box=True,
blocked=True,
num_quarters=4,
num_rooms_visited=25,
**kwargs,
):
self.agent_room = agent_room
self.key_in_box = key_in_box
self.blocked = blocked
self.num_quarters = num_quarters
super().__init__(
num_rows=3, num_cols=3, num_rooms_visited=num_rooms_visited, **kwargs
)
def _gen_grid(self, width, height):
super()._gen_grid(width, height)
middle_room = (1, 1)
# Define positions of "side rooms" i.e. rooms that are neither
# corners nor the center.
side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][: self.num_quarters]
for i in range(len(side_rooms)):
side_room = side_rooms[i]
# Add a door between the center room and the side room
self.add_door(
*middle_room, door_idx=i, color=self.door_colors[i], locked=False
)
for k in [-1, 1]:
# Add a door to each side of the side room
self.add_door(
*side_room,
locked=True,
door_idx=(i + k) % 4,
color=self.door_colors[(i + k) % len(self.door_colors)],
key_in_box=self.key_in_box,
blocked=self.blocked,
)
corners = [(2, 0), (2, 2), (0, 2), (0, 0)][: self.num_quarters]
ball_room = self._rand_elem(corners)
self.obj, _ = self.add_object(
ball_room[0], ball_room[1], "ball", color=self.ball_to_find_color
)
self.place_agent(*self.agent_room)
class ObstructedMaze_2Dl(ObstructedMaze_Full):
def __init__(self, **kwargs):
super().__init__((2, 1), False, False, 1, 4, **kwargs)
class ObstructedMaze_2Dlh(ObstructedMaze_Full):
def __init__(self, **kwargs):
super().__init__((2, 1), True, False, 1, 4, **kwargs)
class ObstructedMaze_2Dlhb(ObstructedMaze_Full):
def __init__(self, **kwargs):
super().__init__((2, 1), True, True, 1, 4, **kwargs)