You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

174 lines
5.7 KiB

from __future__ import annotations
from minigrid.core.constants import COLOR_NAMES
from minigrid.core.grid import Grid
from minigrid.core.mission import MissionSpace
from minigrid.core.world_object import Door, Goal, Key, Wall
from minigrid.minigrid_env import MiniGridEnv
class LockedRoom:
def __init__(self, top, size, doorPos):
self.top = top
self.size = size
self.doorPos = doorPos
self.color = None
self.locked = False
def rand_pos(self, env):
topX, topY = self.top
sizeX, sizeY = self.size
return env._rand_pos(topX + 1, topX + sizeX - 1, topY + 1, topY + sizeY - 1)
class LockedRoomEnv(MiniGridEnv):
"""
## Description
The environment has six rooms, one of which is locked. The agent receives
a textual mission string as input, telling it which room to go to in order
to get the key that opens the locked room. It then has to go into the locked
room in order to reach the final goal. This environment is extremely
difficult to solve with vanilla reinforcement learning alone.
## Mission Space
"get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
{lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
"blue", "purple", "yellow" or "grey".
## Action Space
| Num | Name | Action |
|-----|--------------|---------------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Toggle/activate an object |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent reaches the goal.
2. Timeout (see `max_steps`).
## Registered Configurations
- `MiniGrid-LockedRoom-v0`
"""
def __init__(self, size=19, max_steps: int | None = None, **kwargs):
self.size = size
if max_steps is None:
max_steps = 10 * size
mission_space = MissionSpace(
mission_func=self._gen_mission,
ordered_placeholders=[COLOR_NAMES] * 3,
)
super().__init__(
mission_space=mission_space,
width=size,
height=size,
max_steps=max_steps,
**kwargs,
)
@staticmethod
def _gen_mission(lockedroom_color: str, keyroom_color: str, door_color: str):
return (
f"get the {lockedroom_color} key from the {keyroom_color} room,"
f" unlock the {door_color} door and go to the goal"
)
def _gen_grid(self, width, height):
# Create the grid
self.grid = Grid(width, height)
# Generate the surrounding walls
for i in range(0, width):
self.grid.set(i, 0, Wall())
self.grid.set(i, height - 1, Wall())
for j in range(0, height):
self.grid.set(0, j, Wall())
self.grid.set(width - 1, j, Wall())
# Hallway walls
lWallIdx = width // 2 - 2
rWallIdx = width // 2 + 2
for j in range(0, height):
self.grid.set(lWallIdx, j, Wall())
self.grid.set(rWallIdx, j, Wall())
self.rooms = []
# Room splitting walls
for n in range(0, 3):
j = n * (height // 3)
for i in range(0, lWallIdx):
self.grid.set(i, j, Wall())
for i in range(rWallIdx, width):
self.grid.set(i, j, Wall())
roomW = lWallIdx + 1
roomH = height // 3 + 1
self.rooms.append(LockedRoom((0, j), (roomW, roomH), (lWallIdx, j + 3)))
self.rooms.append(
LockedRoom((rWallIdx, j), (roomW, roomH), (rWallIdx, j + 3))
)
# Choose one random room to be locked
lockedRoom = self._rand_elem(self.rooms)
lockedRoom.locked = True
goalPos = lockedRoom.rand_pos(self)
self.grid.set(*goalPos, Goal())
# Assign the door colors
colors = set(COLOR_NAMES)
for room in self.rooms:
color = self._rand_elem(sorted(colors))
colors.remove(color)
room.color = color
if room.locked:
self.grid.set(*room.doorPos, Door(color, is_locked=True))
else:
self.grid.set(*room.doorPos, Door(color))
# Select a random room to contain the key
while True:
keyRoom = self._rand_elem(self.rooms)
if keyRoom != lockedRoom:
break
keyPos = keyRoom.rand_pos(self)
self.grid.set(*keyPos, Key(lockedRoom.color))
# Randomize the player start position and orientation
self.agent_pos = self.place_agent(
top=(lWallIdx, 0), size=(rWallIdx - lWallIdx, height)
)
# Generate the mission string
self.mission = (
"get the %s key from the %s room, "
"unlock the %s door and "
"go to the goal"
) % (lockedRoom.color, keyRoom.color, lockedRoom.color)