You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

174 lines
5.7 KiB

3 months ago
  1. from __future__ import annotations
  2. from minigrid.core.constants import COLOR_NAMES
  3. from minigrid.core.grid import Grid
  4. from minigrid.core.mission import MissionSpace
  5. from minigrid.core.world_object import Door, Goal, Key, Wall
  6. from minigrid.minigrid_env import MiniGridEnv
  7. class LockedRoom:
  8. def __init__(self, top, size, doorPos):
  9. self.top = top
  10. self.size = size
  11. self.doorPos = doorPos
  12. self.color = None
  13. self.locked = False
  14. def rand_pos(self, env):
  15. topX, topY = self.top
  16. sizeX, sizeY = self.size
  17. return env._rand_pos(topX + 1, topX + sizeX - 1, topY + 1, topY + sizeY - 1)
  18. class LockedRoomEnv(MiniGridEnv):
  19. """
  20. ## Description
  21. The environment has six rooms, one of which is locked. The agent receives
  22. a textual mission string as input, telling it which room to go to in order
  23. to get the key that opens the locked room. It then has to go into the locked
  24. room in order to reach the final goal. This environment is extremely
  25. difficult to solve with vanilla reinforcement learning alone.
  26. ## Mission Space
  27. "get the {lockedroom_color} key from the {keyroom_color} room, unlock the {door_color} door and go to the goal"
  28. {lockedroom_color}, {keyroom_color}, and {door_color} can be "red", "green",
  29. "blue", "purple", "yellow" or "grey".
  30. ## Action Space
  31. | Num | Name | Action |
  32. |-----|--------------|---------------------------|
  33. | 0 | left | Turn left |
  34. | 1 | right | Turn right |
  35. | 2 | forward | Move forward |
  36. | 3 | pickup | Pick up an object |
  37. | 4 | drop | Unused |
  38. | 5 | toggle | Toggle/activate an object |
  39. | 6 | done | Unused |
  40. ## Observation Encoding
  41. - Each tile is encoded as a 3 dimensional tuple:
  42. `(OBJECT_IDX, COLOR_IDX, STATE)`
  43. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  44. [minigrid/minigrid.py](minigrid/minigrid.py)
  45. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  46. ## Rewards
  47. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  48. ## Termination
  49. The episode ends if any one of the following conditions is met:
  50. 1. The agent reaches the goal.
  51. 2. Timeout (see `max_steps`).
  52. ## Registered Configurations
  53. - `MiniGrid-LockedRoom-v0`
  54. """
  55. def __init__(self, size=19, max_steps: int | None = None, **kwargs):
  56. self.size = size
  57. if max_steps is None:
  58. max_steps = 10 * size
  59. mission_space = MissionSpace(
  60. mission_func=self._gen_mission,
  61. ordered_placeholders=[COLOR_NAMES] * 3,
  62. )
  63. super().__init__(
  64. mission_space=mission_space,
  65. width=size,
  66. height=size,
  67. max_steps=max_steps,
  68. **kwargs,
  69. )
  70. @staticmethod
  71. def _gen_mission(lockedroom_color: str, keyroom_color: str, door_color: str):
  72. return (
  73. f"get the {lockedroom_color} key from the {keyroom_color} room,"
  74. f" unlock the {door_color} door and go to the goal"
  75. )
  76. def _gen_grid(self, width, height):
  77. # Create the grid
  78. self.grid = Grid(width, height)
  79. # Generate the surrounding walls
  80. for i in range(0, width):
  81. self.grid.set(i, 0, Wall())
  82. self.grid.set(i, height - 1, Wall())
  83. for j in range(0, height):
  84. self.grid.set(0, j, Wall())
  85. self.grid.set(width - 1, j, Wall())
  86. # Hallway walls
  87. lWallIdx = width // 2 - 2
  88. rWallIdx = width // 2 + 2
  89. for j in range(0, height):
  90. self.grid.set(lWallIdx, j, Wall())
  91. self.grid.set(rWallIdx, j, Wall())
  92. self.rooms = []
  93. # Room splitting walls
  94. for n in range(0, 3):
  95. j = n * (height // 3)
  96. for i in range(0, lWallIdx):
  97. self.grid.set(i, j, Wall())
  98. for i in range(rWallIdx, width):
  99. self.grid.set(i, j, Wall())
  100. roomW = lWallIdx + 1
  101. roomH = height // 3 + 1
  102. self.rooms.append(LockedRoom((0, j), (roomW, roomH), (lWallIdx, j + 3)))
  103. self.rooms.append(
  104. LockedRoom((rWallIdx, j), (roomW, roomH), (rWallIdx, j + 3))
  105. )
  106. # Choose one random room to be locked
  107. lockedRoom = self._rand_elem(self.rooms)
  108. lockedRoom.locked = True
  109. goalPos = lockedRoom.rand_pos(self)
  110. self.grid.set(*goalPos, Goal())
  111. # Assign the door colors
  112. colors = set(COLOR_NAMES)
  113. for room in self.rooms:
  114. color = self._rand_elem(sorted(colors))
  115. colors.remove(color)
  116. room.color = color
  117. if room.locked:
  118. self.grid.set(*room.doorPos, Door(color, is_locked=True))
  119. else:
  120. self.grid.set(*room.doorPos, Door(color))
  121. # Select a random room to contain the key
  122. while True:
  123. keyRoom = self._rand_elem(self.rooms)
  124. if keyRoom != lockedRoom:
  125. break
  126. keyPos = keyRoom.rand_pos(self)
  127. self.grid.set(*keyPos, Key(lockedRoom.color))
  128. # Randomize the player start position and orientation
  129. self.agent_pos = self.place_agent(
  130. top=(lWallIdx, 0), size=(rWallIdx - lWallIdx, height)
  131. )
  132. # Generate the mission string
  133. self.mission = (
  134. "get the %s key from the %s room, "
  135. "unlock the %s door and "
  136. "go to the goal"
  137. ) % (lockedRoom.color, keyRoom.color, lockedRoom.color)