You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

281 lines
9.0 KiB

2 months ago
  1. from __future__ import annotations
  2. from minigrid.core.constants import COLOR_NAMES
  3. from minigrid.core.grid import Grid
  4. from minigrid.core.mission import MissionSpace
  5. from minigrid.core.world_object import Door, Goal, Wall
  6. from minigrid.minigrid_env import MiniGridEnv
  7. class MultiRoom:
  8. def __init__(self, top, size, entryDoorPos, exitDoorPos):
  9. self.top = top
  10. self.size = size
  11. self.entryDoorPos = entryDoorPos
  12. self.exitDoorPos = exitDoorPos
  13. class MultiRoomEnv(MiniGridEnv):
  14. """
  15. ## Description
  16. This environment has a series of connected rooms with doors that must be
  17. opened in order to get to the next room. The final room has the green goal
  18. square the agent must get to. This environment is extremely difficult to
  19. solve using RL alone. However, by gradually increasing the number of rooms
  20. and building a curriculum, the environment can be solved.
  21. ## Mission Space
  22. "traverse the rooms to get to the goal"
  23. ## Action Space
  24. | Num | Name | Action |
  25. |-----|--------------|---------------------------|
  26. | 0 | left | Turn left |
  27. | 1 | right | Turn right |
  28. | 2 | forward | Move forward |
  29. | 3 | pickup | Unused |
  30. | 4 | drop | Unused |
  31. | 5 | toggle | Toggle/activate an object |
  32. | 6 | done | Unused |
  33. ## Observation Encoding
  34. - Each tile is encoded as a 3 dimensional tuple:
  35. `(OBJECT_IDX, COLOR_IDX, STATE)`
  36. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  37. [minigrid/minigrid.py](minigrid/minigrid.py)
  38. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  39. ## Rewards
  40. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  41. ## Termination
  42. The episode ends if any one of the following conditions is met:
  43. 1. The agent reaches the goal.
  44. 2. Timeout (see `max_steps`).
  45. ## Registered Configurations
  46. S: size of map SxS.
  47. N: number of rooms.
  48. - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
  49. - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
  50. - `MiniGrid-MultiRoom-N6-v0` (six rooms)
  51. """
  52. def __init__(
  53. self,
  54. minNumRooms,
  55. maxNumRooms,
  56. maxRoomSize=10,
  57. max_steps: int | None = None,
  58. **kwargs,
  59. ):
  60. assert minNumRooms > 0
  61. assert maxNumRooms >= minNumRooms
  62. assert maxRoomSize >= 4
  63. self.minNumRooms = minNumRooms
  64. self.maxNumRooms = maxNumRooms
  65. self.maxRoomSize = maxRoomSize
  66. self.rooms = []
  67. mission_space = MissionSpace(mission_func=self._gen_mission)
  68. self.size = 25
  69. if max_steps is None:
  70. max_steps = maxNumRooms * 20
  71. super().__init__(
  72. mission_space=mission_space,
  73. width=self.size,
  74. height=self.size,
  75. max_steps=max_steps,
  76. **kwargs,
  77. )
  78. @staticmethod
  79. def _gen_mission():
  80. return "traverse the rooms to get to the goal"
  81. def _gen_grid(self, width, height):
  82. roomList = []
  83. # Choose a random number of rooms to generate
  84. numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms + 1)
  85. while len(roomList) < numRooms:
  86. curRoomList = []
  87. entryDoorPos = (self._rand_int(0, width - 2), self._rand_int(0, width - 2))
  88. # Recursively place the rooms
  89. self._placeRoom(
  90. numRooms,
  91. roomList=curRoomList,
  92. minSz=4,
  93. maxSz=self.maxRoomSize,
  94. entryDoorWall=2,
  95. entryDoorPos=entryDoorPos,
  96. )
  97. if len(curRoomList) > len(roomList):
  98. roomList = curRoomList
  99. # Store the list of rooms in this environment
  100. assert len(roomList) > 0
  101. self.rooms = roomList
  102. # Create the grid
  103. self.grid = Grid(width, height)
  104. wall = Wall()
  105. prevDoorColor = None
  106. # For each room
  107. for idx, room in enumerate(roomList):
  108. topX, topY = room.top
  109. sizeX, sizeY = room.size
  110. # Draw the top and bottom walls
  111. for i in range(0, sizeX):
  112. self.grid.set(topX + i, topY, wall)
  113. self.grid.set(topX + i, topY + sizeY - 1, wall)
  114. # Draw the left and right walls
  115. for j in range(0, sizeY):
  116. self.grid.set(topX, topY + j, wall)
  117. self.grid.set(topX + sizeX - 1, topY + j, wall)
  118. # If this isn't the first room, place the entry door
  119. if idx > 0:
  120. # Pick a door color different from the previous one
  121. doorColors = set(COLOR_NAMES)
  122. if prevDoorColor:
  123. doorColors.remove(prevDoorColor)
  124. # Note: the use of sorting here guarantees determinism,
  125. # This is needed because Python's set is not deterministic
  126. doorColor = self._rand_elem(sorted(doorColors))
  127. entryDoor = Door(doorColor)
  128. self.grid.set(room.entryDoorPos[0], room.entryDoorPos[1], entryDoor)
  129. prevDoorColor = doorColor
  130. prevRoom = roomList[idx - 1]
  131. prevRoom.exitDoorPos = room.entryDoorPos
  132. # Randomize the starting agent position and direction
  133. self.place_agent(roomList[0].top, roomList[0].size)
  134. # Place the final goal in the last room
  135. self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)
  136. self.mission = "traverse the rooms to get to the goal"
  137. def _placeRoom(self, numLeft, roomList, minSz, maxSz, entryDoorWall, entryDoorPos):
  138. # Choose the room size randomly
  139. sizeX = self._rand_int(minSz, maxSz + 1)
  140. sizeY = self._rand_int(minSz, maxSz + 1)
  141. # The first room will be at the door position
  142. if len(roomList) == 0:
  143. topX, topY = entryDoorPos
  144. # Entry on the right
  145. elif entryDoorWall == 0:
  146. topX = entryDoorPos[0] - sizeX + 1
  147. y = entryDoorPos[1]
  148. topY = self._rand_int(y - sizeY + 2, y)
  149. # Entry wall on the south
  150. elif entryDoorWall == 1:
  151. x = entryDoorPos[0]
  152. topX = self._rand_int(x - sizeX + 2, x)
  153. topY = entryDoorPos[1] - sizeY + 1
  154. # Entry wall on the left
  155. elif entryDoorWall == 2:
  156. topX = entryDoorPos[0]
  157. y = entryDoorPos[1]
  158. topY = self._rand_int(y - sizeY + 2, y)
  159. # Entry wall on the top
  160. elif entryDoorWall == 3:
  161. x = entryDoorPos[0]
  162. topX = self._rand_int(x - sizeX + 2, x)
  163. topY = entryDoorPos[1]
  164. else:
  165. assert False, entryDoorWall
  166. # If the room is out of the grid, can't place a room here
  167. if topX < 0 or topY < 0:
  168. return False
  169. if topX + sizeX > self.width or topY + sizeY >= self.height:
  170. return False
  171. # If the room intersects with previous rooms, can't place it here
  172. for room in roomList[:-1]:
  173. nonOverlap = (
  174. topX + sizeX < room.top[0]
  175. or room.top[0] + room.size[0] <= topX
  176. or topY + sizeY < room.top[1]
  177. or room.top[1] + room.size[1] <= topY
  178. )
  179. if not nonOverlap:
  180. return False
  181. # Add this room to the list
  182. roomList.append(MultiRoom((topX, topY), (sizeX, sizeY), entryDoorPos, None))
  183. # If this was the last room, stop
  184. if numLeft == 1:
  185. return True
  186. # Try placing the next room
  187. for i in range(0, 8):
  188. # Pick which wall to place the out door on
  189. wallSet = {0, 1, 2, 3}
  190. wallSet.remove(entryDoorWall)
  191. exitDoorWall = self._rand_elem(sorted(wallSet))
  192. nextEntryWall = (exitDoorWall + 2) % 4
  193. # Pick the exit door position
  194. # Exit on right wall
  195. if exitDoorWall == 0:
  196. exitDoorPos = (topX + sizeX - 1, topY + self._rand_int(1, sizeY - 1))
  197. # Exit on south wall
  198. elif exitDoorWall == 1:
  199. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY + sizeY - 1)
  200. # Exit on left wall
  201. elif exitDoorWall == 2:
  202. exitDoorPos = (topX, topY + self._rand_int(1, sizeY - 1))
  203. # Exit on north wall
  204. elif exitDoorWall == 3:
  205. exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY)
  206. else:
  207. assert False
  208. # Recursively create the other rooms
  209. success = self._placeRoom(
  210. numLeft - 1,
  211. roomList=roomList,
  212. minSz=minSz,
  213. maxSz=maxSz,
  214. entryDoorWall=nextEntryWall,
  215. entryDoorPos=exitDoorPos,
  216. )
  217. if success:
  218. break
  219. return True