You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

471 lines
13 KiB

2 months ago
  1. """
  2. Copied and adapted from https://github.com/mila-iqia/babyai.
  3. Levels described in the Baby AI ICLR 2019 submission, with the `Unlock` instruction.
  4. """
  5. from __future__ import annotations
  6. from minigrid.core.constants import COLOR_NAMES
  7. from minigrid.core.world_object import Ball, Box, Key
  8. from minigrid.envs.babyai.core.roomgrid_level import RoomGridLevel
  9. from minigrid.envs.babyai.core.verifier import ObjDesc, OpenInstr, PickupInstr
  10. class Unlock(RoomGridLevel):
  11. """
  12. ## Description
  13. Unlock a door.
  14. Competencies: Maze, Open, Unlock. No unblocking.
  15. ## Mission Space
  16. "open the {color} door"
  17. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  18. "yellow" or "grey".
  19. ## Action Space
  20. | Num | Name | Action |
  21. |-----|--------------|-------------------|
  22. | 0 | left | Turn left |
  23. | 1 | right | Turn right |
  24. | 2 | forward | Move forward |
  25. | 3 | pickup | Pick up an object |
  26. | 4 | drop | Unused |
  27. | 5 | toggle | Unused |
  28. | 6 | done | Unused |
  29. ## Observation Encoding
  30. - Each tile is encoded as a 3 dimensional tuple:
  31. `(OBJECT_IDX, COLOR_IDX, STATE)`
  32. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  33. [minigrid/minigrid.py](minigrid/minigrid.py)
  34. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  35. ## Rewards
  36. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  37. ## Termination
  38. The episode ends if any one of the following conditions is met:
  39. 1. The agent opens the correct door.
  40. 2. Timeout (see `max_steps`).
  41. ## Registered Configurations
  42. - `BabyAI-Unlock-v0`
  43. """
  44. def gen_mission(self):
  45. # Add a locked door to a random room
  46. id = self._rand_int(0, self.num_cols)
  47. jd = self._rand_int(0, self.num_rows)
  48. door, pos = self.add_door(id, jd, locked=True)
  49. locked_room = self.get_room(id, jd)
  50. # Add the key to a different room
  51. while True:
  52. ik = self._rand_int(0, self.num_cols)
  53. jk = self._rand_int(0, self.num_rows)
  54. if ik is id and jk is jd:
  55. continue
  56. self.add_object(ik, jk, "key", door.color)
  57. break
  58. # With 50% probability, ensure that the locked door is the only
  59. # door of that color
  60. if self._rand_bool():
  61. colors = list(filter(lambda c: c is not door.color, COLOR_NAMES))
  62. self.connect_all(door_colors=colors)
  63. else:
  64. self.connect_all()
  65. # Add distractors to all but the locked room.
  66. # We do this to speed up the reachability test,
  67. # which otherwise will reject all levels with
  68. # objects in the locked room.
  69. for i in range(self.num_cols):
  70. for j in range(self.num_rows):
  71. if i is not id or j is not jd:
  72. self.add_distractors(i, j, num_distractors=3, all_unique=False)
  73. # The agent must be placed after all the object to respect constraints
  74. while True:
  75. self.place_agent()
  76. start_room = self.room_from_pos(*self.agent_pos)
  77. # Ensure that we are not placing the agent in the locked room
  78. if start_room is locked_room:
  79. continue
  80. break
  81. self.check_objs_reachable()
  82. self.instrs = OpenInstr(ObjDesc(door.type, door.color))
  83. class UnlockLocal(RoomGridLevel):
  84. """
  85. ## Description
  86. Fetch a key and unlock a door
  87. (in the current room)
  88. ## Mission Space
  89. "open the door"
  90. ## Action Space
  91. | Num | Name | Action |
  92. |-----|--------------|-------------------|
  93. | 0 | left | Turn left |
  94. | 1 | right | Turn right |
  95. | 2 | forward | Move forward |
  96. | 3 | pickup | Pick up an object |
  97. | 4 | drop | Unused |
  98. | 5 | toggle | Unused |
  99. | 6 | done | Unused |
  100. ## Observation Encoding
  101. - Each tile is encoded as a 3 dimensional tuple:
  102. `(OBJECT_IDX, COLOR_IDX, STATE)`
  103. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  104. [minigrid/minigrid.py](minigrid/minigrid.py)
  105. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  106. ## Rewards
  107. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  108. ## Termination
  109. The episode ends if any one of the following conditions is met:
  110. 1. The agent opens the door.
  111. 2. Timeout (see `max_steps`).
  112. ## Registered Configurations
  113. - `BabyAI-UnlockLocal-v0`
  114. - `BabyAI-UnlockLocalDist-v0`
  115. """
  116. def __init__(self, distractors=False, **kwargs):
  117. self.distractors = distractors
  118. super().__init__(**kwargs)
  119. def gen_mission(self):
  120. door, _ = self.add_door(1, 1, locked=True)
  121. self.add_object(1, 1, "key", door.color)
  122. if self.distractors:
  123. self.add_distractors(1, 1, num_distractors=3)
  124. self.place_agent(1, 1)
  125. self.instrs = OpenInstr(ObjDesc(door.type))
  126. class KeyInBox(RoomGridLevel):
  127. """
  128. ## Description
  129. Unlock a door. Key is in a box (in the current room).
  130. ## Mission Space
  131. "open the door"
  132. ## Action Space
  133. | Num | Name | Action |
  134. |-----|--------------|-------------------|
  135. | 0 | left | Turn left |
  136. | 1 | right | Turn right |
  137. | 2 | forward | Move forward |
  138. | 3 | pickup | Pick up an object |
  139. | 4 | drop | Unused |
  140. | 5 | toggle | Unused |
  141. | 6 | done | Unused |
  142. ## Observation Encoding
  143. - Each tile is encoded as a 3 dimensional tuple:
  144. `(OBJECT_IDX, COLOR_IDX, STATE)`
  145. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  146. [minigrid/minigrid.py](minigrid/minigrid.py)
  147. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  148. ## Rewards
  149. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  150. ## Termination
  151. The episode ends if any one of the following conditions is met:
  152. 1. The agent opens the door.
  153. 2. Timeout (see `max_steps`).
  154. ## Registered Configurations
  155. - `BabyAI-KeyInBox-v0`
  156. ## Additional Notes
  157. The BabyAI bot is unable to solve this level.
  158. """
  159. def __init__(self, **kwargs):
  160. super().__init__(**kwargs)
  161. def gen_mission(self):
  162. door, _ = self.add_door(1, 1, locked=True)
  163. # Put the key in the box, then place the box in the room
  164. key = Key(door.color)
  165. box = Box(self._rand_color(), key)
  166. self.place_in_room(1, 1, box)
  167. self.place_agent(1, 1)
  168. self.instrs = OpenInstr(ObjDesc(door.type))
  169. class UnlockPickup(RoomGridLevel):
  170. """
  171. ## Description
  172. Unlock a door, then pick up a box in another room
  173. ## Mission Space
  174. "pick up the {color} box"
  175. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  176. "yellow" or "grey".
  177. ## Action Space
  178. | Num | Name | Action |
  179. |-----|--------------|-------------------|
  180. | 0 | left | Turn left |
  181. | 1 | right | Turn right |
  182. | 2 | forward | Move forward |
  183. | 3 | pickup | Pick up an object |
  184. | 4 | drop | Unused |
  185. | 5 | toggle | Unused |
  186. | 6 | done | Unused |
  187. ## Observation Encoding
  188. - Each tile is encoded as a 3 dimensional tuple:
  189. `(OBJECT_IDX, COLOR_IDX, STATE)`
  190. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  191. [minigrid/minigrid.py](minigrid/minigrid.py)
  192. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  193. ## Rewards
  194. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  195. ## Termination
  196. The episode ends if any one of the following conditions is met:
  197. 1. The agent picks up the correct box.
  198. 2. Timeout (see `max_steps`).
  199. ## Registered Configurations
  200. - `BabyAI-UnlockPickup-v0`
  201. - `BabyAI-UnlockPickupDist-v0`
  202. """
  203. def __init__(self, distractors=False, max_steps: int | None = None, **kwargs):
  204. self.distractors = distractors
  205. room_size = 6
  206. if max is None:
  207. max_steps = 8 * room_size**2
  208. super().__init__(
  209. num_rows=1, num_cols=2, room_size=6, max_steps=max_steps, **kwargs
  210. )
  211. def gen_mission(self):
  212. # Add a random object to the room on the right
  213. obj, _ = self.add_object(1, 0, kind="box")
  214. # Make sure the two rooms are directly connected by a locked door
  215. door, _ = self.add_door(0, 0, 0, locked=True)
  216. # Add a key to unlock the door
  217. self.add_object(0, 0, "key", door.color)
  218. if self.distractors:
  219. self.add_distractors(num_distractors=4)
  220. self.place_agent(0, 0)
  221. self.instrs = PickupInstr(ObjDesc(obj.type, obj.color))
  222. class BlockedUnlockPickup(RoomGridLevel):
  223. """
  224. ## Description
  225. Unlock a door blocked by a ball, then pick up a box
  226. in another room
  227. ## Mission Space
  228. "pick up the box"
  229. ## Action Space
  230. | Num | Name | Action |
  231. |-----|--------------|-------------------|
  232. | 0 | left | Turn left |
  233. | 1 | right | Turn right |
  234. | 2 | forward | Move forward |
  235. | 3 | pickup | Pick up an object |
  236. | 4 | drop | Unused |
  237. | 5 | toggle | Unused |
  238. | 6 | done | Unused |
  239. ## Observation Encoding
  240. - Each tile is encoded as a 3 dimensional tuple:
  241. `(OBJECT_IDX, COLOR_IDX, STATE)`
  242. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  243. [minigrid/minigrid.py](minigrid/minigrid.py)
  244. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  245. ## Rewards
  246. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  247. ## Termination
  248. The episode ends if any one of the following conditions is met:
  249. 1. The agent picks up the box.
  250. 2. Timeout (see `max_steps`).
  251. ## Registered Configurations
  252. - `BabyAI-BlockedUnlockPickup-v0`
  253. """
  254. def __init__(self, max_steps: int | None = None, **kwargs):
  255. room_size = 6
  256. if max_steps is None:
  257. max_steps = 16 * room_size**2
  258. super().__init__(
  259. num_rows=1, num_cols=2, room_size=room_size, max_steps=max_steps, **kwargs
  260. )
  261. def gen_mission(self):
  262. # Add a box to the room on the right
  263. obj, _ = self.add_object(1, 0, kind="box")
  264. # Make sure the two rooms are directly connected by a locked door
  265. door, pos = self.add_door(0, 0, 0, locked=True)
  266. # Block the door with a ball
  267. color = self._rand_color()
  268. self.grid.set(pos[0] - 1, pos[1], Ball(color))
  269. # Add a key to unlock the door
  270. self.add_object(0, 0, "key", door.color)
  271. self.place_agent(0, 0)
  272. self.instrs = PickupInstr(ObjDesc(obj.type))
  273. class UnlockToUnlock(RoomGridLevel):
  274. """
  275. ## Description
  276. Unlock a door A that requires to unlock a door B before
  277. ## Mission Space
  278. "pick up the ball"
  279. ## Action Space
  280. | Num | Name | Action |
  281. |-----|--------------|-------------------|
  282. | 0 | left | Turn left |
  283. | 1 | right | Turn right |
  284. | 2 | forward | Move forward |
  285. | 3 | pickup | Pick up an object |
  286. | 4 | drop | Unused |
  287. | 5 | toggle | Unused |
  288. | 6 | done | Unused |
  289. ## Observation Encoding
  290. - Each tile is encoded as a 3 dimensional tuple:
  291. `(OBJECT_IDX, COLOR_IDX, STATE)`
  292. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  293. [minigrid/minigrid.py](minigrid/minigrid.py)
  294. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  295. ## Rewards
  296. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  297. ## Termination
  298. The episode ends if any one of the following conditions is met:
  299. 1. The agent picks up the ball.
  300. 2. Timeout (see `max_steps`).
  301. ## Registered Configurations
  302. - `BabyAI-UnlockToUnlock-v0`
  303. """
  304. def __init__(self, max_steps: int | None = None, **kwargs):
  305. room_size = 6
  306. if max_steps is None:
  307. max_steps = 30 * room_size**2
  308. super().__init__(
  309. num_rows=1, num_cols=3, room_size=room_size, max_steps=max_steps, **kwargs
  310. )
  311. def gen_mission(self):
  312. colors = self._rand_subset(COLOR_NAMES, 2)
  313. # Add a door of color A connecting left and middle room
  314. self.add_door(0, 0, door_idx=0, color=colors[0], locked=True)
  315. # Add a key of color A in the room on the right
  316. self.add_object(2, 0, kind="key", color=colors[0])
  317. # Add a door of color B connecting middle and right room
  318. self.add_door(1, 0, door_idx=0, color=colors[1], locked=True)
  319. # Add a key of color B in the middle room
  320. self.add_object(1, 0, kind="key", color=colors[1])
  321. obj, _ = self.add_object(0, 0, kind="ball")
  322. self.place_agent(1, 0)
  323. self.instrs = PickupInstr(ObjDesc(obj.type))