You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

422 lines
12 KiB

2 months ago
  1. """
  2. Copied and adapted from https://github.com/mila-iqia/babyai.
  3. Levels described in the Baby AI ICLR 2019 submission, with the `Open` instruction.
  4. """
  5. from __future__ import annotations
  6. from minigrid.core.constants import COLOR_NAMES
  7. from minigrid.envs.babyai.core.roomgrid_level import RoomGridLevel
  8. from minigrid.envs.babyai.core.verifier import (
  9. LOC_NAMES,
  10. AfterInstr,
  11. BeforeInstr,
  12. ObjDesc,
  13. OpenInstr,
  14. )
  15. class Open(RoomGridLevel):
  16. """
  17. ## Description
  18. Open a door, which may be in another room
  19. ## Mission Space
  20. "open a {color} door"
  21. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  22. "yellow" or "grey".
  23. ## Action Space
  24. | Num | Name | Action |
  25. |-----|--------------|-------------------|
  26. | 0 | left | Turn left |
  27. | 1 | right | Turn right |
  28. | 2 | forward | Move forward |
  29. | 3 | pickup | Pick up an object |
  30. | 4 | drop | Unused |
  31. | 5 | toggle | Unused |
  32. | 6 | done | Unused |
  33. ## Observation Encoding
  34. - Each tile is encoded as a 3 dimensional tuple:
  35. `(OBJECT_IDX, COLOR_IDX, STATE)`
  36. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  37. [minigrid/minigrid.py](minigrid/minigrid.py)
  38. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  39. ## Rewards
  40. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  41. ## Termination
  42. The episode ends if any one of the following conditions is met:
  43. 1. The agent opens the door.
  44. 2. Timeout (see `max_steps`).
  45. ## Registered Configurations
  46. - `BabyAI-Open-v0`
  47. """
  48. def gen_mission(self):
  49. self.place_agent()
  50. self.connect_all()
  51. self.add_distractors(num_distractors=18, all_unique=False)
  52. self.check_objs_reachable()
  53. # Collect a list of all the doors in the environment
  54. doors = []
  55. for i in range(self.num_cols):
  56. for j in range(self.num_rows):
  57. room = self.get_room(i, j)
  58. for door in room.doors:
  59. if door:
  60. doors.append(door)
  61. door = self._rand_elem(doors)
  62. self.instrs = OpenInstr(ObjDesc(door.type, door.color))
  63. class OpenRedDoor(RoomGridLevel):
  64. """
  65. ## Description
  66. Go to the red door
  67. (always unlocked, in the current room)
  68. Note: this level is intentionally meant for debugging and is
  69. intentionally kept very simple.
  70. ## Mission Space
  71. "open the red door"
  72. ## Action Space
  73. | Num | Name | Action |
  74. |-----|--------------|-------------------|
  75. | 0 | left | Turn left |
  76. | 1 | right | Turn right |
  77. | 2 | forward | Move forward |
  78. | 3 | pickup | Pick up an object |
  79. | 4 | drop | Unused |
  80. | 5 | toggle | Unused |
  81. | 6 | done | Unused |
  82. ## Observation Encoding
  83. - Each tile is encoded as a 3 dimensional tuple:
  84. `(OBJECT_IDX, COLOR_IDX, STATE)`
  85. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  86. [minigrid/minigrid.py](minigrid/minigrid.py)
  87. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  88. ## Rewards
  89. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  90. ## Termination
  91. The episode ends if any one of the following conditions is met:
  92. 1. The agent opens the door.
  93. 2. Timeout (see `max_steps`).
  94. ## Registered Configurations
  95. - `BabyAI-OpenRedDoor-v0`
  96. """
  97. def __init__(self, **kwargs):
  98. super().__init__(num_rows=1, num_cols=2, room_size=5, **kwargs)
  99. def gen_mission(self):
  100. obj, _ = self.add_door(0, 0, 0, "red", locked=False)
  101. self.place_agent(0, 0)
  102. self.instrs = OpenInstr(ObjDesc("door", "red"))
  103. class OpenDoor(RoomGridLevel):
  104. """
  105. ## Description
  106. Go to the door
  107. The door to open is given by its color or by its location.
  108. (always unlocked, in the current room)
  109. ## Mission Space
  110. "open the {color} door"
  111. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  112. "yellow" or "grey".
  113. ## Action Space
  114. | Num | Name | Action |
  115. |-----|--------------|-------------------|
  116. | 0 | left | Turn left |
  117. | 1 | right | Turn right |
  118. | 2 | forward | Move forward |
  119. | 3 | pickup | Pick up an object |
  120. | 4 | drop | Unused |
  121. | 5 | toggle | Unused |
  122. | 6 | done | Unused |
  123. ## Observation Encoding
  124. - Each tile is encoded as a 3 dimensional tuple:
  125. `(OBJECT_IDX, COLOR_IDX, STATE)`
  126. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  127. [minigrid/minigrid.py](minigrid/minigrid.py)
  128. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  129. ## Rewards
  130. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  131. ## Termination
  132. The episode ends if any one of the following conditions is met:
  133. 1. The agent opens the door.
  134. 2. Timeout (see `max_steps`).
  135. ## Registered Configurations
  136. - `BabyAI-OpenDoor-v0`
  137. - `BabyAI-OpenDoorDebug-v0`
  138. - `BabyAI-OpenDoorColor-v0`
  139. - `BabyAI-OpenDoorLoc-v0`
  140. """
  141. def __init__(self, debug=False, select_by=None, **kwargs):
  142. self.select_by = select_by
  143. self.debug = debug
  144. super().__init__(**kwargs)
  145. def gen_mission(self):
  146. door_colors = self._rand_subset(COLOR_NAMES, 4)
  147. objs = []
  148. for i, color in enumerate(door_colors):
  149. obj, _ = self.add_door(1, 1, door_idx=i, color=color, locked=False)
  150. objs.append(obj)
  151. select_by = self.select_by
  152. if select_by is None:
  153. select_by = self._rand_elem(["color", "loc"])
  154. if select_by == "color":
  155. object = ObjDesc(objs[0].type, color=objs[0].color)
  156. elif select_by == "loc":
  157. object = ObjDesc(objs[0].type, loc=self._rand_elem(LOC_NAMES))
  158. else:
  159. raise NotImplementedError("Not implemented.")
  160. self.place_agent(1, 1)
  161. self.instrs = OpenInstr(object, strict=self.debug)
  162. class OpenTwoDoors(RoomGridLevel):
  163. """
  164. ## Description
  165. Open door X, then open door Y
  166. The two doors are facing opposite directions, so that the agent
  167. Can't see whether the door behind him is open.
  168. This task requires memory (recurrent policy) to be solved effectively.
  169. ## Mission Space
  170. "open the {color} door, the open the {color} door"
  171. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  172. "yellow" or "grey".
  173. ## Action Space
  174. | Num | Name | Action |
  175. |-----|--------------|-------------------|
  176. | 0 | left | Turn left |
  177. | 1 | right | Turn right |
  178. | 2 | forward | Move forward |
  179. | 3 | pickup | Pick up an object |
  180. | 4 | drop | Unused |
  181. | 5 | toggle | Unused |
  182. | 6 | done | Unused |
  183. ## Observation Encoding
  184. - Each tile is encoded as a 3 dimensional tuple:
  185. `(OBJECT_IDX, COLOR_IDX, STATE)`
  186. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  187. [minigrid/minigrid.py](minigrid/minigrid.py)
  188. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  189. ## Rewards
  190. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  191. ## Termination
  192. The episode ends if any one of the following conditions is met:
  193. 1. The agent opens the door.
  194. 2. Timeout (see `max_steps`).
  195. ## Registered Configurations
  196. - `BabyAI-OpenTwoDoors-v0`
  197. - `BabyAI-OpenRedBlueDoors-v0`
  198. - `BabyAI-OpenRedBlueDoorsDebug-v0`
  199. """
  200. def __init__(
  201. self,
  202. first_color=None,
  203. second_color=None,
  204. strict=False,
  205. max_steps: int | None = None,
  206. **kwargs,
  207. ):
  208. self.first_color = first_color
  209. self.second_color = second_color
  210. self.strict = strict
  211. room_size = 6
  212. if max_steps is None:
  213. max_steps = 20 * room_size**2
  214. super().__init__(room_size=room_size, max_steps=max_steps, **kwargs)
  215. def gen_mission(self):
  216. colors = self._rand_subset(COLOR_NAMES, 2)
  217. first_color = self.first_color
  218. if first_color is None:
  219. first_color = colors[0]
  220. second_color = self.second_color
  221. if second_color is None:
  222. second_color = colors[1]
  223. door1, _ = self.add_door(1, 1, 2, color=first_color, locked=False)
  224. door2, _ = self.add_door(1, 1, 0, color=second_color, locked=False)
  225. self.place_agent(1, 1)
  226. self.instrs = BeforeInstr(
  227. OpenInstr(ObjDesc(door1.type, door1.color), strict=self.strict),
  228. OpenInstr(ObjDesc(door2.type, door2.color)),
  229. )
  230. class OpenDoorsOrder(RoomGridLevel):
  231. """
  232. ## Description
  233. Open one or two doors in the order specified.
  234. ## Mission Space
  235. "open the {color} door, the open the {color} door"
  236. or
  237. "open the {color} door after you open the {color} door"
  238. or
  239. "open the {color} door"
  240. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  241. "yellow" or "grey".
  242. ## Action Space
  243. | Num | Name | Action |
  244. |-----|--------------|-------------------|
  245. | 0 | left | Turn left |
  246. | 1 | right | Turn right |
  247. | 2 | forward | Move forward |
  248. | 3 | pickup | Pick up an object |
  249. | 4 | drop | Unused |
  250. | 5 | toggle | Unused |
  251. | 6 | done | Unused |
  252. ## Observation Encoding
  253. - Each tile is encoded as a 3 dimensional tuple:
  254. `(OBJECT_IDX, COLOR_IDX, STATE)`
  255. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  256. [minigrid/minigrid.py](minigrid/minigrid.py)
  257. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  258. ## Rewards
  259. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  260. ## Termination
  261. The episode ends if any one of the following conditions is met:
  262. 1. The agent opens the door.
  263. 2. Timeout (see `max_steps`).
  264. ## Registered Configurations
  265. - `BabyAI-OpenDoorsOrderN2-v0`
  266. - `BabyAI-OpenDoorsOrderN4-v0`
  267. - `BabyAI-OpenDoorsOrderN2Debug-v0`
  268. - `BabyAI-OpenDoorsOrderN4Debug-v0`
  269. """
  270. def __init__(self, num_doors, debug=False, max_steps: int | None = None, **kwargs):
  271. assert num_doors >= 2
  272. self.num_doors = num_doors
  273. self.debug = debug
  274. room_size = 6
  275. if max_steps is None:
  276. max_steps = 20 * room_size**2
  277. super().__init__(room_size=room_size, max_steps=max_steps, **kwargs)
  278. def gen_mission(self):
  279. colors = self._rand_subset(COLOR_NAMES, self.num_doors)
  280. doors = []
  281. for i in range(self.num_doors):
  282. door, _ = self.add_door(1, 1, color=colors[i], locked=False)
  283. doors.append(door)
  284. self.place_agent(1, 1)
  285. door1, door2 = self._rand_subset(doors, 2)
  286. desc1 = ObjDesc(door1.type, door1.color)
  287. desc2 = ObjDesc(door2.type, door2.color)
  288. mode = self._rand_int(0, 3)
  289. if mode == 0:
  290. self.instrs = OpenInstr(desc1, strict=self.debug)
  291. elif mode == 1:
  292. self.instrs = BeforeInstr(
  293. OpenInstr(desc1, strict=self.debug), OpenInstr(desc2, strict=self.debug)
  294. )
  295. elif mode == 2:
  296. self.instrs = AfterInstr(
  297. OpenInstr(desc1, strict=self.debug), OpenInstr(desc2, strict=self.debug)
  298. )
  299. else:
  300. assert False