You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

426 lines
12 KiB

4 months ago
  1. """
  2. Copied and adapted from https://github.com/mila-iqia/babyai.
  3. Levels described in the Baby AI ICLR 2019 submission, with different instructions than those in other files.
  4. """
  5. from __future__ import annotations
  6. from minigrid.envs.babyai.core.roomgrid_level import RoomGridLevel
  7. from minigrid.envs.babyai.core.verifier import (
  8. BeforeInstr,
  9. GoToInstr,
  10. ObjDesc,
  11. OpenInstr,
  12. PickupInstr,
  13. PutNextInstr,
  14. )
  15. class ActionObjDoor(RoomGridLevel):
  16. """
  17. ## Description
  18. [pick up an object] or
  19. [go to an object or door] or
  20. [open a door]
  21. (in the current room)
  22. ## Mission Space
  23. "pick up the {color} {type}"
  24. or
  25. "go to the {color} {type}"
  26. or
  27. "open a {color} door"
  28. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  29. "yellow" or "grey".
  30. {type} is the type of the object. Can be "ball", "box", "door" or "key".
  31. ## Action Space
  32. | Num | Name | Action |
  33. |-----|--------------|-------------------|
  34. | 0 | left | Turn left |
  35. | 1 | right | Turn right |
  36. | 2 | forward | Move forward |
  37. | 3 | pickup | Pick up an object |
  38. | 4 | drop | Unused |
  39. | 5 | toggle | Unused |
  40. | 6 | done | Unused |
  41. ## Observation Encoding
  42. - Each tile is encoded as a 3 dimensional tuple:
  43. `(OBJECT_IDX, COLOR_IDX, STATE)`
  44. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  45. [minigrid/minigrid.py](minigrid/minigrid.py)
  46. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  47. ## Rewards
  48. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  49. ## Termination
  50. The episode ends if any one of the following conditions is met:
  51. 1. The agent finishes the instruction.
  52. 2. Timeout (see `max_steps`).
  53. ## Registered Configurations
  54. - `BabyAI-ActionObjDoor-v0`
  55. """
  56. def __init__(self, **kwargs):
  57. super().__init__(room_size=7, **kwargs)
  58. def gen_mission(self):
  59. objs = self.add_distractors(1, 1, num_distractors=5)
  60. for _ in range(4):
  61. door, _ = self.add_door(1, 1, locked=False)
  62. objs.append(door)
  63. self.place_agent(1, 1)
  64. obj = self._rand_elem(objs)
  65. desc = ObjDesc(obj.type, obj.color)
  66. if obj.type == "door":
  67. if self._rand_bool():
  68. self.instrs = GoToInstr(desc)
  69. else:
  70. self.instrs = OpenInstr(desc)
  71. else:
  72. if self._rand_bool():
  73. self.instrs = GoToInstr(desc)
  74. else:
  75. self.instrs = PickupInstr(desc)
  76. class FindObjS5(RoomGridLevel):
  77. """
  78. ## Description
  79. Pick up an object (in a random room)
  80. Rooms have a size of 5
  81. This level requires potentially exhaustive exploration
  82. ## Mission Space
  83. "pick up the {type}"
  84. {type} is the type of the object. Can be "ball", "box" or "key".
  85. ## Action Space
  86. | Num | Name | Action |
  87. |-----|--------------|-------------------|
  88. | 0 | left | Turn left |
  89. | 1 | right | Turn right |
  90. | 2 | forward | Move forward |
  91. | 3 | pickup | Pick up an object |
  92. | 4 | drop | Unused |
  93. | 5 | toggle | Unused |
  94. | 6 | done | Unused |
  95. ## Observation Encoding
  96. - Each tile is encoded as a 3 dimensional tuple:
  97. `(OBJECT_IDX, COLOR_IDX, STATE)`
  98. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  99. [minigrid/minigrid.py](minigrid/minigrid.py)
  100. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  101. ## Rewards
  102. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  103. ## Termination
  104. The episode ends if any one of the following conditions is met:
  105. 1. The agent picks up the object.
  106. 2. Timeout (see `max_steps`).
  107. ## Registered Configurations
  108. - `BabyAI-FindObjS5-v0`
  109. - `BabyAI-FindObjS6-v0`
  110. - `BabyAI-FindObjS7-v0`
  111. """
  112. def __init__(self, room_size=5, max_steps: int | None = None, **kwargs):
  113. if max_steps is None:
  114. max_steps = 20 * room_size**2
  115. super().__init__(room_size=room_size, max_steps=max_steps, **kwargs)
  116. def gen_mission(self):
  117. # Add a random object to a random room
  118. i = self._rand_int(0, self.num_rows)
  119. j = self._rand_int(0, self.num_cols)
  120. obj, _ = self.add_object(i, j)
  121. self.place_agent(1, 1)
  122. self.connect_all()
  123. self.instrs = PickupInstr(ObjDesc(obj.type))
  124. class KeyCorridor(RoomGridLevel):
  125. """
  126. ## Description
  127. A ball is behind a locked door, the key is placed in a
  128. random room.
  129. ## Mission Space
  130. "pick up the ball"
  131. ## Action Space
  132. | Num | Name | Action |
  133. |-----|--------------|-------------------|
  134. | 0 | left | Turn left |
  135. | 1 | right | Turn right |
  136. | 2 | forward | Move forward |
  137. | 3 | pickup | Pick up an object |
  138. | 4 | drop | Unused |
  139. | 5 | toggle | Unused |
  140. | 6 | done | Unused |
  141. ## Observation Encoding
  142. - Each tile is encoded as a 3 dimensional tuple:
  143. `(OBJECT_IDX, COLOR_IDX, STATE)`
  144. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  145. [minigrid/minigrid.py](minigrid/minigrid.py)
  146. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  147. ## Rewards
  148. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  149. ## Termination
  150. The episode ends if any one of the following conditions is met:
  151. 1. The agent picks up the ball.
  152. 2. Timeout (see `max_steps`).
  153. ## Registered Configurations
  154. - `BabyAI-KeyCorridor-v0`
  155. - `BabyAI-KeyCorridorS3R1-v0`
  156. - `BabyAI-KeyCorridorS3R2-v0`
  157. - `BabyAI-KeyCorridorS3R3-v0`
  158. - `BabyAI-KeyCorridorS4R3-v0`
  159. - `BabyAI-KeyCorridorS5R3-v0`
  160. - `BabyAI-KeyCorridorS6R3-v0`
  161. """
  162. def __init__(
  163. self,
  164. num_rows=3,
  165. obj_type="ball",
  166. room_size=6,
  167. max_steps: int | None = None,
  168. **kwargs,
  169. ):
  170. self.obj_type = obj_type
  171. if max_steps is None:
  172. max_steps = 30 * room_size**2
  173. super().__init__(
  174. room_size=room_size, num_rows=num_rows, max_steps=max_steps, **kwargs
  175. )
  176. def gen_mission(self):
  177. # Connect the middle column rooms into a hallway
  178. for j in range(1, self.num_rows):
  179. self.remove_wall(1, j, 3)
  180. # Add a locked door on the bottom right
  181. # Add an object behind the locked door
  182. room_idx = self._rand_int(0, self.num_rows)
  183. door, _ = self.add_door(2, room_idx, 2, locked=True)
  184. obj, _ = self.add_object(2, room_idx, kind=self.obj_type)
  185. # Add a key in a random room on the left side
  186. self.add_object(0, self._rand_int(0, self.num_rows), "key", door.color)
  187. # Place the agent in the middle
  188. self.place_agent(1, self.num_rows // 2)
  189. # Make sure all rooms are accessible
  190. self.connect_all()
  191. self.instrs = PickupInstr(ObjDesc(obj.type))
  192. class OneRoomS8(RoomGridLevel):
  193. """
  194. ## Description
  195. Pick up the ball. Rooms have a size of 8.
  196. ## Mission Space
  197. "pick up the ball"
  198. ## Action Space
  199. | Num | Name | Action |
  200. |-----|--------------|-------------------|
  201. | 0 | left | Turn left |
  202. | 1 | right | Turn right |
  203. | 2 | forward | Move forward |
  204. | 3 | pickup | Pick up an object |
  205. | 4 | drop | Unused |
  206. | 5 | toggle | Unused |
  207. | 6 | done | Unused |
  208. ## Observation Encoding
  209. - Each tile is encoded as a 3 dimensional tuple:
  210. `(OBJECT_IDX, COLOR_IDX, STATE)`
  211. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  212. [minigrid/minigrid.py](minigrid/minigrid.py)
  213. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  214. ## Rewards
  215. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  216. ## Termination
  217. The episode ends if any one of the following conditions is met:
  218. 1. The agent picks up the ball.
  219. 2. Timeout (see `max_steps`).
  220. ## Registered Configurations
  221. - `BabyAI-OneRoomS8-v0`
  222. - `BabyAI-OneRoomS12-v0`
  223. - `BabyAI-OneRoomS16-v0`
  224. - `BabyAI-OneRoomS20-v0`
  225. """
  226. def __init__(self, room_size=8, **kwargs):
  227. super().__init__(room_size=room_size, num_rows=1, num_cols=1, **kwargs)
  228. def gen_mission(self):
  229. obj, _ = self.add_object(0, 0, kind="ball")
  230. self.place_agent()
  231. self.instrs = PickupInstr(ObjDesc(obj.type))
  232. class MoveTwoAcross(RoomGridLevel):
  233. """
  234. ## Description
  235. Task of the form: move the A next to the B and the C next to the D.
  236. This task is structured to have a very large number of possible
  237. instructions.
  238. ## Mission Space
  239. "put the {color} {type} next to the {color} {type}, then put the {color} {type} next to the {color} {type}"
  240. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  241. "yellow" or "grey".
  242. {type} is the type of the object. Can be "ball", "box" or "key".
  243. ## Action Space
  244. | Num | Name | Action |
  245. |-----|--------------|-------------------|
  246. | 0 | left | Turn left |
  247. | 1 | right | Turn right |
  248. | 2 | forward | Move forward |
  249. | 3 | pickup | Pick up an object |
  250. | 4 | drop | Unused |
  251. | 5 | toggle | Unused |
  252. | 6 | done | Unused |
  253. ## Observation Encoding
  254. - Each tile is encoded as a 3 dimensional tuple:
  255. `(OBJECT_IDX, COLOR_IDX, STATE)`
  256. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  257. [minigrid/minigrid.py](minigrid/minigrid.py)
  258. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  259. ## Rewards
  260. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  261. ## Termination
  262. The episode ends if any one of the following conditions is met:
  263. 1. The agent finishes the instruction.
  264. 2. Timeout (see `max_steps`).
  265. ## Registered Configurations
  266. - `BabyAI-MoveTwoAcrossS5N2-v0`
  267. - `BabyAI-MoveTwoAcrossS8N9-v0`
  268. """
  269. def __init__(
  270. self, room_size, objs_per_room, max_steps: int | None = None, **kwargs
  271. ):
  272. assert objs_per_room <= 9
  273. self.objs_per_room = objs_per_room
  274. if max_steps is None:
  275. max_steps = 16 * room_size**2
  276. super().__init__(
  277. num_rows=1, num_cols=2, room_size=room_size, max_steps=max_steps, **kwargs
  278. )
  279. def gen_mission(self):
  280. self.place_agent(0, 0)
  281. # Add objects to both the left and right rooms
  282. # so that we know that we have two non-adjacent set of objects
  283. objs_l = self.add_distractors(0, 0, self.objs_per_room)
  284. objs_r = self.add_distractors(1, 0, self.objs_per_room)
  285. # Remove the wall between the two rooms
  286. self.remove_wall(0, 0, 0)
  287. # Select objects from both subsets
  288. objs_l = self._rand_subset(objs_l, 2)
  289. objs_r = self._rand_subset(objs_r, 2)
  290. a = objs_l[0]
  291. b = objs_r[0]
  292. c = objs_r[1]
  293. d = objs_l[1]
  294. self.instrs = BeforeInstr(
  295. PutNextInstr(ObjDesc(a.type, a.color), ObjDesc(b.type, b.color)),
  296. PutNextInstr(ObjDesc(c.type, c.color), ObjDesc(d.type, d.color)),
  297. )