You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

361 lines
10 KiB

2 months ago
  1. """
  2. Copied and adapted from https://github.com/mila-iqia/babyai.
  3. Levels described in the Baby AI ICLR 2019 submission, with the `Pick up` instruction.
  4. """
  5. from __future__ import annotations
  6. from minigrid.envs.babyai.core.levelgen import LevelGen
  7. from minigrid.envs.babyai.core.roomgrid_level import RejectSampling, RoomGridLevel
  8. from minigrid.envs.babyai.core.verifier import ObjDesc, PickupInstr
  9. class Pickup(RoomGridLevel):
  10. """
  11. ## Description
  12. Pick up an object, the object may be in another room.
  13. ## Mission Space
  14. "pick up a {color} {type}"
  15. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  16. "yellow" or "grey".
  17. {type} is the type of the object. Can be "ball", "box" or "key".
  18. ## Action Space
  19. | Num | Name | Action |
  20. |-----|--------------|-------------------|
  21. | 0 | left | Turn left |
  22. | 1 | right | Turn right |
  23. | 2 | forward | Move forward |
  24. | 3 | pickup | Pick up an object |
  25. | 4 | drop | Unused |
  26. | 5 | toggle | Unused |
  27. | 6 | done | Unused |
  28. ## Observation Encoding
  29. - Each tile is encoded as a 3 dimensional tuple:
  30. `(OBJECT_IDX, COLOR_IDX, STATE)`
  31. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  32. [minigrid/minigrid.py](minigrid/minigrid.py)
  33. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  34. ## Rewards
  35. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  36. ## Termination
  37. The episode ends if any one of the following conditions is met:
  38. 1. The agent picks up the object.
  39. 2. Timeout (see `max_steps`).
  40. ## Registered Configurations
  41. - `BabyAI-Pickup-v0`
  42. """
  43. def gen_mission(self):
  44. self.place_agent()
  45. self.connect_all()
  46. objs = self.add_distractors(num_distractors=18, all_unique=False)
  47. self.check_objs_reachable()
  48. obj = self._rand_elem(objs)
  49. self.instrs = PickupInstr(ObjDesc(obj.type, obj.color))
  50. class UnblockPickup(RoomGridLevel):
  51. """
  52. ## Description
  53. Pick up an object, the object may be in another room. The path may
  54. be blocked by one or more obstructors.
  55. ## Mission Space
  56. "pick up a/the {color} {type}"
  57. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  58. "yellow" or "grey".
  59. {type} is the type of the object. Can be "ball", "box" or "key".
  60. ## Action Space
  61. | Num | Name | Action |
  62. |-----|--------------|-------------------|
  63. | 0 | left | Turn left |
  64. | 1 | right | Turn right |
  65. | 2 | forward | Move forward |
  66. | 3 | pickup | Pick up an object |
  67. | 4 | drop | Unused |
  68. | 5 | toggle | Unused |
  69. | 6 | done | Unused |
  70. ## Observation Encoding
  71. - Each tile is encoded as a 3 dimensional tuple:
  72. `(OBJECT_IDX, COLOR_IDX, STATE)`
  73. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  74. [minigrid/minigrid.py](minigrid/minigrid.py)
  75. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  76. ## Rewards
  77. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  78. ## Termination
  79. The episode ends if any one of the following conditions is met:
  80. 1. The agent picks up the object.
  81. 2. Timeout (see `max_steps`).
  82. ## Registered Configurations
  83. - `BabyAI-UnblockPickup-v0`
  84. """
  85. def gen_mission(self):
  86. self.place_agent()
  87. self.connect_all()
  88. objs = self.add_distractors(num_distractors=20, all_unique=False)
  89. # Ensure that at least one object is not reachable without unblocking
  90. # Note: the selected object will still be reachable most of the time
  91. if self.check_objs_reachable(raise_exc=False):
  92. raise RejectSampling("all objects reachable")
  93. obj = self._rand_elem(objs)
  94. self.instrs = PickupInstr(ObjDesc(obj.type, obj.color))
  95. class PickupLoc(LevelGen):
  96. """
  97. ## Description
  98. Pick up an object which may be described using its location. This is a
  99. single room environment.
  100. Competencies: PickUp, Loc. No unblocking.
  101. ## Mission Space
  102. "pick up the {color} {type}"
  103. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  104. "yellow" or "grey".
  105. {type} is the type of the object. Can be "ball", "box" or "key".
  106. ## Action Space
  107. | Num | Name | Action |
  108. |-----|--------------|-------------------|
  109. | 0 | left | Turn left |
  110. | 1 | right | Turn right |
  111. | 2 | forward | Move forward |
  112. | 3 | pickup | Pick up an object |
  113. | 4 | drop | Unused |
  114. | 5 | toggle | Unused |
  115. | 6 | done | Unused |
  116. ## Observation Encoding
  117. - Each tile is encoded as a 3 dimensional tuple:
  118. `(OBJECT_IDX, COLOR_IDX, STATE)`
  119. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  120. [minigrid/minigrid.py](minigrid/minigrid.py)
  121. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  122. ## Rewards
  123. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  124. ## Termination
  125. The episode ends if any one of the following conditions is met:
  126. 1. The agent picks up the object.
  127. 2. Timeout (see `max_steps`).
  128. ## Registered Configurations
  129. - `BabyAI-PickupLoc-v0`
  130. """
  131. def __init__(self, **kwargs):
  132. # We add many distractors to increase the probability
  133. # of ambiguous locations within the same room
  134. super().__init__(
  135. action_kinds=["pickup"],
  136. instr_kinds=["action"],
  137. num_rows=1,
  138. num_cols=1,
  139. num_dists=8,
  140. locked_room_prob=0,
  141. locations=True,
  142. unblocking=False,
  143. **kwargs,
  144. )
  145. class PickupDist(RoomGridLevel):
  146. """
  147. ## Description
  148. Pick up an object
  149. The object to pick up is given by its type only, or
  150. by its color, or by its type and color.
  151. (in the current room, with distractors)
  152. ## Mission Space
  153. "pick up a/the {color}/{type}/{color}{type}"
  154. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  155. "yellow" or "grey".
  156. {type} is the type of the object. Can be "ball", "box" or "key".
  157. ## Action Space
  158. | Num | Name | Action |
  159. |-----|--------------|-------------------|
  160. | 0 | left | Turn left |
  161. | 1 | right | Turn right |
  162. | 2 | forward | Move forward |
  163. | 3 | pickup | Pick up an object |
  164. | 4 | drop | Unused |
  165. | 5 | toggle | Unused |
  166. | 6 | done | Unused |
  167. ## Observation Encoding
  168. - Each tile is encoded as a 3 dimensional tuple:
  169. `(OBJECT_IDX, COLOR_IDX, STATE)`
  170. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  171. [minigrid/minigrid.py](minigrid/minigrid.py)
  172. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  173. ## Rewards
  174. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  175. ## Termination
  176. The episode ends if any one of the following conditions is met:
  177. 1. The agent picks up the object.
  178. 2. Timeout (see `max_steps`).
  179. ## Registered Configurations
  180. - `BabyAI-PickupDist-v0`
  181. - `BabyAI-PickupDistDebug-v0`
  182. """
  183. def __init__(self, debug=False, **kwargs):
  184. self.debug = debug
  185. super().__init__(num_rows=1, num_cols=1, room_size=7, **kwargs)
  186. def gen_mission(self):
  187. # Add 5 random objects in the room
  188. objs = self.add_distractors(num_distractors=5)
  189. self.place_agent(0, 0)
  190. obj = self._rand_elem(objs)
  191. type = obj.type
  192. color = obj.color
  193. select_by = self._rand_elem(["type", "color", "both"])
  194. if select_by == "color":
  195. type = None
  196. elif select_by == "type":
  197. color = None
  198. self.instrs = PickupInstr(ObjDesc(type, color), strict=self.debug)
  199. class PickupAbove(RoomGridLevel):
  200. """
  201. ## Description
  202. Pick up an object (in the room above)
  203. This task requires to use the compass to be solved effectively.
  204. ## Mission Space
  205. "go to the {color} {type}"
  206. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  207. "yellow" or "grey".
  208. {type} is the type of the object. Can be "ball", "box" or "key".
  209. ## Action Space
  210. | Num | Name | Action |
  211. |-----|--------------|-------------------|
  212. | 0 | left | Turn left |
  213. | 1 | right | Turn right |
  214. | 2 | forward | Move forward |
  215. | 3 | pickup | Pick up an object |
  216. | 4 | drop | Unused |
  217. | 5 | toggle | Unused |
  218. | 6 | done | Unused |
  219. ## Observation Encoding
  220. - Each tile is encoded as a 3 dimensional tuple:
  221. `(OBJECT_IDX, COLOR_IDX, STATE)`
  222. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  223. [minigrid/minigrid.py](minigrid/minigrid.py)
  224. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  225. ## Rewards
  226. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  227. ## Termination
  228. The episode ends if any one of the following conditions is met:
  229. 1. The agent picks up the object.
  230. 2. Timeout (see `max_steps`).
  231. ## Registered Configurations
  232. - `BabyAI-PickupAbove-v0`
  233. """
  234. def __init__(self, max_steps: int | None = None, **kwargs):
  235. room_size = 6
  236. if max_steps is None:
  237. max_steps = 8 * room_size**2
  238. super().__init__(room_size=room_size, max_steps=max_steps, **kwargs)
  239. def gen_mission(self):
  240. # Add a random object to the top-middle room
  241. obj, pos = self.add_object(1, 0)
  242. # Make sure the two rooms are directly connected
  243. self.add_door(1, 1, 3, locked=False)
  244. self.place_agent(1, 1)
  245. self.connect_all()
  246. self.instrs = PickupInstr(ObjDesc(obj.type, obj.color))