You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

814 lines
23 KiB

2 months ago
  1. """
  2. Copied and adapted from https://github.com/mila-iqia/babyai.
  3. Levels described in the Baby AI ICLR 2019 submission, with the `Go to` instruction.
  4. """
  5. from __future__ import annotations
  6. from minigrid.envs.babyai.core.levelgen import LevelGen
  7. from minigrid.envs.babyai.core.roomgrid_level import RejectSampling, RoomGridLevel
  8. from minigrid.envs.babyai.core.verifier import GoToInstr, ObjDesc
  9. class GoToRedBallGrey(RoomGridLevel):
  10. """
  11. ## Description
  12. Go to the red ball, single room, with distractors.
  13. The distractors are all grey to reduce perceptual complexity.
  14. This level has distractors but doesn't make use of language.
  15. ## Mission Space
  16. "go to the red ball"
  17. ## Action Space
  18. | Num | Name | Action |
  19. |-----|--------------|-------------------|
  20. | 0 | left | Turn left |
  21. | 1 | right | Turn right |
  22. | 2 | forward | Move forward |
  23. | 3 | pickup | Pick up an object |
  24. | 4 | drop | Unused |
  25. | 5 | toggle | Unused |
  26. | 6 | done | Unused |
  27. ## Observation Encoding
  28. - Each tile is encoded as a 3 dimensional tuple:
  29. `(OBJECT_IDX, COLOR_IDX, STATE)`
  30. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  31. [minigrid/minigrid.py](minigrid/minigrid.py)
  32. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  33. ## Rewards
  34. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  35. ## Termination
  36. The episode ends if any one of the following conditions is met:
  37. 1. The agent goes to the red ball.
  38. 2. Timeout (see `max_steps`).
  39. ## Registered Configurations
  40. - `BabyAI-GoToRedBallGrey-v0`
  41. """
  42. def __init__(self, room_size=8, num_dists=7, **kwargs):
  43. self.num_dists = num_dists
  44. super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
  45. def gen_mission(self):
  46. self.place_agent()
  47. obj, _ = self.add_object(0, 0, "ball", "red")
  48. dists = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
  49. for dist in dists:
  50. dist.color = "grey"
  51. # Make sure no unblocking is required
  52. self.check_objs_reachable()
  53. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  54. class GoToRedBall(RoomGridLevel):
  55. """
  56. ## Description
  57. Go to the red ball, single room, with distractors.
  58. This level has distractors but doesn't make use of language.
  59. ## Mission Space
  60. "go to the red ball"
  61. ## Action Space
  62. | Num | Name | Action |
  63. |-----|--------------|-------------------|
  64. | 0 | left | Turn left |
  65. | 1 | right | Turn right |
  66. | 2 | forward | Move forward |
  67. | 3 | pickup | Pick up an object |
  68. | 4 | drop | Unused |
  69. | 5 | toggle | Unused |
  70. | 6 | done | Unused |
  71. ## Observation Encoding
  72. - Each tile is encoded as a 3 dimensional tuple:
  73. `(OBJECT_IDX, COLOR_IDX, STATE)`
  74. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  75. [minigrid/minigrid.py](minigrid/minigrid.py)
  76. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  77. ## Rewards
  78. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  79. ## Termination
  80. The episode ends if any one of the following conditions is met:
  81. 1. The agent goes to the red ball.
  82. 2. Timeout (see `max_steps`).
  83. ## Registered Configurations
  84. - `BabyAI-GoToRedBall-v0`
  85. """
  86. def __init__(self, room_size=8, num_dists=7, **kwargs):
  87. self.num_dists = num_dists
  88. super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
  89. def gen_mission(self):
  90. self.place_agent()
  91. obj, _ = self.add_object(0, 0, "ball", "red")
  92. self.add_distractors(num_distractors=self.num_dists, all_unique=False)
  93. # Make sure no unblocking is required
  94. self.check_objs_reachable()
  95. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  96. class GoToRedBallNoDists(GoToRedBall):
  97. """
  98. ## Description
  99. Go to the red ball. No distractors present.
  100. ## Mission Space
  101. "go to the red ball"
  102. ## Action Space
  103. | Num | Name | Action |
  104. |-----|--------------|-------------------|
  105. | 0 | left | Turn left |
  106. | 1 | right | Turn right |
  107. | 2 | forward | Move forward |
  108. | 3 | pickup | Pick up an object |
  109. | 4 | drop | Unused |
  110. | 5 | toggle | Unused |
  111. | 6 | done | Unused |
  112. ## Observation Encoding
  113. - Each tile is encoded as a 3 dimensional tuple:
  114. `(OBJECT_IDX, COLOR_IDX, STATE)`
  115. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  116. [minigrid/minigrid.py](minigrid/minigrid.py)
  117. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  118. ## Rewards
  119. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  120. ## Termination
  121. The episode ends if any one of the following conditions is met:
  122. 1. The agent goes to the red ball.
  123. 2. Timeout (see `max_steps`).
  124. ## Registered Configurations
  125. - `BabyAI-GoToRedBallNoDists-v0`
  126. """
  127. def __init__(self, **kwargs):
  128. super().__init__(room_size=8, num_dists=0, **kwargs)
  129. class GoToObj(RoomGridLevel):
  130. """
  131. ## Description
  132. Go to an object, inside a single room with no doors, no distractors. The
  133. naming convention `GoToObjS{X}` represents a room of size `X`.
  134. ## Mission Space
  135. "go to the {color} {type}"
  136. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  137. "yellow" or "grey".
  138. {type} is the type of the object. Can be "ball", "box" or "key".
  139. ## Action Space
  140. | Num | Name | Action |
  141. |-----|--------------|-------------------|
  142. | 0 | left | Turn left |
  143. | 1 | right | Turn right |
  144. | 2 | forward | Move forward |
  145. | 3 | pickup | Pick up an object |
  146. | 4 | drop | Unused |
  147. | 5 | toggle | Unused |
  148. | 6 | done | Unused |
  149. ## Observation Encoding
  150. - Each tile is encoded as a 3 dimensional tuple:
  151. `(OBJECT_IDX, COLOR_IDX, STATE)`
  152. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  153. [minigrid/minigrid.py](minigrid/minigrid.py)
  154. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  155. ## Rewards
  156. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  157. ## Termination
  158. The episode ends if any one of the following conditions is met:
  159. 1. The agent goes to the object.
  160. 2. Timeout (see `max_steps`).
  161. ## Registered Configurations
  162. - `BabyAI-GoToObj-v0`
  163. - `BabyAI-GoToObjS4-v0`
  164. - `BabyAI-GoToObjS6-v1`
  165. Notice: `BabyAI-GoToObjS6-v0` is no longer for use due to a bug in the registry parameters.
  166. """
  167. def __init__(self, room_size=8, **kwargs):
  168. super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
  169. def gen_mission(self):
  170. self.place_agent()
  171. objs = self.add_distractors(num_distractors=1)
  172. obj = objs[0]
  173. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  174. class GoToLocal(RoomGridLevel):
  175. """
  176. ## Description
  177. Go to an object, inside a single room with no doors, no distractors. The
  178. naming convention `GoToLocalS{X}N{Y}` represents a room of size `X` with
  179. distractor number `Y`.
  180. ## Mission Space
  181. "go to the {color} {type}"
  182. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  183. "yellow" or "grey".
  184. {type} is the type of the object. Can be "ball", "box" or "key".
  185. ## Action Space
  186. | Num | Name | Action |
  187. |-----|--------------|-------------------|
  188. | 0 | left | Turn left |
  189. | 1 | right | Turn right |
  190. | 2 | forward | Move forward |
  191. | 3 | pickup | Pick up an object |
  192. | 4 | drop | Unused |
  193. | 5 | toggle | Unused |
  194. | 6 | done | Unused |
  195. ## Observation Encoding
  196. - Each tile is encoded as a 3 dimensional tuple:
  197. `(OBJECT_IDX, COLOR_IDX, STATE)`
  198. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  199. [minigrid/minigrid.py](minigrid/minigrid.py)
  200. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  201. ## Rewards
  202. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  203. ## Termination
  204. The episode ends if any one of the following conditions is met:
  205. 1. The agent goes to the object.
  206. 2. Timeout (see `max_steps`).
  207. ## Registered Configurations
  208. - `BabyAI-GoToLocal-v0`
  209. - `BabyAI-GoToLocalS5N2-v0`
  210. - `BabyAI-GoToLocalS6N2-v0`
  211. - `BabyAI-GoToLocalS6N3-v0`
  212. - `BabyAI-GoToLocalS6N4-v0`
  213. - `BabyAI-GoToLocalS7N4-v0`
  214. - `BabyAI-GoToLocalS7N5-v0`
  215. - `BabyAI-GoToLocalS8N2-v0`
  216. - `BabyAI-GoToLocalS8N3-v0`
  217. - `BabyAI-GoToLocalS8N4-v0`
  218. - `BabyAI-GoToLocalS8N5-v0`
  219. - `BabyAI-GoToLocalS8N6-v0`
  220. - `BabyAI-GoToLocalS8N7-v0`
  221. """
  222. def __init__(self, room_size=8, num_dists=8, **kwargs):
  223. self.num_dists = num_dists
  224. super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
  225. def gen_mission(self):
  226. self.place_agent()
  227. objs = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
  228. self.check_objs_reachable()
  229. obj = self._rand_elem(objs)
  230. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  231. class GoTo(RoomGridLevel):
  232. """
  233. ## Description
  234. Go to an object, the object may be in another room. Many distractors.
  235. ## Mission Space
  236. "go to a/the {color} {type}"
  237. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  238. "yellow" or "grey".
  239. {type} is the type of the object. Can be "ball", "box" or "key".
  240. ## Action Space
  241. | Num | Name | Action |
  242. |-----|--------------|-------------------|
  243. | 0 | left | Turn left |
  244. | 1 | right | Turn right |
  245. | 2 | forward | Move forward |
  246. | 3 | pickup | Pick up an object |
  247. | 4 | drop | Unused |
  248. | 5 | toggle | Unused |
  249. | 6 | done | Unused |
  250. ## Observation Encoding
  251. - Each tile is encoded as a 3 dimensional tuple:
  252. `(OBJECT_IDX, COLOR_IDX, STATE)`
  253. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  254. [minigrid/minigrid.py](minigrid/minigrid.py)
  255. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  256. ## Rewards
  257. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  258. ## Termination
  259. The episode ends if any one of the following conditions is met:
  260. 1. The agent goes to the object.
  261. 2. Timeout (see `max_steps`).
  262. ## Registered Configurations
  263. - `BabyAI-GoTo-v0`
  264. - `BabyAI-GoToOpen-v0`
  265. - `BabyAI-GoToObjMaze-v0`
  266. - `BabyAI-GoToObjMazeOpen-v0`
  267. - `BabyAI-GoToObjMazeS4R2-v0`
  268. - `BabyAI-GoToObjMazeS4-v0`
  269. - `BabyAI-GoToObjMazeS5-v0`
  270. - `BabyAI-GoToObjMazeS6-v0`
  271. - `BabyAI-GoToObjMazeS7-v0`
  272. """
  273. def __init__(
  274. self,
  275. room_size=8,
  276. num_rows=3,
  277. num_cols=3,
  278. num_dists=18,
  279. doors_open=False,
  280. **kwargs,
  281. ):
  282. self.num_dists = num_dists
  283. self.doors_open = doors_open
  284. super().__init__(
  285. num_rows=num_rows, num_cols=num_cols, room_size=room_size, **kwargs
  286. )
  287. def gen_mission(self):
  288. self.place_agent()
  289. self.connect_all()
  290. objs = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
  291. self.check_objs_reachable()
  292. obj = self._rand_elem(objs)
  293. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  294. # If requested, open all the doors
  295. if self.doors_open:
  296. self.open_all_doors()
  297. class GoToImpUnlock(RoomGridLevel):
  298. """
  299. ## Description
  300. Go to an object, which may be in a locked room.
  301. Competencies: Maze, GoTo, ImpUnlock
  302. No unblocking.
  303. ## Mission Space
  304. "go to a/the {color} {type}"
  305. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  306. "yellow" or "grey".
  307. {type} is the type of the object. Can be "ball", "box" or "key".
  308. ## Action Space
  309. | Num | Name | Action |
  310. |-----|--------------|-------------------|
  311. | 0 | left | Turn left |
  312. | 1 | right | Turn right |
  313. | 2 | forward | Move forward |
  314. | 3 | pickup | Pick up an object |
  315. | 4 | drop | Unused |
  316. | 5 | toggle | Unused |
  317. | 6 | done | Unused |
  318. ## Observation Encoding
  319. - Each tile is encoded as a 3 dimensional tuple:
  320. `(OBJECT_IDX, COLOR_IDX, STATE)`
  321. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  322. [minigrid/minigrid.py](minigrid/minigrid.py)
  323. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  324. ## Rewards
  325. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  326. ## Termination
  327. The episode ends if any one of the following conditions is met:
  328. 1. The agent goes to the object.
  329. 2. Timeout (see `max_steps`).
  330. ## Registered Configurations
  331. - `BabyAI-GoToImpUnlock-v0`
  332. """
  333. def gen_mission(self):
  334. # Add a locked door to a random room
  335. id = self._rand_int(0, self.num_cols)
  336. jd = self._rand_int(0, self.num_rows)
  337. door, pos = self.add_door(id, jd, locked=True)
  338. locked_room = self.get_room(id, jd)
  339. # Add the key to a different room
  340. while True:
  341. ik = self._rand_int(0, self.num_cols)
  342. jk = self._rand_int(0, self.num_rows)
  343. if ik is id and jk is jd:
  344. continue
  345. self.add_object(ik, jk, "key", door.color)
  346. break
  347. self.connect_all()
  348. # Add distractors to all but the locked room.
  349. # We do this to speed up the reachability test,
  350. # which otherwise will reject all levels with
  351. # objects in the locked room.
  352. for i in range(self.num_cols):
  353. for j in range(self.num_rows):
  354. if i is not id or j is not jd:
  355. self.add_distractors(i, j, num_distractors=2, all_unique=False)
  356. # The agent must be placed after all the object to respect constraints
  357. while True:
  358. self.place_agent()
  359. start_room = self.room_from_pos(*self.agent_pos)
  360. # Ensure that we are not placing the agent in the locked room
  361. if start_room is locked_room:
  362. continue
  363. break
  364. self.check_objs_reachable()
  365. # Add a single object to the locked room
  366. # The instruction requires going to an object matching that description
  367. (obj,) = self.add_distractors(id, jd, num_distractors=1, all_unique=False)
  368. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  369. class GoToSeq(LevelGen):
  370. """
  371. ## Description
  372. Sequencing of go-to-object commands.
  373. Competencies: Maze, GoTo, Seq
  374. No locked room.
  375. No locations.
  376. No unblocking.
  377. ## Mission Space
  378. "go to a/the {color} {type}" +
  379. "and go to a/the {color} {type}" +
  380. ", then go to a/the {color} {type}" +
  381. "and go to a/the {color} {type}"
  382. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  383. "yellow" or "grey".
  384. {type} is the type of the object. Can be "ball", "box" or "key".
  385. ## Action Space
  386. | Num | Name | Action |
  387. |-----|--------------|-------------------|
  388. | 0 | left | Turn left |
  389. | 1 | right | Turn right |
  390. | 2 | forward | Move forward |
  391. | 3 | pickup | Pick up an object |
  392. | 4 | drop | Unused |
  393. | 5 | toggle | Unused |
  394. | 6 | done | Unused |
  395. ## Observation Encoding
  396. - Each tile is encoded as a 3 dimensional tuple:
  397. `(OBJECT_IDX, COLOR_IDX, STATE)`
  398. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  399. [minigrid/minigrid.py](minigrid/minigrid.py)
  400. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  401. ## Rewards
  402. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  403. ## Termination
  404. The episode ends if any one of the following conditions is met:
  405. 1. The agent goes to the object.
  406. 2. Timeout (see `max_steps`).
  407. ## Registered Configurations
  408. - `BabyAI-GoToSeq-v0`
  409. - `BabyAI-GoToSeqS5R2-v0`
  410. """
  411. def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
  412. super().__init__(
  413. room_size=room_size,
  414. num_rows=num_rows,
  415. num_cols=num_cols,
  416. num_dists=num_dists,
  417. action_kinds=["goto"],
  418. locked_room_prob=0,
  419. locations=False,
  420. unblocking=False,
  421. **kwargs,
  422. )
  423. class GoToRedBlueBall(RoomGridLevel):
  424. """
  425. ## Description
  426. Go to the red ball or to the blue ball.
  427. There is exactly one red or blue ball, and some distractors.
  428. The distractors are guaranteed not to be red or blue balls.
  429. Language is not required to solve this level.
  430. ## Mission Space
  431. "go to the {color} ball"
  432. {color} is the color of the box. Can be "red" or "blue".
  433. ## Action Space
  434. | Num | Name | Action |
  435. |-----|--------------|-------------------|
  436. | 0 | left | Turn left |
  437. | 1 | right | Turn right |
  438. | 2 | forward | Move forward |
  439. | 3 | pickup | Pick up an object |
  440. | 4 | drop | Unused |
  441. | 5 | toggle | Unused |
  442. | 6 | done | Unused |
  443. ## Observation Encoding
  444. - Each tile is encoded as a 3 dimensional tuple:
  445. `(OBJECT_IDX, COLOR_IDX, STATE)`
  446. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  447. [minigrid/minigrid.py](minigrid/minigrid.py)
  448. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  449. ## Rewards
  450. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  451. ## Termination
  452. The episode ends if any one of the following conditions is met:
  453. 1. The agent goes to the ball.
  454. 2. Timeout (see `max_steps`).
  455. ## Registered Configurations
  456. - `BabyAI-GoToRedBlueBall-v0`
  457. """
  458. def __init__(self, room_size=8, num_dists=7, **kwargs):
  459. self.num_dists = num_dists
  460. super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
  461. def gen_mission(self):
  462. self.place_agent()
  463. dists = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
  464. # Ensure there is only one red or blue ball
  465. for dist in dists:
  466. if dist.type == "ball" and (dist.color == "blue" or dist.color == "red"):
  467. raise RejectSampling("can only have one blue or red ball")
  468. color = self._rand_elem(["red", "blue"])
  469. obj, _ = self.add_object(0, 0, "ball", color)
  470. # Make sure no unblocking is required
  471. self.check_objs_reachable()
  472. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
  473. class GoToDoor(RoomGridLevel):
  474. """
  475. ## Description
  476. Go to a door
  477. (of a given color, in the current room)
  478. No distractors, no language variation
  479. ## Mission Space
  480. "go to the {color} door"
  481. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  482. "yellow" or "grey".
  483. ## Action Space
  484. | Num | Name | Action |
  485. |-----|--------------|-------------------|
  486. | 0 | left | Turn left |
  487. | 1 | right | Turn right |
  488. | 2 | forward | Move forward |
  489. | 3 | pickup | Pick up an object |
  490. | 4 | drop | Unused |
  491. | 5 | toggle | Unused |
  492. | 6 | done | Unused |
  493. ## Observation Encoding
  494. - Each tile is encoded as a 3 dimensional tuple:
  495. `(OBJECT_IDX, COLOR_IDX, STATE)`
  496. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  497. [minigrid/minigrid.py](minigrid/minigrid.py)
  498. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  499. ## Rewards
  500. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  501. ## Termination
  502. The episode ends if any one of the following conditions is met:
  503. 1. The agent goes to the door.
  504. 2. Timeout (see `max_steps`).
  505. ## Registered Configurations
  506. - `BabyAI-GoToDoor-v0`
  507. """
  508. def __init__(self, **kwargs):
  509. super().__init__(room_size=7, **kwargs)
  510. def gen_mission(self):
  511. objs = []
  512. for _ in range(4):
  513. door, _ = self.add_door(1, 1)
  514. objs.append(door)
  515. self.place_agent(1, 1)
  516. obj = self._rand_elem(objs)
  517. self.instrs = GoToInstr(ObjDesc("door", obj.color))
  518. class GoToObjDoor(RoomGridLevel):
  519. """
  520. ## Description
  521. Go to an object or door
  522. (of a given type and color, in the current room)
  523. ## Mission Space
  524. "go to the {color} {type}"
  525. {color} is the color of the box. Can be "red", "green", "blue", "purple",
  526. "yellow" or "grey".
  527. {type} is the type of the object. Can be "ball", "box", "key" or "door".
  528. ## Action Space
  529. | Num | Name | Action |
  530. |-----|--------------|-------------------|
  531. | 0 | left | Turn left |
  532. | 1 | right | Turn right |
  533. | 2 | forward | Move forward |
  534. | 3 | pickup | Pick up an object |
  535. | 4 | drop | Unused |
  536. | 5 | toggle | Unused |
  537. | 6 | done | Unused |
  538. ## Observation Encoding
  539. - Each tile is encoded as a 3 dimensional tuple:
  540. `(OBJECT_IDX, COLOR_IDX, STATE)`
  541. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  542. [minigrid/minigrid.py](minigrid/minigrid.py)
  543. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  544. ## Rewards
  545. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  546. ## Termination
  547. The episode ends if any one of the following conditions is met:
  548. 1. The agent goes to the object or door.
  549. 2. Timeout (see `max_steps`).
  550. ## Registered Configurations
  551. - `BabyAI-GoToObjDoor-v0`
  552. """
  553. def __init__(self, **kwargs):
  554. super().__init__(room_size=8, **kwargs)
  555. def gen_mission(self):
  556. self.place_agent(1, 1)
  557. objs = self.add_distractors(1, 1, num_distractors=8, all_unique=False)
  558. for _ in range(4):
  559. door, _ = self.add_door(1, 1)
  560. objs.append(door)
  561. self.check_objs_reachable()
  562. obj = self._rand_elem(objs)
  563. self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))