You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

128 lines
3.8 KiB

2 months ago
  1. from __future__ import annotations
  2. from minigrid.core.grid import Grid
  3. from minigrid.core.mission import MissionSpace
  4. from minigrid.core.world_object import Goal
  5. from minigrid.minigrid_env import MiniGridEnv
  6. class FourRoomsEnv(MiniGridEnv):
  7. """
  8. ## Description
  9. Classic four room reinforcement learning environment. The agent must
  10. navigate in a maze composed of four rooms interconnected by 4 gaps in the
  11. walls. To obtain a reward, the agent must reach the green goal square. Both
  12. the agent and the goal square are randomly placed in any of the four rooms.
  13. ## Mission Space
  14. "reach the goal"
  15. ## Action Space
  16. | Num | Name | Action |
  17. |-----|--------------|--------------|
  18. | 0 | left | Turn left |
  19. | 1 | right | Turn right |
  20. | 2 | forward | Move forward |
  21. | 3 | pickup | Unused |
  22. | 4 | drop | Unused |
  23. | 5 | toggle | Unused |
  24. | 6 | done | Unused |
  25. ## Observation Encoding
  26. - Each tile is encoded as a 3 dimensional tuple:
  27. `(OBJECT_IDX, COLOR_IDX, STATE)`
  28. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  29. [minigrid/minigrid.py](minigrid/minigrid.py)
  30. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  31. ## Rewards
  32. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  33. ## Termination
  34. The episode ends if any one of the following conditions is met:
  35. 1. The agent reaches the goal.
  36. 2. Timeout (see `max_steps`).
  37. ## Registered Configurations
  38. - `MiniGrid-FourRooms-v0`
  39. """
  40. def __init__(self, agent_pos=None, goal_pos=None, max_steps=100, **kwargs):
  41. self._agent_default_pos = agent_pos
  42. self._goal_default_pos = goal_pos
  43. self.size = 19
  44. mission_space = MissionSpace(mission_func=self._gen_mission)
  45. super().__init__(
  46. mission_space=mission_space,
  47. width=self.size,
  48. height=self.size,
  49. max_steps=max_steps,
  50. **kwargs,
  51. )
  52. @staticmethod
  53. def _gen_mission():
  54. return "reach the goal"
  55. def _gen_grid(self, width, height):
  56. # Create the grid
  57. self.grid = Grid(width, height)
  58. # Generate the surrounding walls
  59. self.grid.horz_wall(0, 0)
  60. self.grid.horz_wall(0, height - 1)
  61. self.grid.vert_wall(0, 0)
  62. self.grid.vert_wall(width - 1, 0)
  63. room_w = width // 2
  64. room_h = height // 2
  65. # For each row of rooms
  66. for j in range(0, 2):
  67. # For each column
  68. for i in range(0, 2):
  69. xL = i * room_w
  70. yT = j * room_h
  71. xR = xL + room_w
  72. yB = yT + room_h
  73. # Bottom wall and door
  74. if i + 1 < 2:
  75. self.grid.vert_wall(xR, yT, room_h)
  76. pos = (xR, self._rand_int(yT + 1, yB))
  77. self.grid.set(*pos, None)
  78. # Bottom wall and door
  79. if j + 1 < 2:
  80. self.grid.horz_wall(xL, yB, room_w)
  81. pos = (self._rand_int(xL + 1, xR), yB)
  82. self.grid.set(*pos, None)
  83. # Randomize the player start position and orientation
  84. if self._agent_default_pos is not None:
  85. self.agent_pos = self._agent_default_pos
  86. self.grid.set(*self._agent_default_pos, None)
  87. # assuming random start direction
  88. self.agent_dir = self._rand_int(0, 4)
  89. else:
  90. self.place_agent()
  91. if self._goal_default_pos is not None:
  92. goal = Goal()
  93. self.put_obj(goal, *self._goal_default_pos)
  94. goal.init_pos, goal.cur_pos = self._goal_default_pos
  95. else:
  96. self.place_obj(Goal())