You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

114 lines
3.4 KiB

2 months ago
  1. from __future__ import annotations
  2. from minigrid.core.grid import Grid
  3. from minigrid.core.mission import MissionSpace
  4. from minigrid.core.world_object import Goal
  5. from minigrid.minigrid_env import MiniGridEnv
  6. class EmptyEnv(MiniGridEnv):
  7. """
  8. ## Description
  9. This environment is an empty room, and the goal of the agent is to reach the
  10. green goal square, which provides a sparse reward. A small penalty is
  11. subtracted for the number of steps to reach the goal. This environment is
  12. useful, with small rooms, to validate that your RL algorithm works
  13. correctly, and with large rooms to experiment with sparse rewards and
  14. exploration. The random variants of the environment have the agent starting
  15. at a random position for each episode, while the regular variants have the
  16. agent always starting in the corner opposite to the goal.
  17. ## Mission Space
  18. "get to the green goal square"
  19. ## Action Space
  20. | Num | Name | Action |
  21. |-----|--------------|--------------|
  22. | 0 | left | Turn left |
  23. | 1 | right | Turn right |
  24. | 2 | forward | Move forward |
  25. | 3 | pickup | Unused |
  26. | 4 | drop | Unused |
  27. | 5 | toggle | Unused |
  28. | 6 | done | Unused |
  29. ## Observation Encoding
  30. - Each tile is encoded as a 3 dimensional tuple:
  31. `(OBJECT_IDX, COLOR_IDX, STATE)`
  32. - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
  33. [minigrid/minigrid.py](minigrid/minigrid.py)
  34. - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
  35. ## Rewards
  36. A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
  37. ## Termination
  38. The episode ends if any one of the following conditions is met:
  39. 1. The agent reaches the goal.
  40. 2. Timeout (see `max_steps`).
  41. ## Registered Configurations
  42. - `MiniGrid-Empty-5x5-v0`
  43. - `MiniGrid-Empty-Random-5x5-v0`
  44. - `MiniGrid-Empty-6x6-v0`
  45. - `MiniGrid-Empty-Random-6x6-v0`
  46. - `MiniGrid-Empty-8x8-v0`
  47. - `MiniGrid-Empty-16x16-v0`
  48. """
  49. def __init__(
  50. self,
  51. size=8,
  52. agent_start_pos=(1, 1),
  53. agent_start_dir=0,
  54. max_steps: int | None = None,
  55. **kwargs,
  56. ):
  57. self.agent_start_pos = agent_start_pos
  58. self.agent_start_dir = agent_start_dir
  59. mission_space = MissionSpace(mission_func=self._gen_mission)
  60. if max_steps is None:
  61. max_steps = 4 * size**2
  62. super().__init__(
  63. mission_space=mission_space,
  64. grid_size=size,
  65. # Set this to True for maximum speed
  66. see_through_walls=True,
  67. max_steps=max_steps,
  68. **kwargs,
  69. )
  70. @staticmethod
  71. def _gen_mission():
  72. return "get to the green goal square"
  73. def _gen_grid(self, width, height):
  74. # Create an empty grid
  75. self.grid = Grid(width, height)
  76. # Generate the surrounding walls
  77. self.grid.wall_rect(0, 0, width, height)
  78. # Place a goal square in the bottom-right corner
  79. self.put_obj(Goal(), width - 2, height - 2)
  80. # Place the agent
  81. if self.agent_start_pos is not None:
  82. self.agent_pos = self.agent_start_pos
  83. self.agent_dir = self.agent_start_dir
  84. else:
  85. self.place_agent()
  86. self.mission = "get to the green goal square"