You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

274 lines
9.8 KiB

2 months ago
  1. from __future__ import annotations
  2. from minigrid.core.constants import COLOR_NAMES
  3. from minigrid.core.grid import Grid
  4. from minigrid.core.mission import MissionSpace
  5. from minigrid.core.world_object import (
  6. Ball,
  7. Box,
  8. Key,
  9. Slippery,
  10. SlipperyEast,
  11. SlipperySouth,
  12. SlipperyNorth,
  13. SlipperyWest,
  14. Lava,
  15. Goal,
  16. Point
  17. )
  18. from minigrid.minigrid_env import MiniGridEnv, is_slippery
  19. import numpy as np
  20. class LavaSlipperyEnv(MiniGridEnv):
  21. def __init__(self,
  22. randomize_start=True, size=12,
  23. width=None,
  24. height=None,
  25. probability_intended=8/9,
  26. probability_turn_intended=8/9,
  27. obstacle_type=Lava,
  28. goal_reward=1,
  29. failure_penalty=-1,
  30. per_step_penalty=0,
  31. dense_rewards=False,
  32. **kwargs):
  33. self.obstacle_type = obstacle_type
  34. self.size = size
  35. self.probability_intended = probability_intended
  36. self.probability_turn_intended = probability_turn_intended
  37. if width is not None and height is not None:
  38. self.width = width
  39. self.height = height
  40. elif size is not None:
  41. self.width = size
  42. self.height = size
  43. else:
  44. raise ValueError(f"Please define either width and height or a size for square environments. The set values are width: {width}, height: {height}, size: {size}.")
  45. if obstacle_type == Lava:
  46. mission_space = MissionSpace(
  47. mission_func=lambda: "avoid the lava and get to the green goal square"
  48. )
  49. else:
  50. mission_space = MissionSpace(
  51. mission_func=lambda: "find the opening and get to the green goal square"
  52. )
  53. super().__init__(
  54. mission_space=mission_space,
  55. width=self.width,
  56. height=self.height,
  57. max_steps=200,
  58. # Set this to True for maximum speed
  59. see_through_walls=False,
  60. **kwargs
  61. )
  62. self.randomize_start = randomize_start
  63. self.goal_reward = goal_reward
  64. self.failure_penalty = failure_penalty
  65. self.dense_rewards = dense_rewards
  66. self.per_step_penalty = per_step_penalty
  67. def _place_slippery_lava(self, x, y):
  68. self.put_obj(Lava(), x, y)
  69. self.put_obj(self._create_slippery_north(), x, y - 1)
  70. self.put_obj(self._create_slippery_south(), x, y + 1)
  71. self.put_obj(self._create_slippery_east(), x + 1, y)
  72. self.put_obj(self._create_slippery_west(), x - 1, y)
  73. def create_slippery_lava_line(self, y, x_start, x_end, no_slippery_left=False, no_slippery_right=False):
  74. if not no_slippery_left:
  75. self.put_obj(self._create_slippery_west(), x_start - 1, y)
  76. if not no_slippery_right:
  77. self.put_obj(self._create_slippery_east(), x_end + 1 , y)
  78. for x in range(x_start, x_end + 1):
  79. self.put_obj(Lava(), x, y)
  80. def _gen_grid(self, width, height):
  81. assert width >= 5 and height >= 5
  82. # Create an empty grid
  83. self.grid = Grid(width, height)
  84. # Generate the surrounding walls
  85. self.grid.horz_wall(0, 0)
  86. self.grid.horz_wall(0, height - 1)
  87. self.grid.vert_wall(0, 0)
  88. self.grid.vert_wall(width - 1, 0)
  89. self.mission = (
  90. "avoid the lava and get to the green goal square"
  91. if self.obstacle_type == Lava
  92. else "find the opening and get to the green goal square"
  93. )
  94. def disable_random_start(self):
  95. self.randomize_start = False
  96. def place_agent(self, spawn_on_slippery=False, agent_pos=None, agent_dir=0):
  97. max_tries = 10_000
  98. num_tries = 0
  99. if self.randomize_start == True:
  100. while True:
  101. num_tries += 1
  102. if num_tries > max_tries:
  103. raise RecursionError("rejection sampling failed in place_obj")
  104. x = np.random.randint(0, self.width)
  105. y = np.random.randint(0, self.height)
  106. cell = self.grid.get(*(x,y))
  107. if cell is None or (cell.can_overlap() and not isinstance(cell, Lava) and not isinstance(cell, Goal) and (spawn_on_slippery or not is_slippery(cell))):
  108. self.agent_pos = np.array((x, y))
  109. self.agent_dir = np.random.randint(0, 4)
  110. break
  111. elif agent_dir is None:
  112. self.agent_pos = np.array((1, 1))
  113. self.agent_dir = 0
  114. else:
  115. self.agent_pos = agent_pos
  116. self.agent_dir = agent_dir
  117. def place_goal(self, goal_pos):
  118. self.goal_pos = goal_pos
  119. self.put_obj(Goal(), *self.goal_pos)
  120. def run_bfs(self):
  121. self.bfs_reward = self.run_BFS_reward()
  122. self.bfs_reward = [rew * 0.1 for rew in self.bfs_reward]
  123. def printGrid(self, init=False):
  124. grid = super().printGrid(init)
  125. properties_str = ""
  126. properties_str += F"ProbTurnIntended:{self.probability_turn_intended}\n"
  127. properties_str += F"ProbForwardIntended:{self.probability_intended}\n"
  128. return grid + properties_str
  129. def step(self, action):
  130. obs, reward, terminated, truncated, info = super().step(action)
  131. return obs, reward - self.per_step_penalty, terminated, truncated, info
  132. class LavaSlipperyEnv1(LavaSlipperyEnv):
  133. def __init__(self, *args, **kwargs):
  134. super().__init__(*args, **kwargs)
  135. def _gen_grid(self, width, height):
  136. super()._gen_grid(width, height)
  137. w_mid = width // 2
  138. h_mid = height // 2
  139. self.put_obj(Lava(), w_mid - 1, h_mid - 1)
  140. self.put_obj(Lava(), w_mid, h_mid - 1)
  141. self.put_obj(Lava(), w_mid - 1, h_mid)
  142. self.put_obj(Lava(), w_mid, h_mid)
  143. self.put_obj(self._create_slippery_east(), w_mid - 2, h_mid - 1)
  144. self.put_obj(self._create_slippery_east(), w_mid - 2, h_mid)
  145. self.put_obj(self._create_slippery_west(), w_mid + 1, h_mid - 1)
  146. self.put_obj(self._create_slippery_west(), w_mid + 1, h_mid)
  147. self.put_obj(Lava(), w_mid - 1, 1)
  148. self.put_obj(Lava(), w_mid, 1)
  149. self.put_obj(self._create_slippery_north(), w_mid - 1, 2)
  150. self.put_obj(self._create_slippery_north(), w_mid, 2)
  151. self.put_obj(self._create_slippery_east(), w_mid - 2, 1)
  152. self.put_obj(self._create_slippery_west(), w_mid + 1, 1)
  153. self.put_obj(Lava(), w_mid - 1, height - 2)
  154. self.put_obj(Lava(), w_mid, height - 2)
  155. self.put_obj(self._create_slippery_south(), w_mid - 1, height - 3)
  156. self.put_obj(self._create_slippery_south(), w_mid, height - 3)
  157. self.put_obj(self._create_slippery_east(), w_mid - 2, height - 2)
  158. self.put_obj(self._create_slippery_west(), w_mid + 1, height - 2)
  159. self.place_agent(agent_pos=np.array((1, 1)), agent_dir=0)
  160. self.place_goal(np.array((width - 2, height - 2)))
  161. if self.dense_rewards: self.run_bfs()
  162. class LavaSlipperyCliff(LavaSlipperyEnv):
  163. def __init__(self, *args, **kwargs):
  164. super().__init__(*args, **kwargs)
  165. def _gen_grid(self, width, height):
  166. super()._gen_grid(width, height)
  167. for i in range(1,5):
  168. self.grid.horz_wall(3, i, width - 6, Lava)
  169. for i in range(5,height - 3):
  170. self.grid.horz_wall(3, i, width - 6, SlipperyNorth(probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  171. self.place_agent(agent_pos=np.array((1, 1)), agent_dir=0)
  172. self.place_goal(np.array((width - 2, 1)))
  173. if self.dense_rewards: self.run_bfs()
  174. class LavaSlipperyHill(LavaSlipperyEnv):
  175. def __init__(self, *args, **kwargs):
  176. super().__init__(*args, **kwargs)
  177. def _gen_grid(self, width, height):
  178. super()._gen_grid(width, height)
  179. for i in range(1,height - 1):
  180. self.grid.horz_wall(1, i, width - 2, SlipperyNorth)
  181. for i in range(1,5):
  182. self.grid.horz_wall(3, i, width - 6, Lava)
  183. self.place_agent(agent_pos=np.array((1, 1)), agent_dir=0, spawn_on_slippery=True)
  184. self.place_goal(np.array((width - 2, 1)))
  185. if self.dense_rewards: self.run_bfs()
  186. class LavaSlipperyMaze(LavaSlipperyEnv):
  187. def __init__(self, *args, **kwargs):
  188. super().__init__(*args, **kwargs)
  189. def _gen_grid(self, width, height):
  190. super()._gen_grid(width, height)
  191. slippery_tile = SlipperySouth(probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended)
  192. self.grid.horz_wall(1, 3, 5, Lava)
  193. self.grid.horz_wall(1, 4, 5, Lava)
  194. self.grid.horz_wall(6, 3, 3, slippery_tile)
  195. self.grid.horz_wall(6, 4, 3, slippery_tile)
  196. self.grid.horz_wall(9, 3, 7, Lava)
  197. self.grid.horz_wall(9, 4, 7, Lava)
  198. self.grid.horz_wall(4, 7, 4, Lava)
  199. self.grid.horz_wall(4, 8, 4, Lava)
  200. self.grid.horz_wall(13, 7, 6, Lava)
  201. self.grid.horz_wall(13, 8, 6, Lava)
  202. self.grid.horz_wall(1, 11, 6, Lava)
  203. self.grid.horz_wall(1, 12, 6, Lava)
  204. self.grid.horz_wall(7, 11, 3, slippery_tile)
  205. self.grid.horz_wall(7, 12, 3, slippery_tile)
  206. self.grid.horz_wall(10, 11, 7, Lava)
  207. self.grid.horz_wall(10, 12, 7, Lava)
  208. self.grid.horz_wall(1, 15, 4, Lava)
  209. self.grid.horz_wall(1, 16, 4, Lava)
  210. self.grid.horz_wall(10, 15, 9, Lava)
  211. self.grid.horz_wall(10, 16, 9, Lava)
  212. self.place_agent(agent_pos=np.array((1, 1)), agent_dir=0)
  213. self.place_goal(np.array((width - 2, height - 2)))
  214. def step(self, action):
  215. obs, reward, terminated, truncated, info = super().step(action)
  216. if self.dense_rewards:
  217. reward -= 0.0001 * (self.height - self.agent_pos[1])
  218. return obs, reward, terminated, truncated, info