The source code and dockerfile for the GSW2024 AI Lab.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

744 lines
36 KiB

4 months ago
  1. from __future__ import annotations
  2. from minigrid.core.grid import Grid
  3. from minigrid.core.mission import MissionSpace
  4. from minigrid.core.world_object import (
  5. SlipperyEast,
  6. SlipperySouth,
  7. SlipperyNorth,
  8. SlipperyWest,
  9. SlipperyNorthEast,
  10. Lava,
  11. Goal
  12. )
  13. from minigrid.envs.adversaries_base import AdversaryEnv
  14. from minigrid.core.tasks import FollowAgent, DoRandom, GoTo
  15. from minigrid.minigrid_env import MiniGridEnv, is_slippery
  16. import numpy as np
  17. from itertools import product
  18. class WindyCityEnv(MiniGridEnv):
  19. def __init__(self,
  20. randomize_start=True, size=10,
  21. width=24,
  22. height=22,
  23. probability_intended=8/9,
  24. probability_turn_intended=8/9,
  25. obstacle_type=Lava,
  26. goal_reward=1,
  27. failure_penalty=-1,
  28. per_step_penalty=0,
  29. dense_rewards=False,
  30. **kwargs):
  31. self.obstacle_type = obstacle_type
  32. self.size = size
  33. self.probability_intended = probability_intended
  34. self.probability_turn_intended = probability_turn_intended
  35. if width is not None and height is not None:
  36. self.width = width
  37. self.height = height
  38. elif size is not None:
  39. self.width = size
  40. self.height = size
  41. else:
  42. raise ValueError(f"Please define either width and height or a size for square environments. The set values are width: {width}, height: {height}, size: {size}.")
  43. mission_space = MissionSpace(mission_func=self._gen_mission)
  44. super().__init__(
  45. width=self.width,
  46. height=self.height,
  47. max_steps=200,
  48. # Set this to True for maximum speed
  49. see_through_walls=False,
  50. mission_space = mission_space,
  51. **kwargs
  52. )
  53. self.randomize_start = randomize_start
  54. self.goal_reward = goal_reward
  55. self.failure_penalty = failure_penalty
  56. self.dense_rewards = dense_rewards
  57. self.per_step_penalty = per_step_penalty
  58. self.trajectory = list()
  59. @staticmethod
  60. def _gen_mission():
  61. return "Finish your task while avoiding the adversaries"
  62. def disable_random_start(self):
  63. self.randomize_start = False
  64. def place_agent(self, spawn_on_slippery=False, agent_pos=None, agent_dir=0):
  65. max_tries = 10_000
  66. num_tries = 0
  67. if self.randomize_start == True:
  68. while True:
  69. num_tries += 1
  70. if num_tries > max_tries:
  71. raise RecursionError("rejection sampling failed in place_obj")
  72. x = np.random.randint(0, self.width)
  73. y = np.random.randint(0, self.height)
  74. cell = self.grid.get(*(x,y))
  75. if ( cell is None or
  76. (cell.can_overlap() and
  77. not isinstance(cell, Lava) and
  78. not isinstance(cell, Goal) and
  79. (spawn_on_slippery or not is_slippery(cell)) and
  80. not (x in [7, 8, 9, 10] and y in [9, 10]))
  81. ):
  82. self.agent_pos = np.array((x, y))
  83. self.agent_dir = np.random.randint(0, 4)
  84. break
  85. elif agent_dir is None:
  86. self.agent_pos = np.array((1, 1))
  87. self.agent_dir = 0
  88. else:
  89. self.agent_pos = agent_pos
  90. self.agent_dir = agent_dir
  91. self.trajectory.append((self.agent_pos, self.agent_dir))
  92. def place_goal(self, goal_pos):
  93. self.goal_pos = goal_pos
  94. self.put_obj(Goal(), *self.goal_pos)
  95. def printGrid(self, init=False):
  96. grid = super().printGrid(init)
  97. properties_str = ""
  98. properties_str += F"ProbTurnIntended:{self.probability_turn_intended}\n"
  99. properties_str += F"ProbForwardIntended:{self.probability_intended}\n"
  100. return grid + properties_str
  101. def step(self, action):
  102. obs, reward, terminated, truncated, info = super().step(action)
  103. self.trajectory.append((action, self.agent_pos, self.agent_dir))
  104. if truncated and info["ran_into_lava"]:
  105. print(self.trajectory)
  106. print("truncated: ", info)
  107. self.trajectory = list()
  108. if truncated and info["reached_goal"]:
  109. print("truncated: ", info)
  110. self.trajectory = list()
  111. elif terminated and info["ran_into_lava"]:
  112. print(self.trajectory)
  113. print("terminated: ", info)
  114. self.trajectory = list()
  115. elif terminated:
  116. print("terminated: ", info)
  117. self.trajectory = list()
  118. elif truncated:
  119. print("truncated: ", info)
  120. self.trajectory = list()
  121. return obs, reward - self.per_step_penalty, terminated, truncated, info
  122. def reset(self, **kwargs) -> tuple[ObsType, dict[str, Any]]:
  123. return super().reset(**kwargs)
  124. def _place_building(self, col, row, width, height, obj_type=Lava):
  125. for i in range(col, width + col):
  126. self.grid.vert_wall(i, row, height, obj_type=obj_type)
  127. def _gen_grid(self, width, height):
  128. super()._gen_grid(width, height)
  129. self.grid = Grid(width, height)
  130. # Generate the surrounding walls
  131. self.grid.horz_wall(0, 0)
  132. self.grid.horz_wall(0, height - 1)
  133. self.grid.vert_wall(0, 0)
  134. self.grid.vert_wall(width - 1, 0)
  135. for i in range(1, height - 1):
  136. self.grid.horz_wall(1, i, width-2, obj_type=SlipperyNorthEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  137. self._place_building(13, 1, 4, 2)
  138. self.grid.vert_wall(12, 1, 2, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  139. self.grid.horz_wall(13, 3, 4, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  140. self.grid.vert_wall(17, 1, 2, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  141. self._place_building(7, 3, 3, 4)
  142. self.grid.vert_wall(6, 3, 4, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  143. self.grid.vert_wall(10, 3, 4, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  144. self.grid.horz_wall(7, 2, 3, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  145. self.grid.horz_wall(7, 7, 3, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  146. self._place_building(15, 7, 6, 4)
  147. self.grid.vert_wall(14, 7, 4, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  148. self.grid.vert_wall(14, 9, 2, obj_type=Lava)
  149. self.grid.vert_wall(20, 7, 4, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  150. self.grid.vert_wall(13, 9, 2, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  151. self.grid.horz_wall(15, 6, 5, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  152. self.grid.horz_wall(14, 11, 6, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  153. self._place_building(5, 11, 5, 6)
  154. self.grid.vert_wall(4, 11, 6, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  155. self.grid.vert_wall(10, 11, 6, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  156. self.grid.horz_wall(5, 17, 5, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  157. self.grid.horz_wall(5, 10, 5, obj_type=SlipperyWest("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  158. self.grid.horz_wall(6, 9, 4, obj_type=SlipperyWest("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  159. self.grid.vert_wall(9, 7, 4, obj_type=SlipperySouth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  160. self._place_building(21, 13, 2, 5)
  161. self.grid.vert_wall(20, 13, 5, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  162. self.grid.horz_wall(21, 12, 2, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  163. self.grid.horz_wall(21, 18, 2, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  164. self.place_agent(agent_pos=np.array((1, height -2)), agent_dir=0, spawn_on_slippery=True)
  165. self.place_goal(np.array((width - 2, 1)))
  166. if self.dense_rewards: self.run_bfs()
  167. class WindyCityAdvEnv(AdversaryEnv):
  168. def __init__(self,
  169. randomize_start=True, size=10,
  170. width=15,
  171. height=15,
  172. probability_intended=8/9,
  173. probability_turn_intended=8/9,
  174. obstacle_type=Lava,
  175. goal_reward=1,
  176. failure_penalty=-1,
  177. per_step_penalty=0,
  178. dense_rewards=False,
  179. **kwargs):
  180. self.obstacle_type = obstacle_type
  181. self.size = size
  182. self.probability_intended = probability_intended
  183. self.probability_turn_intended = probability_turn_intended
  184. if width is not None and height is not None:
  185. self.width = width
  186. self.height = height
  187. elif size is not None:
  188. self.width = size
  189. self.height = size
  190. else:
  191. raise ValueError(f"Please define either width and height or a size for square environments. The set values are width: {width}, height: {height}, size: {size}.")
  192. super().__init__(
  193. width=self.width,
  194. height=self.height,
  195. max_steps=200,
  196. # Set this to True for maximum speed
  197. see_through_walls=False,
  198. **kwargs
  199. )
  200. self.randomize_start = randomize_start
  201. self.goal_reward = goal_reward
  202. self.failure_penalty = failure_penalty
  203. self.dense_rewards = dense_rewards
  204. self.per_step_penalty = per_step_penalty
  205. self.trajectory = list()
  206. def disable_random_start(self):
  207. self.randomize_start = False
  208. def place_agent(self, spawn_on_slippery=False, agent_pos=None, agent_dir=0):
  209. max_tries = 10_000
  210. num_tries = 0
  211. if self.randomize_start == True:
  212. while True:
  213. num_tries += 1
  214. if num_tries > max_tries:
  215. raise RecursionError("rejection sampling failed in place_obj")
  216. x = np.random.randint(0, self.width)
  217. y = np.random.randint(0, self.height)
  218. cell = self.grid.get(*(x,y))
  219. if ( cell is None or
  220. (cell.can_overlap() and
  221. not isinstance(cell, Lava) and
  222. not isinstance(cell, Goal) and
  223. (spawn_on_slippery or not is_slippery(cell)) and
  224. not (x in [7, 8, 9, 10] and y in [9, 10]))
  225. ):
  226. self.agent_pos = np.array((x, y))
  227. self.agent_dir = np.random.randint(0, 4)
  228. break
  229. elif agent_dir is None:
  230. self.agent_pos = np.array((1, 1))
  231. self.agent_dir = 0
  232. else:
  233. self.agent_pos = agent_pos
  234. self.agent_dir = agent_dir
  235. self.trajectory.append((self.agent_pos, self.agent_dir))
  236. def place_goal(self, goal_pos):
  237. self.goal_pos = goal_pos
  238. self.put_obj(Goal(), *self.goal_pos)
  239. def printGrid(self, init=False):
  240. grid = super().printGrid(init)
  241. properties_str = ""
  242. properties_str += F"ProbTurnIntended:{self.probability_turn_intended}\n"
  243. properties_str += F"ProbForwardIntended:{self.probability_intended}\n"
  244. return grid + properties_str
  245. def step(self, action):
  246. obs, reward, terminated, truncated, info = super().step(action)
  247. self.trajectory.append((action, self.agent_pos, self.agent_dir, str(self.adversaries["blue"])))
  248. if truncated and info["ran_into_lava"]:
  249. print(self.trajectory)
  250. print("truncated: ", info)
  251. self.trajectory = list()
  252. if truncated and info["reached_goal"]:
  253. print("truncated: ", info)
  254. self.trajectory = list()
  255. elif terminated and info["ran_into_lava"]:
  256. print(self.trajectory)
  257. print("terminated: ", info)
  258. self.trajectory = list()
  259. elif terminated:
  260. print("terminated: ", info)
  261. self.trajectory = list()
  262. elif truncated:
  263. print("truncated: ", info)
  264. self.trajectory = list()
  265. return obs, reward - self.per_step_penalty, terminated, truncated, info
  266. def reset(self, **kwargs) -> tuple[ObsType, dict[str, Any]]:
  267. return super().reset(**kwargs)
  268. def _place_building(self, col, row, width, height, obj_type=Lava):
  269. for i in range(col, width + col):
  270. self.grid.vert_wall(i, row, height, obj_type=obj_type)
  271. def _gen_grid(self, width, height):
  272. super()._gen_grid(width, height)
  273. self.grid = Grid(width, height)
  274. # Generate the surrounding walls
  275. self.grid.horz_wall(0, 0)
  276. self.grid.horz_wall(0, height - 1)
  277. self.grid.vert_wall(0, 0)
  278. self.grid.vert_wall(width - 1, 0)
  279. for i in range(1, height - 1):
  280. self.grid.horz_wall(1, i, width-2, obj_type=SlipperyNorthEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  281. self._place_building(7, 1, 4, 1)
  282. self.grid.vert_wall(6, 1, 1, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  283. self.grid.horz_wall(7, 2, 4, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  284. self.grid.vert_wall(11, 1, 1, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  285. self._place_building(4, 5, 2, 1)
  286. self.grid.vert_wall(3, 5, 1, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  287. self.grid.vert_wall(6, 5, 1, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  288. self.grid.horz_wall(4, 4, 2, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  289. self.grid.horz_wall(4, 6, 2, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  290. self._place_building(12, 7, 2, 3)
  291. self.grid.vert_wall(11, 7, 3, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  292. self.grid.horz_wall(11, 6, 3, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  293. self.grid.horz_wall(11, 10, 3, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  294. self._place_building(4, 10, 2, 2)
  295. self.grid.vert_wall(3, 10, 2, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  296. self.grid.vert_wall(6, 10, 2, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  297. self.grid.horz_wall(4, 12, 2, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  298. self.grid.horz_wall(4, 9, 2, obj_type=SlipperyWest("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  299. self.grid.vert_wall(5, 7, 3, obj_type=SlipperySouth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  300. #should spawn randomly
  301. x = np.random.choice([1,2,3,6,7,8,9])
  302. y = np.random.choice([6,7,8])
  303. self.add_adversary(x, y, "blue", direction=1, tasks=[FollowAgent("red", duration=2), DoRandom(duration=2)], repeating=True)
  304. self.place_agent(agent_pos=np.array((1, height -2)), agent_dir=0, spawn_on_slippery=True)
  305. self.place_goal(np.array((width - 2, 1)))
  306. if self.dense_rewards: self.run_bfs()
  307. class WindyCity2Env(MiniGridEnv):
  308. def __init__(self,
  309. randomize_start=True, size=10,
  310. width=27,
  311. height=22,
  312. probability_intended=8/9,
  313. probability_turn_intended=8/9,
  314. obstacle_type=Lava,
  315. goal_reward=1,
  316. failure_penalty=-1,
  317. per_step_penalty=0,
  318. dense_rewards=False,
  319. two_player_winning_region_start=False,
  320. **kwargs):
  321. self.obstacle_type = obstacle_type
  322. self.size = size
  323. self.probability_intended = probability_intended
  324. self.probability_turn_intended = probability_turn_intended
  325. if width is not None and height is not None:
  326. self.width = width
  327. self.height = height
  328. elif size is not None:
  329. self.width = size
  330. self.height = size
  331. else:
  332. raise ValueError(f"Please define either width and height or a size for square environments. The set values are width: {width}, height: {height}, size: {size}.")
  333. mission_space = MissionSpace(mission_func=self._gen_mission)
  334. super().__init__(
  335. width=self.width,
  336. height=self.height,
  337. max_steps=200,
  338. # Set this to True for maximum speed
  339. see_through_walls=False,
  340. mission_space = mission_space,
  341. **kwargs
  342. )
  343. self.randomize_start = randomize_start
  344. self.two_player_winning_region_start = two_player_winning_region_start
  345. self.goal_reward = goal_reward
  346. self.failure_penalty = failure_penalty
  347. self.dense_rewards = dense_rewards
  348. self.per_step_penalty = per_step_penalty
  349. self.trajectory = list()
  350. @staticmethod
  351. def _gen_mission():
  352. return "Finish your task while avoiding the adversaries"
  353. def disable_random_start(self):
  354. self.randomize_start = False
  355. def place_agent(self, spawn_on_slippery=False, agent_pos=None, agent_dir=0):
  356. max_tries = 10_000
  357. num_tries = 0
  358. if self.two_player_winning_region_start == True:
  359. winning_region = list()
  360. winning_region += product([1,2,3,4], [y for y in range(1, self.height-1)])
  361. winning_region += product([x for x in range(1,12)], [1])
  362. winning_region += product([x for x in range(1,self.width-10)], [self.height-2])
  363. winning_region += product([x for x in range(self.width-6, self.width-1)], [1,2,3,4])
  364. winning_region += product([x for x in range(self.width-11, self.width-1)], [5])
  365. x, y= winning_region[np.random.choice(len(winning_region), 1)[0]]
  366. self.agent_pos = np.array((x,y))
  367. self.agent_dir = np.random.randint(0, 4)
  368. self.trajectory.append((self.agent_pos, self.agent_dir))
  369. return
  370. if self.randomize_start == True:
  371. while True:
  372. num_tries += 1
  373. if num_tries > max_tries:
  374. raise RecursionError("rejection sampling failed in place_obj")
  375. x = np.random.randint(0, self.width)
  376. y = np.random.randint(0, self.height)
  377. cell = self.grid.get(*(x,y))
  378. if ( cell is None or
  379. (cell.can_overlap() and
  380. not isinstance(cell, Lava) and
  381. not isinstance(cell, Goal) and
  382. (spawn_on_slippery or not is_slippery(cell)) and
  383. not (x in [7, 8, 9, 10] and y in [9, 10]))
  384. ):
  385. self.agent_pos = np.array((x, y))
  386. self.agent_dir = np.random.randint(0, 4)
  387. break
  388. elif agent_dir is None:
  389. self.agent_pos = np.array((1, 1))
  390. self.agent_dir = 0
  391. else:
  392. self.agent_pos = agent_pos
  393. self.agent_dir = agent_dir
  394. self.trajectory.append((self.agent_pos, self.agent_dir))
  395. def place_goal(self, goal_pos):
  396. self.goal_pos = goal_pos
  397. self.put_obj(Goal(), *self.goal_pos)
  398. def printGrid(self, init=False):
  399. grid = super().printGrid(init)
  400. properties_str = ""
  401. properties_str += F"ProbTurnIntended:{self.probability_turn_intended}\n"
  402. properties_str += F"ProbForwardIntended:{self.probability_intended}\n"
  403. return grid + properties_str
  404. def step(self, action):
  405. obs, reward, terminated, truncated, info = super().step(action)
  406. self.trajectory.append((action, self.agent_pos, self.agent_dir))
  407. if truncated and info["ran_into_lava"]:
  408. print(self.trajectory)
  409. print("truncated: ", info)
  410. self.trajectory = list()
  411. if truncated and info["reached_goal"]:
  412. print("truncated: ", info)
  413. self.trajectory = list()
  414. elif terminated and info["ran_into_lava"]:
  415. print(self.trajectory)
  416. print("terminated: ", info)
  417. self.trajectory = list()
  418. elif terminated:
  419. print("terminated: ", info)
  420. self.trajectory = list()
  421. elif truncated:
  422. print("truncated: ", info)
  423. self.trajectory = list()
  424. return obs, reward - self.per_step_penalty, terminated, truncated, info
  425. def reset(self, **kwargs) -> tuple[ObsType, dict[str, Any]]:
  426. return super().reset(**kwargs)
  427. def _place_building(self, col, row, width, height, obj_type=Lava):
  428. for i in range(col, width + col):
  429. self.grid.vert_wall(i, row, height, obj_type=obj_type)
  430. def _gen_grid(self, width, height):
  431. super()._gen_grid(width, height)
  432. self.grid = Grid(width, height)
  433. # Generate the surrounding walls
  434. self.grid.horz_wall(0, 0)
  435. self.grid.horz_wall(0, height - 1)
  436. self.grid.vert_wall(0, 0)
  437. self.grid.vert_wall(width - 1, 0)
  438. for i in range(1, height - 1):
  439. self.grid.horz_wall(1, i, width-2, obj_type=SlipperyNorthEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  440. self.grid.horz_wall(1, 17, 15, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  441. self.grid.horz_wall(1, 18, 16, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  442. self.grid.horz_wall(1, 19, 17, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  443. self.grid.horz_wall(1, 20, 18, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  444. self.grid.horz_wall(1, 7, 9, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  445. self.grid.horz_wall(1, 8, 8, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  446. self.grid.horz_wall(1, 9, 8, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  447. self.grid.horz_wall(1, 10, 7, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  448. self._place_building(16, 1, 4, 2)
  449. self.grid.vert_wall(15, 1, 2, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  450. self.grid.horz_wall(16, 3, 4, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  451. self.grid.vert_wall(20, 1, 2, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  452. self._place_building(10, 3, 3, 4)
  453. #self.grid.vert_wall(9, 3, 4, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  454. self.grid.vert_wall(13, 3, 4, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  455. self.grid.horz_wall(10, 2, 3, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  456. self.grid.horz_wall(10, 7, 3, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  457. self._place_building(16, 7, 8, 5)
  458. self.grid.vert_wall(15, 7, 4, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  459. #self.grid.vert_wall(17, 9, 3, obj_type=Lava)
  460. self.grid.vert_wall(24, 7, 5, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  461. self.grid.vert_wall(15, 9, 3, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  462. self.grid.horz_wall(16, 6, 7, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  463. self.grid.horz_wall(16, 12, 7, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  464. self.grid.vert_wall(22, 12, 1, obj_type=SlipperyNorthEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  465. self.grid.vert_wall(23, 13, 1, obj_type=SlipperyNorthEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  466. self._place_building(8, 11, 5, 6)
  467. #self.grid.vert_wall(7, 11, 6, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  468. self.grid.vert_wall(13, 11, 6, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  469. self.grid.horz_wall(8, 17, 5, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  470. self.grid.horz_wall(9, 10, 4, obj_type=SlipperyWest("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  471. self.grid.horz_wall(10, 9, 3, obj_type=SlipperyWest("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  472. self.grid.vert_wall(12, 7, 4, obj_type=SlipperySouth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  473. self._place_building(22, 14, 4, 4)
  474. self.grid.vert_wall(21, 14, 4, obj_type=SlipperyNorth("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  475. self.grid.horz_wall(22, 13, 4, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  476. self.grid.horz_wall(22, 18, 4, obj_type=SlipperyEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  477. #self.grid.vert_wall(22, 13, 1, obj_type=SlipperyNorthEast("white", probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))
  478. self.place_agent(agent_pos=np.array((18, height - 4)), agent_dir=3, spawn_on_slippery=True)
  479. self.place_goal(np.array((width - 2, 10)))
  480. if self.dense_rewards: self.run_bfs()
  481. class WindyCitySmallAdv(AdversaryEnv):
  482. def __init__(self,
  483. randomize_start=True, size=10,
  484. width=11,
  485. height=9,
  486. probability_intended=1,
  487. probability_turn_intended=1,
  488. obstacle_type=Lava,
  489. goal_reward=1,
  490. failure_penalty=-1,
  491. per_step_penalty=0,
  492. dense_rewards=False,
  493. two_player_winning_region_start=False,
  494. **kwargs):
  495. self.obstacle_type = obstacle_type
  496. self.size = size
  497. self.probability_intended = probability_intended
  498. self.probability_turn_intended = probability_turn_intended
  499. if width is not None and height is not None:
  500. self.width = width
  501. self.height = height
  502. elif size is not None:
  503. self.width = size
  504. self.height = size
  505. else:
  506. raise ValueError(f"Please define either width and height or a size for square environments. The set values are width: {width}, height: {height}, size: {size}.")
  507. mission_space = MissionSpace(mission_func=self._gen_mission)
  508. super().__init__(
  509. width=self.width,
  510. height=self.height,
  511. max_steps=50,
  512. # Set this to True for maximum speed
  513. see_through_walls=False,
  514. #mission_space = mission_space,
  515. **kwargs
  516. )
  517. self.randomize_start = randomize_start
  518. self.two_player_winning_region_start = two_player_winning_region_start
  519. self.goal_reward = goal_reward
  520. self.failure_penalty = failure_penalty
  521. self.dense_rewards = dense_rewards
  522. self.per_step_penalty = per_step_penalty
  523. self.trajectory = list()
  524. @staticmethod
  525. def _gen_mission():
  526. return "Finish your task while avoiding the adversaries"
  527. def disable_random_start(self):
  528. self.randomize_start = False
  529. def place_agent(self, spawn_on_slippery=False, agent_pos=None, agent_dir=0):
  530. max_tries = 10_000
  531. num_tries = 0
  532. if self.randomize_start == True:
  533. while True:
  534. num_tries += 1
  535. if num_tries > max_tries:
  536. raise RecursionError("rejection sampling failed in place_obj")
  537. x = np.random.randint(0, self.width)
  538. y = np.random.randint(5, self.height)
  539. cell = self.grid.get(*(x,y))
  540. if ( cell is None or
  541. (cell.can_overlap() and
  542. not isinstance(cell, Lava) and
  543. not isinstance(cell, Goal) and
  544. (spawn_on_slippery or not is_slippery(cell)) and
  545. not (x in [7, 8, 9, 10] and y in [9, 10]))
  546. ):
  547. self.agent_pos = np.array((x, y))
  548. self.agent_dir = np.random.randint(0, 4)
  549. break
  550. elif agent_dir is None:
  551. self.agent_pos = np.array((1, 1))
  552. self.agent_dir = 0
  553. else:
  554. self.agent_pos = agent_pos
  555. self.agent_dir = agent_dir
  556. self.trajectory.append((self.agent_pos, self.agent_dir))
  557. def place_goal(self, goal_pos):
  558. self.goal_pos = goal_pos
  559. self.put_obj(Goal(), *self.goal_pos)
  560. def printGrid(self, init=False):
  561. grid = super().printGrid(init)
  562. properties_str = ""
  563. properties_str += F"ProbTurnIntended:{self.probability_turn_intended}\n"
  564. properties_str += F"ProbForwardIntended:{self.probability_intended}\n"
  565. return grid + properties_str
  566. def step(self, action):
  567. obs, reward, terminated, truncated, info = super().step(action)
  568. self.trajectory.append((action, self.agent_pos, self.agent_dir))
  569. if truncated and info["ran_into_lava"]:
  570. print(self.trajectory)
  571. print("truncated: ", info)
  572. self.trajectory = list()
  573. if truncated and info["reached_goal"]:
  574. print("truncated: ", info)
  575. self.trajectory = list()
  576. elif terminated and info["ran_into_lava"]:
  577. print(self.trajectory)
  578. print("terminated: ", info)
  579. self.trajectory = list()
  580. elif terminated and info["collision"]:
  581. print(self.trajectory)
  582. print("terminated: ", info)
  583. self.trajectory = list()
  584. elif terminated:
  585. print("terminated: ", info)
  586. self.trajectory = list()
  587. elif truncated:
  588. print("truncated: ", info)
  589. self.trajectory = list()
  590. return obs, reward - self.per_step_penalty, terminated, truncated, info
  591. def reset(self, **kwargs) -> tuple[ObsType, dict[str, Any]]:
  592. return super().reset(**kwargs)
  593. def _place_building(self, col, row, width, height, obj_type=Lava):
  594. for i in range(col, width + col):
  595. self.grid.vert_wall(i, row, height, obj_type=obj_type)
  596. def _gen_grid(self, width, height):
  597. super()._gen_grid(width, height)
  598. self.grid = Grid(width, height)
  599. # Generate the surrounding walls
  600. self.grid.horz_wall(0, 0)
  601. self.grid.horz_wall(0, height - 1)
  602. self.grid.vert_wall(0, 0)
  603. self.grid.vert_wall(width - 1, 0)
  604. self._place_building(3, 3, 5, 2)
  605. blue_adv = self.add_adversary(2, 4, "blue", direction=3, tasks=
  606. [GoTo((2,2)), GoTo((8,2)), GoTo((8,4)), GoTo((8,2)), GoTo((2,2)), GoTo((2,4))], repeating=True)
  607. self.place_agent(agent_pos=np.array((5, 5)), agent_dir=3, spawn_on_slippery=True)
  608. self.place_goal(np.array((width//2, 1)))
  609. if self.dense_rewards: self.run_bfs()