You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

278 lines
9.0 KiB

4 months ago
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import re
  4. from tqdm import tqdm
  5. import gymnasium as gym
  6. import numpy as np
  7. import pygame
  8. from gymnasium import Env
  9. from minigrid.core.actions import Actions
  10. from minigrid.core.state import to_state
  11. from minigrid.minigrid_env import MiniGridEnv
  12. from minigrid.wrappers import ImgObsWrapper, RGBImgPartialObsWrapper
  13. def actionsToMiniGrid(actions):
  14. mask = [0] * 7
  15. for action in actions:
  16. if "turn_left" in action:
  17. mask[0] = 1
  18. elif "turn_right" in action:
  19. mask[1] = 1
  20. elif "move" in action:
  21. mask[2] = 1
  22. elif "pickup" in action:
  23. mask[3] = 1
  24. elif "drop" in action:
  25. mask[4] = 1
  26. elif "toggle" in action:
  27. mask[5] = 1
  28. elif "done" in action:
  29. mask[6] = 1
  30. return mask
  31. class Shield:
  32. def __init__(self, shieldfile="current.shield"):
  33. self.shieldfile = shieldfile
  34. self.parse()
  35. def parse(self):
  36. self.shield = dict()
  37. self.shield_raw = dict()
  38. with open(self.shieldfile, "r") as shield:
  39. shield = shield.readlines()
  40. for line in tqdm(shield[3:-2]):
  41. state_valuation = line[line.find("[")+1:line.find("]")]
  42. actions = line[line.find("]")+1:]
  43. ints = dict(re.findall(r'([a-zA-Z][_a-zA-Z0-9]+)=(-?[a-zA-Z0-9]+)', state_valuation))
  44. booleans = re.findall(r'(\!?)([a-zA-Z][_a-zA-Z0-9]+)[\s\t]+', state_valuation)
  45. booleans = {b[1]: False if b[0] == "!" else True for b in booleans}
  46. actions = re.findall(r'{([a-zA-Z][_a-zA-Z0-9]+)}', actions)
  47. if int(ints.get("clock", 0)) != 0:
  48. continue
  49. if int(ints.get("previousActionAgent", 3)) != 3:
  50. continue
  51. self.shield[to_state(ints, booleans)] = actionsToMiniGrid(actions)
  52. self.shield_raw[to_state(ints, booleans)] = line[line.find("]")+1:]
  53. def get_action_mask(self, state):
  54. print(state)
  55. try:
  56. return self.shield[state]
  57. except:
  58. print("Unsafe State")
  59. return [0.0] * 7
  60. def get_action_mask_raw(self, state):
  61. try:
  62. return self.shield_raw[state]
  63. except:
  64. print("Not listed")
  65. class ManualControl:
  66. def __init__(
  67. self,
  68. env: Env,
  69. seed=None,
  70. random_agent=False,
  71. shieldfile=None,
  72. enforce=False
  73. ) -> None:
  74. self.env = env
  75. self.seed = seed
  76. self.closed = False
  77. self.random_agent = random_agent
  78. if shieldfile is not None:
  79. self.shield = Shield(shieldfile)
  80. self.enforce= enforce
  81. self.cumulative_reward = 0
  82. def start(self):
  83. """Start the window display with blocking event loop"""
  84. self.reset(self.seed)
  85. while not self.closed:
  86. if self.random_agent:
  87. index = np.random.choice(7, 1)[0]
  88. action = [Actions.left, Actions.right, Actions.forward, Actions.pickup, Actions.drop, Actions.toggle, Actions.done][index]
  89. print(Actions(action), end=" ")
  90. if hasattr(self, "shield") and self.enforce:
  91. mask = self.shield.get_action_mask(self.env.get_symbolic_state())
  92. if mask[Actions(action)] == 1.0:
  93. self.step(action)
  94. else:
  95. print("blocked: ", Actions(action), end=" ")
  96. else:
  97. self.step(action)
  98. print(" ")
  99. else:
  100. for event in pygame.event.get():
  101. if event.type == pygame.QUIT:
  102. self.env.close()
  103. break
  104. if event.type == pygame.KEYDOWN:
  105. event.key = pygame.key.name(int(event.key))
  106. self.key_handler(event)
  107. def step(self, action: Actions):
  108. _, reward, terminated, truncated, info = self.env.step(action)
  109. self.cumulative_reward += reward
  110. print(f"step={self.env.step_count}, reward={reward:.4f}, cumulative_reward={self.cumulative_reward:.4f}")
  111. print(info)
  112. if hasattr(self, "shield") and self.enforce:
  113. symbolic_state = self.env.get_symbolic_state()
  114. mask = self.shield.get_action_mask(symbolic_state)
  115. print(mask)
  116. print(self.shield.get_action_mask_raw(symbolic_state))
  117. if terminated:
  118. print("terminated!")
  119. input("")
  120. self.reset(self.seed)
  121. elif truncated:
  122. print("truncated!")
  123. self.reset(self.seed)
  124. else:
  125. self.env.render()
  126. def reset(self, seed=None):
  127. self.env.reset(seed=seed)
  128. if hasattr(self, "shield") and self.enforce:
  129. symbolic_state = self.env.get_symbolic_state()
  130. mask = self.shield.get_action_mask(symbolic_state)
  131. print(mask)
  132. print(self.shield.get_action_mask_raw(symbolic_state))
  133. self.cumulative_reward = 0
  134. self.env.render()
  135. def key_handler(self, event):
  136. key: str = event.key
  137. print("pressed", key)
  138. if key == "escape":
  139. self.env.close()
  140. return
  141. if key == "backspace":
  142. self.reset()
  143. return
  144. if key == "f12":
  145. self.take_screenshot()
  146. return
  147. key_to_action = {
  148. "left": Actions.left,
  149. "right": Actions.right,
  150. "up": Actions.forward,
  151. "space": Actions.toggle,
  152. "pageup": Actions.pickup,
  153. "pagedown": Actions.drop,
  154. "tab": Actions.pickup,
  155. "left shift": Actions.drop,
  156. "enter": Actions.done,
  157. }
  158. if key in key_to_action.keys():
  159. action = key_to_action[key]
  160. symbolic_state = self.env.get_symbolic_state()
  161. if hasattr(self, "shield") and self.enforce:
  162. mask = self.shield.get_action_mask(symbolic_state)
  163. print(mask)
  164. print(self.shield.get_action_mask_raw(symbolic_state))
  165. if mask[Actions(action)] == 1.0:
  166. self.step(action)
  167. else:
  168. print(key)
  169. elif hasattr(self, "shield") and not self.enforce:
  170. mask = self.shield.get_action_mask(symbolic_state)
  171. print(mask)
  172. print(self.shield.get_action_mask_raw(symbolic_state))
  173. else:
  174. self.step(action)
  175. else:
  176. print(key)
  177. def take_screenshot(self):
  178. import datetime
  179. filename = f"{datetime.datetime.now().isoformat()}.png"
  180. print(f"Saving a screenshot to '{filename}'")
  181. window = self.env.window
  182. screenshot = pygame.Surface(window.get_size())
  183. screenshot.blit(window, (0,0))
  184. pygame.image.save(screenshot, filename)
  185. if __name__ == "__main__":
  186. import argparse
  187. parser = argparse.ArgumentParser()
  188. parser.add_argument(
  189. "--env-id",
  190. type=str,
  191. help="gym environment to load",
  192. choices=gym.envs.registry.keys(),
  193. default="MiniGrid-MultiRoom-N6-v0",
  194. )
  195. parser.add_argument(
  196. "--seed",
  197. type=int,
  198. help="random seed to generate the environment with",
  199. default=None,
  200. )
  201. parser.add_argument(
  202. "--tile-size", type=int, help="size at which to render tiles", default=32
  203. )
  204. parser.add_argument(
  205. "--agent-view",
  206. action="store_true",
  207. help="draw the agent sees (partially observable view)",
  208. )
  209. parser.add_argument(
  210. "--agent-view-size",
  211. type=int,
  212. default=7,
  213. help="set the number of grid spaces visible in agent-view ",
  214. )
  215. parser.add_argument(
  216. "--screen-size",
  217. type=int,
  218. default="640",
  219. help="set the resolution for pygame rendering (width and height)",
  220. )
  221. parser.add_argument(
  222. "--random-agent",
  223. action="store_true",
  224. help="make the agent move around randomly"
  225. )
  226. parser.add_argument(
  227. "--shield-file",
  228. type=str,
  229. help="shield file to parse and load",
  230. )
  231. parser.add_argument(
  232. "--no-enforcement",
  233. action="store_true",
  234. help="do not enforce, but inform the user abouth shield violations"
  235. )
  236. args = parser.parse_args()
  237. env: MiniGridEnv = gym.make(
  238. args.env_id,
  239. tile_size=args.tile_size,
  240. render_mode="human",
  241. agent_pov=args.agent_view,
  242. agent_view_size=args.agent_view_size,
  243. screen_size=args.screen_size,
  244. )
  245. if args.agent_view:
  246. print("Using agent view")
  247. env = RGBImgPartialObsWrapper(env, args.tile_size)
  248. env = ImgObsWrapper(env)
  249. #env.disable_random_start()
  250. print(env.printGrid(init=True))
  251. manual_control = ManualControl(env, seed=args.seed, random_agent=args.random_agent, shieldfile=args.shield_file, enforce=args.no_enforcement == False)
  252. manual_control.start()