commiting some more helper stuff

2 years ago · 985aedcaa9
4 changed files with 170 additions and 8 deletions
--- a/all_positions.pickle
+++ b/all_positions.pickle
--- a/manual_control.py
+++ b/manual_control.py
@ -0,0 +1,156 @@
+import sys
+import operator
+from os import listdir, system
+from random import randrange
+from ale_py import ALEInterface, SDL_SUPPORT, Action
+from colors import *
+from PIL import Image
+from matplotlib import pyplot as plt
+import cv2
+import pickle
+import queue
+from dataclasses import dataclass, field
+
+from enum import Enum
+
+from copy import deepcopy
+
+import numpy as np
+
+import readchar
+
+from sample_factory.algo.utils.tensor_dict import TensorDict
+from query_sample_factory_checkpoint import SampleFactoryNNQueryWrapper
+
+import time
+
+tempest_binary = "/home/spranger/projects/tempest-devel/ranking_release/bin/storm"
+rom_file = "/home/spranger/research/Skiing/env/lib/python3.8/site-packages/AutoROM/roms/skiing.bin"
+
+
+def input_to_action(char):
+    if char == "0":
+        return Action.NOOP
+    if char == "1":
+        return Action.RIGHT
+    if char == "2":
+        return Action.LEFT
+    if char == "3":
+        return "reset"
+    if char == "4":
+        return "set_x"
+    if char == "5":
+        return "set_vel"
+    if char in ["w", "a", "s", "d"]:
+        return char
+
+
+ale = ALEInterface()
+
+
+if SDL_SUPPORT:
+    ale.setBool("sound", True)
+    ale.setBool("display_screen", True)
+
+# Load the ROM file
+ale.loadROM(rom_file)
+
+with open('all_positions_v2.pickle', 'rb') as handle:
+    ramDICT = pickle.load(handle)
+y_ram_setting = 60
+x = 70
+
+oldram = deepcopy(ale.getRAM())
+velocity_set = False
+for episode in range(10):
+    total_reward = 0
+    j = 0
+    while not ale.game_over():
+        if not velocity_set: ale.setRAM(14,0)
+        j += 1
+        a = input_to_action(repr(readchar.readchar())[1])
+        #a = Action.NOOP
+
+        if a == "w":
+            y_ram_setting -= 1
+            if y_ram_setting <= 61:
+                y_ram_setting = 61
+            for i, r in enumerate(ramDICT[y_ram_setting]):
+                ale.setRAM(i,r)
+            ale.setRAM(25,x)
+            ale.act(Action.NOOP)
+        elif a == "s":
+            y_ram_setting += 1
+            if y_ram_setting >= 1950:
+                y_ram_setting = 1945
+            for i, r in enumerate(ramDICT[y_ram_setting]):
+                ale.setRAM(i,r)
+            ale.setRAM(25,x)
+            ale.act(Action.NOOP)
+        elif a == "a":
+            x -= 1
+            if x <= 0:
+                x = 0
+            ale.setRAM(25,x)
+            ale.act(Action.NOOP)
+        elif a == "d":
+            x += 1
+            if x >= 144:
+                x = 144
+            ale.setRAM(25,x)
+            ale.act(Action.NOOP)
+
+
+        elif a == "reset":
+            ram_pos = input("Ram Position:")
+            for i, r in enumerate(ramDICT[int(ram_pos)]):
+                ale.setRAM(i,r)
+            ale.act(Action.NOOP)
+        # Apply an action and get the resulting reward
+        elif a == "set_x":
+            x = int(input("X:"))
+            ale.setRAM(25, x)
+            ale.act(Action.NOOP)
+        elif a == "set_vel":
+            vel = input("Velocity:")
+            ale.setRAM(14, int(vel))
+            ale.act(Action.NOOP)
+            velocity_set = True
+        else:
+            reward = ale.act(a)
+        ram = ale.getRAM()
+        #if j % 2 == 0:
+        #    y_pixel = int(j*1/2) + 55
+        #    ramDICT[y_pixel] = ram
+        #    print(f"saving to {y_pixel:04}")
+        #    if y_pixel == 126 or y_pixel == 235:
+        #        input("")
+
+        int_old_ram = list(map(int, oldram))
+        int_ram = list(map(int, ram))
+        difference = list()
+        for o, r in zip(int_old_ram, int_ram):
+            difference.append(r-o)
+
+        oldram = deepcopy(ram)
+        #print(f"player_x: {ram[25]},\tclock_m: {ram[104]},\tclock_s: {ram[105]},\tclock_ms: {ram[106]},\tscore: {ram[107]}")
+        print(f"player_x: {ram[25]},\tplayer_y: {y_ram_setting}")
+        #print(f"y_0: {ram[86]}, y_1: {ram[87]}, y_2: {ram[88]}, y_3: {ram[89]}, y_4: {ram[90]}, y_5: {ram[91]}, y_6: {ram[92]}, y_7: {ram[93]}, y_8: {ram[94]}")
+
+        #for i, r in enumerate(ram):
+        #    print('{:03}:{:02x} '.format(i,r), end="")
+        #    if i % 16 == 15: print("")
+        #print("")
+        #for i, r in enumerate(difference):
+        #    string = '{:02}:{:03} '.format(i%100,r)
+        #    if r != 0:
+        #        print(color(string, fg='red'), end="")
+        #    else:
+        #        print(string, end="")
+        #    if i % 16 == 15: print("")
+    print("Episode %d ended with score: %d" % (episode, total_reward))
+    input("")
+
+    with open('all_positions_v2.pickle', 'wb') as handle:
+        pickle.dump(ramDICT, handle, protocol=pickle.HIGHEST_PROTOCOL)
+    ale.reset_game()
--- a/test.py
+++ b/test.py
@ -6,9 +6,6 @@ import numpy as np
 from matplotlib import pyplot as plt
 import readchar

-import queue
-
-ski_position_queue = queue.Queue()

 env = gym.make("ALE/Skiing-v5", render_mode="human")

@ -70,9 +67,4 @@ for _ in range(1000000):
    observation, reward, terminated, truncated, info = env.step(0)
    observation, reward, terminated, truncated, info = env.step(0)

-    #plt.imshow(observation)
-    #plt.show()
-    #im = Image.fromarray(observation)
-    #im.save("init.png")
-
 env.close()
--- a/train.py
+++ b/train.py
@ -0,0 +1,14 @@
+from stable_baselines3 import PPO, DQN
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.logger import configure, Image
+from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CheckpointCallback
+from gym_minigrid.wrappers import RGBImgObsWrapper, ImgObsWrapper, MiniWrapper
+
+import os
+from subprocess import call
+import time
+import argparse
+import gym
+
+env = gym.make("ALE/Skiing-v5", render_mode="human")
+observation, info = env.reset()