Skiing/rom_evaluate.py


								import sys

								from random import randrange

								from ale_py import ALEInterface, SDL_SUPPORT, Action

								from colors import *

								from PIL import Image

								from matplotlib import pyplot as plt

								import cv2

								import pickle

								import queue


								from copy import deepcopy


								import numpy as np


								import readchar


								from sample_factory.algo.utils.tensor_dict import TensorDict

								from query_sample_factory_checkpoint import SampleFactoryNNQueryWrapper


								import time


								def input_to_action(char):

								    if char == "0":

								        return Action.NOOP

								    if char == "1":

								        return Action.RIGHT

								    if char == "2":

								        return Action.LEFT

								    if char == "3":

								        return "reset"

								    if char == "4":

								        return "set_x"

								    if char == "5":

								        return "set_vel"

								    if char in ["w", "a", "s", "d"]:

								        return char


								ski_position_counter = {1: (Action.LEFT, 40), 2: (Action.LEFT, 35), 3: (Action.LEFT, 30), 4: (Action.LEFT, 10), 5: (Action.NOOP, 1), 6: (Action.RIGHT, 10), 7: (Action.RIGHT, 30), 8: (Action.RIGHT, 40) }


								def run_single_test(ale, nn_wrapper, x,y,ski_position, duration=200):

								    print(f"Running Test from x: {x:04}, y: {y:04}, ski_position: {ski_position}")

								    for i, r in enumerate(ramDICT[y]):

								        ale.setRAM(i,r)

								    ski_position_setting = ski_position_counter[ski_position]

								    for i in range(0,ski_position_setting[1]):

								        ale.act(ski_position_setting[0])

								        ale.setRAM(14,0)

								        ale.setRAM(25,x)

								    ale.setRAM(14,180)


								    all_obs = list()

								    for i in range(0,duration):

								        resized_obs = cv2.resize(ale.getScreenGrayscale() , (84,84), interpolation=cv2.INTER_AREA)

								        all_obs.append(resized_obs)

								        if len(all_obs) >= 4:

								            stack_tensor = TensorDict({"obs": np.array(all_obs[-4:])})

								            action = nn_wrapper.query(stack_tensor)

								            ale.act(input_to_action(str(action)))

								        else:

								            ale.act(Action.NOOP)

								        time.sleep(0.005)


								ale = ALEInterface()


								if SDL_SUPPORT:

								    ale.setBool("sound", True)

								    ale.setBool("display_screen", True)


								# Load the ROM file

								rom_file = "/home/spranger/research/Skiing/env/lib/python3.8/site-packages/AutoROM/roms/skiing.bin"

								ale.loadROM(rom_file)


								# Get the list of legal actions


								with open('all_positions_v2.pickle', 'rb') as handle:

								    ramDICT = pickle.load(handle)

								#ramDICT = dict()

								#for i,r in enumerate(ramDICT[235]):

								#    ale.setRAM(i,r)


								y_ram_setting = 60

								x = 70


								nn_wrapper = SampleFactoryNNQueryWrapper()

								#run_single_test(ale, nn_wrapper, 70,61,5)

								#input("")

								run_single_test(ale, nn_wrapper, 30,61,5,duration=1000)

								run_single_test(ale, nn_wrapper, 114,170,7)

								run_single_test(ale, nn_wrapper, 124,170,5)

								run_single_test(ale, nn_wrapper, 134,170,2)

								run_single_test(ale, nn_wrapper, 120,185,1)

								run_single_test(ale, nn_wrapper, 134,170,8)

								run_single_test(ale, nn_wrapper, 85,195,8)

								velocity_set = False

								for episode in range(10):

								    total_reward = 0

								    j = 0

								    while not ale.game_over():

								        if not velocity_set: ale.setRAM(14,0)

								        j += 1

								        a = input_to_action(repr(readchar.readchar())[1])

								        #a = Action.NOOP


								        if a == "w":

								            y_ram_setting -= 1

								            if y_ram_setting <= 61:

								                y_ram_setting = 61

								            for i, r in enumerate(ramDICT[y_ram_setting]):

								                ale.setRAM(i,r)

								            ale.setRAM(25,x)

								            ale.act(Action.NOOP)

								        elif a == "s":

								            y_ram_setting += 1

								            if y_ram_setting >= 1950:

								                y_ram_setting = 1945

								            for i, r in enumerate(ramDICT[y_ram_setting]):

								                ale.setRAM(i,r)

								            ale.setRAM(25,x)

								            ale.act(Action.NOOP)

								        elif a == "a":

								            x -= 1

								            if x <= 0:

								                x = 0

								            ale.setRAM(25,x)

								            ale.act(Action.NOOP)

								        elif a == "d":

								            x += 1

								            if x >= 144:

								                x = 144

								            ale.setRAM(25,x)

								            ale.act(Action.NOOP)


								        elif a == "reset":

								            ram_pos = input("Ram Position:")

								            for i, r in enumerate(ramDICT[int(ram_pos)]):

								                ale.setRAM(i,r)

								            ale.act(Action.NOOP)

								        # Apply an action and get the resulting reward

								        elif a == "set_x":

								            x = int(input("X:"))

								            ale.setRAM(25, x)

								            ale.act(Action.NOOP)

								        elif a == "set_vel":

								            vel = input("Velocity:")

								            ale.setRAM(14, int(vel))

								            ale.act(Action.NOOP)

								            velocity_set = True

								        else:

								            reward = ale.act(a)

								        ram = ale.getRAM()

								        #if j % 2 == 0:

								        #    y_pixel = int(j*1/2) + 55

								        #    ramDICT[y_pixel] = ram

								        #    print(f"saving to {y_pixel:04}")

								        #    if y_pixel == 126 or y_pixel == 235:

								        #        input("")


								        int_old_ram = list(map(int, oldram))

								        int_ram = list(map(int, ram))

								        difference = list()

								        for o, r in zip(int_old_ram, int_ram):

								            difference.append(r-o)


								        oldram = deepcopy(ram)

								        #print(f"player_x: {ram[25]},\tclock_m: {ram[104]},\tclock_s: {ram[105]},\tclock_ms: {ram[106]},\tscore: {ram[107]}")

								        print(f"player_x: {ram[25]},\tplayer_y: {y_ram_setting}")

								        #print(f"y_0: {ram[86]}, y_1: {ram[87]}, y_2: {ram[88]}, y_3: {ram[89]}, y_4: {ram[90]}, y_5: {ram[91]}, y_6: {ram[92]}, y_7: {ram[93]}, y_8: {ram[94]}")


								        #for i, r in enumerate(ram):

								        #    print('{:03}:{:02x} '.format(i,r), end="")

								        #    if i % 16 == 15: print("")

								        #print("")

								        #for i, r in enumerate(difference):

								        #    string = '{:02}:{:03} '.format(i%100,r)

								        #    if r != 0:

								        #        print(color(string, fg='red'), end="")

								        #    else:

								        #        print(string, end="")

								        #    if i % 16 == 15: print("")

								    print("Episode %d ended with score: %d" % (episode, total_reward))

								    input("")


								    with open('all_positions_v2.pickle', 'wb') as handle:

								        pickle.dump(ramDICT, handle, protocol=pickle.HIGHEST_PROTOCOL)

								    ale.reset_game()