Skiing/manual_control.py


								import sys

								import operator

								from os import listdir, system

								from random import randrange

								from ale_py import ALEInterface, SDL_SUPPORT, Action

								from colors import *

								from PIL import Image

								from matplotlib import pyplot as plt

								import cv2

								import pickle

								import queue

								from dataclasses import dataclass, field


								from enum import Enum


								from copy import deepcopy


								import numpy as np


								import readchar


								from sample_factory.algo.utils.tensor_dict import TensorDict

								from query_sample_factory_checkpoint import SampleFactoryNNQueryWrapper


								import time


								tempest_binary = "/home/spranger/projects/tempest-devel/ranking_release/bin/storm"

								rom_file = "/home/spranger/research/Skiing/env/lib/python3.8/site-packages/AutoROM/roms/skiing.bin"


								def input_to_action(char):

								    if char == "0":

								        return Action.NOOP

								    if char == "1":

								        return Action.RIGHT

								    if char == "2":

								        return Action.LEFT

								    if char == "3":

								        return "reset"

								    if char == "4":

								        return "set_x"

								    if char == "5":

								        return "set_vel"

								    if char in ["w", "a", "s", "d"]:

								        return char


								ale = ALEInterface()


								if SDL_SUPPORT:

								    ale.setBool("sound", True)

								    ale.setBool("display_screen", True)


								# Load the ROM file

								ale.loadROM(rom_file)


								with open('all_positions_v2.pickle', 'rb') as handle:

								    ramDICT = pickle.load(handle)

								y_ram_setting = 60

								x = 70


								oldram = deepcopy(ale.getRAM())

								velocity_set = False

								for episode in range(10):

								    total_reward = 0

								    j = 0

								    while not ale.game_over():

								        if not velocity_set: ale.setRAM(14,0)

								        j += 1

								        a = input_to_action(repr(readchar.readchar())[1])

								        #a = Action.NOOP


								        if a == "w":

								            y_ram_setting -= 1

								            if y_ram_setting <= 61:

								                y_ram_setting = 61

								            for i, r in enumerate(ramDICT[y_ram_setting]):

								                ale.setRAM(i,r)

								            ale.setRAM(25,x)

								            ale.act(Action.NOOP)

								        elif a == "s":

								            y_ram_setting += 1

								            if y_ram_setting >= 1950:

								                y_ram_setting = 1945

								            for i, r in enumerate(ramDICT[y_ram_setting]):

								                ale.setRAM(i,r)

								            ale.setRAM(25,x)

								            ale.act(Action.NOOP)

								        elif a == "a":

								            x -= 1

								            if x <= 0:

								                x = 0

								            ale.setRAM(25,x)

								            ale.act(Action.NOOP)

								        elif a == "d":

								            x += 1

								            if x >= 144:

								                x = 144

								            ale.setRAM(25,x)

								            ale.act(Action.NOOP)


								        elif a == "reset":

								            ram_pos = input("Ram Position:")

								            for i, r in enumerate(ramDICT[int(ram_pos)]):

								                ale.setRAM(i,r)

								            ale.act(Action.NOOP)

								        # Apply an action and get the resulting reward

								        elif a == "set_x":

								            x = int(input("X:"))

								            ale.setRAM(25, x)

								            ale.act(Action.NOOP)

								        elif a == "set_vel":

								            vel = input("Velocity:")

								            ale.setRAM(14, int(vel))

								            ale.act(Action.NOOP)

								            velocity_set = True

								        else:

								            reward = ale.act(a)

								        ram = ale.getRAM()

								        #if j % 2 == 0:

								        #    y_pixel = int(j*1/2) + 55

								        #    ramDICT[y_pixel] = ram

								        #    print(f"saving to {y_pixel:04}")

								        #    if y_pixel == 126 or y_pixel == 235:

								        #        input("")


								        int_old_ram = list(map(int, oldram))

								        int_ram = list(map(int, ram))

								        difference = list()

								        for o, r in zip(int_old_ram, int_ram):

								            difference.append(r-o)


								        oldram = deepcopy(ram)

								        #print(f"player_x: {ram[25]},\tclock_m: {ram[104]},\tclock_s: {ram[105]},\tclock_ms: {ram[106]},\tscore: {ram[107]}")

								        print(f"player_x: {ram[25]},\tplayer_y: {y_ram_setting}")

								        #print(f"y_0: {ram[86]}, y_1: {ram[87]}, y_2: {ram[88]}, y_3: {ram[89]}, y_4: {ram[90]}, y_5: {ram[91]}, y_6: {ram[92]}, y_7: {ram[93]}, y_8: {ram[94]}")


								        #for i, r in enumerate(ram):

								        #    print('{:03}:{:02x} '.format(i,r), end="")

								        #    if i % 16 == 15: print("")

								        #print("")

								        #for i, r in enumerate(difference):

								        #    string = '{:02}:{:03} '.format(i%100,r)

								        #    if r != 0:

								        #        print(color(string, fg='red'), end="")

								        #    else:

								        #        print(string, end="")

								        #    if i % 16 == 15: print("")

								    print("Episode %d ended with score: %d" % (episode, total_reward))

								    input("")


								    with open('all_positions_v2.pickle', 'wb') as handle:

								        pickle.dump(ramDICT, handle, protocol=pickle.HIGHEST_PROTOCOL)

								    ale.reset_game()