Skiing/manual_control.py

import sys
import operator
from os import listdir, system
from random import randrange
from ale_py import ALEInterface, SDL_SUPPORT, Action
from colors import *
from PIL import Image
from matplotlib import pyplot as plt
import cv2
import pickle
import queue
from dataclasses import dataclass, field

from enum import Enum

from copy import deepcopy

import numpy as np

import readchar

from sample_factory.algo.utils.tensor_dict import TensorDict
from query_sample_factory_checkpoint import SampleFactoryNNQueryWrapper

import time

tempest_binary = "/home/spranger/projects/tempest-devel/ranking_release/bin/storm"
rom_file = "/home/spranger/research/Skiing/env/lib/python3.8/site-packages/AutoROM/roms/skiing.bin"


def input_to_action(char):
    if char == "0":
        return Action.NOOP
    if char == "1":
        return Action.RIGHT
    if char == "2":
        return Action.LEFT
    if char == "3":
        return "reset"
    if char == "4":
        return "set_x"
    if char == "5":
        return "set_vel"
    if char in ["w", "a", "s", "d"]:
        return char


ale = ALEInterface()


if SDL_SUPPORT:
    ale.setBool("sound", True)
    ale.setBool("display_screen", True)

# Load the ROM file
ale.loadROM(rom_file)

with open('all_positions_v2.pickle', 'rb') as handle:
    ramDICT = pickle.load(handle)
y_ram_setting = 60
x = 70

oldram = deepcopy(ale.getRAM())
velocity_set = False
for episode in range(10):
    total_reward = 0
    j = 0
    while not ale.game_over():
        if not velocity_set: ale.setRAM(14,0)
        j += 1
        a = input_to_action(repr(readchar.readchar())[1])
        #a = Action.NOOP

        if a == "w":
            y_ram_setting -= 1
            if y_ram_setting <= 61:
                y_ram_setting = 61
            for i, r in enumerate(ramDICT[y_ram_setting]):
                ale.setRAM(i,r)
            ale.setRAM(25,x)
            ale.act(Action.NOOP)
        elif a == "s":
            y_ram_setting += 1
            if y_ram_setting >= 1950:
                y_ram_setting = 1945
            for i, r in enumerate(ramDICT[y_ram_setting]):
                ale.setRAM(i,r)
            ale.setRAM(25,x)
            ale.act(Action.NOOP)
        elif a == "a":
            x -= 1
            if x <= 0:
                x = 0
            ale.setRAM(25,x)
            ale.act(Action.NOOP)
        elif a == "d":
            x += 1
            if x >= 144:
                x = 144
            ale.setRAM(25,x)
            ale.act(Action.NOOP)


        elif a == "reset":
            ram_pos = input("Ram Position:")
            for i, r in enumerate(ramDICT[int(ram_pos)]):
                ale.setRAM(i,r)
            ale.act(Action.NOOP)
        # Apply an action and get the resulting reward
        elif a == "set_x":
            x = int(input("X:"))
            ale.setRAM(25, x)
            ale.act(Action.NOOP)
        elif a == "set_vel":
            vel = input("Velocity:")
            ale.setRAM(14, int(vel))
            ale.act(Action.NOOP)
            velocity_set = True
        else:
            reward = ale.act(a)
        ram = ale.getRAM()
        #if j % 2 == 0:
        #    y_pixel = int(j*1/2) + 55
        #    ramDICT[y_pixel] = ram
        #    print(f"saving to {y_pixel:04}")
        #    if y_pixel == 126 or y_pixel == 235:
        #        input("")

        int_old_ram = list(map(int, oldram))
        int_ram = list(map(int, ram))
        difference = list()
        for o, r in zip(int_old_ram, int_ram):
            difference.append(r-o)

        oldram = deepcopy(ram)
        #print(f"player_x: {ram[25]},\tclock_m: {ram[104]},\tclock_s: {ram[105]},\tclock_ms: {ram[106]},\tscore: {ram[107]}")
        print(f"player_x: {ram[25]},\tplayer_y: {y_ram_setting}")
        #print(f"y_0: {ram[86]}, y_1: {ram[87]}, y_2: {ram[88]}, y_3: {ram[89]}, y_4: {ram[90]}, y_5: {ram[91]}, y_6: {ram[92]}, y_7: {ram[93]}, y_8: {ram[94]}")

        #for i, r in enumerate(ram):
        #    print('{:03}:{:02x} '.format(i,r), end="")
        #    if i % 16 == 15: print("")
        #print("")
        #for i, r in enumerate(difference):
        #    string = '{:02}:{:03} '.format(i%100,r)
        #    if r != 0:
        #        print(color(string, fg='red'), end="")
        #    else:
        #        print(string, end="")
        #    if i % 16 == 15: print("")
    print("Episode %d ended with score: %d" % (episode, total_reward))
    input("")

    with open('all_positions_v2.pickle', 'wb') as handle:
        pickle.dump(ramDICT, handle, protocol=pickle.HIGHEST_PROTOCOL)
    ale.reset_game()