|
@ -1,4 +1,6 @@ |
|
|
import sys |
|
|
import sys |
|
|
|
|
|
import operator |
|
|
|
|
|
from os import listdir, system |
|
|
from random import randrange |
|
|
from random import randrange |
|
|
from ale_py import ALEInterface, SDL_SUPPORT, Action |
|
|
from ale_py import ALEInterface, SDL_SUPPORT, Action |
|
|
from colors import * |
|
|
from colors import * |
|
@ -7,6 +9,9 @@ from matplotlib import pyplot as plt |
|
|
import cv2 |
|
|
import cv2 |
|
|
import pickle |
|
|
import pickle |
|
|
import queue |
|
|
import queue |
|
|
|
|
|
from dataclasses import dataclass, field |
|
|
|
|
|
|
|
|
|
|
|
from enum import Enum |
|
|
|
|
|
|
|
|
from copy import deepcopy |
|
|
from copy import deepcopy |
|
|
|
|
|
|
|
@ -19,6 +24,52 @@ from query_sample_factory_checkpoint import SampleFactoryNNQueryWrapper |
|
|
|
|
|
|
|
|
import time |
|
|
import time |
|
|
|
|
|
|
|
|
|
|
|
tempest_binary = "/home/spranger/projects/tempest-devel/ranking_release/bin/storm" |
|
|
|
|
|
mdp_file = "simplified.prism" |
|
|
|
|
|
rom_file = "/home/spranger/research/Skiing/env/lib/python3.8/site-packages/AutoROM/roms/skiing.bin" |
|
|
|
|
|
|
|
|
|
|
|
class Verdict(Enum): |
|
|
|
|
|
INCONCLUSIVE = 1 |
|
|
|
|
|
GOOD = 2 |
|
|
|
|
|
BAD = 3 |
|
|
|
|
|
|
|
|
|
|
|
def convert(tuples): |
|
|
|
|
|
return dict(tuples) |
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True) |
|
|
|
|
|
class State: |
|
|
|
|
|
x: int |
|
|
|
|
|
y: int |
|
|
|
|
|
ski_position: int |
|
|
|
|
|
def default_value(): |
|
|
|
|
|
return {'action' : None, 'choiceValue' : None} |
|
|
|
|
|
@dataclass(frozen=True) |
|
|
|
|
|
class StateValue: |
|
|
|
|
|
ranking: float |
|
|
|
|
|
choices: dict = field(default_factory=default_value) |
|
|
|
|
|
|
|
|
|
|
|
def exec(command,verbose=True): |
|
|
|
|
|
if verbose: print(f"Executing {command}") |
|
|
|
|
|
system(f"echo {command} >> list_of_exec") |
|
|
|
|
|
return system(command) |
|
|
|
|
|
|
|
|
|
|
|
def model_to_actual(ski_position): |
|
|
|
|
|
if ski_position == 1: |
|
|
|
|
|
return 1 |
|
|
|
|
|
elif ski_position in [2,3]: |
|
|
|
|
|
return 2 |
|
|
|
|
|
elif ski_position in [4,5]: |
|
|
|
|
|
return 3 |
|
|
|
|
|
elif ski_position in [6,7]: |
|
|
|
|
|
return 4 |
|
|
|
|
|
elif ski_position in [8,9]: |
|
|
|
|
|
return 5 |
|
|
|
|
|
elif ski_position in [10,11]: |
|
|
|
|
|
return 6 |
|
|
|
|
|
elif ski_position in [12,13]: |
|
|
|
|
|
return 7 |
|
|
|
|
|
elif ski_position == 14: |
|
|
|
|
|
return 8 |
|
|
|
|
|
|
|
|
def input_to_action(char): |
|
|
def input_to_action(char): |
|
|
if char == "0": |
|
|
if char == "0": |
|
@ -36,10 +87,21 @@ def input_to_action(char): |
|
|
if char in ["w", "a", "s", "d"]: |
|
|
if char in ["w", "a", "s", "d"]: |
|
|
return char |
|
|
return char |
|
|
|
|
|
|
|
|
ski_position_counter = {1: (Action.LEFT, 40), 2: (Action.LEFT, 35), 3: (Action.LEFT, 30), 4: (Action.LEFT, 10), 5: (Action.NOOP, 1), 6: (Action.RIGHT, 10), 7: (Action.RIGHT, 30), 8: (Action.RIGHT, 40) } |
|
|
|
|
|
|
|
|
def drawImportantStates(important_states): |
|
|
|
|
|
draw_commands = {1: list(), 2:list(), 3:list(), 4:list(), 5:list(), 6:list(), 7:list(), 8:list(), 9:list(), 10:list(), 11:list(), 12:list(), 13:list(), 14:list()} |
|
|
|
|
|
for state in important_states: |
|
|
|
|
|
x = state[0].x |
|
|
|
|
|
y = state[0].y |
|
|
|
|
|
markerSize = 2 |
|
|
|
|
|
ski_position = state[0].ski_position |
|
|
|
|
|
draw_commands[ski_position].append(f"-fill 'rgba(255,204,0,{state[1].ranking})' -draw 'rectangle {x-markerSize},{y-markerSize} {x+markerSize},{y+markerSize} '") |
|
|
|
|
|
for i in range(1,15): |
|
|
|
|
|
command = f"convert images/1_full_scaled_down.png {' '.join(draw_commands[i])} first_try_{i:02}.png" |
|
|
|
|
|
exec(command) |
|
|
|
|
|
|
|
|
|
|
|
ski_position_counter = {1: (Action.LEFT, 40), 2: (Action.LEFT, 35), 3: (Action.LEFT, 30), 4: (Action.LEFT, 10), 5: (Action.NOOP, 1), 6: (Action.RIGHT, 10), 7: (Action.RIGHT, 30), 8: (Action.RIGHT, 40) } |
|
|
def run_single_test(ale, nn_wrapper, x,y,ski_position, duration=200): |
|
|
def run_single_test(ale, nn_wrapper, x,y,ski_position, duration=200): |
|
|
print(f"Running Test from x: {x:04}, y: {y:04}, ski_position: {ski_position}") |
|
|
|
|
|
|
|
|
print(f"Running Test from x: {x:04}, y: {y:04}, ski_position: {ski_position}", end="") |
|
|
for i, r in enumerate(ramDICT[y]): |
|
|
for i, r in enumerate(ramDICT[y]): |
|
|
ale.setRAM(i,r) |
|
|
ale.setRAM(i,r) |
|
|
ski_position_setting = ski_position_counter[ski_position] |
|
|
ski_position_setting = ski_position_counter[ski_position] |
|
@ -50,17 +112,60 @@ def run_single_test(ale, nn_wrapper, x,y,ski_position, duration=200): |
|
|
ale.setRAM(14,180) |
|
|
ale.setRAM(14,180) |
|
|
|
|
|
|
|
|
all_obs = list() |
|
|
all_obs = list() |
|
|
|
|
|
speed_list = list() |
|
|
|
|
|
first_action_set = False |
|
|
|
|
|
first_action = 0 |
|
|
for i in range(0,duration): |
|
|
for i in range(0,duration): |
|
|
resized_obs = cv2.resize(ale.getScreenGrayscale() , (84,84), interpolation=cv2.INTER_AREA) |
|
|
resized_obs = cv2.resize(ale.getScreenGrayscale() , (84,84), interpolation=cv2.INTER_AREA) |
|
|
all_obs.append(resized_obs) |
|
|
all_obs.append(resized_obs) |
|
|
if len(all_obs) >= 4: |
|
|
if len(all_obs) >= 4: |
|
|
stack_tensor = TensorDict({"obs": np.array(all_obs[-4:])}) |
|
|
stack_tensor = TensorDict({"obs": np.array(all_obs[-4:])}) |
|
|
action = nn_wrapper.query(stack_tensor) |
|
|
action = nn_wrapper.query(stack_tensor) |
|
|
|
|
|
if not first_action_set: |
|
|
|
|
|
first_action_set = True |
|
|
|
|
|
first_action = input_to_action(str(action)) |
|
|
ale.act(input_to_action(str(action))) |
|
|
ale.act(input_to_action(str(action))) |
|
|
else: |
|
|
else: |
|
|
ale.act(Action.NOOP) |
|
|
ale.act(Action.NOOP) |
|
|
|
|
|
speed_list.append(ale.getRAM()[14]) |
|
|
|
|
|
if len(speed_list) > 15 and sum(speed_list[-6:-1]) == 0: |
|
|
|
|
|
return (Verdict.BAD, first_action) |
|
|
time.sleep(0.005) |
|
|
time.sleep(0.005) |
|
|
|
|
|
|
|
|
|
|
|
return (Verdict.INCONCLUSIVE, first_action) |
|
|
|
|
|
|
|
|
|
|
|
def optimalAction(choices): |
|
|
|
|
|
return max(choices.items(), key=operator.itemgetter(1))[0] |
|
|
|
|
|
|
|
|
|
|
|
def computeStateRanking(): |
|
|
|
|
|
command = f"{tempest_binary} --prism {mdp_file} --buildchoicelab --buildstateval --prop 'Rmax=? [C <= 1000]'" |
|
|
|
|
|
exec(command) |
|
|
|
|
|
|
|
|
|
|
|
def fillStateRanking(file_name, match=""): |
|
|
|
|
|
state_ranking = dict() |
|
|
|
|
|
try: |
|
|
|
|
|
with open(file_name, "r") as f: |
|
|
|
|
|
file_content = f.readlines() |
|
|
|
|
|
for line in file_content: |
|
|
|
|
|
if not "move=0" in line: continue |
|
|
|
|
|
stateMapping = convert(re.findall(r"([a-zA-Z_]*[a-zA-Z])=(\d+)?", line)) |
|
|
|
|
|
#print("stateMapping", stateMapping) |
|
|
|
|
|
choices = convert(re.findall(r"[a-zA-Z_]*(left|right|noop)[a-zA-Z_]*:(-?\d+\.?\d*)", line)) |
|
|
|
|
|
choices = {key:float(value) for (key,value) in choices.items()} |
|
|
|
|
|
#print("choices", choices) |
|
|
|
|
|
ranking_value = float(re.search(r"Value:([+-]?(\d*\.\d+)|\d+)", line)[0].replace("Value:","")) |
|
|
|
|
|
#print("ranking_value", ranking_value) |
|
|
|
|
|
state = State(int(stateMapping["x"]), int(stateMapping["y"]), int(stateMapping["ski_position"])) |
|
|
|
|
|
value = StateValue(ranking_value, choices) |
|
|
|
|
|
state_ranking[state] = value |
|
|
|
|
|
return state_ranking |
|
|
|
|
|
|
|
|
|
|
|
except EnvironmentError: |
|
|
|
|
|
print("TODO file not available. Exiting.") |
|
|
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
|
|
|
computeStateRanking() |
|
|
|
|
|
ranking = fillStateRanking("action_ranking") |
|
|
|
|
|
sorted_ranking = sorted(ranking.items(), key=lambda x: x[1].ranking) |
|
|
ale = ALEInterface() |
|
|
ale = ALEInterface() |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -69,121 +174,26 @@ if SDL_SUPPORT: |
|
|
ale.setBool("display_screen", True) |
|
|
ale.setBool("display_screen", True) |
|
|
|
|
|
|
|
|
# Load the ROM file |
|
|
# Load the ROM file |
|
|
rom_file = "/home/spranger/research/Skiing/env/lib/python3.8/site-packages/AutoROM/roms/skiing.bin" |
|
|
|
|
|
ale.loadROM(rom_file) |
|
|
ale.loadROM(rom_file) |
|
|
|
|
|
|
|
|
# Get the list of legal actions |
|
|
|
|
|
|
|
|
|
|
|
with open('all_positions_v2.pickle', 'rb') as handle: |
|
|
with open('all_positions_v2.pickle', 'rb') as handle: |
|
|
ramDICT = pickle.load(handle) |
|
|
ramDICT = pickle.load(handle) |
|
|
#ramDICT = dict() |
|
|
|
|
|
#for i,r in enumerate(ramDICT[235]): |
|
|
|
|
|
# ale.setRAM(i,r) |
|
|
|
|
|
|
|
|
|
|
|
y_ram_setting = 60 |
|
|
y_ram_setting = 60 |
|
|
x = 70 |
|
|
x = 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nn_wrapper = SampleFactoryNNQueryWrapper() |
|
|
nn_wrapper = SampleFactoryNNQueryWrapper() |
|
|
#run_single_test(ale, nn_wrapper, 70,61,5) |
|
|
|
|
|
#input("") |
|
|
|
|
|
run_single_test(ale, nn_wrapper, 30,61,5,duration=1000) |
|
|
|
|
|
run_single_test(ale, nn_wrapper, 114,170,7) |
|
|
|
|
|
run_single_test(ale, nn_wrapper, 124,170,5) |
|
|
|
|
|
run_single_test(ale, nn_wrapper, 134,170,2) |
|
|
|
|
|
run_single_test(ale, nn_wrapper, 120,185,1) |
|
|
|
|
|
run_single_test(ale, nn_wrapper, 134,170,8) |
|
|
|
|
|
run_single_test(ale, nn_wrapper, 85,195,8) |
|
|
|
|
|
velocity_set = False |
|
|
|
|
|
for episode in range(10): |
|
|
|
|
|
total_reward = 0 |
|
|
|
|
|
j = 0 |
|
|
|
|
|
while not ale.game_over(): |
|
|
|
|
|
if not velocity_set: ale.setRAM(14,0) |
|
|
|
|
|
j += 1 |
|
|
|
|
|
a = input_to_action(repr(readchar.readchar())[1]) |
|
|
|
|
|
#a = Action.NOOP |
|
|
|
|
|
|
|
|
|
|
|
if a == "w": |
|
|
|
|
|
y_ram_setting -= 1 |
|
|
|
|
|
if y_ram_setting <= 61: |
|
|
|
|
|
y_ram_setting = 61 |
|
|
|
|
|
for i, r in enumerate(ramDICT[y_ram_setting]): |
|
|
|
|
|
ale.setRAM(i,r) |
|
|
|
|
|
ale.setRAM(25,x) |
|
|
|
|
|
ale.act(Action.NOOP) |
|
|
|
|
|
elif a == "s": |
|
|
|
|
|
y_ram_setting += 1 |
|
|
|
|
|
if y_ram_setting >= 1950: |
|
|
|
|
|
y_ram_setting = 1945 |
|
|
|
|
|
for i, r in enumerate(ramDICT[y_ram_setting]): |
|
|
|
|
|
ale.setRAM(i,r) |
|
|
|
|
|
ale.setRAM(25,x) |
|
|
|
|
|
ale.act(Action.NOOP) |
|
|
|
|
|
elif a == "a": |
|
|
|
|
|
x -= 1 |
|
|
|
|
|
if x <= 0: |
|
|
|
|
|
x = 0 |
|
|
|
|
|
ale.setRAM(25,x) |
|
|
|
|
|
ale.act(Action.NOOP) |
|
|
|
|
|
elif a == "d": |
|
|
|
|
|
x += 1 |
|
|
|
|
|
if x >= 144: |
|
|
|
|
|
x = 144 |
|
|
|
|
|
ale.setRAM(25,x) |
|
|
|
|
|
ale.act(Action.NOOP) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif a == "reset": |
|
|
|
|
|
ram_pos = input("Ram Position:") |
|
|
|
|
|
for i, r in enumerate(ramDICT[int(ram_pos)]): |
|
|
|
|
|
ale.setRAM(i,r) |
|
|
|
|
|
ale.act(Action.NOOP) |
|
|
|
|
|
# Apply an action and get the resulting reward |
|
|
|
|
|
elif a == "set_x": |
|
|
|
|
|
x = int(input("X:")) |
|
|
|
|
|
ale.setRAM(25, x) |
|
|
|
|
|
ale.act(Action.NOOP) |
|
|
|
|
|
elif a == "set_vel": |
|
|
|
|
|
vel = input("Velocity:") |
|
|
|
|
|
ale.setRAM(14, int(vel)) |
|
|
|
|
|
ale.act(Action.NOOP) |
|
|
|
|
|
velocity_set = True |
|
|
|
|
|
else: |
|
|
|
|
|
reward = ale.act(a) |
|
|
|
|
|
ram = ale.getRAM() |
|
|
|
|
|
#if j % 2 == 0: |
|
|
|
|
|
# y_pixel = int(j*1/2) + 55 |
|
|
|
|
|
# ramDICT[y_pixel] = ram |
|
|
|
|
|
# print(f"saving to {y_pixel:04}") |
|
|
|
|
|
# if y_pixel == 126 or y_pixel == 235: |
|
|
|
|
|
# input("") |
|
|
|
|
|
|
|
|
|
|
|
int_old_ram = list(map(int, oldram)) |
|
|
|
|
|
int_ram = list(map(int, ram)) |
|
|
|
|
|
difference = list() |
|
|
|
|
|
for o, r in zip(int_old_ram, int_ram): |
|
|
|
|
|
difference.append(r-o) |
|
|
|
|
|
|
|
|
|
|
|
oldram = deepcopy(ram) |
|
|
|
|
|
#print(f"player_x: {ram[25]},\tclock_m: {ram[104]},\tclock_s: {ram[105]},\tclock_ms: {ram[106]},\tscore: {ram[107]}") |
|
|
|
|
|
print(f"player_x: {ram[25]},\tplayer_y: {y_ram_setting}") |
|
|
|
|
|
#print(f"y_0: {ram[86]}, y_1: {ram[87]}, y_2: {ram[88]}, y_3: {ram[89]}, y_4: {ram[90]}, y_5: {ram[91]}, y_6: {ram[92]}, y_7: {ram[93]}, y_8: {ram[94]}") |
|
|
|
|
|
|
|
|
|
|
|
#for i, r in enumerate(ram): |
|
|
|
|
|
# print('{:03}:{:02x} '.format(i,r), end="") |
|
|
|
|
|
# if i % 16 == 15: print("") |
|
|
|
|
|
#print("") |
|
|
|
|
|
#for i, r in enumerate(difference): |
|
|
|
|
|
# string = '{:02}:{:03} '.format(i%100,r) |
|
|
|
|
|
# if r != 0: |
|
|
|
|
|
# print(color(string, fg='red'), end="") |
|
|
|
|
|
# else: |
|
|
|
|
|
# print(string, end="") |
|
|
|
|
|
# if i % 16 == 15: print("") |
|
|
|
|
|
print("Episode %d ended with score: %d" % (episode, total_reward)) |
|
|
|
|
|
input("") |
|
|
|
|
|
|
|
|
|
|
|
with open('all_positions_v2.pickle', 'wb') as handle: |
|
|
|
|
|
pickle.dump(ramDICT, handle, protocol=pickle.HIGHEST_PROTOCOL) |
|
|
|
|
|
ale.reset_game() |
|
|
|
|
|
|
|
|
exec("cp testing_1.png /dev/shm/testing.png") |
|
|
|
|
|
for important_state in sorted_ranking[-100:-1]: |
|
|
|
|
|
optimal_choice = optimalAction(important_state[1].choices) |
|
|
|
|
|
#print(important_state[1].choices, f"\t\tOptimal: {optimal_choice}") |
|
|
|
|
|
x = important_state[0].x |
|
|
|
|
|
y = important_state[0].y |
|
|
|
|
|
ski_pos = model_to_actual(important_state[0].ski_position) |
|
|
|
|
|
action_taken = run_single_test(ale,nn_wrapper,x,y,ski_pos, duration=50) |
|
|
|
|
|
print(f".... {action_taken}") |
|
|
|
|
|
markerSize = 1 |
|
|
|
|
|
marker = f"-fill 'rgba(255,204,0,{important_state[1].ranking})' -draw 'point {x},{y} '" |
|
|
|
|
|
command = f"convert /dev/shm/testing.png {marker} /dev/shm/testing.png" |
|
|
|
|
|
exec(command, verbose=False) |