import gym from PIL import Image from copy import deepcopy import numpy as np from matplotlib import pyplot as plt import readchar env = gym.make("ALE/Skiing-v5", render_mode="human") observation, info = env.reset() y = 40 standstillcounter = 0 def update_y(y, ski_position): global standstillcounter if ski_position in [6,7, 8,9]: standstillcounter = 0 y_update = 16 elif ski_position in [4,5, 10,11]: standstillcounter = 0 y_update = 12 elif ski_position in [2,3, 12,13]: standstillcounter = 0 y_update = 8 elif ski_position in [1, 14] and standstillcounter >= 5: if standstillcounter >= 8: print("!!!!!!!!!! no more x updates!!!!!!!!!!!") y_update = 0 elif ski_position in [1, 14]: y_update = 4 if ski_position in [1, 14]: standstillcounter += 1 return y_update def update_ski_position(ski_position, action): if action == 0: return ski_position elif action == 1: return min(ski_position+1, 14) elif action == 2: return max(ski_position-1, 1) approx_x_coordinate = 80 ski_position = 8 for _ in range(1000000): action = env.action_space.sample() # agent policy that uses the observation and info action = int(repr(readchar.readchar())[1]) ski_position = update_ski_position(ski_position, action) y_update = update_y(y, ski_position) y += y_update if y_update else 0 old_x = deepcopy(approx_x_coordinate) approx_x_coordinate = int(np.mean(np.where(observation[:,:,1] == 92)[1])) print(f"Action: {action},\tski position: {ski_position},\ty_update: {y_update},\ty: {y},\tx: {approx_x_coordinate},\tx_update:{approx_x_coordinate - old_x}") observation, reward, terminated, truncated, info = env.step(action) if terminated or truncated: observation, info = env.reset() observation, reward, terminated, truncated, info = env.step(0) observation, reward, terminated, truncated, info = env.step(0) observation, reward, terminated, truncated, info = env.step(0) observation, reward, terminated, truncated, info = env.step(0) env.close()