diff --git a/rom_evaluate.py b/rom_evaluate.py index b719fdc..3cf2978 100644 --- a/rom_evaluate.py +++ b/rom_evaluate.py @@ -77,6 +77,9 @@ class TestResult: init_check_opt_min: float init_check_opt_max: float init_check_opt_avg: float + safe_states: int + unsafe_states: int + policy_queries: int def __str__(self): return f"""Test Result: init_check_pes_min: {self.init_check_pes_min} @@ -87,7 +90,7 @@ class TestResult: init_check_opt_avg: {self.init_check_opt_avg} """ def csv(self, ws=" "): - return f"{self.init_check_pes_min:0.04f}{ws}{self.init_check_pes_max:0.04f}{ws}{self.init_check_pes_avg:0.04f}{ws}{self.init_check_opt_min:0.04f}{ws}{self.init_check_opt_max:0.04f}{ws}{self.init_check_opt_avg:0.04f}" + return f"{self.init_check_pes_min:0.04f}{ws}{self.init_check_pes_max:0.04f}{ws}{self.init_check_pes_avg:0.04f}{ws}{self.init_check_opt_min:0.04f}{ws}{self.init_check_opt_max:0.04f}{ws}{self.init_check_opt_avg:0.04f}{ws}{self.safeStates}{ws}{self.unsafeStates}{ws}{self.policy_queries}" def exec(command,verbose=True): if verbose: print(f"Executing {command}") @@ -128,19 +131,23 @@ def saveObservations(observations, verdict, testDir): ski_position_counter = {1: (Action.LEFT, 40), 2: (Action.LEFT, 35), 3: (Action.LEFT, 30), 4: (Action.LEFT, 10), 5: (Action.NOOP, 1), 6: (Action.RIGHT, 10), 7: (Action.RIGHT, 30), 8: (Action.RIGHT, 40) } def run_single_test(ale, nn_wrapper, x,y,ski_position, velocity, duration=50): -#def run_single_test(ale, nn_wrapper, x,y,ski_position, duration=50): #print(f"Running Test from x: {x:04}, y: {y:04}, ski_position: {ski_position}", end="") testDir = f"{x}_{y}_{ski_position}_{velocity}" - #testDir = f"{x}_{y}_{ski_position}" - for i, r in enumerate(ramDICT[y]): - ale.setRAM(i,r) - ski_position_setting = ski_position_counter[ski_position] - for i in range(0,ski_position_setting[1]): - ale.act(ski_position_setting[0]) - ale.setRAM(14,0) - ale.setRAM(25,x) - ale.setRAM(14,180) # TODO - + try: + for i, r in enumerate(ramDICT[y]): + ale.setRAM(i,r) + ski_position_setting = ski_position_counter[ski_position] + for i in range(0,ski_position_setting[1]): + ale.act(ski_position_setting[0]) + ale.setRAM(14,0) + ale.setRAM(25,x) + ale.setRAM(14,180) # TODO + except Exception as e: + print(e) + logger.warn(f"Could not run test for x: {x}, y: {y}, ski_position: {ski_position}, velocity: {velocity}") + return (Verdict.INCONCLUSIVE, 0) + + num_queries = 0 all_obs = list() speed_list = list() resized_obs = cv2.resize(ale.getScreenGrayscale(), (84,84), interpolation=cv2.INTER_AREA) @@ -152,32 +159,33 @@ def run_single_test(ale, nn_wrapper, x,y,ski_position, velocity, duration=50): if i % 4 == 0: stack_tensor = TensorDict({"obs": np.array(all_obs[-4:])}) action = nn_wrapper.query(stack_tensor) + num_queries += 1 ale.act(input_to_action(str(action))) else: ale.act(input_to_action(str(action))) speed_list.append(ale.getRAM()[14]) if len(speed_list) > 15 and sum(speed_list[-6:-1]) == 0: #saveObservations(all_obs, Verdict.BAD, testDir) - return Verdict.BAD + return (Verdict.BAD, num_queries) #saveObservations(all_obs, Verdict.GOOD, testDir) - return Verdict.GOOD + return (Verdict.GOOD, num_queries) def computeStateRanking(mdp_file, iteration): logger.info("Computing state ranking") tic() - prop = f"filter(min, Pmin=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], !\"Hit_Tree\" & !\"Hit_Gate\" & !\"Unsafe\" );" - prop += f"filter(max, Pmin=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], !\"Hit_Tree\" & !\"Hit_Gate\" & !\"Unsafe\" );" - prop += f"filter(avg, Pmin=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], !\"Hit_Tree\" & !\"Hit_Gate\" & !\"Unsafe\" );" - prop += f"filter(min, Pmax=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], !\"Hit_Tree\" & !\"Hit_Gate\" & !\"Unsafe\" );" - prop += f"filter(max, Pmax=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], !\"Hit_Tree\" & !\"Hit_Gate\" & !\"Unsafe\" );" - prop += f"filter(avg, Pmax=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], !\"Hit_Tree\" & !\"Hit_Gate\" & !\"Unsafe\" );" + prop = f"filter(min, Pmin=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], (!\"S_Hit_Tree\" & !\"S_Hit_Gate\") | (\"Safe\" | \"Unsafe\") );" + prop += f"filter(max, Pmin=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], (!\"S_Hit_Tree\" & !\"S_Hit_Gate\") | (\"Safe\" | \"Unsafe\") );" + prop += f"filter(avg, Pmin=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], (!\"S_Hit_Tree\" & !\"S_Hit_Gate\") | (\"Safe\" | \"Unsafe\") );" + prop += f"filter(min, Pmax=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], (!\"S_Hit_Tree\" & !\"S_Hit_Gate\") | (\"Safe\" | \"Unsafe\") );" + prop += f"filter(max, Pmax=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], (!\"S_Hit_Tree\" & !\"S_Hit_Gate\") | (\"Safe\" | \"Unsafe\") );" + prop += f"filter(avg, Pmax=? [ G !(\"Hit_Tree\" | \"Hit_Gate\" | \"Unsafe\") ], (!\"S_Hit_Tree\" & !\"S_Hit_Gate\") | (\"Safe\" | \"Unsafe\") );" prop += 'Rmax=? [C <= 200]' results = list() try: command = f"{tempest_binary} --prism {mdp_file} --buildchoicelab --buildstateval --build-all-labels --prop '{prop}'" output = subprocess.check_output(command, shell=True).decode("utf-8").split('\n') for line in output: - print(line) + #print(line) if "Result" in line and not len(results) >= 6: range_value = re.search(r"(.*:).*\[(-?\d+\.?\d*), (-?\d+\.?\d*)\].*", line) if range_value: @@ -191,7 +199,7 @@ def computeStateRanking(mdp_file, iteration): # todo die gracefully if ranking is uniform print(e.output) logger.info(f"Computing state ranking - DONE: took {toc()} seconds") - return TestResult(*tuple(results)) + return TestResult(*tuple(results),0,0,0) def fillStateRanking(file_name, match=""): logger.info(f"Parsing state ranking, {file_name}") @@ -274,6 +282,8 @@ def statesFormulaTrimmed(states): formula += ")" formula += ")" formulas.append(formula) + print(formulas) + sys.exit(1) return createBalancedDisjunction(formulas) # https://stackoverflow.com/questions/5389507/iterating-over-every-two-elements-in-a-list @@ -459,8 +469,8 @@ if __name__ == '__main__': _init_logger() logger = logging.getLogger('main') logger.info("Starting") - n_clusters = 40 testAll = False + num_queries = 0 source = "images/1_full_scaled_down.png" for ski_position in range(1, num_ski_positions + 1): @@ -477,6 +487,10 @@ if __name__ == '__main__': while True: updatePrismFile(init_mdp, iteration, safeStates, unsafeStates) modelCheckingResult = computeStateRanking(f"{init_mdp}_{iteration:03}.prism", iteration) + if len(results) > 0: + modelCheckingResult.safeStates = results[-1].safeStates + modelCheckingResult.unsafeStates = results[-1].unsafeStates + modelCheckingResult.num_queries = results[-1].num_queries results.append(modelCheckingResult) logger.info(f"Model Checking Result: {modelCheckingResult}") if abs(modelCheckingResult.init_check_pes_avg - modelCheckingResult.init_check_opt_avg) < eps: @@ -508,8 +522,8 @@ if __name__ == '__main__': y = state[0].y ski_pos = state[0].ski_position velocity = state[0].velocity - #result = run_single_test(ale,nn_wrapper,x,y,ski_pos, duration=50) - result = run_single_test(ale,nn_wrapper,x,y,ski_pos, velocity, duration=50) + result, num_queries_this_test_case = run_single_test(ale,nn_wrapper,x,y,ski_pos, velocity, duration=50) + num_queries += num_queries_this_test_case if result == Verdict.BAD: if testAll: failingPerCluster[id].append(state) @@ -522,6 +536,9 @@ if __name__ == '__main__': clusterResult[id] = (cluster, Verdict.GOOD) safeStates.update([(s[0].x,s[0].y, s[0].ski_position, s[0].velocity) for s in cluster]) logger.info(f"Iteration: {iteration:03}\t-\tSafe Results : {len(safeStates)}\t-\tUnsafe Results:{len(unsafeStates)}") + results[-1].safeStates = len(safeStates) + results[-1].unsafeStates = len(unsafeStates) + results[-1].num_queries = num_queries if testAll: drawClusters(failingPerCluster, f"failing", iteration) drawResult(clusterResult, "result", iteration)