diff --git a/rom_evaluate.py b/rom_evaluate.py
index 726601d..5b6907a 100644
--- a/rom_evaluate.py
+++ b/rom_evaluate.py
@@ -70,24 +70,9 @@ def exec(command,verbose=True):
     system(f"echo {command} >> list_of_exec")
     return system(command)
 
+num_tests_per_cluster = 50
+factor_tests_per_cluster = 0.2
 num_ski_positions = 8
-def model_to_actual(ski_position):
-    if ski_position == 1:
-        return 1
-    elif ski_position in [2,3]:
-        return 2
-    elif ski_position in [4,5]:
-        return 3
-    elif ski_position in [6,7]:
-        return 4
-    elif ski_position in [8,9]:
-        return 5
-    elif ski_position in [10,11]:
-        return 6
-    elif ski_position in [12,13]:
-        return 7
-    elif ski_position == 14:
-        return 8
 
 def input_to_action(char):
     if char == "0":
@@ -117,9 +102,20 @@ def drawImportantStates(important_states):
         command = f"convert images/1_full_scaled_down.png {' '.join(draw_commands[i])} first_try_{i:02}.png"
         exec(command)
 
+def saveObservations(observations, verdict, testDir):
+    testDir = f"images/testing_{experiment_id}/{verdict.name}_{testDir}_{len(observations)}"
+    if len(observations) < 20:
+        logger.warn(f"Potentially spurious test case for {testDir}")
+        testDir = f"{testDir}_pot_spurious"
+    exec(f"mkdir {testDir}", verbose=False)
+    for i, obs in enumerate(observations):
+        img = Image.fromarray(obs)
+        img.save(f"{testDir}/{i:003}.png")
+
 ski_position_counter = {1: (Action.LEFT, 40), 2: (Action.LEFT, 35), 3: (Action.LEFT, 30), 4: (Action.LEFT, 10), 5: (Action.NOOP, 1), 6: (Action.RIGHT, 10), 7: (Action.RIGHT, 30), 8: (Action.RIGHT, 40) }
 def run_single_test(ale, nn_wrapper, x,y,ski_position, duration=200):
     #print(f"Running Test from x: {x:04}, y: {y:04}, ski_position: {ski_position}", end="")
+    testDir = f"{x}_{y}_{ski_position}"
     for i, r in enumerate(ramDICT[y]):
         ale.setRAM(i,r)
     ski_position_setting = ski_position_counter[ski_position]
@@ -134,8 +130,9 @@ def run_single_test(ale, nn_wrapper, x,y,ski_position, duration=200):
     first_action_set = False
     first_action = 0
     for i in range(0,duration):
-        resized_obs = cv2.resize(ale.getScreenGrayscale() , (84,84), interpolation=cv2.INTER_AREA)
-        all_obs.append(resized_obs)
+        resized_obs = cv2.resize(ale.getScreenGrayscale(), (84,84), interpolation=cv2.INTER_AREA)
+        for i in range(0,4):
+            all_obs.append(resized_obs)
         if len(all_obs) >= 4:
             stack_tensor = TensorDict({"obs": np.array(all_obs[-4:])})
             action = nn_wrapper.query(stack_tensor)
@@ -147,9 +144,10 @@ def run_single_test(ale, nn_wrapper, x,y,ski_position, duration=200):
             ale.act(Action.NOOP)
         speed_list.append(ale.getRAM()[14])
         if len(speed_list) > 15 and sum(speed_list[-6:-1]) == 0:
-            return (Verdict.BAD, first_action)
-        #time.sleep(0.005)
-    return (Verdict.INCONCLUSIVE, first_action)
+            saveObservations(all_obs, Verdict.BAD, testDir)
+            return Verdict.BAD
+    saveObservations(all_obs, Verdict.GOOD, testDir)
+    return Verdict.GOOD
 
 def optimalAction(choices):
     return max(choices.items(), key=operator.itemgetter(1))[0]
@@ -245,19 +243,16 @@ x = 70
 nn_wrapper = SampleFactoryNNQueryWrapper()
 
 iteration = 0
-id = int(time.time())
-init_mdp = "velocity"
-exec(f"mkdir -p images/testing_{id}")
-exec(f"cp 1_full_scaled_down.png images/testing_{id}/testing_0000.png")
+experiment_id = int(time.time())
+init_mdp = "velocity_safety"
+exec(f"mkdir -p images/testing_{experiment_id}")
+exec(f"cp 1_full_scaled_down.png images/testing_{experiment_id}/testing_0000.png")
 exec(f"cp {init_mdp}.prism {init_mdp}_000.prism")
 
 markerSize = 1
 #markerList = {1: list(), 2:list(), 3:list(), 4:list(), 5:list(), 6:list(), 7:list(), 8:list()}
 
-def f(n):
-    if n >= 1.0:
-        return True
-    return False
+imagesDir = f"images/testing_{experiment_id}"
 
 def drawOntoSkiPosImage(states, color, target_prefix="cluster_", alpha_factor=1.0):
     markerList = {ski_position:list() for ski_position in range(1,num_ski_positions + 1)}
@@ -266,12 +261,13 @@ def drawOntoSkiPosImage(states, color, target_prefix="cluster_", alpha_factor=1.
         marker = f"-fill 'rgba({color}, {alpha_factor * state[1].ranking})' -draw 'rectangle {s.x-markerSize},{s.y-markerSize} {s.x+markerSize},{s.y+markerSize} '"
         markerList[s.ski_position].append(marker)
     for pos, marker in markerList.items():
-        command = f"convert images/testing_{id}/{target_prefix}_{pos:02}.png {' '.join(marker)} images/testing_{id}/{target_prefix}_{pos:02}.png"
+        command = f"convert {imagesDir}/{target_prefix}_{pos:02}.png {' '.join(marker)} {imagesDir}/{target_prefix}_{pos:02}.png"
         exec(command, verbose=False)
 
 
 def concatImages(prefix):
-    exec(f"montage images/testing_{id}/{prefix}_*png -geometry +0+0 -tile x1 images/testing_{id}/{prefix}.png", verbose=False)
+    exec(f"montage {imagesDir}/{prefix}_*png -geometry +0+0 -tile x1 {imagesDir}/{prefix}.png", verbose=False)
+    exec(f"sxiv {imagesDir}/{prefix}.png&")
 
 def drawStatesOntoTiledImage(states, color, target, source="images/1_full_scaled_down.png", alpha_factor=1.0):
     """
@@ -285,20 +281,32 @@ def drawStatesOntoTiledImage(states, color, target, source="images/1_full_scaled
         marker = f"-fill 'rgba({color}, {alpha_factor * state[1].ranking})' -draw 'rectangle {s.x-markerSize},{s.y-markerSize} {s.x+markerSize},{s.y+markerSize} '"
         markerList[s.ski_position].append(marker)
     for pos, marker in markerList.items():
-        command = f"convert {source} {' '.join(marker)} images/testing_{id}/{target}_{pos:02}.png"
+        command = f"convert {source} {' '.join(marker)} {imagesDir}/{target}_{pos:02}.png"
         exec(command, verbose=False)
-    exec(f"montage images/testing_{id}/{target}_*png -geometry +0+0 -tile x1 images/testing_{id}/{target}.png", verbose=False)
+    exec(f"montage {imagesDir}/{target}_*png -geometry +0+0 -tile x1 {imagesDir}/{target}.png", verbose=False)
     logger.info(f"Drawing {len(states)} states onto {target} - Done: took {toc()} seconds")
 
 def drawClusters(clusterDict, target, alpha_factor=1.0):
     for ski_position in range(1, num_ski_positions + 1):
         source = "images/1_full_scaled_down.png"
-        exec(f"cp {source} images/testing_{id}/{target}_{ski_position:02}.png")
+        exec(f"cp {source} {imagesDir}/{target}_{ski_position:02}.png")
     for _, clusterStates in clusterDict.items():
         color = f"{np.random.choice(range(256))}, {np.random.choice(range(256))}, {np.random.choice(range(256))}"
-        drawOntoSkiPosImage(clusterStates, color, f"clusters")
-    concatImages("clusters")
+        drawOntoSkiPosImage(clusterStates, color, target, alpha_factor=alpha_factor)
+    concatImages(target)
 
+def drawResult(clusterDict, target):
+    for ski_position in range(1, num_ski_positions + 1):
+        source = "images/1_full_scaled_down.png"
+        exec(f"cp {source} {imagesDir}/{target}_{ski_position:02}.png")
+    for _, (clusterStates, result) in clusterDict.items():
+        color = "100,100,100"
+        if result == Verdict.GOOD:
+            color = "0,200,0"
+        elif result == Verdict.BAD:
+            color = "200,0,0"
+        drawOntoSkiPosImage(clusterStates, color, target, alpha_factor=0.7)
+    concatImages(target)
 
 def _init_logger():
     logger = logging.getLogger('main')
@@ -308,7 +316,7 @@ def _init_logger():
     handler.setFormatter(formatter)
     logger.addHandler(handler)
 
-def clusterImportantStates(ranking, n_clusters=10):
+def clusterImportantStates(ranking, n_clusters=40):
     logger.info(f"Starting to cluster {len(ranking)} states into {n_clusters} cluster")
     tic()
     states = [[s[0].x,s[0].y, s[0].ski_position * 10, s[1].ranking] for s in ranking]
@@ -324,36 +332,39 @@ if __name__ == '__main__':
     _init_logger()
     logger = logging.getLogger('main')
     logger.info("Starting")
+    n_clusters = 40
+    testAll = False
     while True:
         #computeStateRanking(f"{init_mdp}_{iteration:03}.prism")
         ranking = fillStateRanking("action_ranking")
         sorted_ranking = sorted( (x for x in ranking.items() if x[1].ranking > 0.1), key=lambda x: x[1].ranking)
-        print(type(sorted_ranking))
-        clusters = clusterImportantStates(sorted_ranking)
+        clusters = clusterImportantStates(sorted_ranking, n_clusters)
+
+        if testAll: failingPerCluster = {i: list() for i in range(0, n_clusters)}
+        clusterResult = dict()
+        for id, cluster in clusters.items():
+            num_tests = int(factor_tests_per_cluster * len(cluster))
+            num_tests = 1
+            logger.info(f"Testing {num_tests} states (from {len(cluster)} states) from cluster {id}")
+            randomStates = np.random.choice(len(cluster), num_tests, replace=False)
+            randomStates = [cluster[i] for i in randomStates]
+
+            verdictGood = True
+            for state in randomStates:
+                x = state[0].x
+                y = state[0].y
+                ski_pos = state[0].ski_position
+                result = run_single_test(ale,nn_wrapper,x,y,ski_pos, duration=50)
+                if result == Verdict.BAD:
+                    if testAll:
+                        failingPerCluster[id].append(state)
+                    else:
+                        clusterResult[id] = (cluster, Verdict.BAD)
+                        verdictGood = False
+                        break
+            if verdictGood:
+                clusterResult[id] = (cluster, Verdict.GOOD)
+        if testAll: drawClusters(failingPerCluster, f"failing")
+        drawResult(clusterResult, "result")
 
-        sys.exit(1)
-        #for i, state in enumerate(sorted_ranking):
-        #    print(state)
-        #    if i % 10 == 0:
-        #        input("")
-        #print(len(sorted_ranking))
-
-        """
-        for important_state in ranking[-100:-1]:
-            optimal_choice = optimalAction(important_state[1].choices)
-            #print(important_state[1].choices, f"\t\tOptimal: {optimal_choice}")
-            x = important_state[0].x
-            y = important_state[0].y
-            ski_pos = model_to_actual(important_state[0].ski_position)
-            result = run_single_test(ale,nn_wrapper,x,y,ski_pos, duration=50)
-            #print(f".... {result}")
-            marker = f"-fill 'rgba({verdict_to_color_map[result[0]],0.7})' -draw 'rectangle {x-markerSize},{y-markerSize} {x+markerSize},{y+markerSize} '"
-            markerList[ski_pos].append(marker)
-            populate_fixed_actions(important_state[0], result[1])
-        for pos, marker in markerList.items():
-            command = f"convert images/testing_{id}/testing_0000.png {' '.join(marker)} images/testing_{id}/testing_{iteration+1:03}_{pos:02}.png"
-            exec(command, verbose=False)
-        exec(f"montage images/testing_{id}/testing_{iteration+1:03}_*png -geometry +0+0 -tile x1 images/testing_{id}/{iteration+1:03}.png", verbose=False)
-        iteration += 1
-        """
         update_prism_file(f"{init_mdp}_{iteration-1:03}.prism", f"{init_mdp}_{iteration:03}.prism")