You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
218 lines
8.2 KiB
218 lines
8.2 KiB
import stormpy
|
|
import stormpy.core
|
|
import stormpy.simulator
|
|
|
|
import stormpy.shields
|
|
import stormpy.logic
|
|
|
|
import stormpy.examples
|
|
import stormpy.examples.files
|
|
|
|
from enum import Enum
|
|
from abc import ABC
|
|
|
|
import re
|
|
import sys
|
|
import tempfile, datetime, shutil
|
|
|
|
import gymnasium as gym
|
|
|
|
from minigrid.core.actions import Actions
|
|
from minigrid.core.state import to_state
|
|
|
|
import os
|
|
import time
|
|
|
|
import argparse
|
|
|
|
def tic():
|
|
#Homemade version of matlab tic and toc functions: https://stackoverflow.com/a/18903019
|
|
global startTime_for_tictoc
|
|
startTime_for_tictoc = time.time()
|
|
|
|
def toc():
|
|
if 'startTime_for_tictoc' in globals():
|
|
print("Elapsed time is " + str(time.time() - startTime_for_tictoc) + " seconds.")
|
|
else:
|
|
print("Toc: start time not set")
|
|
|
|
class ShieldingConfig(Enum):
|
|
Training = 'training'
|
|
Evaluation = 'evaluation'
|
|
Disabled = 'none'
|
|
Full = 'full'
|
|
|
|
def __str__(self) -> str:
|
|
return self.value
|
|
|
|
def shield_needed(shielding):
|
|
return shielding in [ShieldingConfig.Training, ShieldingConfig.Evaluation, ShieldingConfig.Full]
|
|
|
|
def shielded_evaluation(shielding):
|
|
return shielding in [ShieldingConfig.Evaluation, ShieldingConfig.Full]
|
|
|
|
def shielded_training(shielding):
|
|
return shielding in [ShieldingConfig.Training, ShieldingConfig.Full]
|
|
|
|
class ShieldHandler(ABC):
|
|
def __init__(self) -> None:
|
|
pass
|
|
def create_shield(self, **kwargs) -> dict:
|
|
pass
|
|
|
|
class MiniGridShieldHandler(ShieldHandler):
|
|
def __init__(self, grid_to_prism_binary, grid_file, prism_path, formula, prism_config=None, shield_value=0.9, shield_comparison='absolute', nocleanup=False) -> None:
|
|
self.tmp_dir_name = f"shielding_files_{datetime.datetime.now().strftime('%Y%m%dT%H%M%S')}_{next(tempfile._get_candidate_names())}"
|
|
os.mkdir(self.tmp_dir_name)
|
|
self.grid_file = self.tmp_dir_name + "/" + grid_file
|
|
self.grid_to_prism_binary = grid_to_prism_binary
|
|
self.prism_path = self.tmp_dir_name + "/" + prism_path
|
|
self.prism_config = prism_config
|
|
|
|
self.formula = formula
|
|
shield_comparison = stormpy.logic.ShieldComparison.ABSOLUTE if shield_comparison == "absolute" else stormpy.logic.ShieldComparison.RELATIVE
|
|
self.shield_expression = stormpy.logic.ShieldExpression(stormpy.logic.ShieldingType.PRE_SAFETY, shield_comparison, shield_value)
|
|
|
|
self.nocleanup = nocleanup
|
|
def __del__(self):
|
|
if not self.nocleanup:
|
|
shutil.rmtree(self.tmp_dir_name)
|
|
|
|
def __export_grid_to_text(self, env):
|
|
with open(self.grid_file, "w") as f:
|
|
f.write(env.printGrid(init=True))
|
|
|
|
|
|
def __create_prism(self):
|
|
if self.prism_config is None:
|
|
result = os.system(F"{self.grid_to_prism_binary} -i {self.grid_file} -o {self.prism_path}")
|
|
else:
|
|
result = os.system(F"{self.grid_to_prism_binary} -i {self.grid_file} -o {self.prism_path} -c {self.prism_config}")
|
|
|
|
assert result == 0, "Prism file could not be generated"
|
|
|
|
def __create_shield_dict(self):
|
|
program = stormpy.parse_prism_program(self.prism_path)
|
|
|
|
formulas = stormpy.parse_properties_for_prism_program(self.formula, program)
|
|
options = stormpy.BuilderOptions([p.raw_formula for p in formulas])
|
|
options.set_build_state_valuations(True)
|
|
options.set_build_choice_labels(True)
|
|
options.set_build_all_labels()
|
|
print(f"LOG: Starting with explicit model creation...")
|
|
tic()
|
|
model = stormpy.build_sparse_model_with_options(program, options)
|
|
toc()
|
|
|
|
print(f"LOG: Starting with model checking...")
|
|
tic()
|
|
result = stormpy.model_checking(model, formulas[0], extract_scheduler=True, shield_expression=self.shield_expression)
|
|
toc()
|
|
|
|
assert result.has_shield
|
|
shield = result.shield
|
|
action_dictionary = dict()
|
|
shield_scheduler = shield.construct()
|
|
state_valuations = model.state_valuations
|
|
choice_labeling = model.choice_labeling
|
|
|
|
stormpy.shields.export_shield(model, shield, self.tmp_dir_name + "/shield")
|
|
|
|
print(f"LOG: Starting to translate shield...")
|
|
tic()
|
|
for stateID in model.states:
|
|
choice = shield_scheduler.get_choice(stateID)
|
|
choices = choice.choice_map
|
|
|
|
state_valuation = state_valuations.get_string(stateID)
|
|
#print(state_valuation)
|
|
#print(choices)
|
|
ints = dict(re.findall(r'([a-zA-Z][_a-zA-Z0-9]+)=(-?[a-zA-Z0-9]+)', state_valuation))
|
|
booleans = re.findall(r'(\!?)([a-zA-Z][_a-zA-Z0-9]+)[\s\t]+', state_valuation)
|
|
booleans = {b[1]: False if b[0] == "!" else True for b in booleans}
|
|
#print(ints, booleans)
|
|
if int(ints.get("previousActionAgent", 3)) != 3:
|
|
continue
|
|
if int(ints.get("clock", 0)) != 0:
|
|
continue
|
|
state = to_state(ints, booleans)
|
|
#print(f"{state} got added with actions:")
|
|
#print(get_allowed_actions_mask([choice_labeling.get_labels_of_choice(model.get_choice_index(stateID, choice[1])) for choice in choices]))
|
|
action_dictionary[state] = get_allowed_actions_mask([choice_labeling.get_labels_of_choice(model.get_choice_index(stateID, choice[1])) for choice in choices])
|
|
|
|
toc()
|
|
#print(f"{len(action_dictionary)} states in the shield")
|
|
return action_dictionary
|
|
|
|
|
|
def create_shield(self, **kwargs):
|
|
env = kwargs["env"]
|
|
self.__export_grid_to_text(env)
|
|
self.__create_prism()
|
|
|
|
return self.__create_shield_dict()
|
|
|
|
def expname(args):
|
|
return f"{datetime.datetime.now().strftime('%Y%m%dT%H%M%S')}_{args.env}_{args.shielding}_{args.shield_comparison}_{args.shield_value}_{args.expname_suffix}"
|
|
|
|
def create_log_dir(args):
|
|
log_dir = f"{args.log_dir}/{expname(args)}"
|
|
os.makedirs(log_dir, exist_ok=True)
|
|
return log_dir
|
|
|
|
def get_allowed_actions_mask(actions):
|
|
action_mask = [0.0] * 7
|
|
actions_labels = [label for labels in actions for label in list(labels)]
|
|
for action_label in actions_labels:
|
|
if "move" in action_label:
|
|
action_mask[2] = 1.0
|
|
elif "left" in action_label:
|
|
action_mask[0] = 1.0
|
|
elif "right" in action_label:
|
|
action_mask[1] = 1.0
|
|
elif "pickup" in action_label:
|
|
action_mask[3] = 1.0
|
|
elif "drop" in action_label:
|
|
action_mask[4] = 1.0
|
|
elif "toggle" in action_label:
|
|
action_mask[5] = 1.0
|
|
elif "done" in action_label:
|
|
action_mask[6] = 1.0
|
|
return action_mask
|
|
|
|
def common_parser():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--env",
|
|
help="gym environment to load",
|
|
choices=gym.envs.registry.keys(),
|
|
default="MiniGrid-LavaSlipperyCliff-16x13-v0")
|
|
|
|
parser.add_argument("--grid_file", default="grid.txt")
|
|
parser.add_argument("--prism_output_file", default="grid.prism")
|
|
parser.add_argument("--log_dir", default="../log_results/")
|
|
parser.add_argument("--formula", default="Pmax=? [G !AgentIsOnLava]")
|
|
parser.add_argument("--shielding", type=ShieldingConfig, choices=list(ShieldingConfig), default=ShieldingConfig.Full)
|
|
parser.add_argument("--steps", default=20_000, type=int)
|
|
parser.add_argument("--shield_creation_at_reset", action=argparse.BooleanOptionalAction)
|
|
parser.add_argument("--prism_config", default=None)
|
|
parser.add_argument("--shield_value", default=0.9, type=float)
|
|
parser.add_argument("--shield_comparison", default='absolute', choices=['relative', 'absolute'])
|
|
parser.add_argument("--nocleanup", action=argparse.BooleanOptionalAction)
|
|
parser.add_argument("--expname_suffix", default="")
|
|
return parser
|
|
|
|
class MiniWrapper(gym.Wrapper):
|
|
def __init__(self, env):
|
|
super().__init__(env)
|
|
self.env = env
|
|
|
|
def reset(self, *, seed=None, options=None):
|
|
obs, info = self.env.reset(seed=seed, options=options)
|
|
return obs.transpose(1,0,2), info
|
|
|
|
def observations(self, obs):
|
|
return obs
|
|
|
|
def step(self, action):
|
|
obs, reward, terminated, truncated, info = self.env.step(action)
|
|
return obs.transpose(1,0,2), reward, terminated, truncated, info
|