Browse Source

adversary handling

refactoring
Thomas Knoll 1 year ago
parent
commit
d41ba6258f
  1. 23
      examples/shields/rl/helpers.py
  2. 65
      examples/shields/rl/shieldhandlers.py

23
examples/shields/rl/helpers.py

@ -37,7 +37,7 @@ def extract_keys(env):
if env.carrying and env.carrying.type == "key": if env.carrying and env.carrying.type == "key":
keys.append((env.carrying, -1, -1)) keys.append((env.carrying, -1, -1))
# TODO Maybe need to add ordering of keys so it matches the order in the shield
return keys return keys
def extract_doors(env): def extract_doors(env):
@ -51,6 +51,18 @@ def extract_doors(env):
return doors return doors
def extract_adversaries(env):
adv = []
if not hasattr(env, "adversaries"):
return []
for color, adversary in env.adversaries.items():
adv.append(adversary)
return adv
def create_log_dir(args): def create_log_dir(args):
return F"{args.log_dir}sh:{args.shielding}-env:{args.env}" return F"{args.log_dir}sh:{args.shielding}-env:{args.env}"
@ -87,6 +99,7 @@ def parse_arguments(argparse):
help="gym environment to load", help="gym environment to load",
default="MiniGrid-LavaCrossingS9N1-v0", default="MiniGrid-LavaCrossingS9N1-v0",
choices=[ choices=[
"MiniGrid-Adv-8x8-v0",
"MiniGrid-LavaCrossingS9N1-v0", "MiniGrid-LavaCrossingS9N1-v0",
"MiniGrid-LavaCrossingS9N3-v0", "MiniGrid-LavaCrossingS9N3-v0",
"MiniGrid-LavaSlipperyS12-v0", "MiniGrid-LavaSlipperyS12-v0",
@ -94,10 +107,10 @@ def parse_arguments(argparse):
"MiniGrid-LavaSlipperyS12-v2", "MiniGrid-LavaSlipperyS12-v2",
"MiniGrid-LavaSlipperyS12-v3", "MiniGrid-LavaSlipperyS12-v3",
"MiniGrid-DoorKey-8x8-v0", "MiniGrid-DoorKey-8x8-v0",
"MiniGrid-DoubleDoor-16x16-v0",
"MiniGrid-DoubleDoor-12x12-v0",
"MiniGrid-DoubleDoor-10x8-v0",
"MiniGrid-SingleDoor-7x6-v0",
# "MiniGrid-DoubleDoor-16x16-v0",
# "MiniGrid-DoubleDoor-12x12-v0",
# "MiniGrid-DoubleDoor-10x8-v0",
# "MiniGrid-SingleDoor-7x6-v0",
# "MiniGrid-LockedRoom-v0", # "MiniGrid-LockedRoom-v0",
# "MiniGrid-FourRooms-v0", # "MiniGrid-FourRooms-v0",
# "MiniGrid-LavaGapS7-v0", # "MiniGrid-LavaGapS7-v0",

65
examples/shields/rl/shieldhandlers.py

@ -9,11 +9,11 @@ import stormpy.examples
import stormpy.examples.files import stormpy.examples.files
from helpers import extract_doors, extract_keys
from helpers import extract_doors, extract_keys, extract_adversaries
from abc import ABC from abc import ABC
import os import os
import time
class Action(): class Action():
def __init__(self, idx, prob=1, labels=[]) -> None: def __init__(self, idx, prob=1, labels=[]) -> None:
self.idx = idx self.idx = idx
@ -40,7 +40,7 @@ class MiniGridShieldHandler(ShieldHandler):
def __create_prism(self): def __create_prism(self):
result = os.system(F"{self.grid_to_prism_path} -v 'agent' -i {self.grid_file} -o {self.prism_path}")
result = os.system(F"{self.grid_to_prism_path} -v 'Agent,Blue' -i {self.grid_file} -o {self.prism_path}")
assert result == 0, "Prism file could not be generated" assert result == 0, "Prism file could not be generated"
@ -67,19 +67,18 @@ class MiniGridShieldHandler(ShieldHandler):
assert result.has_shield assert result.has_shield
shield = result.shield shield = result.shield
stormpy.shields.export_shield(model, shield, "Grid.shield") stormpy.shields.export_shield(model, shield, "Grid.shield")
action_dictionary = {} action_dictionary = {}
shield_scheduler = shield.construct() shield_scheduler = shield.construct()
state_valuations = model.state_valuations
choice_labeling = model.choice_labeling
for stateID in model.states: for stateID in model.states:
choice = shield_scheduler.get_choice(stateID) choice = shield_scheduler.get_choice(stateID)
choices = choice.choice_map choices = choice.choice_map
state_valuation = model.state_valuations.get_string(stateID)
actions_to_be_executed = [Action(idx= choice[1], prob=choice[0], labels=model.choice_labeling.get_labels_of_choice(model.get_choice_index(stateID, choice[1]))) for choice in choices]
state_valuation = state_valuations.get_string(stateID)
actions_to_be_executed = [Action(idx= choice[1], prob=choice[0], labels=choice_labeling.get_labels_of_choice(model.get_choice_index(stateID, choice[1]))) for choice in choices]
action_dictionary[state_valuation] = actions_to_be_executed action_dictionary[state_valuation] = actions_to_be_executed
return action_dictionary return action_dictionary
@ -93,15 +92,16 @@ class MiniGridShieldHandler(ShieldHandler):
def create_shield_query(env): def create_shield_query(env):
coordinates = env.env.agent_pos coordinates = env.env.agent_pos
view_direction = env.env.agent_dir view_direction = env.env.agent_dir
keys = extract_keys(env) keys = extract_keys(env)
doors = extract_doors(env) doors = extract_doors(env)
adversaries = extract_adversaries(env)
if env.carrying: if env.carrying:
carrying = F"Agent_is_carrying_object\t"
agent_carrying = F"Agent_is_carrying_object\t"
else: else:
carrying = "!Agent_is_carrying_object\t"
agent_carrying = "!Agent_is_carrying_object\t"
key_positions = [] key_positions = []
agent_key_status = [] agent_key_status = []
@ -110,7 +110,6 @@ def create_shield_query(env):
key_color = key[0].color key_color = key[0].color
key_x = key[1] key_x = key[1]
key_y = key[2] key_y = key[2]
# '[!Agent_is_carrying_object\t& !Agent_has_yellow_key\t& !AgentDone\t& Dooryellowlocked\t& !Dooryellowopen\t& xAgent=1\t& yAgent=1\t& viewAgent=0\t& xKeyyellow=2\t& yKeyyellow=2]'
if env.carrying and env.carrying.type == "key": if env.carrying and env.carrying.type == "key":
agent_key_text = F"Agent_has_{env.carrying.color}_key\t& " agent_key_text = F"Agent_has_{env.carrying.color}_key\t& "
key_position = F"xKey{key_color}={key_x}\t& yKey{key_color}={key_y}\t" key_position = F"xKey{key_color}={key_x}\t& yKey{key_color}={key_y}\t"
@ -121,7 +120,8 @@ def create_shield_query(env):
key_positions.append(key_position) key_positions.append(key_position)
agent_key_status.append(agent_key_text) agent_key_status.append(agent_key_text)
key_positions[-1] = key_positions[-1].strip()
if key_positions:
key_positions[-1] = key_positions[-1].strip()
door_status = [] door_status = []
for door in doors: for door in doors:
@ -134,11 +134,46 @@ def create_shield_query(env):
status = F"!Door{door.color}locked\t& !Door{door.color}open\t&" status = F"!Door{door.color}locked\t& !Door{door.color}open\t&"
door_status.append(status) door_status.append(status)
adv_status = []
adv_positions = []
for adversary in adversaries:
status = ""
position = ""
if adversary.carrying:
carrying = F"{adversary.name}_is_carrying_object\t"
else:
carrying = F"!{adversary.name}_is_carrying_object\t"
status = F"{carrying}& !{adversary.name}Done\t& "
position = F"x{adversary.name}={adversary.cur_pos[1]}\t& y{adversary.name}={adversary.cur_pos[0]}\t& view{adversary.name}={adversary.adversary_dir}"
adv_status.append(status)
adv_positions.append(position)
door_status_text = ""
if door_status:
door_status_text = F"& {''.join(door_status)}\t"
adv_status_text = ""
if adv_status:
adv_status_text = F"& {''.join(adv_status)}"
adv_positions_text = ""
if adv_positions:
adv_positions_text = F"\t& {''.join(adv_positions)}"
key_positions_text = ""
if key_positions:
key_positions_text = F"\t& {''.join(key_positions)}"
agent_position = F"xAgent={coordinates[0]}\t& yAgent={coordinates[1]}\t& viewAgent={view_direction}" agent_position = F"xAgent={coordinates[0]}\t& yAgent={coordinates[1]}\t& viewAgent={view_direction}"
query = f"[{carrying}& {''.join(agent_key_status)}!AgentDone\t& {''.join(door_status)} {agent_position}\t& {''.join(key_positions)}]"
query = f"[{agent_carrying}& {''.join(agent_key_status)}!AgentDone\t{adv_status_text}{door_status_text}{agent_position}{adv_positions_text}{key_positions_text}]"
return query return query
Loading…
Cancel
Save