You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
295 lines
11 KiB
295 lines
11 KiB
from abc import ABC
|
|
from typing import Iterable, List
|
|
from collections import deque
|
|
|
|
import numpy as np
|
|
|
|
|
|
from minigrid.core.world_object import Lava
|
|
from minigrid.core.actions import Actions
|
|
from minigrid.core.constants import DIR_TO_VEC
|
|
|
|
|
|
try:
|
|
from astar import find_path
|
|
except:
|
|
print("Install with:")
|
|
print("pip install git+https://github.com/jrialland/python-astar.git")
|
|
raise Exception("Need to install astar")
|
|
import numpy.random
|
|
|
|
class Task(ABC):
|
|
# returns a bool, true if task is completed, false otherwise
|
|
def completed(self, pos, dir, carrying, env):
|
|
pass
|
|
# Returns the best action to solve this task
|
|
def get_best_action(self, pos, dir, carrying, env):
|
|
pass
|
|
# returns a string representing the task
|
|
def __repr__(self):
|
|
pass
|
|
|
|
|
|
def get_plan(pos, dir, carrying, env, goal_pos, avoid_agent=True):
|
|
def neighbors_fnct(node):
|
|
left = (node[0], node[1], node[3], -node[2])
|
|
right = (node[0], node[1], -node[3], node[2])
|
|
fwd_pos = node[0] + node[2], node[1] + node[3]
|
|
forward_cell = env.grid.get(*fwd_pos)
|
|
|
|
forward_pos_open = forward_cell is None or forward_cell.can_overlap()
|
|
forward_pos_open = forward_pos_open and not isinstance(forward_cell, Lava)
|
|
|
|
if forward_pos_open:
|
|
forward = (node[0] + node[2], node[1] + node[3], node[2], node[3])
|
|
return forward, left, right
|
|
else:
|
|
return left, right
|
|
|
|
def avoid_agent_neighbors_fcnt(node):
|
|
left = (node[0], node[1], node[3], -node[2])
|
|
right = (node[0], node[1], -node[3], node[2])
|
|
fwd_pos = node[0] + node[2], node[1] + node[3]
|
|
forward_cell = env.grid.get(*fwd_pos)
|
|
|
|
forward_pos_open = forward_cell is None or forward_cell.can_overlap()
|
|
forward_pos_open = forward_pos_open and not isinstance(forward_cell, Lava)
|
|
forward_pos_not_agent = fwd_pos[0] != env.agent_pos[0] or fwd_pos[1] != env.agent_pos[1]
|
|
|
|
if forward_pos_open and forward_pos_not_agent:
|
|
forward = (node[0] + node[2], node[1] + node[3], node[2], node[3])
|
|
return forward, left, right
|
|
else:
|
|
return left, right
|
|
|
|
dir_vec = DIR_TO_VEC[dir]
|
|
start = (pos[0], pos[1], dir_vec[0], dir_vec[1])
|
|
goal = (goal_pos[0], goal_pos[1], dir_vec[0], dir_vec[1])
|
|
|
|
plan = find_path(
|
|
start=start,
|
|
goal=goal,
|
|
neighbors_fnct=avoid_agent_neighbors_fcnt if avoid_agent else neighbors_fnct,
|
|
reversePath=False,
|
|
heuristic_cost_estimate_fnct=lambda a, b: abs(a[0] - b[0]) + abs(a[1] - b[1]),
|
|
distance_between_fnct=lambda a, b: 1.0,
|
|
is_goal_reached_fnct=lambda a, b: abs(a[0] - b[0]) + abs(a[1] - b[1]) <= 0
|
|
)
|
|
|
|
return list(plan) if plan is not None else None
|
|
|
|
|
|
class GoTo(Task):
|
|
def __init__(self, goal_position):
|
|
self.goal_position = goal_position
|
|
self.plan = None
|
|
|
|
def completed(self, pos, dir, carrying, env):
|
|
return pos == self.goal_position
|
|
|
|
def get_best_action(self, pos, dir, carrying, env):
|
|
if pos == self.goal_position:
|
|
return Actions.done
|
|
# if farther than 1 unit away, Run A*
|
|
if self.plan is None or len(self.plan) == 0:
|
|
self.plan = get_plan(pos, dir, carrying, env, self.goal_position)
|
|
|
|
# if we have a plan, but we are not in the state we should be, create new plan
|
|
|
|
dir_vec = DIR_TO_VEC[dir]
|
|
if self.plan is not None:
|
|
current_state_maybe = self.plan.pop(0)
|
|
if current_state_maybe[0] != pos[0] or \
|
|
current_state_maybe[1] != pos[1] or \
|
|
current_state_maybe[2] != dir_vec[0] or \
|
|
current_state_maybe[3] != dir_vec[1]:
|
|
self.plan = None
|
|
return self.get_best_action(pos, dir, carrying, env)
|
|
|
|
if self.plan is None or len(self.plan) < 1:
|
|
# yield because there might be another actor blocking the way to our goal
|
|
self.plan = None
|
|
return Actions.done
|
|
|
|
next_state = self.plan[0]
|
|
|
|
# decide how to achieve next state
|
|
if abs(next_state[0] - pos[0]) == 1 or abs(next_state[1] - pos[1]) == 1:
|
|
return Actions.forward
|
|
elif next_state[2] == dir_vec[1] and next_state[3] == -dir_vec[0]:
|
|
return Actions.left
|
|
elif next_state[2] == -dir_vec[1] and next_state[3] == dir_vec[0]:
|
|
return Actions.right
|
|
else: # something went wrong such as bumping into other agent, replan
|
|
self.plan = None
|
|
return self.get_best_action(pos, dir, carrying, env)
|
|
|
|
def __repr__(self):
|
|
return "Task: Go to position {}".format(self.goal_position)
|
|
|
|
|
|
class PickUpObject(Task):
|
|
def __init__(self, obj_position, obj):
|
|
self.obj_position = obj_position
|
|
self.obj = obj
|
|
self.plan = None
|
|
def completed(self, pos, dir, carrying, env):
|
|
return carrying == self.obj
|
|
def get_best_action(self, pos, dir, carrying, env):
|
|
assert abs(pos[0] - self.obj_position[0] + pos[1] - self.obj_position[1]) == 1, "Distance to the object needs to be exactly 1, please move the adversary (GoTo) first."
|
|
delta_row = self.obj_position[0] - pos[0]
|
|
delta_col = self.obj_position[1] - pos[1]
|
|
dir_vec = DIR_TO_VEC[dir]
|
|
if delta_row == dir_vec[0] and delta_col == dir_vec[1]:
|
|
return Actions.pickup
|
|
else:
|
|
return Actions.left
|
|
def __repr__(self):
|
|
return "Task: Pick up object at position {}".format(self.obj_position)
|
|
|
|
|
|
class PlaceObject(Task):
|
|
def __init__(self, obj_position, obj):
|
|
self.obj_position = obj_position
|
|
self.obj = obj
|
|
|
|
def completed(self, pos, dir, carrying, env):
|
|
return env.grid.get(*self.obj_position) == self.obj and carrying is None
|
|
def get_best_action(self, pos, dir, carrying, env):
|
|
assert abs(pos[0] - self.obj_position[0] + pos[1] - self.obj_position[1]) == 1, "Distance to the object needs to be exactly 1, please move the adversary (GoTo) first."
|
|
delta_row = self.obj_position[0] - pos[0]
|
|
delta_col = self.obj_position[1] - pos[1]
|
|
dir_vec = DIR_TO_VEC[dir]
|
|
if delta_row == dir_vec[0] and delta_col == dir_vec[1]:
|
|
return Actions.drop
|
|
else:
|
|
return Actions.left
|
|
def __repr__(self):
|
|
return "Task: Place object at position {}".format(self.obj_position)
|
|
|
|
class DoNothing(Task):
|
|
def __init__(self, duration=0):
|
|
self.duration = duration
|
|
self.steps = 0
|
|
def completed(self, pos, dir, carrying, env):
|
|
if self.duration == 0:
|
|
return False
|
|
elif self.duration == self.steps:
|
|
return True
|
|
return False
|
|
def reset_steps(self):
|
|
self.steps = 0
|
|
def get_best_action(self, pos, dir, carrying, env):
|
|
if self.duration != 0:
|
|
self.steps += 1
|
|
return Actions.done
|
|
def __repr__(self):
|
|
return "Task: Do nothing"
|
|
|
|
class DoRandom(Task):
|
|
def __init__(self, duration=0):
|
|
self.duration = duration
|
|
self.steps = 0
|
|
def completed(self, pos, dir, carrying, env):
|
|
if self.duration == 0:
|
|
return False
|
|
elif self.duration == self.steps:
|
|
return True
|
|
return False
|
|
def reset_steps(self):
|
|
self.steps = 0
|
|
def get_best_action(self, pos, dir, carrying, env):
|
|
if self.duration != 0: self.steps += 1
|
|
return numpy.random.random_integers(0, 2, 1)
|
|
def __repr__(self):
|
|
return "Task: Act randomly"
|
|
|
|
class FollowAgent(Task):
|
|
def __init__(self, agent, duration=0):
|
|
self.agent = agent
|
|
self.duration = duration
|
|
self.steps = 0
|
|
self.follow_agent = False
|
|
if self.agent == "red":
|
|
self.follow_agent = True
|
|
self.plan = None
|
|
def completed(self, pos, dir, carrying, env):
|
|
if self.duration == 0:
|
|
return False
|
|
elif self.duration == self.steps:
|
|
return True
|
|
return False
|
|
def reset_steps(self):
|
|
self.steps = 0
|
|
def get_best_action(self, pos, dir, carrying, env):
|
|
if self.duration != 0: self.steps += 1
|
|
targets_position = np.array(self._get_targets_position(env))
|
|
optimal_directions=self._get_optimal_directions(pos, targets_position)
|
|
|
|
if dir in optimal_directions:
|
|
self.plan = get_plan(pos, dir, carrying, env, targets_position, avoid_agent=False)
|
|
if self.plan is not None:
|
|
next_state = self.plan[1]
|
|
dir_vec = DIR_TO_VEC[dir]
|
|
if abs(next_state[0] - pos[0]) == 1 or abs(next_state[1] - pos[1]) == 1:
|
|
return Actions.forward
|
|
elif next_state[2] == dir_vec[1] and next_state[3] == -dir_vec[0]:
|
|
return Actions.left
|
|
elif next_state[2] == -dir_vec[1] and next_state[3] == dir_vec[0]:
|
|
return Actions.right
|
|
else:
|
|
self.plan = None
|
|
return Actions.done
|
|
else:
|
|
return Actions.done
|
|
|
|
return self._get_turn_action(dir, optimal_directions)
|
|
|
|
def __repr__(self):
|
|
return f"Task: Follow agent {self.agent}"
|
|
|
|
def _get_targets_position(self, env):
|
|
if self.follow_agent:
|
|
return env.agent_pos
|
|
return env.adversaries[self.agent].adversary_pos
|
|
|
|
def _get_optimal_directions(self, pos, targets_position):
|
|
offset = targets_position - np.array(pos)
|
|
optimal_directions = list()
|
|
if offset[0] < 0: optimal_directions.append(2)
|
|
if offset[0] > 0: optimal_directions.append(0)
|
|
if offset[1] < 0: optimal_directions.append(3)
|
|
if offset[1] > 0: optimal_directions.append(1)
|
|
return optimal_directions
|
|
|
|
def _get_turn_action(self, dir, optimal_directions):
|
|
diffs = [d - dir for d in optimal_directions]
|
|
if -1 in diffs or 3 in diffs: return Actions.left
|
|
if 1 in diffs or -3 in diffs: return Actions.right
|
|
else: return numpy.random.random_integers(0, 1, 1) # target is behind me
|
|
|
|
|
|
class TaskManager:
|
|
def __init__(self, tasks:List[Task], repeating=False):
|
|
self.repeating = repeating
|
|
if repeating:
|
|
self.tasks = deque(tasks)
|
|
else:
|
|
self.tasks = tasks
|
|
|
|
def get_best_action(self, pos, dir, carrying, env):
|
|
if len(self.tasks) == 0:
|
|
raise Exception("List of tasks empty")
|
|
if self.tasks[0].completed(pos, dir, carrying, env) and not self.repeating:
|
|
self.tasks.pop(0)
|
|
elif self.tasks[0].completed(pos, dir, carrying, env) and self.repeating:
|
|
done_task = self.tasks.popleft()
|
|
if isinstance(done_task, (DoRandom, DoNothing, FollowAgent)): done_task.reset_steps()
|
|
self.tasks.append(done_task)
|
|
try:
|
|
best_action = self.tasks[0].get_best_action(pos, dir, carrying, env)
|
|
except IndexError as e:
|
|
# The adversary has finished all its tasks and will yield
|
|
self.tasks = [DoNothing()]
|
|
best_action = self.tasks[0].get_best_action(pos, dir, carrying, env)
|
|
return best_action
|