You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

295 lines
11 KiB

from abc import ABC
from typing import Iterable, List
from collections import deque
import numpy as np
from minigrid.core.world_object import Lava
from minigrid.core.actions import Actions
from minigrid.core.constants import DIR_TO_VEC
try:
from astar import find_path
except:
print("Install with:")
print("pip install git+https://github.com/jrialland/python-astar.git")
raise Exception("Need to install astar")
import numpy.random
class Task(ABC):
# returns a bool, true if task is completed, false otherwise
def completed(self, pos, dir, carrying, env):
pass
# Returns the best action to solve this task
def get_best_action(self, pos, dir, carrying, env):
pass
# returns a string representing the task
def __repr__(self):
pass
def get_plan(pos, dir, carrying, env, goal_pos, avoid_agent=True):
def neighbors_fnct(node):
left = (node[0], node[1], node[3], -node[2])
right = (node[0], node[1], -node[3], node[2])
fwd_pos = node[0] + node[2], node[1] + node[3]
forward_cell = env.grid.get(*fwd_pos)
forward_pos_open = forward_cell is None or forward_cell.can_overlap()
forward_pos_open = forward_pos_open and not isinstance(forward_cell, Lava)
if forward_pos_open:
forward = (node[0] + node[2], node[1] + node[3], node[2], node[3])
return forward, left, right
else:
return left, right
def avoid_agent_neighbors_fcnt(node):
left = (node[0], node[1], node[3], -node[2])
right = (node[0], node[1], -node[3], node[2])
fwd_pos = node[0] + node[2], node[1] + node[3]
forward_cell = env.grid.get(*fwd_pos)
forward_pos_open = forward_cell is None or forward_cell.can_overlap()
forward_pos_open = forward_pos_open and not isinstance(forward_cell, Lava)
forward_pos_not_agent = fwd_pos[0] != env.agent_pos[0] or fwd_pos[1] != env.agent_pos[1]
if forward_pos_open and forward_pos_not_agent:
forward = (node[0] + node[2], node[1] + node[3], node[2], node[3])
return forward, left, right
else:
return left, right
dir_vec = DIR_TO_VEC[dir]
start = (pos[0], pos[1], dir_vec[0], dir_vec[1])
goal = (goal_pos[0], goal_pos[1], dir_vec[0], dir_vec[1])
plan = find_path(
start=start,
goal=goal,
neighbors_fnct=avoid_agent_neighbors_fcnt if avoid_agent else neighbors_fnct,
reversePath=False,
heuristic_cost_estimate_fnct=lambda a, b: abs(a[0] - b[0]) + abs(a[1] - b[1]),
distance_between_fnct=lambda a, b: 1.0,
is_goal_reached_fnct=lambda a, b: abs(a[0] - b[0]) + abs(a[1] - b[1]) <= 0
)
return list(plan) if plan is not None else None
class GoTo(Task):
def __init__(self, goal_position):
self.goal_position = goal_position
self.plan = None
def completed(self, pos, dir, carrying, env):
return pos == self.goal_position
def get_best_action(self, pos, dir, carrying, env):
if pos == self.goal_position:
return Actions.done
# if farther than 1 unit away, Run A*
if self.plan is None or len(self.plan) == 0:
self.plan = get_plan(pos, dir, carrying, env, self.goal_position)
# if we have a plan, but we are not in the state we should be, create new plan
dir_vec = DIR_TO_VEC[dir]
if self.plan is not None:
current_state_maybe = self.plan.pop(0)
if current_state_maybe[0] != pos[0] or \
current_state_maybe[1] != pos[1] or \
current_state_maybe[2] != dir_vec[0] or \
current_state_maybe[3] != dir_vec[1]:
self.plan = None
return self.get_best_action(pos, dir, carrying, env)
if self.plan is None or len(self.plan) < 1:
# yield because there might be another actor blocking the way to our goal
self.plan = None
return Actions.done
next_state = self.plan[0]
# decide how to achieve next state
if abs(next_state[0] - pos[0]) == 1 or abs(next_state[1] - pos[1]) == 1:
return Actions.forward
elif next_state[2] == dir_vec[1] and next_state[3] == -dir_vec[0]:
return Actions.left
elif next_state[2] == -dir_vec[1] and next_state[3] == dir_vec[0]:
return Actions.right
else: # something went wrong such as bumping into other agent, replan
self.plan = None
return self.get_best_action(pos, dir, carrying, env)
def __repr__(self):
return "Task: Go to position {}".format(self.goal_position)
class PickUpObject(Task):
def __init__(self, obj_position, obj):
self.obj_position = obj_position
self.obj = obj
self.plan = None
def completed(self, pos, dir, carrying, env):
return carrying == self.obj
def get_best_action(self, pos, dir, carrying, env):
assert abs(pos[0] - self.obj_position[0] + pos[1] - self.obj_position[1]) == 1, "Distance to the object needs to be exactly 1, please move the adversary (GoTo) first."
delta_row = self.obj_position[0] - pos[0]
delta_col = self.obj_position[1] - pos[1]
dir_vec = DIR_TO_VEC[dir]
if delta_row == dir_vec[0] and delta_col == dir_vec[1]:
return Actions.pickup
else:
return Actions.left
def __repr__(self):
return "Task: Pick up object at position {}".format(self.obj_position)
class PlaceObject(Task):
def __init__(self, obj_position, obj):
self.obj_position = obj_position
self.obj = obj
def completed(self, pos, dir, carrying, env):
return env.grid.get(*self.obj_position) == self.obj and carrying is None
def get_best_action(self, pos, dir, carrying, env):
assert abs(pos[0] - self.obj_position[0] + pos[1] - self.obj_position[1]) == 1, "Distance to the object needs to be exactly 1, please move the adversary (GoTo) first."
delta_row = self.obj_position[0] - pos[0]
delta_col = self.obj_position[1] - pos[1]
dir_vec = DIR_TO_VEC[dir]
if delta_row == dir_vec[0] and delta_col == dir_vec[1]:
return Actions.drop
else:
return Actions.left
def __repr__(self):
return "Task: Place object at position {}".format(self.obj_position)
class DoNothing(Task):
def __init__(self, duration=0):
self.duration = duration
self.steps = 0
def completed(self, pos, dir, carrying, env):
if self.duration == 0:
return False
elif self.duration == self.steps:
return True
return False
def reset_steps(self):
self.steps = 0
def get_best_action(self, pos, dir, carrying, env):
if self.duration != 0:
self.steps += 1
return Actions.done
def __repr__(self):
return "Task: Do nothing"
class DoRandom(Task):
def __init__(self, duration=0):
self.duration = duration
self.steps = 0
def completed(self, pos, dir, carrying, env):
if self.duration == 0:
return False
elif self.duration == self.steps:
return True
return False
def reset_steps(self):
self.steps = 0
def get_best_action(self, pos, dir, carrying, env):
if self.duration != 0: self.steps += 1
return numpy.random.random_integers(0, 2, 1)
def __repr__(self):
return "Task: Act randomly"
class FollowAgent(Task):
def __init__(self, agent, duration=0):
self.agent = agent
self.duration = duration
self.steps = 0
self.follow_agent = False
if self.agent == "red":
self.follow_agent = True
self.plan = None
def completed(self, pos, dir, carrying, env):
if self.duration == 0:
return False
elif self.duration == self.steps:
return True
return False
def reset_steps(self):
self.steps = 0
def get_best_action(self, pos, dir, carrying, env):
if self.duration != 0: self.steps += 1
targets_position = np.array(self._get_targets_position(env))
optimal_directions=self._get_optimal_directions(pos, targets_position)
if dir in optimal_directions:
self.plan = get_plan(pos, dir, carrying, env, targets_position, avoid_agent=False)
if self.plan is not None:
next_state = self.plan[1]
dir_vec = DIR_TO_VEC[dir]
if abs(next_state[0] - pos[0]) == 1 or abs(next_state[1] - pos[1]) == 1:
return Actions.forward
elif next_state[2] == dir_vec[1] and next_state[3] == -dir_vec[0]:
return Actions.left
elif next_state[2] == -dir_vec[1] and next_state[3] == dir_vec[0]:
return Actions.right
else:
self.plan = None
return Actions.done
else:
return Actions.done
return self._get_turn_action(dir, optimal_directions)
def __repr__(self):
return f"Task: Follow agent {self.agent}"
def _get_targets_position(self, env):
if self.follow_agent:
return env.agent_pos
return env.adversaries[self.agent].adversary_pos
def _get_optimal_directions(self, pos, targets_position):
offset = targets_position - np.array(pos)
optimal_directions = list()
if offset[0] < 0: optimal_directions.append(2)
if offset[0] > 0: optimal_directions.append(0)
if offset[1] < 0: optimal_directions.append(3)
if offset[1] > 0: optimal_directions.append(1)
return optimal_directions
def _get_turn_action(self, dir, optimal_directions):
diffs = [d - dir for d in optimal_directions]
if -1 in diffs or 3 in diffs: return Actions.left
if 1 in diffs or -3 in diffs: return Actions.right
else: return numpy.random.random_integers(0, 1, 1) # target is behind me
class TaskManager:
def __init__(self, tasks:List[Task], repeating=False):
self.repeating = repeating
if repeating:
self.tasks = deque(tasks)
else:
self.tasks = tasks
def get_best_action(self, pos, dir, carrying, env):
if len(self.tasks) == 0:
raise Exception("List of tasks empty")
if self.tasks[0].completed(pos, dir, carrying, env) and not self.repeating:
self.tasks.pop(0)
elif self.tasks[0].completed(pos, dir, carrying, env) and self.repeating:
done_task = self.tasks.popleft()
if isinstance(done_task, (DoRandom, DoNothing, FollowAgent)): done_task.reset_steps()
self.tasks.append(done_task)
try:
best_action = self.tasks[0].get_best_action(pos, dir, carrying, env)
except IndexError as e:
# The adversary has finished all its tasks and will yield
self.tasks = [DoNothing()]
best_action = self.tasks[0].get_best_action(pos, dir, carrying, env)
return best_action