MC-ProbMC-PrismFiles/HW11/Minigrid/minigrid/core/tasks.py


								from abc import ABC

								from typing import Iterable, List

								from collections import deque


								import numpy as np


								from minigrid.core.world_object import Lava

								from minigrid.core.actions import Actions

								from minigrid.core.constants import DIR_TO_VEC


								try:

								    from astar import find_path

								except:

								    print("Install with:")

								    print("pip install git+https://github.com/jrialland/python-astar.git")

								    raise Exception("Need to install astar")

								import numpy.random


								class Task(ABC):

								    # returns a bool, true if task is completed, false otherwise

								    def completed(self, pos, dir, carrying, env):

								        pass

								    # Returns the best action to solve this task

								    def get_best_action(self, pos, dir, carrying, env):

								        pass

								    # returns a string representing the task

								    def __repr__(self):

								        pass


								def get_plan(pos, dir, carrying, env, goal_pos, avoid_agent=True):

								    def neighbors_fnct(node):

								        left = (node[0], node[1], node[3], -node[2])

								        right = (node[0], node[1], -node[3], node[2])

								        fwd_pos = node[0] + node[2], node[1] + node[3]

								        forward_cell = env.grid.get(*fwd_pos)


								        forward_pos_open = forward_cell is None or forward_cell.can_overlap()

								        forward_pos_open = forward_pos_open and not isinstance(forward_cell, Lava)


								        if forward_pos_open:

								            forward = (node[0] + node[2], node[1] + node[3], node[2], node[3])

								            return forward, left, right

								        else:

								            return left, right


								    def avoid_agent_neighbors_fcnt(node):

								        left = (node[0], node[1], node[3], -node[2])

								        right = (node[0], node[1], -node[3], node[2])

								        fwd_pos = node[0] + node[2], node[1] + node[3]

								        forward_cell = env.grid.get(*fwd_pos)


								        forward_pos_open = forward_cell is None or forward_cell.can_overlap()

								        forward_pos_open = forward_pos_open and not isinstance(forward_cell, Lava)

								        forward_pos_not_agent = fwd_pos[0] != env.agent_pos[0] or fwd_pos[1] != env.agent_pos[1]


								        if forward_pos_open and forward_pos_not_agent:

								            forward = (node[0] + node[2], node[1] + node[3], node[2], node[3])

								            return forward, left, right

								        else:

								            return left, right


								    dir_vec = DIR_TO_VEC[dir]

								    start = (pos[0], pos[1], dir_vec[0], dir_vec[1])

								    goal = (goal_pos[0], goal_pos[1], dir_vec[0], dir_vec[1])


								    plan = find_path(

								        start=start,

								        goal=goal,

								        neighbors_fnct=avoid_agent_neighbors_fcnt if avoid_agent else neighbors_fnct,

								        reversePath=False,

								        heuristic_cost_estimate_fnct=lambda a, b: abs(a[0] - b[0]) + abs(a[1] - b[1]),

								        distance_between_fnct=lambda a, b: 1.0,

								        is_goal_reached_fnct=lambda a, b: abs(a[0] - b[0]) + abs(a[1] - b[1]) <= 0

								    )


								    return list(plan) if plan is not None else None


								class GoTo(Task):

								    def __init__(self, goal_position):

								        self.goal_position = goal_position

								        self.plan = None


								    def completed(self, pos, dir, carrying, env):

								        return pos == self.goal_position


								    def get_best_action(self, pos, dir, carrying, env):

								        if pos == self.goal_position:

								            return Actions.done

								        # if farther than 1 unit away, Run A*

								        if self.plan is None or len(self.plan) == 0:

								            self.plan = get_plan(pos, dir, carrying, env, self.goal_position)


								        # if we have a plan, but we are not in the state we should be, create new plan


								        dir_vec = DIR_TO_VEC[dir]

								        if self.plan is not None:

								            current_state_maybe = self.plan.pop(0)

								            if current_state_maybe[0] != pos[0] or \

								                    current_state_maybe[1] != pos[1] or \

								                    current_state_maybe[2] != dir_vec[0] or \

								                    current_state_maybe[3] != dir_vec[1]:

								                self.plan = None

								                return self.get_best_action(pos, dir, carrying, env)


								        if self.plan is None or len(self.plan) < 1:

								            # yield because there might be another actor blocking the way to our goal

								            self.plan = None

								            return Actions.done


								        next_state = self.plan[0]


								        # decide how to achieve next state

								        if abs(next_state[0] - pos[0]) == 1 or abs(next_state[1] - pos[1]) == 1:

								            return Actions.forward

								        elif next_state[2] == dir_vec[1] and next_state[3] == -dir_vec[0]:

								            return Actions.left

								        elif next_state[2] == -dir_vec[1] and next_state[3] == dir_vec[0]:

								            return Actions.right

								        else:  # something went wrong such as bumping into other agent, replan

								            self.plan = None

								            return self.get_best_action(pos, dir, carrying, env)


								    def __repr__(self):

								        return "Task: Go to position {}".format(self.goal_position)


								class PickUpObject(Task):

								    def __init__(self, obj_position, obj):

								        self.obj_position = obj_position

								        self.obj = obj

								        self.plan = None

								    def completed(self, pos, dir, carrying, env):

								        return carrying == self.obj

								    def get_best_action(self, pos, dir, carrying, env):

								        assert abs(pos[0] - self.obj_position[0] + pos[1] - self.obj_position[1]) == 1, "Distance to the object needs to be exactly 1, please move the adversary (GoTo) first."

								        delta_row = self.obj_position[0] - pos[0]

								        delta_col = self.obj_position[1] - pos[1]

								        dir_vec = DIR_TO_VEC[dir]

								        if delta_row == dir_vec[0] and delta_col == dir_vec[1]:

								            return Actions.pickup

								        else:

								            return Actions.left

								    def __repr__(self):

								        return "Task: Pick up object at position {}".format(self.obj_position)


								class PlaceObject(Task):

								    def __init__(self, obj_position, obj):

								        self.obj_position = obj_position

								        self.obj = obj


								    def completed(self, pos, dir, carrying, env):

								        return env.grid.get(*self.obj_position) == self.obj and carrying is None

								    def get_best_action(self, pos, dir, carrying, env):

								        assert abs(pos[0] - self.obj_position[0] + pos[1] - self.obj_position[1]) == 1, "Distance to the object needs to be exactly 1, please move the adversary (GoTo) first."

								        delta_row = self.obj_position[0] - pos[0]

								        delta_col = self.obj_position[1] - pos[1]

								        dir_vec = DIR_TO_VEC[dir]

								        if delta_row == dir_vec[0] and delta_col == dir_vec[1]:

								            return Actions.drop

								        else:

								            return Actions.left

								    def __repr__(self):

								        return "Task: Place object at position {}".format(self.obj_position)


								class DoNothing(Task):

								    def __init__(self, duration=0):

								        self.duration = duration

								        self.steps = 0

								    def completed(self, pos, dir, carrying, env):

								        if self.duration == 0:

								            return False

								        elif self.duration == self.steps:

								            return True

								        return False

								    def reset_steps(self):

								        self.steps = 0

								    def get_best_action(self, pos, dir, carrying, env):

								        if self.duration != 0:

								            self.steps += 1

								        return Actions.done

								    def __repr__(self):

								        return "Task: Do nothing"


								class DoRandom(Task):

								    def __init__(self, duration=0):

								        self.duration = duration

								        self.steps = 0

								    def completed(self, pos, dir, carrying, env):

								        if self.duration == 0:

								            return False

								        elif self.duration == self.steps:

								            return True

								        return False

								    def reset_steps(self):

								        self.steps = 0

								    def get_best_action(self, pos, dir, carrying, env):

								        if self.duration != 0: self.steps += 1

								        return numpy.random.random_integers(0, 2, 1)

								    def __repr__(self):

								        return "Task: Act randomly"


								class FollowAgent(Task):

								    def __init__(self, agent, duration=0):

								        self.agent = agent

								        self.duration = duration

								        self.steps = 0

								        self.follow_agent = False

								        if self.agent == "red":

								            self.follow_agent = True

								        self.plan = None

								    def completed(self, pos, dir, carrying, env):

								        if self.duration == 0:

								            return False

								        elif self.duration == self.steps:

								            return True

								        return False

								    def reset_steps(self):

								        self.steps = 0

								    def get_best_action(self, pos, dir, carrying, env):

								        if self.duration != 0: self.steps += 1

								        targets_position = np.array(self._get_targets_position(env))

								        optimal_directions=self._get_optimal_directions(pos, targets_position)


								        if dir in optimal_directions:

								            self.plan = get_plan(pos, dir, carrying, env, targets_position, avoid_agent=False)

								            if self.plan is not None:

								                next_state = self.plan[1]

								                dir_vec = DIR_TO_VEC[dir]

								                if abs(next_state[0] - pos[0]) == 1 or abs(next_state[1] - pos[1]) == 1:

								                    return Actions.forward

								                elif next_state[2] == dir_vec[1] and next_state[3] == -dir_vec[0]:

								                    return Actions.left

								                elif next_state[2] == -dir_vec[1] and next_state[3] == dir_vec[0]:

								                    return Actions.right

								                else:

								                    self.plan = None

								                    return Actions.done

								            else:

								                return Actions.done


								        return self._get_turn_action(dir, optimal_directions)


								    def __repr__(self):

								        return f"Task: Follow agent {self.agent}"


								    def _get_targets_position(self, env):

								        if self.follow_agent:

								            return env.agent_pos

								        return env.adversaries[self.agent].adversary_pos


								    def _get_optimal_directions(self, pos, targets_position):

								        offset = targets_position - np.array(pos)

								        optimal_directions = list()

								        if offset[0] < 0: optimal_directions.append(2)

								        if offset[0] > 0: optimal_directions.append(0)

								        if offset[1] < 0: optimal_directions.append(3)

								        if offset[1] > 0: optimal_directions.append(1)

								        return optimal_directions


								    def _get_turn_action(self, dir, optimal_directions):

								        diffs = [d - dir for d in optimal_directions]

								        if -1 in diffs or 3 in diffs: return Actions.left

								        if 1 in diffs or -3 in diffs: return Actions.right

								        else:                         return numpy.random.random_integers(0, 1, 1) # target is behind me


								class TaskManager:

								    def __init__(self, tasks:List[Task], repeating=False):

								        self.repeating = repeating

								        if repeating:

								            self.tasks = deque(tasks)

								        else:

								            self.tasks = tasks


								    def get_best_action(self, pos, dir, carrying, env):

								        if len(self.tasks) == 0:

								            raise Exception("List of tasks empty")

								        if self.tasks[0].completed(pos, dir, carrying, env) and not self.repeating:

								            self.tasks.pop(0)

								        elif self.tasks[0].completed(pos, dir, carrying, env) and self.repeating:

								            done_task = self.tasks.popleft()

								            if isinstance(done_task, (DoRandom, DoNothing, FollowAgent)): done_task.reset_steps()

								            self.tasks.append(done_task)

								        try:

								            best_action = self.tasks[0].get_best_action(pos, dir, carrying, env)

								        except IndexError as e:

								            # The adversary has finished all its tasks and will yield

								            self.tasks = [DoNothing()]

								            best_action = self.tasks[0].get_best_action(pos, dir, carrying, env)

								        return best_action