GSW_AI_LAB/notebooks/environments/Minigrid/minigrid/envs/adversaries_base.py

from __future__ import annotations

from minigrid.core.grid import Grid
from minigrid.core.mission import MissionSpace
from minigrid.core.world_object import Goal, Lava, SlipperyNorth, SlipperyEast, SlipperySouth, SlipperyWest, Ball
from minigrid.minigrid_env import MiniGridEnv, is_slippery
from minigrid.core.tasks import GoTo, DoNothing, PickUpObject, PlaceObject

import numpy as np

class AdversaryEnv(MiniGridEnv):

    """
    ## Description

    """

    def __init__(self, width=7, height=6, generate_wall=True, generate_lava=False, generate_slippery=False ,max_steps: int | None = None, **kwargs):
        if max_steps is None:
            max_steps = 10 * (width * height)**2
        mission_space = MissionSpace(mission_func=self._gen_mission)
        self.collision_penalty = -1
        super().__init__(
            mission_space=mission_space, width=width, height=height, max_steps=max_steps, **kwargs
        )

    @staticmethod
    def _gen_mission():
        return "Finish your task while avoiding the adversaries"

    def _gen_grid(self, width, height):
        self.grid = Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)


    def step(self, action):
        delete_list = list()
        for position, box in self.background_tiles.items():
            if self.grid.get(*position) is None:
                self.grid.set(*position, box)
                self.grid.set_background(*position, None)
                delete_list.append(tuple(position))
        for position in delete_list:
            del self.background_tiles[position]

        obs, reward, terminated, truncated, info = super().step(action)

        agent_pos = self.agent_pos
        adv_penalty = 0

        if not terminated:
            for adversary in self.adversaries.values():
                collided = self.move_adversary(adversary, agent_pos)
                self.trajectory.append((adversary.color, adversary.adversary_pos, adversary.adversary_dir))
                if collided:
                    terminated = True
                    info["collision"] = True
                    try:
                        reward = self.collision_penalty
                    except e:
                        reward = -1


        return obs, reward, terminated, truncated, info

    def move_adversary(self, adversary, agent_pos):
        # fetch current location and forward location
        cur_pos = adversary.adversary_pos
        current_cell = self.grid.get(*adversary.adversary_pos)
        fwd_pos = cur_pos + adversary.dir_vec()
        fwd_cell = self.grid.get(*fwd_pos)
        collision = False
        need_position_update = False

        action = adversary.get_action(self)
        if action == self.actions.forward and is_slippery(current_cell):
            probabilities = current_cell.get_probabilities(adversary.adversary_dir)
            possible_fwd_pos, prob = self.get_neighbours_prob(adversary.adversary_pos, probabilities)
            fwd_pos_index = np.random.choice(len(possible_fwd_pos), 1, p=prob)
            fwd_pos = possible_fwd_pos[fwd_pos_index[0]]
            fwd_cell = self.grid.get(*fwd_pos)
            need_position_update = True

        if action == self.actions.left:
            adversary.adversary_dir -= 1
            if adversary.adversary_dir < 0:
                adversary.adversary_dir += 4

        # Rotate right
        elif action == self.actions.right:
            adversary.adversary_dir = (adversary.adversary_dir + 1) % 4

        # Move forward
        elif action == self.actions.forward:
            if fwd_pos[0] == agent_pos[0] and fwd_pos[1] == agent_pos[1]:
                collision = True
            if fwd_cell is None or fwd_cell.can_overlap():
                adversary.adversary_pos = tuple(fwd_pos)

        # Pick up an object
        elif action == self.actions.pickup:
            if fwd_cell and fwd_cell.can_pickup():
                if adversary.carrying is None:
                    adversary.carrying = fwd_cell
                    adversary.carrying.cur_pos = np.array([-1, -1])
                    self.grid.set(fwd_pos[0], fwd_pos[1], None)

        # Drop an object
        elif action == self.actions.drop:
            if not fwd_cell and adversary.carrying:
                self.grid.set(fwd_pos[0], fwd_pos[1], adversary.carrying)
                adversary.carrying.cur_pos = fwd_pos
                adversary.carrying = None

        # Toggle/activate an object
        elif action == self.actions.toggle:
            if fwd_cell:
                fwd_cell.toggle(self, fwd_pos)

        # Done action (not used by default)
        elif action == self.actions.done:
            pass

        else:
            raise ValueError(f"Unknown action: {action}")

        if need_position_update and (fwd_cell is None or fwd_cell.can_overlap()):
            adversary.adversary_pos = tuple(fwd_pos)

        return collision