GSW_AI_LAB/notebooks/environments/Minigrid/minigrid/envs/obstructedmaze.py

from __future__ import annotations

from minigrid.core.constants import COLOR_NAMES, DIR_TO_VEC
from minigrid.core.mission import MissionSpace
from minigrid.core.roomgrid import RoomGrid
from minigrid.core.world_object import Ball, Box, Key


class ObstructedMazeEnv(RoomGrid):

    """
    ## Description

    The agent has to pick up a box which is placed in a corner of a 3x3 maze.
    The doors are locked, the keys are hidden in boxes and doors are obstructed
    by balls. This environment can be solved without relying on language.

    ## Mission Space

    "pick up the {COLOR_NAMES[0]} ball"

    ## Action Space

    | Num | Name         | Action                    |
    |-----|--------------|---------------------------|
    | 0   | left         | Turn left                 |
    | 1   | right        | Turn right                |
    | 2   | forward      | Move forward              |
    | 3   | pickup       | Pick up an object         |
    | 4   | drop         | Unused                    |
    | 5   | toggle       | Toggle/activate an object |
    | 6   | done         | Unused                    |

    ## Observation Encoding

    - Each tile is encoded as a 3 dimensional tuple:
        `(OBJECT_IDX, COLOR_IDX, STATE)`
    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
        [minigrid/minigrid.py](minigrid/minigrid.py)
    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked

    ## Rewards

    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.

    ## Termination

    The episode ends if any one of the following conditions is met:

    1. The agent picks up the blue ball.
    2. Timeout (see `max_steps`).

    ## Registered Configurations

    "NDl" are the number of doors locked.
    "h" if the key is hidden in a box.
    "b" if the door is obstructed by a ball.
    "Q" number of quarters that will have doors and keys out of the 9 that the
    map already has.
    "Full" 3x3 maze with "h" and "b" options.
    "v1" prevents the key from being covered by the blocking ball. Only 2Dlhb, 1Q, 2Q, and Full are
    updated to v1. Other configurations won't face this issue because there is no blocking ball (1Dl,
    1Dlh, 2Dl, 2Dlh) or the only blocking ball is added before the key (1Dlhb).

    - `MiniGrid-ObstructedMaze-1Dl-v0`
    - `MiniGrid-ObstructedMaze-1Dlh-v0`
    - `MiniGrid-ObstructedMaze-1Dlhb-v0`
    - `MiniGrid-ObstructedMaze-2Dl-v0`
    - `MiniGrid-ObstructedMaze-2Dlh-v0`
    - `MiniGrid-ObstructedMaze-2Dlhb-v0`
    - `MiniGrid-ObstructedMaze-2Dlhb-v1`
    - `MiniGrid-ObstructedMaze-1Q-v0`
    - `MiniGrid-ObstructedMaze-1Q-v1`
    - `MiniGrid-ObstructedMaze-2Q-v0`
    - `MiniGrid-ObstructedMaze-2Q-v1`
    - `MiniGrid-ObstructedMaze-Full-v0`
    - `MiniGrid-ObstructedMaze-Full-v1`

    """

    def __init__(
        self,
        num_rows,
        num_cols,
        num_rooms_visited,
        max_steps: int | None = None,
        **kwargs,
    ):
        room_size = 6

        if max_steps is None:
            max_steps = 4 * num_rooms_visited * room_size**2

        mission_space = MissionSpace(
            mission_func=self._gen_mission,
            ordered_placeholders=[[COLOR_NAMES[0]]],
        )
        super().__init__(
            mission_space=mission_space,
            room_size=room_size,
            num_rows=num_rows,
            num_cols=num_cols,
            max_steps=max_steps,
            **kwargs,
        )
        self.obj = Ball()  # initialize the obj attribute, that will be changed later on

    @staticmethod
    def _gen_mission(color: str):
        return f"pick up the {color} ball"

    def _gen_grid(self, width, height):
        super()._gen_grid(width, height)

        # Define all possible colors for doors
        self.door_colors = self._rand_subset(COLOR_NAMES, len(COLOR_NAMES))
        # Define the color of the ball to pick up
        self.ball_to_find_color = COLOR_NAMES[0]
        # Define the color of the balls that obstruct doors
        self.blocking_ball_color = COLOR_NAMES[1]
        # Define the color of boxes in which keys are hidden
        self.box_color = COLOR_NAMES[2]

        self.mission = "pick up the %s ball" % self.ball_to_find_color

    def step(self, action):
        obs, reward, terminated, truncated, info = super().step(action)

        if action == self.actions.pickup:
            if self.carrying and self.carrying == self.obj:
                reward = self._reward()
                terminated = True

        return obs, reward, terminated, truncated, info

    def add_door(
        self,
        i,
        j,
        door_idx=0,
        color=None,
        locked=False,
        key_in_box=False,
        blocked=False,
    ):
        """
        Add a door. If the door must be locked, it also adds the key.
        If the key must be hidden, it is put in a box. If the door must
        be obstructed, it adds a ball in front of the door.
        """

        door, door_pos = super().add_door(i, j, door_idx, color, locked=locked)

        if blocked:
            vec = DIR_TO_VEC[door_idx]
            blocking_ball = Ball(self.blocking_ball_color) if blocked else None
            self.grid.set(door_pos[0] - vec[0], door_pos[1] - vec[1], blocking_ball)

        if locked:
            obj = Key(door.color)
            if key_in_box:
                box = Box(self.box_color)
                box.contains = obj
                obj = box
            self.place_in_room(i, j, obj)

        return door, door_pos


class ObstructedMaze_1Dlhb(ObstructedMazeEnv):
    """
    A blue ball is hidden in a 2x1 maze. A locked door separates
    rooms. Doors are obstructed by a ball and keys are hidden in boxes.
    """

    def __init__(self, key_in_box=True, blocked=True, **kwargs):
        self.key_in_box = key_in_box
        self.blocked = blocked

        super().__init__(num_rows=1, num_cols=2, num_rooms_visited=2, **kwargs)

    def _gen_grid(self, width, height):
        super()._gen_grid(width, height)

        self.add_door(
            0,
            0,
            door_idx=0,
            color=self.door_colors[0],
            locked=True,
            key_in_box=self.key_in_box,
            blocked=self.blocked,
        )

        self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color)
        self.place_agent(0, 0)


class ObstructedMaze_Full(ObstructedMazeEnv):
    """
    A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors
    are locked, doors are obstructed by a ball and keys are hidden in
    boxes.
    """

    def __init__(
        self,
        agent_room=(1, 1),
        key_in_box=True,
        blocked=True,
        num_quarters=4,
        num_rooms_visited=25,
        **kwargs,
    ):
        self.agent_room = agent_room
        self.key_in_box = key_in_box
        self.blocked = blocked
        self.num_quarters = num_quarters

        super().__init__(
            num_rows=3, num_cols=3, num_rooms_visited=num_rooms_visited, **kwargs
        )

    def _gen_grid(self, width, height):
        super()._gen_grid(width, height)

        middle_room = (1, 1)
        # Define positions of "side rooms" i.e. rooms that are neither
        # corners nor the center.
        side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][: self.num_quarters]
        for i in range(len(side_rooms)):
            side_room = side_rooms[i]

            # Add a door between the center room and the side room
            self.add_door(
                *middle_room, door_idx=i, color=self.door_colors[i], locked=False
            )

            for k in [-1, 1]:
                # Add a door to each side of the side room
                self.add_door(
                    *side_room,
                    locked=True,
                    door_idx=(i + k) % 4,
                    color=self.door_colors[(i + k) % len(self.door_colors)],
                    key_in_box=self.key_in_box,
                    blocked=self.blocked,
                )

        corners = [(2, 0), (2, 2), (0, 2), (0, 0)][: self.num_quarters]
        ball_room = self._rand_elem(corners)

        self.obj, _ = self.add_object(
            ball_room[0], ball_room[1], "ball", color=self.ball_to_find_color
        )
        self.place_agent(*self.agent_room)


class ObstructedMaze_2Dl(ObstructedMaze_Full):
    def __init__(self, **kwargs):
        super().__init__((2, 1), False, False, 1, 4, **kwargs)


class ObstructedMaze_2Dlh(ObstructedMaze_Full):
    def __init__(self, **kwargs):
        super().__init__((2, 1), True, False, 1, 4, **kwargs)


class ObstructedMaze_2Dlhb(ObstructedMaze_Full):
    def __init__(self, **kwargs):
        super().__init__((2, 1), True, True, 1, 4, **kwargs)