GSW_AI_LAB/notebooks/environments/Minigrid/minigrid/envs/multiroom.py


								from __future__ import annotations


								from minigrid.core.constants import COLOR_NAMES

								from minigrid.core.grid import Grid

								from minigrid.core.mission import MissionSpace

								from minigrid.core.world_object import Door, Goal, Wall

								from minigrid.minigrid_env import MiniGridEnv


								class MultiRoom:

								    def __init__(self, top, size, entryDoorPos, exitDoorPos):

								        self.top = top

								        self.size = size

								        self.entryDoorPos = entryDoorPos

								        self.exitDoorPos = exitDoorPos


								class MultiRoomEnv(MiniGridEnv):


								    """

								    ## Description


								    This environment has a series of connected rooms with doors that must be

								    opened in order to get to the next room. The final room has the green goal

								    square the agent must get to. This environment is extremely difficult to

								    solve using RL alone. However, by gradually increasing the number of rooms

								    and building a curriculum, the environment can be solved.


								    ## Mission Space


								    "traverse the rooms to get to the goal"


								    ## Action Space


								    | Num | Name         | Action                    |

								    |-----|--------------|---------------------------|

								    | 0   | left         | Turn left                 |

								    | 1   | right        | Turn right                |

								    | 2   | forward      | Move forward              |

								    | 3   | pickup       | Unused                    |

								    | 4   | drop         | Unused                    |

								    | 5   | toggle       | Toggle/activate an object |

								    | 6   | done         | Unused                    |


								    ## Observation Encoding


								    - Each tile is encoded as a 3 dimensional tuple:

								        `(OBJECT_IDX, COLOR_IDX, STATE)`

								    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in

								        [minigrid/minigrid.py](minigrid/minigrid.py)

								    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked


								    ## Rewards


								    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.


								    ## Termination


								    The episode ends if any one of the following conditions is met:


								    1. The agent reaches the goal.

								    2. Timeout (see `max_steps`).


								    ## Registered Configurations


								    S: size of map SxS.

								    N: number of rooms.


								    - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)

								    - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)

								    - `MiniGrid-MultiRoom-N6-v0` (six rooms)


								    """


								    def __init__(

								        self,

								        minNumRooms,

								        maxNumRooms,

								        maxRoomSize=10,

								        max_steps: int | None = None,

								        **kwargs,

								    ):

								        assert minNumRooms > 0

								        assert maxNumRooms >= minNumRooms

								        assert maxRoomSize >= 4


								        self.minNumRooms = minNumRooms

								        self.maxNumRooms = maxNumRooms

								        self.maxRoomSize = maxRoomSize


								        self.rooms = []


								        mission_space = MissionSpace(mission_func=self._gen_mission)


								        self.size = 25


								        if max_steps is None:

								            max_steps = maxNumRooms * 20


								        super().__init__(

								            mission_space=mission_space,

								            width=self.size,

								            height=self.size,

								            max_steps=max_steps,

								            **kwargs,

								        )


								    @staticmethod

								    def _gen_mission():

								        return "traverse the rooms to get to the goal"


								    def _gen_grid(self, width, height):

								        roomList = []


								        # Choose a random number of rooms to generate

								        numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms + 1)


								        while len(roomList) < numRooms:

								            curRoomList = []


								            entryDoorPos = (self._rand_int(0, width - 2), self._rand_int(0, width - 2))


								            # Recursively place the rooms

								            self._placeRoom(

								                numRooms,

								                roomList=curRoomList,

								                minSz=4,

								                maxSz=self.maxRoomSize,

								                entryDoorWall=2,

								                entryDoorPos=entryDoorPos,

								            )


								            if len(curRoomList) > len(roomList):

								                roomList = curRoomList


								        # Store the list of rooms in this environment

								        assert len(roomList) > 0

								        self.rooms = roomList


								        # Create the grid

								        self.grid = Grid(width, height)

								        wall = Wall()


								        prevDoorColor = None


								        # For each room

								        for idx, room in enumerate(roomList):


								            topX, topY = room.top

								            sizeX, sizeY = room.size


								            # Draw the top and bottom walls

								            for i in range(0, sizeX):

								                self.grid.set(topX + i, topY, wall)

								                self.grid.set(topX + i, topY + sizeY - 1, wall)


								            # Draw the left and right walls

								            for j in range(0, sizeY):

								                self.grid.set(topX, topY + j, wall)

								                self.grid.set(topX + sizeX - 1, topY + j, wall)


								            # If this isn't the first room, place the entry door

								            if idx > 0:

								                # Pick a door color different from the previous one

								                doorColors = set(COLOR_NAMES)

								                if prevDoorColor:

								                    doorColors.remove(prevDoorColor)

								                # Note: the use of sorting here guarantees determinism,

								                # This is needed because Python's set is not deterministic

								                doorColor = self._rand_elem(sorted(doorColors))


								                entryDoor = Door(doorColor)

								                self.grid.set(room.entryDoorPos[0], room.entryDoorPos[1], entryDoor)

								                prevDoorColor = doorColor


								                prevRoom = roomList[idx - 1]

								                prevRoom.exitDoorPos = room.entryDoorPos


								        # Randomize the starting agent position and direction

								        self.place_agent(roomList[0].top, roomList[0].size)


								        # Place the final goal in the last room

								        self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)


								        self.mission = "traverse the rooms to get to the goal"


								    def _placeRoom(self, numLeft, roomList, minSz, maxSz, entryDoorWall, entryDoorPos):

								        # Choose the room size randomly

								        sizeX = self._rand_int(minSz, maxSz + 1)

								        sizeY = self._rand_int(minSz, maxSz + 1)


								        # The first room will be at the door position

								        if len(roomList) == 0:

								            topX, topY = entryDoorPos

								        # Entry on the right

								        elif entryDoorWall == 0:

								            topX = entryDoorPos[0] - sizeX + 1

								            y = entryDoorPos[1]

								            topY = self._rand_int(y - sizeY + 2, y)

								        # Entry wall on the south

								        elif entryDoorWall == 1:

								            x = entryDoorPos[0]

								            topX = self._rand_int(x - sizeX + 2, x)

								            topY = entryDoorPos[1] - sizeY + 1

								        # Entry wall on the left

								        elif entryDoorWall == 2:

								            topX = entryDoorPos[0]

								            y = entryDoorPos[1]

								            topY = self._rand_int(y - sizeY + 2, y)

								        # Entry wall on the top

								        elif entryDoorWall == 3:

								            x = entryDoorPos[0]

								            topX = self._rand_int(x - sizeX + 2, x)

								            topY = entryDoorPos[1]

								        else:

								            assert False, entryDoorWall


								        # If the room is out of the grid, can't place a room here

								        if topX < 0 or topY < 0:

								            return False

								        if topX + sizeX > self.width or topY + sizeY >= self.height:

								            return False


								        # If the room intersects with previous rooms, can't place it here

								        for room in roomList[:-1]:

								            nonOverlap = (

								                topX + sizeX < room.top[0]

								                or room.top[0] + room.size[0] <= topX

								                or topY + sizeY < room.top[1]

								                or room.top[1] + room.size[1] <= topY

								            )


								            if not nonOverlap:

								                return False


								        # Add this room to the list

								        roomList.append(MultiRoom((topX, topY), (sizeX, sizeY), entryDoorPos, None))


								        # If this was the last room, stop

								        if numLeft == 1:

								            return True


								        # Try placing the next room

								        for i in range(0, 8):


								            # Pick which wall to place the out door on

								            wallSet = {0, 1, 2, 3}

								            wallSet.remove(entryDoorWall)

								            exitDoorWall = self._rand_elem(sorted(wallSet))

								            nextEntryWall = (exitDoorWall + 2) % 4


								            # Pick the exit door position

								            # Exit on right wall

								            if exitDoorWall == 0:

								                exitDoorPos = (topX + sizeX - 1, topY + self._rand_int(1, sizeY - 1))

								            # Exit on south wall

								            elif exitDoorWall == 1:

								                exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY + sizeY - 1)

								            # Exit on left wall

								            elif exitDoorWall == 2:

								                exitDoorPos = (topX, topY + self._rand_int(1, sizeY - 1))

								            # Exit on north wall

								            elif exitDoorWall == 3:

								                exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY)

								            else:

								                assert False


								            # Recursively create the other rooms

								            success = self._placeRoom(

								                numLeft - 1,

								                roomList=roomList,

								                minSz=minSz,

								                maxSz=maxSz,

								                entryDoorWall=nextEntryWall,

								                entryDoorPos=exitDoorPos,

								            )


								            if success:

								                break


								        return True