Tempest_in_Action/notebooks/environments/Minigrid/minigrid/envs/lavaslippery.py


								from __future__ import annotations


								from minigrid.core.constants import COLOR_NAMES

								from minigrid.core.grid import Grid

								from minigrid.core.mission import MissionSpace

								from minigrid.core.world_object import (

								    Ball,

								    Box,

								    Key,

								    Slippery,

								    SlipperyEast,

								    SlipperySouth,

								    SlipperyNorth,

								    SlipperyWest,

								    Lava,

								    Goal,

								    Point

								 )


								from minigrid.minigrid_env import MiniGridEnv, is_slippery


								import numpy as np


								class LavaSlipperyEnv(MiniGridEnv):

								    def __init__(self,

								                randomize_start=True, size=12,

								                width=None,

								                height=None,

								                probability_intended=8/9,

								                probability_turn_intended=8/9,

								                obstacle_type=Lava,

								                goal_reward=1,

								                failure_penalty=-1,

								                per_step_penalty=0,

								                dense_rewards=False,

								                     **kwargs):


								        self.obstacle_type = obstacle_type

								        self.size = size

								        self.probability_intended = probability_intended

								        self.probability_turn_intended = probability_turn_intended


								        if width is not None and height is not None:

								            self.width = width

								            self.height = height

								        elif size is not None:

								            self.width = size

								            self.height = size

								        else:

								            raise ValueError(f"Please define either width and height or a size for square environments. The set values are width: {width}, height: {height}, size: {size}.")


								        if obstacle_type == Lava:

								            mission_space = MissionSpace(

								                mission_func=lambda: "avoid the lava and get to the green goal square"

								            )

								        else:

								            mission_space = MissionSpace(

								                mission_func=lambda: "find the opening and get to the green goal square"

								            )

								        super().__init__(

								            mission_space=mission_space,

								            width=self.width,

								            height=self.height,

								            max_steps=200,

								            # Set this to True for maximum speed

								            see_through_walls=False,

								            **kwargs

								        )


								        self.randomize_start = randomize_start

								        self.goal_reward = goal_reward

								        self.failure_penalty = failure_penalty

								        self.dense_rewards = dense_rewards

								        self.per_step_penalty = per_step_penalty


								    def _place_slippery_lava(self, x, y):

								        self.put_obj(Lava(), x, y)

								        self.put_obj(self._create_slippery_north(), x, y - 1)

								        self.put_obj(self._create_slippery_south(), x, y + 1)

								        self.put_obj(self._create_slippery_east(), x + 1, y)

								        self.put_obj(self._create_slippery_west(), x - 1, y)


								    def create_slippery_lava_line(self, y, x_start, x_end, no_slippery_left=False, no_slippery_right=False):

								        if not no_slippery_left:

								            self.put_obj(self._create_slippery_west(), x_start - 1, y)


								        if not no_slippery_right:

								            self.put_obj(self._create_slippery_east(), x_end + 1 , y)


								        for x in range(x_start, x_end + 1):

								            self.put_obj(Lava(), x, y)


								    def _gen_grid(self, width, height):

								        assert width >= 5 and height >= 5


								        # Create an empty grid

								        self.grid = Grid(width, height)


								        # Generate the surrounding walls

								        self.grid.horz_wall(0, 0)

								        self.grid.horz_wall(0, height - 1)

								        self.grid.vert_wall(0, 0)

								        self.grid.vert_wall(width - 1, 0)


								        self.mission = (

								            "avoid the lava and get to the green goal square"

								            if self.obstacle_type == Lava

								            else "find the opening and get to the green goal square"

								        )


								    def disable_random_start(self):

								        self.randomize_start = False


								    def place_agent(self, spawn_on_slippery=False, agent_pos=None, agent_dir=0):

								        max_tries = 10_000

								        num_tries = 0


								        if self.randomize_start == True:

								            while True:

								                num_tries += 1

								                if num_tries > max_tries:

								                    raise RecursionError("rejection sampling failed in place_obj")

								                x = np.random.randint(0, self.width)

								                y = np.random.randint(0, self.height)


								                cell = self.grid.get(*(x,y))

								                if cell is None or (cell.can_overlap() and not isinstance(cell, Lava) and not isinstance(cell, Goal) and (spawn_on_slippery or not is_slippery(cell))):

								                    self.agent_pos = np.array((x, y))

								                    self.agent_dir = np.random.randint(0, 4)

								                    break

								        elif agent_dir is None:

								            self.agent_pos = np.array((1, 1))

								            self.agent_dir = 0

								        else:

								            self.agent_pos = agent_pos

								            self.agent_dir = agent_dir


								    def place_goal(self, goal_pos):

								        self.goal_pos = goal_pos

								        self.put_obj(Goal(), *self.goal_pos)


								    def run_bfs(self):

								        self.bfs_reward = self.run_BFS_reward()

								        self.bfs_reward = [rew * 0.1 for rew in self.bfs_reward]


								    def printGrid(self, init=False):

								        grid = super().printGrid(init)


								        properties_str = ""


								        properties_str += F"ProbTurnIntended:{self.probability_turn_intended}\n"

								        properties_str += F"ProbForwardIntended:{self.probability_intended}\n"


								        return grid + properties_str


								    def step(self, action):

								        obs, reward, terminated, truncated, info = super().step(action)

								        return obs, reward - self.per_step_penalty, terminated, truncated, info


								class LavaSlipperyEnv1(LavaSlipperyEnv):

								    def __init__(self, *args, **kwargs):

								        super().__init__(*args, **kwargs)


								    def _gen_grid(self, width, height):

								        super()._gen_grid(width, height)


								        w_mid = width // 2

								        h_mid = height // 2


								        self.put_obj(Lava(), w_mid - 1, h_mid - 1)

								        self.put_obj(Lava(), w_mid, h_mid - 1)

								        self.put_obj(Lava(), w_mid - 1, h_mid)

								        self.put_obj(Lava(), w_mid, h_mid)


								        self.put_obj(self._create_slippery_east(), w_mid - 2, h_mid - 1)

								        self.put_obj(self._create_slippery_east(), w_mid - 2, h_mid)


								        self.put_obj(self._create_slippery_west(), w_mid + 1, h_mid - 1)

								        self.put_obj(self._create_slippery_west(), w_mid + 1, h_mid)


								        self.put_obj(Lava(), w_mid - 1, 1)

								        self.put_obj(Lava(), w_mid, 1)


								        self.put_obj(self._create_slippery_north(), w_mid - 1, 2)

								        self.put_obj(self._create_slippery_north(), w_mid, 2)

								        self.put_obj(self._create_slippery_east(), w_mid - 2, 1)

								        self.put_obj(self._create_slippery_west(), w_mid + 1, 1)


								        self.put_obj(Lava(), w_mid - 1, height - 2)

								        self.put_obj(Lava(), w_mid, height - 2)


								        self.put_obj(self._create_slippery_south(), w_mid - 1, height - 3)

								        self.put_obj(self._create_slippery_south(), w_mid, height - 3)

								        self.put_obj(self._create_slippery_east(), w_mid - 2, height - 2)

								        self.put_obj(self._create_slippery_west(), w_mid + 1, height - 2)


								        self.place_agent(agent_pos=np.array((1, 1)), agent_dir=0)

								        self.place_goal(np.array((width - 2, height - 2)))

								        if self.dense_rewards: self.run_bfs()


								class LavaSlipperyCliff(LavaSlipperyEnv):

								    def __init__(self, *args, **kwargs):

								        super().__init__(*args, **kwargs)


								    def _gen_grid(self, width, height):

								        super()._gen_grid(width, height)

								        for i in range(1,5):

								            self.grid.horz_wall(3, i, width - 6, Lava)

								        for i in range(5,height - 3):

								            self.grid.horz_wall(3, i, width - 6, SlipperyNorth(probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended))


								        self.place_agent(agent_pos=np.array((1, 1)), agent_dir=0)

								        self.place_goal(np.array((width - 2, 1)))

								        if self.dense_rewards: self.run_bfs()


								class LavaSlipperyHill(LavaSlipperyEnv):

								    def __init__(self, *args, **kwargs):

								        super().__init__(*args, **kwargs)


								    def _gen_grid(self, width, height):

								        super()._gen_grid(width, height)

								        for i in range(1,height - 1):

								            self.grid.horz_wall(1, i, width - 2, SlipperyNorth)

								        for i in range(1,5):

								            self.grid.horz_wall(3, i, width - 6, Lava)


								        self.place_agent(agent_pos=np.array((1, 1)), agent_dir=0, spawn_on_slippery=True)

								        self.place_goal(np.array((width - 2, 1)))

								        if self.dense_rewards: self.run_bfs()


								class LavaSlipperyMaze(LavaSlipperyEnv):

								    def __init__(self, *args, **kwargs):

								        super().__init__(*args, **kwargs)


								    def _gen_grid(self, width, height):

								        super()._gen_grid(width, height)

								        slippery_tile = SlipperySouth(probability_intended=self.probability_intended, probability_turn_intended=self.probability_turn_intended)

								        self.grid.horz_wall(1, 3, 5, Lava)

								        self.grid.horz_wall(1, 4, 5, Lava)

								        self.grid.horz_wall(6, 3, 3, slippery_tile)

								        self.grid.horz_wall(6, 4, 3, slippery_tile)

								        self.grid.horz_wall(9, 3, 7, Lava)

								        self.grid.horz_wall(9, 4, 7, Lava)


								        self.grid.horz_wall(4, 7, 4, Lava)

								        self.grid.horz_wall(4, 8, 4, Lava)

								        self.grid.horz_wall(13, 7, 6, Lava)

								        self.grid.horz_wall(13, 8, 6, Lava)


								        self.grid.horz_wall(1, 11, 6, Lava)

								        self.grid.horz_wall(1, 12, 6, Lava)

								        self.grid.horz_wall(7, 11, 3, slippery_tile)

								        self.grid.horz_wall(7, 12, 3, slippery_tile)

								        self.grid.horz_wall(10, 11, 7, Lava)

								        self.grid.horz_wall(10, 12, 7, Lava)


								        self.grid.horz_wall(1, 15, 4, Lava)

								        self.grid.horz_wall(1, 16, 4, Lava)

								        self.grid.horz_wall(10, 15, 9, Lava)

								        self.grid.horz_wall(10, 16, 9, Lava)


								        self.place_agent(agent_pos=np.array((1, 1)), agent_dir=0)

								        self.place_goal(np.array((width - 2, height - 2)))


								    def step(self, action):

								        obs, reward, terminated, truncated, info = super().step(action)

								        if self.dense_rewards:

								            reward -= 0.0001 * (self.height - self.agent_pos[1])

								        return obs, reward, terminated, truncated, info