|
|
from __future__ import annotations
from minigrid.core.constants import COLOR_NAMES from minigrid.core.grid import Grid from minigrid.core.mission import MissionSpace from minigrid.core.world_object import ( Ball, Box, Key, Slippery, SlipperyEast, SlipperySouth, SlipperyNorth, SlipperyWest, Lava, Goal, Point )
from minigrid.minigrid_env import MiniGridEnv
import numpy as np import random
class LavaFaultyEnv(MiniGridEnv): """
### Registered Configurations
S: size of map SxS. V: Version
- `MiniGrid-LavaFaultyS12-v0`
"""
def __init__(self, size=12, width=None, height=None, gap=5, fault_probability=0.1, per_step_penalty=0.0, faulty_behavior=True, obstacle_type=Lava, randomize_start=True, **kwargs):
self.obstacle_type = obstacle_type self.size = size self.gap = gap self.fault_probability = fault_probability self.faulty_behavior = faulty_behavior self.previous_action = None self.per_step_penalty = per_step_penalty self.randomize_start = randomize_start
if width is not None and height is not None: self.width = width self.height = height else: self.width = size self.height = size
if obstacle_type == Lava: mission_space = MissionSpace( mission_func=lambda: "avoid the lava and get to the green goal square" ) else: mission_space = MissionSpace( mission_func=lambda: "find the opening and get to the green goal square" )
super().__init__( mission_space=mission_space, width=self.width, height=self.height, max_steps=200, # Set this to True for maximum speed see_through_walls=False, **kwargs )
def fault(self): return True if random.random() < self.fault_probability else False
def step(self, action: ActType) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]: if self.step_count > 0 and self.fault(): action = self.previous_action self.previous_action = action obs, reward, terminated, trucated, info = super().step(action) return obs, reward - self.per_step_penalty, terminated, trucated, info
def reset(self, **kwargs) -> tuple[ObsType, dict[str, Any]]: self.previous_action = None return super().reset(**kwargs)
def _gen_grid(self, width, height): assert width >= 5 and height >= 5 # Create an empty grid self.grid = Grid(width, height)
for row in range(1, height - 1): if row < (height - self.gap): self.grid.horz_wall(1, row, width - self.gap - row, Lava) for i, col in enumerate(reversed(range(1, width - 1))): self.grid.vert_wall(col, self.gap + i, None, Lava)
self.grid.wall_rect(0, 0, width, height)
if self.randomize_start: self.place_agent() else: self.agent_pos = np.array((1, height - 2)) self.agent_dir = 3
self.mission = ( "avoid the lava and get to the green goal square" if self.obstacle_type == Lava else "find the opening and get to the green goal square" ) self.put_obj(Goal(), width - 2, 1)
def disable_random_start(self): self.randomize_start = False
def printGrid(self, init=False): grid = super().printGrid(init)
properties_str = ""
if self.faulty_behavior: properties_str += F"FaultProbability:{self.fault_probability}\n"
return grid + properties_str
|