The source code and dockerfile for the GSW2024 AI Lab.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.
 
 
 
 
 
 

814 lines
23 KiB

"""
Copied and adapted from https://github.com/mila-iqia/babyai.
Levels described in the Baby AI ICLR 2019 submission, with the `Go to` instruction.
"""
from __future__ import annotations
from minigrid.envs.babyai.core.levelgen import LevelGen
from minigrid.envs.babyai.core.roomgrid_level import RejectSampling, RoomGridLevel
from minigrid.envs.babyai.core.verifier import GoToInstr, ObjDesc
class GoToRedBallGrey(RoomGridLevel):
"""
## Description
Go to the red ball, single room, with distractors.
The distractors are all grey to reduce perceptual complexity.
This level has distractors but doesn't make use of language.
## Mission Space
"go to the red ball"
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the red ball.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoToRedBallGrey-v0`
"""
def __init__(self, room_size=8, num_dists=7, **kwargs):
self.num_dists = num_dists
super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
def gen_mission(self):
self.place_agent()
obj, _ = self.add_object(0, 0, "ball", "red")
dists = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
for dist in dists:
dist.color = "grey"
# Make sure no unblocking is required
self.check_objs_reachable()
self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
class GoToRedBall(RoomGridLevel):
"""
## Description
Go to the red ball, single room, with distractors.
This level has distractors but doesn't make use of language.
## Mission Space
"go to the red ball"
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the red ball.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoToRedBall-v0`
"""
def __init__(self, room_size=8, num_dists=7, **kwargs):
self.num_dists = num_dists
super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
def gen_mission(self):
self.place_agent()
obj, _ = self.add_object(0, 0, "ball", "red")
self.add_distractors(num_distractors=self.num_dists, all_unique=False)
# Make sure no unblocking is required
self.check_objs_reachable()
self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
class GoToRedBallNoDists(GoToRedBall):
"""
## Description
Go to the red ball. No distractors present.
## Mission Space
"go to the red ball"
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the red ball.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoToRedBallNoDists-v0`
"""
def __init__(self, **kwargs):
super().__init__(room_size=8, num_dists=0, **kwargs)
class GoToObj(RoomGridLevel):
"""
## Description
Go to an object, inside a single room with no doors, no distractors. The
naming convention `GoToObjS{X}` represents a room of size `X`.
## Mission Space
"go to the {color} {type}"
{color} is the color of the box. Can be "red", "green", "blue", "purple",
"yellow" or "grey".
{type} is the type of the object. Can be "ball", "box" or "key".
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the object.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoToObj-v0`
- `BabyAI-GoToObjS4-v0`
- `BabyAI-GoToObjS6-v1`
Notice: `BabyAI-GoToObjS6-v0` is no longer for use due to a bug in the registry parameters.
"""
def __init__(self, room_size=8, **kwargs):
super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
def gen_mission(self):
self.place_agent()
objs = self.add_distractors(num_distractors=1)
obj = objs[0]
self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
class GoToLocal(RoomGridLevel):
"""
## Description
Go to an object, inside a single room with no doors, no distractors. The
naming convention `GoToLocalS{X}N{Y}` represents a room of size `X` with
distractor number `Y`.
## Mission Space
"go to the {color} {type}"
{color} is the color of the box. Can be "red", "green", "blue", "purple",
"yellow" or "grey".
{type} is the type of the object. Can be "ball", "box" or "key".
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the object.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoToLocal-v0`
- `BabyAI-GoToLocalS5N2-v0`
- `BabyAI-GoToLocalS6N2-v0`
- `BabyAI-GoToLocalS6N3-v0`
- `BabyAI-GoToLocalS6N4-v0`
- `BabyAI-GoToLocalS7N4-v0`
- `BabyAI-GoToLocalS7N5-v0`
- `BabyAI-GoToLocalS8N2-v0`
- `BabyAI-GoToLocalS8N3-v0`
- `BabyAI-GoToLocalS8N4-v0`
- `BabyAI-GoToLocalS8N5-v0`
- `BabyAI-GoToLocalS8N6-v0`
- `BabyAI-GoToLocalS8N7-v0`
"""
def __init__(self, room_size=8, num_dists=8, **kwargs):
self.num_dists = num_dists
super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
def gen_mission(self):
self.place_agent()
objs = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
self.check_objs_reachable()
obj = self._rand_elem(objs)
self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
class GoTo(RoomGridLevel):
"""
## Description
Go to an object, the object may be in another room. Many distractors.
## Mission Space
"go to a/the {color} {type}"
{color} is the color of the box. Can be "red", "green", "blue", "purple",
"yellow" or "grey".
{type} is the type of the object. Can be "ball", "box" or "key".
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the object.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoTo-v0`
- `BabyAI-GoToOpen-v0`
- `BabyAI-GoToObjMaze-v0`
- `BabyAI-GoToObjMazeOpen-v0`
- `BabyAI-GoToObjMazeS4R2-v0`
- `BabyAI-GoToObjMazeS4-v0`
- `BabyAI-GoToObjMazeS5-v0`
- `BabyAI-GoToObjMazeS6-v0`
- `BabyAI-GoToObjMazeS7-v0`
"""
def __init__(
self,
room_size=8,
num_rows=3,
num_cols=3,
num_dists=18,
doors_open=False,
**kwargs,
):
self.num_dists = num_dists
self.doors_open = doors_open
super().__init__(
num_rows=num_rows, num_cols=num_cols, room_size=room_size, **kwargs
)
def gen_mission(self):
self.place_agent()
self.connect_all()
objs = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
self.check_objs_reachable()
obj = self._rand_elem(objs)
self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
# If requested, open all the doors
if self.doors_open:
self.open_all_doors()
class GoToImpUnlock(RoomGridLevel):
"""
## Description
Go to an object, which may be in a locked room.
Competencies: Maze, GoTo, ImpUnlock
No unblocking.
## Mission Space
"go to a/the {color} {type}"
{color} is the color of the box. Can be "red", "green", "blue", "purple",
"yellow" or "grey".
{type} is the type of the object. Can be "ball", "box" or "key".
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the object.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoToImpUnlock-v0`
"""
def gen_mission(self):
# Add a locked door to a random room
id = self._rand_int(0, self.num_cols)
jd = self._rand_int(0, self.num_rows)
door, pos = self.add_door(id, jd, locked=True)
locked_room = self.get_room(id, jd)
# Add the key to a different room
while True:
ik = self._rand_int(0, self.num_cols)
jk = self._rand_int(0, self.num_rows)
if ik is id and jk is jd:
continue
self.add_object(ik, jk, "key", door.color)
break
self.connect_all()
# Add distractors to all but the locked room.
# We do this to speed up the reachability test,
# which otherwise will reject all levels with
# objects in the locked room.
for i in range(self.num_cols):
for j in range(self.num_rows):
if i is not id or j is not jd:
self.add_distractors(i, j, num_distractors=2, all_unique=False)
# The agent must be placed after all the object to respect constraints
while True:
self.place_agent()
start_room = self.room_from_pos(*self.agent_pos)
# Ensure that we are not placing the agent in the locked room
if start_room is locked_room:
continue
break
self.check_objs_reachable()
# Add a single object to the locked room
# The instruction requires going to an object matching that description
(obj,) = self.add_distractors(id, jd, num_distractors=1, all_unique=False)
self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
class GoToSeq(LevelGen):
"""
## Description
Sequencing of go-to-object commands.
Competencies: Maze, GoTo, Seq
No locked room.
No locations.
No unblocking.
## Mission Space
"go to a/the {color} {type}" +
"and go to a/the {color} {type}" +
", then go to a/the {color} {type}" +
"and go to a/the {color} {type}"
{color} is the color of the box. Can be "red", "green", "blue", "purple",
"yellow" or "grey".
{type} is the type of the object. Can be "ball", "box" or "key".
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the object.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoToSeq-v0`
- `BabyAI-GoToSeqS5R2-v0`
"""
def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
super().__init__(
room_size=room_size,
num_rows=num_rows,
num_cols=num_cols,
num_dists=num_dists,
action_kinds=["goto"],
locked_room_prob=0,
locations=False,
unblocking=False,
**kwargs,
)
class GoToRedBlueBall(RoomGridLevel):
"""
## Description
Go to the red ball or to the blue ball.
There is exactly one red or blue ball, and some distractors.
The distractors are guaranteed not to be red or blue balls.
Language is not required to solve this level.
## Mission Space
"go to the {color} ball"
{color} is the color of the box. Can be "red" or "blue".
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the ball.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoToRedBlueBall-v0`
"""
def __init__(self, room_size=8, num_dists=7, **kwargs):
self.num_dists = num_dists
super().__init__(num_rows=1, num_cols=1, room_size=room_size, **kwargs)
def gen_mission(self):
self.place_agent()
dists = self.add_distractors(num_distractors=self.num_dists, all_unique=False)
# Ensure there is only one red or blue ball
for dist in dists:
if dist.type == "ball" and (dist.color == "blue" or dist.color == "red"):
raise RejectSampling("can only have one blue or red ball")
color = self._rand_elem(["red", "blue"])
obj, _ = self.add_object(0, 0, "ball", color)
# Make sure no unblocking is required
self.check_objs_reachable()
self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
class GoToDoor(RoomGridLevel):
"""
## Description
Go to a door
(of a given color, in the current room)
No distractors, no language variation
## Mission Space
"go to the {color} door"
{color} is the color of the box. Can be "red", "green", "blue", "purple",
"yellow" or "grey".
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the door.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoToDoor-v0`
"""
def __init__(self, **kwargs):
super().__init__(room_size=7, **kwargs)
def gen_mission(self):
objs = []
for _ in range(4):
door, _ = self.add_door(1, 1)
objs.append(door)
self.place_agent(1, 1)
obj = self._rand_elem(objs)
self.instrs = GoToInstr(ObjDesc("door", obj.color))
class GoToObjDoor(RoomGridLevel):
"""
## Description
Go to an object or door
(of a given type and color, in the current room)
## Mission Space
"go to the {color} {type}"
{color} is the color of the box. Can be "red", "green", "blue", "purple",
"yellow" or "grey".
{type} is the type of the object. Can be "ball", "box", "key" or "door".
## Action Space
| Num | Name | Action |
|-----|--------------|-------------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Pick up an object |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
## Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
## Rewards
A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
## Termination
The episode ends if any one of the following conditions is met:
1. The agent goes to the object or door.
2. Timeout (see `max_steps`).
## Registered Configurations
- `BabyAI-GoToObjDoor-v0`
"""
def __init__(self, **kwargs):
super().__init__(room_size=8, **kwargs)
def gen_mission(self):
self.place_agent(1, 1)
objs = self.add_distractors(1, 1, num_distractors=8, all_unique=False)
for _ in range(4):
door, _ = self.add_door(1, 1)
objs.append(door)
self.check_objs_reachable()
obj = self._rand_elem(objs)
self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))