|
|
from __future__ import annotations
import pickle import re import warnings
import gymnasium as gym import numpy as np import pytest from gymnasium.envs.registration import EnvSpec from gymnasium.utils.env_checker import check_env, data_equivalence
from minigrid.core.grid import Grid from minigrid.core.mission import MissionSpace from tests.utils import all_testing_env_specs, assert_equals
CHECK_ENV_IGNORE_WARNINGS = [ f"\x1b[33mWARN: {message}\x1b[0m" for message in [ "A Box observation space minimum value is -infinity. This is probably too low.", "A Box observation space maximum value is -infinity. This is probably too high.", "For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information.", ] ]
@pytest.mark.parametrize( "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs] ) def test_env(spec): # Capture warnings env = spec.make(disable_env_checker=True).unwrapped warnings.simplefilter("always") # Test if env adheres to Gym API with warnings.catch_warnings(record=True) as w: check_env(env) for warning in w: if warning.message.args[0] not in CHECK_ENV_IGNORE_WARNINGS: raise gym.error.Error(f"Unexpected warning: {warning.message}")
# Note that this precludes running this test in multiple threads. # However, we probably already can't do multithreading due to some environments. SEED = 0 NUM_STEPS = 50
@pytest.mark.parametrize( "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs] ) def test_env_determinism_rollout(env_spec: EnvSpec): """Run a rollout with two environments and assert equality.
This test run a rollout of NUM_STEPS steps with two environments initialized with the same seed and assert that:
- observation after first reset are the same - same actions are sampled by the two envs - observations are contained in the observation space - obs, rew, terminated, truncated and info are equals between the two envs """
# Don't check rollout equality if it's a nondeterministic environment. if env_spec.nondeterministic is True: return
env_1 = env_spec.make(disable_env_checker=True) env_2 = env_spec.make(disable_env_checker=True)
initial_obs_1 = env_1.reset(seed=SEED) initial_obs_2 = env_2.reset(seed=SEED) assert_equals(initial_obs_1, initial_obs_2)
env_1.action_space.seed(SEED)
for time_step in range(NUM_STEPS): # We don't evaluate the determinism of actions action = env_1.action_space.sample()
obs_1, rew_1, terminated_1, truncated_1, info_1 = env_1.step(action) obs_2, rew_2, terminated_2, truncated_2, info_2 = env_2.step(action)
assert_equals(obs_1, obs_2, f"[{time_step}] ") assert env_1.observation_space.contains( obs_1 ) # obs_2 verified by previous assertion
assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}" assert ( terminated_1 == terminated_2 ), f"[{time_step}] terminated 1={terminated_1}, terminated 2={terminated_2}" assert ( truncated_1 == truncated_2 ), f"[{time_step}] truncated 1={truncated_1}, truncated 2={truncated_2}" assert_equals(info_1, info_2, f"[{time_step}] ")
if ( terminated_1 or truncated_1 ): # terminated_2 and truncated_2 verified by previous assertion env_1.reset(seed=SEED) env_2.reset(seed=SEED)
env_1.close() env_2.close()
@pytest.mark.parametrize( "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs] ) def test_render_modes(spec): env = spec.make()
for mode in env.metadata.get("render_modes", []): if mode != "human": new_env = spec.make(render_mode=mode)
new_env.reset() new_env.step(new_env.action_space.sample()) new_env.render()
@pytest.mark.parametrize("env_id", ["MiniGrid-DoorKey-6x6-v0"]) def test_agent_sees_method(env_id): env = gym.make(env_id) goal_pos = (env.grid.width - 2, env.grid.height - 2)
# Test the env.agent_sees() function env.reset() # Test the "in" operator on grid objects assert ("green", "goal") in env.grid assert ("blue", "key") not in env.grid for i in range(0, 500): action = env.action_space.sample() obs, reward, terminated, truncated, info = env.step(action)
grid, _ = Grid.decode(obs["image"]) goal_visible = ("green", "goal") in grid
agent_sees_goal = env.agent_sees(*goal_pos) assert agent_sees_goal == goal_visible if terminated or truncated: env.reset()
env.close()
@pytest.mark.parametrize( "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs] ) def test_max_steps_argument(env_spec): """
Test that when initializing an environment with a fixed number of steps per episode (`max_steps` argument), the episode will be truncated after taking that number of steps. """
max_steps = 50 env = env_spec.make(max_steps=max_steps) env.reset() step_count = 0 while True: _, _, terminated, truncated, _ = env.step(4) step_count += 1 if truncated: assert step_count == max_steps step_count = 0 break
env.close()
@pytest.mark.parametrize( "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs], ) def test_pickle_env(env_spec): """Test that all environments are picklable.""" env: gym.Env = env_spec.make() pickled_env: gym.Env = pickle.loads(pickle.dumps(env))
data_equivalence(env.reset(), pickled_env.reset())
action = env.action_space.sample() data_equivalence(env.step(action), pickled_env.step(action)) env.close() pickled_env.close()
@pytest.mark.parametrize( "env_spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs], ) def old_run_test(env_spec): # Load the gym environment env = env_spec.make() env.max_steps = min(env.max_steps, 200) env.reset() env.render()
# Verify that the same seed always produces the same environment for i in range(0, 5): seed = 1337 + i _ = env.reset(seed=seed) grid1 = env.grid _ = env.reset(seed=seed) grid2 = env.grid assert grid1 == grid2
env.reset()
# Run for a few episodes num_episodes = 0 while num_episodes < 5: # Pick a random action action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)
# Validate the agent position assert env.agent_pos[0] < env.width assert env.agent_pos[1] < env.height
# Test observation encode/decode roundtrip img = obs["image"] grid, vis_mask = Grid.decode(img) img2 = grid.encode(vis_mask=vis_mask) assert np.array_equal(img, img2)
# Test the env to string function str(env)
# Check that the reward is within the specified range assert reward >= env.reward_range[0], reward assert reward <= env.reward_range[1], reward
if terminated or truncated: num_episodes += 1 env.reset()
env.render()
# Test the close method env.close()
@pytest.mark.parametrize("env_id", ["MiniGrid-Empty-8x8-v0"]) def test_interactive_mode(env_id): env = gym.make(env_id) env.reset()
for i in range(0, 100): print(f"step {i}")
# Pick a random action action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)
# Test the close method env.close()
def test_mission_space():
# Test placeholders mission_space = MissionSpace( mission_func=lambda color, obj_type: f"Get the {color} {obj_type}.", ordered_placeholders=[["green", "red"], ["ball", "key"]], )
assert mission_space.contains("Get the green ball.") assert mission_space.contains("Get the red key.") assert not mission_space.contains("Get the purple box.")
# Test passing inverted placeholders assert not mission_space.contains("Get the key red.")
# Test passing extra repeated placeholders assert not mission_space.contains("Get the key red key.")
# Test contained placeholders like "get the" and "go get the". "get the" string is contained in both placeholders. mission_space = MissionSpace( mission_func=lambda get_syntax, obj_type: f"{get_syntax} {obj_type}.", ordered_placeholders=[ ["go get the", "get the", "go fetch the", "fetch the"], ["ball", "key"], ], )
assert mission_space.contains("get the ball.") assert mission_space.contains("go get the key.") assert mission_space.contains("go fetch the ball.")
# Test repeated placeholders mission_space = MissionSpace( mission_func=lambda get_syntax, color_1, obj_type_1, color_2, obj_type_2: f"{get_syntax} {color_1} {obj_type_1} and the {color_2} {obj_type_2}.", ordered_placeholders=[ ["go get the", "get the", "go fetch the", "fetch the"], ["green", "red"], ["ball", "key"], ["green", "red"], ["ball", "key"], ], )
assert mission_space.contains("get the green key and the green key.") assert mission_space.contains("go fetch the red ball and the green key.")
# not reasonable to test for all environments, test for a few of them. @pytest.mark.parametrize( "env_id", [ "MiniGrid-Empty-8x8-v0", "MiniGrid-DoorKey-16x16-v0", "MiniGrid-ObstructedMaze-1Dl-v0", ], ) def test_env_sync_vectorization(env_id): def env_maker(env_id, **kwargs): def env_func(): env = gym.make(env_id, **kwargs) return env
return env_func
num_envs = 4 env = gym.vector.SyncVectorEnv([env_maker(env_id) for _ in range(num_envs)]) env.reset() env.step(env.action_space.sample()) env.close()
def test_pprint_grid(env_id="MiniGrid-Empty-8x8-v0"): env = gym.make(env_id)
env_repr = str(env) assert ( env_repr == "<OrderEnforcing<PassiveEnvChecker<EmptyEnv<MiniGrid-Empty-8x8-v0>>>>" )
with pytest.raises( ValueError, match=re.escape( "The environment hasn't been `reset` therefore the `agent_pos`, `agent_dir` or `grid` are unknown." ), ): env.unwrapped.pprint_grid()
env.reset() assert isinstance(env.unwrapped.pprint_grid(), str)
|