diff --git a/examples/shields/rl/15_train_eval_tune.py b/examples/shields/rl/15_train_eval_tune.py index ad9d5c1..3bf53e9 100644 --- a/examples/shields/rl/15_train_eval_tune.py +++ b/examples/shields/rl/15_train_eval_tune.py @@ -74,7 +74,7 @@ def ppo(args): ), run_config=air.RunConfig( - stop = {"episode_reward_mean": 94, + stop = {"episode_reward_mean": 1, "timesteps_total": args.steps,}, checkpoint_config=air.CheckpointConfig(checkpoint_at_end=True, num_to_keep=1,