|
@ -74,7 +74,7 @@ def ppo(args): |
|
|
|
|
|
|
|
|
), |
|
|
), |
|
|
run_config=air.RunConfig( |
|
|
run_config=air.RunConfig( |
|
|
stop = {"episode_reward_mean": 94, |
|
|
|
|
|
|
|
|
stop = {"episode_reward_mean": 1, |
|
|
"timesteps_total": args.steps,}, |
|
|
"timesteps_total": args.steps,}, |
|
|
checkpoint_config=air.CheckpointConfig(checkpoint_at_end=True, |
|
|
checkpoint_config=air.CheckpointConfig(checkpoint_at_end=True, |
|
|
num_to_keep=1, |
|
|
num_to_keep=1, |
|
|