I met some questions when using tune to train a agent with RLLIB.
import ray
from ray.tune.registry import register_env
from ray.tune import CLIReporter
from envs.hunter import Hunter, Hunter_config
from ray.rllib.agents import ppo
from ray.rllib.agents.ppo import PPOConfig
from ray import tune
from ray.tune.logger import pretty_print
ray.init(ignore_reinit_error=True)
hunter_config = PPOConfig()
def env_creator(config):
return Hunter(config=None)
register_env("Hunter", env_creator)
hunter_config.environment(env="Hunter")
hunter_config.framework(framework="torch")
hunter_config.debugging(log_level="ERROR")
hunter_config.rollouts(
num_rollout_workers=36,
num_envs_per_worker=1
)
hunter_config.training(
lr=tune.grid_search([5e-5, 2e-4]),
train_batch_size=tune.grid_search([128, 256])
)
reporter = CLIReporter()
experiment_results = tune.run(
run_or_experiment=hunter_config.algo_class,
config=hunter_config.to_dict(),
stop={
"timesteps_total": 10000
},
progress_reporter=reporter,
local_dir="hunter_results",
checkpoint_freq=10,
checkpoint_at_end=True,
verbose=3,
metric="episode_reward_mean",
mode="max"
)
best_trial = experiment_results.get_best_trial()
print("best trial:", best_trial)
best_checkpoint = experiment_results.get_best_checkpoint(
trial=best_trial,
metric="episode_reward_mean",
mode="max"
)
print(f"Best checkpoint from training:{best_checkpoint}")
The code above created four trials:
1.I set num_rollout_workers
to 36.How do cpu allocate?9 cpus in each trials or 36 cpus for one trial?
2.I set timesteps_total
to 10000 in stop
.I don’t know it means each trial is trained for 10000 steps or the sum of the number of steps trained for each trial is 10000.