Some questions about tune

I met some questions when using tune to train a agent with RLLIB.

import ray
from ray.tune.registry import register_env
from ray.tune import CLIReporter
from envs.hunter import Hunter, Hunter_config
from ray.rllib.agents import ppo
from ray.rllib.agents.ppo import PPOConfig
from ray import tune
from ray.tune.logger import pretty_print

ray.init(ignore_reinit_error=True)

hunter_config = PPOConfig()
def env_creator(config):
    return Hunter(config=None)

register_env("Hunter", env_creator)
hunter_config.environment(env="Hunter")
hunter_config.framework(framework="torch")
hunter_config.debugging(log_level="ERROR")
hunter_config.rollouts(
    num_rollout_workers=36,
    num_envs_per_worker=1
)
hunter_config.training(
    lr=tune.grid_search([5e-5, 2e-4]),
    train_batch_size=tune.grid_search([128, 256])
)
reporter = CLIReporter()
experiment_results = tune.run(
    run_or_experiment=hunter_config.algo_class,
    config=hunter_config.to_dict(),
    stop={
        "timesteps_total": 10000
    },
    progress_reporter=reporter,
    local_dir="hunter_results",
    checkpoint_freq=10,
    checkpoint_at_end=True,
    verbose=3,
    metric="episode_reward_mean",
    mode="max"
)

best_trial = experiment_results.get_best_trial()
print("best trial:", best_trial)
best_checkpoint = experiment_results.get_best_checkpoint(
    trial=best_trial,
    metric="episode_reward_mean",
    mode="max"
)

print(f"Best checkpoint from training:{best_checkpoint}")

The code above created four trials:
1.I set num_rollout_workers to 36.How do cpu allocate?9 cpus in each trials or 36 cpus for one trial?
2.I set timesteps_total to 10000 in stop.I don’t know it means each trial is trained for 10000 steps or the sum of the number of steps trained for each trial is 10000.