Hi,
When trying to run PPO hyperparameter optimization on a machine with 64 CPUs and 2 GPUs, setting tune.with_resources() results in the hyperparameter tuning not working (just prints out pending for each trial every 30 sec). Is there a proper way to utilize all resources? I assume with the settings below it should run trials concurrently in batches of 8.
tuner = Tuner(
trainable = tune.with_resources(PPO, {"cpu": 8, "gpu": 0.25}),
PPO,
param_space = {
'env': args.env,
'model': {'free_log_std': True},
'horizon': 100, # length of MDP
'gamma': tune.choice([0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]),
'lambda_': tune.choice([0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]),
'kl_coeff': tune.choice([0.3, 0.5, 1, 1.5, 2.0]),
'kl_target': tune.choice([0.1, 0.05, 0.03, 0.02, 0.01, 0.005, 0.001]),
'sgd_minibatch_size': tune.choice([8, 16, 32, 64, 128, 256, 512, 1024, 2048]),
'num_sgd_iter': tune.choice([1, 5, 10, 20]),
'vf_loss_coeff': tune.uniform(0, 1),
'entropy_coeff': tune.loguniform(1e-8, 0.1),
'clip_param': tune.choice([0.1, 0.2, 0.3, 0.4, 0.5]),
'lr': tune.loguniform(1e-5, 1),
'train_batch_size': tune.choice([2048, 4096, 8192, 16384, 32768, 65536]),
'vf_clip_param': tune.choice([1, 2, 3, 5, 10]),
'grad_clip': tune.choice([0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]),
},
tune_config=tune.TuneConfig(
num_samples=args.num_samples, # number of trials
scheduler=pb2,
reuse_actors=False,
),
run_config=train.RunConfig(
name=args.name,
),
)
results = tuner.fit()