Hi @sven1977,
Here’s my ppo config. It’s the same config I use when training on just one machine. All I have done is start ray on the two machines and then on the head node started the script with the number of workers matching the new number of cpus available
config = {
"algorithm": "PPO",
"env": "yaniv",
"env_config": env_config,
"framework": "torch",
"num_gpus": args.num_gpus,
"num_workers": args.num_workers,
"num_envs_per_worker": 1,
"num_cpus_per_worker": 1,
"num_cpus_for_driver": 1,
"multiagent": {
"policies": {
"policy_1": (None, obs_space, act_space, {}),
"policy_2": (None, obs_space, act_space, {}),
"policy_3": (None, obs_space, act_space, {}),
"policy_4": (None, obs_space, act_space, {}),
},
"policy_mapping_fn": policy_mapping_fn,
"policies_to_train": ["policy_1"],
},
"callbacks": YanivCallbacks,
# "log_level": "INFO",
"evaluation_num_workers": 0,
"evaluation_config": {"explore": False},
"evaluation_interval": args.eval_every,
"custom_eval_function": make_eval_func(env_config, args.eval_num),
# hyper params
"model": {
"custom_model": "yaniv_mask",
"fcnet_hiddens": [512, 512],
},
"batch_mode": "complete_episodes",
# A3c
# "rollout_fragment_length": 50,
# "train_batch_size": 500,
# "min_iter_time_s": 10,
# ppo
"sgd_minibatch_size": 2048,
"train_batch_size": 65536,
"rollout_fragment_length": 100
}
I think I’m using a normal env set up. It’s just a regular custom multiagent env