I’m new to here. I’m trying to train a muti-agent reinforce learning model, with an environment created with PettingZoo. But in training, when I try to use tune.grid_search
the gpu in my device seems not in using, and the status keep stuck in pending, anyone can help me with fixing these parameters to get things right? My ubuntu device has 128 cpus and 4 gpus.
from ray.rllib.algorithms.ppo import PPO
import ray
from ray import tune
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
from ray.tune.logger import TBXLoggerCallback
from env.RL_Routing_Environment import RL_Routing_Environment
def env_creator(config):
env = RL_Routing_Environment()
return ParallelPettingZooEnv(env)
if __name__ == "__main__":
ray.init(_metrics_export_port=8888, dashboard_host='0.0.0.0')
from ray.tune.registry import register_env
register_env("routing_env", env_creator)
config = (
PPOConfig()
.environment("routing_env")
.framework("torch")
.rollouts(num_rollout_workers=32)
.resources(num_gpus=2)
.training(
model={"vf_share_layers": True},
# lr=tune.grid_search([1e-3, 1e-4, 1e-5]),
# train_batch_size=tune.grid_search([2000, 4000, 8000]),
# sgd_minibatch_size=tune.grid_search([64, 128, 256]),
lr=1e-4,
train_batch_size=4000,
sgd_minibatch_size=128, # SGD
clip_param=0.2,
entropy_coeff=0.01,
lambda_=0.95
)
)
tune.run(
"PPO",
# tune.with_resources(PPO, {"gpu": 1, "cpu": 8}),
config=config.to_dict(),
callbacks=[TBXLoggerCallback()],
stop={"training_iteration": 10000},
checkpoint_freq=30,
checkpoint_at_end=True
)
ray.shutdown()