Hello! I am quite new to using RLlib, but have managed to set up my
I’m relatively new to using RLlib but have managed to set up a training pipeline using the PPO algorithm. My system has a GPU (NVIDIA RTX 3080), and RLlib detects it correctly, but the GPU is not utilized, as confirmed by nvidia-smi.
The console shows:
Logical resource usage: 5.0/16 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
And Nvidia-SMI shows 3% GPU usage.
I have wrecked my brain but I cannot figure out why the GPU is not utilized.
In my main I:
ray.init(ignore_reinit_error=True, num_gpus=1)
print(ray.get_gpu_ids())
print(ray.available_resources())
available_resources returns:
{'accelerator_type:G': 1.0, 'node:__internal_head__': 1.0, 'CPU': 16.0, 'object_store_memory': 6037658419.0, 'node:127.0.0.1': 1.0, 'memory': 12075316839.0, 'GPU': 1.0}
2
but ray.init returns [ ]
I have also tested torch.cuda.is_available()
and it returns my GPU
My training config looks like this:
def policy_mapping(agent_id, episode=None, worker=None, **kwargs):
return "shared_policy"
def get_ma_training_config():
ModelCatalog.register_custom_model("traffic_model", MyTrafficModel)
temp_env = TrafficLightMAEnv({"num_lights": 5})
config = (
PPOConfig()
.api_stack(
enable_rl_module_and_learner=False,
enable_env_runner_and_connector_v2=False
)
.environment(
env=TrafficLightMAEnv,
env_config={
"num_lights": 5,
"sumo_cfg": r"C:\Users\pc\Documents\Trafic\data\sumo\simulation.sumocfg",
"max_steps": 20000,
}
)
.framework("torch")
.resources(num_gpus=1)
.env_runners(
num_env_runners=4,
num_envs_per_env_runner=2,
rollout_fragment_length=200,
sample_timeout_s=180,
)
.training(
train_batch_size=6000,
num_epochs=10,
gamma=0.99,
lr=3e-4,
lambda_=0.95,
clip_param=0.2,
vf_clip_param=10.0,
entropy_coeff=0.01,
)
.callbacks(MetricsCallback)
.reporting(
metrics_num_episodes_for_smoothing=100,
keep_per_episode_custom_metrics=True,
min_sample_timesteps_per_iteration=1000,
min_time_s_per_iteration=1,
metrics_episode_collection_timeout_s=60,
)
.debugging(
logger_config={
"type": "ray.tune.logger.TBXLogger",
"logdir": "./logs",
"log_metrics_tables": True,
}
)
.multi_agent(
policies={
"shared_policy": (
None,
temp_env.single_light_obs_space,
temp_env.single_light_act_space,
{
"model": {
"custom_model": "traffic_model",
},
},
)
},
policy_mapping_fn=policy_mapping,
)
)
return config
Where I have tried to set num_gpus_per_env_runner
, but when i do that i get
Logical resource usage: 0/16 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:G)
in the console.
Im also using a custom module:
class TrafficModel(TorchModelV2, nn.Module):
def __init__(self, obs_space, action_space, num_outputs, model_config, name):
TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
nn.Module.__init__(self)
input_size = 15
hidden_size = 256
self.input_norm = nn.LayerNorm(input_size)
self.network = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.ReLU(),
nn.LayerNorm(hidden_size),
nn.Linear(hidden_size, hidden_size),
nn.ReLU(),
nn.LayerNorm(hidden_size)
)
self.discrete_head = nn.Linear(hidden_size, 3)
self.continuous_mean = nn.Linear(hidden_size, 1)
self.continuous_log_std = nn.Parameter(torch.zeros(1))
self.value_branch = nn.Linear(hidden_size, 1)
self._value_out = None
def forward(self, input_dict, state, seq_lens):
obs = input_dict["obs_flat"].float()
x = self.input_norm(obs)
features = self.network(x)
phase_logits = self.discrete_head(features)
duration_mean = self.continuous_mean(features)
duration_mean = torch.tanh(duration_mean) * 27.5 + 32.5
log_std = torch.clamp(self.continuous_log_std, -20.0, 2.0)
log_std = log_std.expand_as(duration_mean)
model_out = torch.cat([phase_logits, duration_mean, log_std], dim=-1)
self._value_out = self.value_branch(features).squeeze(-1)
return model_out, state
def value_function(self):
return self._value_out
I have tried to tweak that aswell, but whatever i do it still gets done by the CPU when i log what device is executing it.
I have no idea why its not working, and cant seam to figure out why.
Does anybody have any ideas?