I’m encountering an AttributeError when trying to run a PPO trainer on an OR-GYM environment for inventory management using Ray RLlib and PyTorch in a CPU-only setup. Despite explicitly setting num_gpus=0 in the configuration, the error persists.
Environment:
Python 3.9
Ray 2.1.0
PyTorch (CPU version confirmed)
OR-GYM
Tensorflow 2.3.0
gym 0.13.1
Library imports
import or_gym
from or_gym.utils import create_env
import ray
from ray.rllib.agents.ppo import PPOTrainer
from ray import tune
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
Configuration:
rl_config = {
'env': env_name,
'num_workers': 8,
'num_gpus': 0,
'env_config': env_config,
'model': {
'vf_share_layers': False,
'fcnet_activation': 'relu',
'fcnet_hiddens': [256, 256]
},
'framework': 'torch',
'lr': 1e-5,
'clip_param': 0.2
}
Code for Trainer Initialization:
ray.shutdown()
register_env(env_name, env_config)
ray.init(ignore_reinit_error=True)
agent = agents.ppo.PPOTrainer(env=env_name, config=rl_config)
results = []
for i in range(1200):
res = agent.train()
results.append(res)
if (i+1) % 5 == 0:
print('\nIter: {}\tReward: {:.2f}'.format(i+1, res['episode_reward_mean']), end='')
print('\n')
ray.shutdown()
Error Trace:
The error occurs when initializing the PPOTrainer:
2024-04-25 18:30:58,063 INFO worker.py:1528 -- Started a local Ray instance.
(RolloutWorker pid=24100) 2024-04-25 18:31:06,911 WARNING env.py:147 -- Your env doesn't have a .spec.max_episode_steps attribute. This is fine if you have set 'horizon' in your config dictionary, or `soft_horizon`. However, if you haven't, 'horizon' will default to infinity, and your environment will not be reset.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[49], line 11
7 # Initialize Ray and Build Agent
8 # Initialize Ray and Build Agent
9 ray.init(ignore_reinit_error=True)
---> 11 agent = agents.ppo.PPOTrainer(env=env_name,
12 config=rl_config)
13 results = []
14 for i in range(1200):
File ~\vlsecondrl\lib\site-packages\ray\rllib\algorithms\algorithm.py:414, in Algorithm.__init__(self, config, env, logger_creator, **kwargs)
402 # Initialize common evaluation_metrics to nan, before they become
403 # available. We want to make sure the metrics are always present
404 # (although their values may be nan), so that Tune does not complain
405 # when we use these as stopping criteria.
406 self.evaluation_metrics = {
407 "evaluation": {
408 "episode_reward_max": np.nan,
(...)
411 }
412 }
--> 414 super().__init__(config=config, logger_creator=logger_creator, **kwargs)
416 # Check, whether `training_iteration` is still a tune.Trainable property
417 # and has not been overridden by the user in the attempt to implement the
418 # algos logic (this should be done now inside `training_step`).
419 try:
File ~\vlsecondrl\lib\site-packages\ray\tune\trainable\trainable.py:161, in Trainable.__init__(self, config, logger_creator, remote_checkpoint_dir, custom_syncer, sync_timeout)
159 start_time = time.time()
160 self._local_ip = ray.util.get_node_ip_address()
--> 161 self.setup(copy.deepcopy(self.config))
162 setup_time = time.time() - start_time
163 if setup_time > SETUP_TIME_THRESHOLD:
File ~\vlsecondrl\lib\site-packages\ray\rllib\algorithms\algorithm.py:524, in Algorithm.setup(self, config)
517 if _init is False:
518 # - Create rollout workers here automatically.
519 # - Run the execution plan to create the local iterator to `next()`
520 # in each training iteration.
521 # This matches the behavior of using `build_trainer()`, which
522 # has been deprecated.
523 try:
--> 524 self.workers = WorkerSet(
525 env_creator=self.env_creator,
526 validate_env=self.validate_env,
527 policy_class=self.get_default_policy_class(self.config),
528 trainer_config=self.config,
529 num_workers=self.config["num_workers"],
530 local_worker=True,
531 logdir=self.logdir,
532 )
533 # WorkerSet creation possibly fails, if some (remote) workers cannot
534 # be initialized properly (due to some errors in the RolloutWorker's
535 # constructor).
536 except RayActorError as e:
537 # In case of an actor (remote worker) init failure, the remote worker
538 # may still exist and will be accessible, however, e.g. calling
539 # its `sample.remote()` would result in strange "property not found"
540 # errors.
File ~\vlsecondrl\lib\site-packages\ray\rllib\evaluation\worker_set.py:185, in WorkerSet.__init__(self, env_creator, validate_env, policy_class, trainer_config, num_workers, local_worker, logdir, _setup)
182 spaces = None
184 if local_worker:
--> 185 self._local_worker = self._make_worker(
186 cls=RolloutWorker,
187 env_creator=env_creator,
188 validate_env=validate_env,
189 policy_cls=self._policy_class,
190 # Initially, policy_specs will be inferred from config dict.
191 policy_specs=None,
192 worker_index=0,
193 num_workers=num_workers,
194 config=self._local_config,
195 spaces=spaces,
196 )
File ~\vlsecondrl\lib\site-packages\ray\rllib\evaluation\worker_set.py:892, in WorkerSet._make_worker(self, cls, env_creator, validate_env, policy_cls, policy_specs, worker_index, num_workers, recreated_worker, config, spaces)
889 else:
890 extra_python_environs = config.get("extra_python_environs_for_worker", None)
--> 892 worker = cls(
893 env_creator=env_creator,
894 validate_env=validate_env,
895 policy_spec=policy_specs,
896 policy_mapping_fn=config["multiagent"]["policy_mapping_fn"],
897 policies_to_train=config["multiagent"]["policies_to_train"],
898 tf_session_creator=(session_creator if config["tf_session_args"] else None),
899 rollout_fragment_length=config["rollout_fragment_length"],
900 count_steps_by=config["multiagent"]["count_steps_by"],
901 batch_mode=config["batch_mode"],
902 episode_horizon=config["horizon"],
903 preprocessor_pref=config["preprocessor_pref"],
904 sample_async=config["sample_async"],
905 compress_observations=config["compress_observations"],
906 num_envs=config["num_envs_per_worker"],
907 observation_fn=config["multiagent"]["observation_fn"],
908 clip_rewards=config["clip_rewards"],
909 normalize_actions=config["normalize_actions"],
910 clip_actions=config["clip_actions"],
911 env_config=config["env_config"],
912 policy_config=config,
913 worker_index=worker_index,
914 num_workers=num_workers,
915 recreated_worker=recreated_worker,
916 log_dir=self._logdir,
917 log_level=config["log_level"],
918 callbacks=config["callbacks"],
919 input_creator=input_creator,
920 output_creator=output_creator,
921 remote_worker_envs=config["remote_worker_envs"],
922 remote_env_batch_wait_ms=config["remote_env_batch_wait_ms"],
923 soft_horizon=config["soft_horizon"],
924 no_done_at_end=config["no_done_at_end"],
925 seed=(config["seed"] + worker_index)
926 if config["seed"] is not None
927 else None,
928 fake_sampler=config["fake_sampler"],
929 extra_python_environs=extra_python_environs,
930 spaces=spaces,
931 disable_env_checking=config["disable_env_checking"],
932 )
934 return worker
File ~\vlsecondrl\lib\site-packages\ray\rllib\evaluation\rollout_worker.py:604, in RolloutWorker.__init__(self, env_creator, validate_env, policy_spec, policy_mapping_fn, policies_to_train, tf_session_creator, rollout_fragment_length, count_steps_by, batch_mode, episode_horizon, preprocessor_pref, sample_async, compress_observations, num_envs, observation_fn, clip_rewards, normalize_actions, clip_actions, env_config, model_config, policy_config, worker_index, num_workers, recreated_worker, log_dir, log_level, callbacks, input_creator, output_creator, remote_worker_envs, remote_env_batch_wait_ms, soft_horizon, no_done_at_end, seed, extra_python_environs, fake_sampler, spaces, policy, disable_env_checking)
602 devices = get_tf_gpu_devices()
603 elif policy_config.get("framework") == "torch":
--> 604 devices = list(range(torch.cuda.device_count()))
606 if len(devices) < num_gpus:
607 raise RuntimeError(
608 ERR_MSG_NO_GPUS.format(len(devices), devices) + HOWTO_CHANGE_CONFIG
609 )
AttributeError: 'NoneType' object has no attribute 'cuda'
The error points to the line in RolloutWorker.init() where it attempts to call torch.cuda.device_count().
PyTorch Installation Check:
import torch
print(torch.__version__) #output 2.3.0+cpu
print(torch.cuda.is_available()) #output False
Weird thing is The same code runs perfectly in Google Colab (with no GPU enabled) but fails in my local environment in jupyter notebook.