I am using GPU for training PPO algorithm in rllib, but I encountered the following error:
(PPO pid=112116) Exception raised in creation task: The actor died because of an error raised in its creation task, ray::PPO.__init__() (pid=112116, ip=192.168.1.179, actor_id=f5ef5a1c3ca91a754740606c01000000, repr=PPO)
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 517, in __init__
(PPO pid=112116) super().__init__(
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 161, in __init__
(PPO pid=112116) self.setup(copy.deepcopy(self.config))
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 639, in setup
(PPO pid=112116) self.workers = WorkerSet(
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 157, in __init__
(PPO pid=112116) self._setup(
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 247, in _setup
(PPO pid=112116) self._local_worker = self._make_worker(
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 925, in _make_worker
(PPO pid=112116) worker = cls(
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 525, in __init__
(PPO pid=112116) self._update_policy_map(policy_dict=self.policy_dict)
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1727, in _update_policy_map
(PPO pid=112116) self._build_policy_map(
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1838, in _build_policy_map
(PPO pid=112116) new_policy = create_policy_for_framework(
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/utils/policy.py", line 141, in create_policy_for_framework
(PPO pid=112116) return policy_class(observation_space, action_space, merged_config)
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo_torch_policy.py", line 49, in __init__
(PPO pid=112116) TorchPolicyV2.__init__(
(PPO pid=112116) File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/policy/torch_policy_v2.py", line 157, in __init__
(PPO pid=112116) model_copy = copy.deepcopy(model)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 172, in deepcopy
(PPO pid=112116) y = _reconstruct(x, memo, *rv)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 271, in _reconstruct
(PPO pid=112116) state = deepcopy(state, memo)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 146, in deepcopy
(PPO pid=112116) y = copier(x, memo)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 231, in _deepcopy_dict
(PPO pid=112116) y[deepcopy(key, memo)] = deepcopy(value, memo)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 172, in deepcopy
(PPO pid=112116) y = _reconstruct(x, memo, *rv)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 271, in _reconstruct
(PPO pid=112116) state = deepcopy(state, memo)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 146, in deepcopy
(PPO pid=112116) y = copier(x, memo)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 231, in _deepcopy_dict
(PPO pid=112116) y[deepcopy(key, memo)] = deepcopy(value, memo)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 172, in deepcopy
(PPO pid=112116) y = _reconstruct(x, memo, *rv)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 271, in _reconstruct
(PPO pid=112116) state = deepcopy(state, memo)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 146, in deepcopy
(PPO pid=112116) y = copier(x, memo)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 231, in _deepcopy_dict
(PPO pid=112116) y[deepcopy(key, memo)] = deepcopy(value, memo)
(PPO pid=112116) File "/usr/lib/python3.10/copy.py", line 161, in deepcopy
(PPO pid=112116) rv = reductor(4)
(PPO pid=112116) TypeError: cannot pickle '_io.TextIOWrapper' object
2024-01-17 06:56:36,427 ERROR tune_controller.py:1383 -- Trial task failed for trial PPO_EnvironmentSEWDQN_89e2e_00000
Traceback (most recent call last):
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
result = ray.get(future)
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
return fn(*args, **kwargs)
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/_private/worker.py", line 2565, in get
raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::PPO.__init__() (pid=112116, ip=192.168.1.179, actor_id=f5ef5a1c3ca91a754740606c01000000, repr=PPO)
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 517, in __init__
super().__init__(
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 161, in __init__
self.setup(copy.deepcopy(self.config))
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 639, in setup
self.workers = WorkerSet(
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 157, in __init__
self._setup(
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 247, in _setup
self._local_worker = self._make_worker(
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 925, in _make_worker
worker = cls(
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 525, in __init__
self._update_policy_map(policy_dict=self.policy_dict)
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1727, in _update_policy_map
self._build_policy_map(
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1838, in _build_policy_map
new_policy = create_policy_for_framework(
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/utils/policy.py", line 141, in create_policy_for_framework
return policy_class(observation_space, action_space, merged_config)
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo_torch_policy.py", line 49, in __init__
TorchPolicyV2.__init__(
File "/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/policy/torch_policy_v2.py", line 157, in __init__
model_copy = copy.deepcopy(model)
File "/usr/lib/python3.10/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/usr/lib/python3.10/copy.py", line 271, in _reconstruct
state = deepcopy(state, memo)
File "/usr/lib/python3.10/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/usr/lib/python3.10/copy.py", line 231, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/usr/lib/python3.10/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/usr/lib/python3.10/copy.py", line 271, in _reconstruct
state = deepcopy(state, memo)
File "/usr/lib/python3.10/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/usr/lib/python3.10/copy.py", line 231, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/usr/lib/python3.10/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/usr/lib/python3.10/copy.py", line 271, in _reconstruct
state = deepcopy(state, memo)
File "/usr/lib/python3.10/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/usr/lib/python3.10/copy.py", line 231, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/usr/lib/python3.10/copy.py", line 161, in deepcopy
rv = reductor(4)
TypeError: cannot pickle '_io.TextIOWrapper' object
It seems that there is a bug that cannot pickle the torch model based on TorchModelV2, when I am using GPU. The python version is 3.10.12, ray==2.8.1 and torch==2.1.0