tuner = tune.Tuner(
‘PPO’,
param_space=ppo_config,
run_config=tune.RunConfig(
storage_path=storage_dir,
name=‘p0’,
stop={‘training_iteration’: 2000},
verbose=3,
checkpoint_config=tune.CheckpointConfig(
checkpoint_at_end=True,
checkpoint_frequency=5,
),
),
)
results = tuner.fit()
error:
Trial PPO_multi_env_08ebf_00000 finished iteration 5 at 2025-07-16 10:55:01. Total running time: 1min 39s
╭──────────────────────────────────────────────────╮
│ Trial PPO_multi_env_08ebf_00000 result │
├──────────────────────────────────────────────────┤
│ env_runners/episode_len_mean 150 │
│ env_runners/episode_return_mean -172 │
│ num_env_steps_sampled_lifetime 20480 │
╰──────────────────────────────────────────────────╯
2025-07-16 10:55:01,180 ERROR tune_controller.py:1331 – Trial task failed for trial PPO_multi_env_08ebf_00000
Traceback (most recent call last):
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/air/execution/_internal/event_manager.py”, line 110, in resolve_future
result = ray.get(future)
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/auto_init_hook.py”, line 22, in auto_init_wrapper
return fn(*args, **kwargs)
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/client_mode_hook.py”, line 104, in wrapper
return func(*args, **kwargs)
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/worker.py”, line 2849, in get
values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/worker.py”, line 937, in get_objects
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RaySystemError): ray::PPO.save() (pid=26518, ip=10.68.4.39, actor_id=4c6af5385d92900280559f5e01000000, repr=PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True))
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/tune/trainable/trainable.py”, line 486, in save
checkpoint_dict_or_path = self.save_checkpoint(checkpoint_dir)
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/rllib/algorithms/algorithm.py”, line 2690, in save_checkpoint
self.save_to_path(
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/rllib/utils/checkpoints.py”, line 300, in save_to_path
comp_state = self.get_state(components=comp_name)[comp_name]
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/rllib/algorithms/algorithm.py”, line 2834, in get_state
state[COMPONENT_LEARNER_GROUP] = self.learner_group.get_state(
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/rllib/core/learner/learner_group.py”, line 521, in get_state
state[COMPONENT_LEARNER] = self._get_results(results)[0]
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/rllib/core/learner/learner_group.py”, line 672, in _get_results
raise result_or_error
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/rllib/utils/actor_manager.py”, line 861, in _fetch_result
result = ray.get(ready)
ray.exceptions.RaySystemError: System error: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device(‘cpu’) to map your storages to the CPU.
traceback: Traceback (most recent call last):
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/storage.py”, line 533, in _load_from_bytes
return torch.load(io.BytesIO(b), weights_only=False)
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 1495, in load
return _legacy_load(
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 1754, in _legacy_load
result = unpickler.load()
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 1682, in persistent_load
obj = restore_location(obj, location)
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 693, in default_restore_location
result = fn(storage, location)
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 631, in _deserialize
device = _validate_device(location, backend_name)
File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 600, in _validate_device
raise RuntimeError(
RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device(‘cpu’) to map your storages to the CPU.
Trial PPO_multi_env_08ebf_00000 errored after 5 iterations at 2025-07-16 10:55:01. Total running time: 1min 39s
Error file: /tmp/ray/session_2025-07-16_10-53-20_879632_24234/artifacts/2025-07-16_10-53-21/p0/driver_artifacts/PPO_multi_env_08ebf_00000_0_2025-07-16_10-53-21/error.txt
2025-07-16 10:55:01,202 INFO tune.py:1009 – Wrote the latest version of all result files and experiment state to ‘/home/zanhao/Project_shuttle/results/p0’ in 0.0206s.
Trial status: 1 ERROR
Current time: 2025-07-16 10:55:01. Total running time: 1min 39s
Logical resource usage: 13.0/32 CPUs, 0.99/1 GPUs (0.0/1.0 accelerator_type:G)
╭────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ Trial name status iter total time (s) …lls_per_iteration …_sampled_lifetime │
├────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤
│ PPO_multi_env_08ebf_00000 ERROR 5 94.4173 1 20480 │
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
Number of errored trials: 1
╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ Trial name # failures error file │
├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤
│ PPO_multi_env_08ebf_00000 1 /tmp/ray/session_2025-07-16_10-53-20_879632_24234/artifacts/2025-07-16_10-53-21/p0/driver_artifacts/PPO_multi_env_08ebf_00000_0_2025-07-16_10-53-21/error.txt │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device(‘cpu’) to map your storages to the CPU.
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) Traceback (most recent call last):
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/serialization.py”, line 458, in deserialize_objects
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) obj = self._deserialize_object(data, metadata, object_ref)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/serialization.py”, line 315, in _deserialize_object
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) return self._deserialize_msgpack_data(data, metadata_fields)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/serialization.py”, line 270, in _deserialize_msgpack_data
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) python_objects = self._deserialize_pickle5_data(pickle5_data)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/serialization.py”, line 258, in _deserialize_pickle5_data
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) obj = pickle.loads(in_band, buffers=buffers)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/storage.py”, line 533, in _load_from_bytes
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) return torch.load(io.BytesIO(b), weights_only=False)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 1495, in load
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) return _legacy_load(
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 1754, in _legacy_load
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) result = unpickler.load()
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 1682, in persistent_load
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) obj = restore_location(obj, location)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 693, in default_restore_location
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) result = fn(storage, location)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 631, in _deserialize
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) device = _validate_device(location, backend_name)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 600, in _validate_device
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) raise RuntimeError(
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device(‘cpu’) to map your storages to the CPU.
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) 2025-07-16 10:55:01,178 ERROR actor_manager.py:873 – Ray error (System error: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device(‘cpu’) to map your storages to the CPU.
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) traceback: Traceback (most recent call last):
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/serialization.py”, line 458, in deserialize_objects
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) obj = self._deserialize_object(data, metadata, object_ref)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/serialization.py”, line 315, in _deserialize_object
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) return self._deserialize_msgpack_data(data, metadata_fields)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/serialization.py”, line 270, in _deserialize_msgpack_data
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) python_objects = self._deserialize_pickle5_data(pickle5_data)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/ray/_private/serialization.py”, line 258, in _deserialize_pickle5_data
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) obj = pickle.loads(in_band, buffers=buffers)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/storage.py”, line 533, in _load_from_bytes
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) return torch.load(io.BytesIO(b), weights_only=False)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 1495, in load
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) return _legacy_load(
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 1754, in _legacy_load
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) result = unpickler.load()
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 1682, in persistent_load
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) obj = restore_location(obj, location)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 693, in default_restore_location
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) result = fn(storage, location)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 631, in _deserialize
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) device = _validate_device(location, backend_name)
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) File “/home/zanhao/anaconda3/envs/torch/lib/python3.9/site-packages/torch/serialization.py”, line 600, in _validate_device
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) raise RuntimeError(
(PPO(env=multi_env; env-runners=4; learners=1; multi-agent=True) pid=26518) RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device(‘cpu’) to map your storages to the CPU.