- High: It blocks me to complete my task.
I tried running it“ https://docs.ray.io/en/latest/train/getting-started.html ”
When running the code ‘results=trainer. fit()’, the prompt is as follows:
(RayTrainWorker pid=452) 2023-04-04 17:42:26,673 INFO config.py:86 -- Setting up process group for: env:// [rank=0, world_size=1]
2023-04-04 17:42:26,861 ERROR trial_runner.py:1062 -- Trial TorchTrainer_fd797_00000: Error processing event.
ray.exceptions.RayTaskError(RuntimeError): ray::_Inner.train() (pid=2960, ip=127.0.0.1, repr=TorchTrainer)
File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\tune\trainable\trainable.py", line 368, in train
raise skipped from exception_cause(skipped)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\tune\trainable\function_trainable.py", line 337, in entrypoint
return self._trainable_func(
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\base_trainer.py", line 505, in _trainable_func
super()._trainable_func(self._merged_config, reporter, checkpoint_dir)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\tune\trainable\function_trainable.py", line 654, in _trainable_func
output = fn()
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\base_trainer.py", line 415, in train_func
trainer.training_loop()
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\data_parallel_trainer.py", line 383, in training_loop
backend_executor.start(initialization_hook=None)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\_internal\backend_executor.py", line 128, in start
self._backend.on_start(self.worker_group, self._backend_config)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\torch\config.py", line 179, in on_start
ray.get(setup_futures)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
return func(*args, **kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\_private\worker.py", line 2380, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): ray::RayTrainWorker._RayTrainWorker__execute() (pid=452, ip=127.0.0.1, repr=<ray.train._internal.worker_group.RayTrainWorker object at 0x000001E5F430ABE0>)
File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\_internal\worker_group.py", line 31, in __execute
raise skipped from exception_cause(skipped)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\_internal\worker_group.py", line 28, in __execute
return func(*args, **kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\torch\config.py", line 113, in _setup_torch_process_group
dist.init_process_group(
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\torch\distributed\distributed_c10d.py", line 433, in init_process_group
rendezvous_iterator = rendezvous(
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\torch\distributed\rendezvous.py", line 82, in rendezvous
raise RuntimeError("No rendezvous handler for {}://".format(result.scheme))
RuntimeError: No rendezvous handler for env://
Result for TorchTrainer_fd797_00000:
date: 2023-04-04_17-42-23
experiment_id: eed291129249403494a71e5562256554
hostname: DESKTOP-G8GCIRA
node_ip: 127.0.0.1
pid: 2960
timestamp: 1680601343
trial_id: fd797_00000
== Status ==
Current time: 2023-04-04 17:42:26 (running for 00:00:08.22)
Memory usage on this node: 5.7/14.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/1 GPUs, 0.0/5.83 GiB heap, 0.0/2.91 GiB objects
Result logdir: C:\Users\vipuser\ray_results\TorchTrainer_2023-04-04_17-42-18
Number of trials: 1/1 (1 ERROR)
+--------------------------+----------+----------------+
| Trial name | status | loc |
|--------------------------+----------+----------------|
| TorchTrainer_fd797_00000 | ERROR | 127.0.0.1:2960 |
+--------------------------+----------+----------------+
Number of errored trials: 1
+--------------------------+--------------+------------------------------------------------------------------------------------------------------------------------+
| Trial name | # failures | error file |
|--------------------------+--------------+------------------------------------------------------------------------------------------------------------------------|
| TorchTrainer_fd797_00000 | 1 | C:\Users\vipuser\ray_results\TorchTrainer_2023-04-04_17-42-18\TorchTrainer_fd797_00000_0_2023-04-04_17-42-18\error.txt |
+--------------------------+--------------+------------------------------------------------------------------------------------------------------------------------+
2023-04-04 17:42:26,892 ERROR tune.py:794 -- Trials did not complete: [TorchTrainer_fd797_00000]
2023-04-04 17:42:26,892 INFO tune.py:798 -- Total run time: 8.28 seconds (8.22 seconds for the tuning loop).
== Status ==
Current time: 2023-04-04 17:42:26 (running for 00:00:08.22)
Memory usage on this node: 5.7/14.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/1 GPUs, 0.0/5.83 GiB heap, 0.0/2.91 GiB objects
Result logdir: C:\Users\vipuser\ray_results\TorchTrainer_2023-04-04_17-42-18
Number of trials: 1/1 (1 ERROR)
+--------------------------+----------+----------------+
| Trial name | status | loc |
|--------------------------+----------+----------------|
| TorchTrainer_fd797_00000 | ERROR | 127.0.0.1:2960 |
+--------------------------+----------+----------------+
Number of errored trials: 1
+--------------------------+--------------+------------------------------------------------------------------------------------------------------------------------+
| Trial name | # failures | error file |
|--------------------------+--------------+------------------------------------------------------------------------------------------------------------------------|
| TorchTrainer_fd797_00000 | 1 | C:\Users\vipuser\ray_results\TorchTrainer_2023-04-04_17-42-18\TorchTrainer_fd797_00000_0_2023-04-04_17-42-18\error.txt |
+--------------------------+--------------+------------------------------------------------------------------------------------------------------------------------+
Traceback (most recent call last):
File "C:/Users/vipuser/PycharmProjects/SIC-master/test.py", line 376, in <module>
results = trainer.fit()
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\base_trainer.py", line 368, in fit
raise result.error
types.RayTaskError(RuntimeError): ray::_Inner.train() (pid=2960, ip=127.0.0.1, repr=TorchTrainer)
File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\tune\trainable\trainable.py", line 368, in train
raise skipped from exception_cause(skipped)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\tune\trainable\function_trainable.py", line 337, in entrypoint
return self._trainable_func(
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\base_trainer.py", line 505, in _trainable_func
super()._trainable_func(self._merged_config, reporter, checkpoint_dir)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\tune\trainable\function_trainable.py", line 654, in _trainable_func
output = fn()
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\base_trainer.py", line 415, in train_func
trainer.training_loop()
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\data_parallel_trainer.py", line 383, in training_loop
backend_executor.start(initialization_hook=None)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\_internal\backend_executor.py", line 128, in start
self._backend.on_start(self.worker_group, self._backend_config)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\torch\config.py", line 179, in on_start
ray.get(setup_futures)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
return func(*args, **kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\_private\worker.py", line 2380, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): ray::RayTrainWorker._RayTrainWorker__execute() (pid=452, ip=127.0.0.1, repr=<ray.train._internal.worker_group.RayTrainWorker object at 0x000001E5F430ABE0>)
File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\_internal\worker_group.py", line 31, in __execute
raise skipped from exception_cause(skipped)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\_internal\worker_group.py", line 28, in __execute
return func(*args, **kwargs)
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\ray\train\torch\config.py", line 113, in _setup_torch_process_group
dist.init_process_group(
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\torch\distributed\distributed_c10d.py", line 433, in init_process_group
rendezvous_iterator = rendezvous(
File "C:\Users\vipuser\.conda\envs\pytorch\lib\site-packages\torch\distributed\rendezvous.py", line 82, in rendezvous
raise RuntimeError("No rendezvous handler for {}://".format(result.scheme))
RuntimeError: No rendezvous handler for env://