how can I ask it to run on CPU alone? use_gpu=False?
I tried to switch to PT1.9.0, which gives me the error below everywhere (both in Notebook EC2 and Training EC2 this time!) it’s quite a mess
by any chance do you happen to have samples or repos with successful use of Ray Train on EC2 or even better DLAMI?
(top of the stack trace only)
(BaseWorkerMixin pid=16881) 2022-01-11 20:50:23,782 INFO torch.py:67 -- Setting up process group for: env:// [rank=0, world_size=4]
(BaseWorkerMixin pid=16925) 2022-01-11 20:50:23,782 INFO torch.py:67 -- Setting up process group for: env:// [rank=3, world_size=4]
(BaseWorkerMixin pid=16886) 2022-01-11 20:50:23,782 INFO torch.py:67 -- Setting up process group for: env:// [rank=2, world_size=4]
(BaseWorkerMixin pid=16922) 2022-01-11 20:50:23,784 INFO torch.py:67 -- Setting up process group for: env:// [rank=1, world_size=4]
2022-01-11 20:50:24,959 INFO trainer.py:178 -- Run results will be logged in: /home/ec2-user/ray_results/train_2022-01-11_20-50-19/run_001
2022-01-11 20:50:25,689 WARNING worker.py:1245 -- Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 618, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 659, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 625, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 629, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 578, in ray._raylet.execute_task.function_executor
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/_private/function_manager.py", line 609, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/util/tracing/tracing_helper.py", line 451, in _resume_span
return method(self, *_args, **_kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/worker_group.py", line 26, in __execute
return func(*args, **kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/backend.py", line 498, in end_training
output = session.finish()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/session.py", line 102, in finish
func_output = self.training_thread.join()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/utils.py", line 94, in join
raise self.exc
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/utils.py", line 87, in run
self.ret = self._target(*self._args, **self._kwargs)
File "a2d2_code/train-ray.py", line 123, in train_func
"pytorch/vision:v0.10.0", args.network, pretrained=False, num_classes=args.classes
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 362, in load
repo_or_dir = _get_cache_or_reload(repo_or_dir, force_reload, verbose)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 162, in _get_cache_or_reload
_validate_not_a_forked_repo(repo_owner, repo_name, branch)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 124, in _validate_not_a_forked_repo
with urlopen(url) as r:
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 531, in open
response = meth(req, response)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 569, in error
return self._call_chain(*args)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: rate limit exceeded
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 759, in ray._raylet.task_execution_handler
File "python/ray/_raylet.pyx", line 580, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 714, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 1854, in ray._raylet.CoreWorker.store_task_outputs
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/serialization.py", line 361, in serialize
return self._serialize_to_msgpack(value)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/serialization.py", line 317, in _serialize_to_msgpack
value = value.to_bytes()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/exceptions.py", line 22, in to_bytes
serialized_exception=pickle.dumps(self),
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 73, in dumps
cp.dump(obj)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 620, in dump
return Pickler.dump(self, obj)
TypeError: cannot serialize '_io.BufferedReader' object
An unexpected internal error occurred while the worker was executing a task.
2022-01-11 20:50:25,689 WARNING worker.py:1245 -- Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 618, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 659, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 625, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 629, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 578, in ray._raylet.execute_task.function_executor
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/_private/function_manager.py", line 609, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/util/tracing/tracing_helper.py", line 451, in _resume_span
return method(self, *_args, **_kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/worker_group.py", line 26, in __execute
return func(*args, **kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/backend.py", line 498, in end_training
output = session.finish()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/session.py", line 102, in finish
func_output = self.training_thread.join()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/utils.py", line 94, in join
raise self.exc
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/utils.py", line 87, in run
self.ret = self._target(*self._args, **self._kwargs)
File "a2d2_code/train-ray.py", line 123, in train_func
"pytorch/vision:v0.10.0", args.network, pretrained=False, num_classes=args.classes
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 362, in load
repo_or_dir = _get_cache_or_reload(repo_or_dir, force_reload, verbose)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 162, in _get_cache_or_reload
_validate_not_a_forked_repo(repo_owner, repo_name, branch)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 124, in _validate_not_a_forked_repo
with urlopen(url) as r:
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 531, in open
response = meth(req, response)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 569, in error
return self._call_chain(*args)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: rate limit exceeded
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 759, in ray._raylet.task_execution_handler
File "python/ray/_raylet.pyx", line 580, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 714, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 1854, in ray._raylet.CoreWorker.store_task_outputs
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/serialization.py", line 361, in serialize
return self._serialize_to_msgpack(value)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/serialization.py", line 317, in _serialize_to_msgpack
value = value.to_bytes()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/exceptions.py", line 22, in to_bytes
serialized_exception=pickle.dumps(self),
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 73, in dumps
cp.dump(obj)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 620, in dump
return Pickler.dump(self, obj)
TypeError: cannot serialize '_io.BufferedReader' object
An unexpected internal error occurred while the worker was executing a task.
2022-01-11 20:50:25,689 WARNING worker.py:1245 -- Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 618, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 659, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 625, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 629, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 578, in ray._raylet.execute_task.function_executor
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/_private/function_manager.py", line 609, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/util/tracing/tracing_helper.py", line 451, in _resume_span
return method(self, *_args, **_kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/worker_group.py", line 26, in __execute
return func(*args, **kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/backend.py", line 498, in end_training
output = session.finish()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/session.py", line 102, in finish
func_output = self.training_thread.join()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/utils.py", line 94, in join
raise self.exc
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/utils.py", line 87, in run
self.ret = self._target(*self._args, **self._kwargs)
File "a2d2_code/train-ray.py", line 123, in train_func
"pytorch/vision:v0.10.0", args.network, pretrained=False, num_classes=args.classes
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 362, in load
repo_or_dir = _get_cache_or_reload(repo_or_dir, force_reload, verbose)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 162, in _get_cache_or_reload
_validate_not_a_forked_repo(repo_owner, repo_name, branch)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 124, in _validate_not_a_forked_repo
with urlopen(url) as r:
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 531, in open
response = meth(req, response)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 569, in error
return self._call_chain(*args)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: rate limit exceeded
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 759, in ray._raylet.task_execution_handler
File "python/ray/_raylet.pyx", line 580, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 714, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 1854, in ray._raylet.CoreWorker.store_task_outputs
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/serialization.py", line 361, in serialize
return self._serialize_to_msgpack(value)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/serialization.py", line 317, in _serialize_to_msgpack
value = value.to_bytes()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/exceptions.py", line 22, in to_bytes
serialized_exception=pickle.dumps(self),
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 73, in dumps
cp.dump(obj)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 620, in dump
return Pickler.dump(self, obj)
TypeError: cannot serialize '_io.BufferedReader' object
An unexpected internal error occurred while the worker was executing a task.
2022-01-11 20:50:25,689 WARNING worker.py:1245 -- Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 618, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 659, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 625, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 629, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 578, in ray._raylet.execute_task.function_executor
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/_private/function_manager.py", line 609, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/util/tracing/tracing_helper.py", line 451, in _resume_span
return method(self, *_args, **_kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/worker_group.py", line 26, in __execute
return func(*args, **kwargs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/backend.py", line 498, in end_training
output = session.finish()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/session.py", line 102, in finish
func_output = self.training_thread.join()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/utils.py", line 94, in join
raise self.exc
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/utils.py", line 87, in run
self.ret = self._target(*self._args, **self._kwargs)
File "a2d2_code/train-ray.py", line 123, in train_func
"pytorch/vision:v0.10.0", args.network, pretrained=False, num_classes=args.classes
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 362, in load
repo_or_dir = _get_cache_or_reload(repo_or_dir, force_reload, verbose)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 162, in _get_cache_or_reload
_validate_not_a_forked_repo(repo_owner, repo_name, branch)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 124, in _validate_not_a_forked_repo
with urlopen(url) as r:
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 531, in open
response = meth(req, response)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 569, in error
return self._call_chain(*args)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: rate limit exceeded
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 759, in ray._raylet.task_execution_handler
File "python/ray/_raylet.pyx", line 580, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 714, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 1854, in ray._raylet.CoreWorker.store_task_outputs
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/serialization.py", line 361, in serialize
return self._serialize_to_msgpack(value)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/serialization.py", line 317, in _serialize_to_msgpack
value = value.to_bytes()
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/exceptions.py", line 22, in to_bytes
serialized_exception=pickle.dumps(self),
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 73, in dumps
cp.dump(obj)
File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 620, in dump
return Pickler.dump(self, obj)
TypeError: cannot serialize '_io.BufferedReader' object
An unexpected internal error occurred while the worker was executing a task.
(BaseWorkerMixin pid=16881) 2022-01-11 20:50:25,681 ERROR worker.py:431 -- SystemExit was raised from the worker
(BaseWorkerMixin pid=16881) Traceback (most recent call last):
(BaseWorkerMixin pid=16881) File "python/ray/_raylet.pyx", line 618, in ray._raylet.execute_task
(BaseWorkerMixin pid=16881) File "python/ray/_raylet.pyx", line 659, in ray._raylet.execute_task
(BaseWorkerMixin pid=16881) File "python/ray/_raylet.pyx", line 625, in ray._raylet.execute_task
(BaseWorkerMixin pid=16881) File "python/ray/_raylet.pyx", line 629, in ray._raylet.execute_task
(BaseWorkerMixin pid=16881) File "python/ray/_raylet.pyx", line 578, in ray._raylet.execute_task.function_executor
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/_private/function_manager.py", line 609, in actor_method_executor
(BaseWorkerMixin pid=16881) return method(__ray_actor, *args, **kwargs)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/util/tracing/tracing_helper.py", line 451, in _resume_span
(BaseWorkerMixin pid=16881) return method(self, *_args, **_kwargs)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/worker_group.py", line 26, in __execute
(BaseWorkerMixin pid=16881) return func(*args, **kwargs)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/backend.py", line 498, in end_training
(BaseWorkerMixin pid=16881) output = session.finish()
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/session.py", line 102, in finish
(BaseWorkerMixin pid=16881) func_output = self.training_thread.join()
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/utils.py", line 94, in join
(BaseWorkerMixin pid=16881) raise self.exc
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/train/utils.py", line 87, in run
(BaseWorkerMixin pid=16881) self.ret = self._target(*self._args, **self._kwargs)
(BaseWorkerMixin pid=16881) File "a2d2_code/train-ray.py", line 123, in train_func
(BaseWorkerMixin pid=16881) "pytorch/vision:v0.10.0", args.network, pretrained=False, num_classes=args.classes
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 362, in load
(BaseWorkerMixin pid=16881) repo_or_dir = _get_cache_or_reload(repo_or_dir, force_reload, verbose)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 162, in _get_cache_or_reload
(BaseWorkerMixin pid=16881) _validate_not_a_forked_repo(repo_owner, repo_name, branch)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/hub.py", line 124, in _validate_not_a_forked_repo
(BaseWorkerMixin pid=16881) with urlopen(url) as r:
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 222, in urlopen
(BaseWorkerMixin pid=16881) return opener.open(url, data, timeout)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 531, in open
(BaseWorkerMixin pid=16881) response = meth(req, response)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 641, in http_response
(BaseWorkerMixin pid=16881) 'http', request, response, code, msg, hdrs)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 569, in error
(BaseWorkerMixin pid=16881) return self._call_chain(*args)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 503, in _call_chain
(BaseWorkerMixin pid=16881) result = func(*args)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/urllib/request.py", line 649, in http_error_default
(BaseWorkerMixin pid=16881) raise HTTPError(req.full_url, code, msg, hdrs, fp)
(BaseWorkerMixin pid=16881) urllib.error.HTTPError: HTTP Error 403: rate limit exceeded
(BaseWorkerMixin pid=16881)
(BaseWorkerMixin pid=16881) During handling of the above exception, another exception occurred:
(BaseWorkerMixin pid=16881)
(BaseWorkerMixin pid=16881) Traceback (most recent call last):
(BaseWorkerMixin pid=16881) File "python/ray/_raylet.pyx", line 759, in ray._raylet.task_execution_handler
(BaseWorkerMixin pid=16881) File "python/ray/_raylet.pyx", line 580, in ray._raylet.execute_task
(BaseWorkerMixin pid=16881) File "python/ray/_raylet.pyx", line 714, in ray._raylet.execute_task
(BaseWorkerMixin pid=16881) File "python/ray/_raylet.pyx", line 1854, in ray._raylet.CoreWorker.store_task_outputs
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/serialization.py", line 361, in serialize
(BaseWorkerMixin pid=16881) return self._serialize_to_msgpack(value)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/serialization.py", line 317, in _serialize_to_msgpack
(BaseWorkerMixin pid=16881) value = value.to_bytes()
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/exceptions.py", line 22, in to_bytes
(BaseWorkerMixin pid=16881) serialized_exception=pickle.dumps(self),
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 73, in dumps
(BaseWorkerMixin pid=16881) cp.dump(obj)
(BaseWorkerMixin pid=16881) File "/home/ec2-user/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 620, in dump
(BaseWorkerMixin pid=16881) return Pickler.dump(self, obj)
(BaseWorkerMixin pid=16881) TypeError: cannot serialize '_io.BufferedReader' object