I am trying to run dask on ray. When I run a small example everything works fine but if I try and run the same example with a larger input is fails immediately with strange errors like
2021-06-30 04:39:40,870 ERROR worker.py:409 -- SystemExit was raised from the worker
Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 477, in ray._raylet.execute_task
File "/home/ray/anaconda3/lib/python3.7/site-packages/ray/worker.py", line 301, in deserialize_objects
return context.deserialize_objects(data_metadata_pairs, object_refs)
File "/home/ray/anaconda3/lib/python3.7/site-packages/ray/serialization.py", line 248, in deserialize_objects
obj = self._deserialize_object(data, metadata, object_ref)
File "/home/ray/anaconda3/lib/python3.7/site-packages/ray/serialization.py", line 190, in _deserialize_object
return self._deserialize_msgpack_data(data, metadata_fields)
File "/home/ray/anaconda3/lib/python3.7/site-packages/ray/serialization.py", line 168, in _deserialize_msgpack_data
python_objects = self._deserialize_pickle5_data(pickle5_data)
File "/home/ray/anaconda3/lib/python3.7/site-packages/ray/serialization.py", line 158, in _deserialize_pickle5_data
obj = pickle.loads(in_band)
File "/home/ray/anaconda3/lib/python3.7/site-packages/s3fs/__init__.py", line 1, in <module>
from .core import S3FileSystem, S3File
File "/home/ray/anaconda3/lib/python3.7/site-packages/s3fs/core.py", line 14, in <module>
import aiobotocore
File "/home/ray/anaconda3/lib/python3.7/site-packages/aiobotocore/__init__.py", line 1, in <module>
from .session import get_session, AioSession
File "/home/ray/anaconda3/lib/python3.7/site-packages/aiobotocore/session.py", line 1, in <module>
from botocore.session import Session, EVENT_ALIASES, ServiceModel, UnknownServiceError
File "/home/ray/anaconda3/lib/python3.7/site-packages/botocore/session.py", line 29, in <module>
import botocore.configloader
File "/home/ray/anaconda3/lib/python3.7/site-packages/botocore/configloader.py", line 19, in <module>
from botocore.compat import six
File "/home/ray/anaconda3/lib/python3.7/site-packages/botocore/compat.py", line 27, in <module>
from urllib3 import exceptions
File "/home/ray/anaconda3/lib/python3.7/site-packages/urllib3/__init__.py", line 7, in <module>
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
File "/home/ray/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 29, in <module>
from .connection import (
File "/home/ray/anaconda3/lib/python3.7/site-packages/urllib3/connection.py", line 41, in <module>
from .util.ssl_ import (
File "/home/ray/anaconda3/lib/python3.7/site-packages/urllib3/util/__init__.py", line 7, in <module>
from .ssl_ import (
File "/home/ray/anaconda3/lib/python3.7/site-packages/urllib3/util/ssl_.py", line 11, in <module>
from .url import IPV4_RE, BRACELESS_IPV6_ADDRZ_RE
File "/home/ray/anaconda3/lib/python3.7/site-packages/urllib3/util/url.py", line 63, in <module>
BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
File "/home/ray/anaconda3/lib/python3.7/re.py", line 234, in compile
return _compile(pattern, flags)
File "/home/ray/anaconda3/lib/python3.7/re.py", line 286, in _compile
p = sre_compile.compile(pattern, flags)
File "/home/ray/anaconda3/lib/python3.7/site-packages/ray/worker.py", line 406, in sigterm_handler
sys.exit(1)
SystemExit: 1
It shouldn’t be a dependency mismatch as I am running the client from the same docker image as is running on the cluster. Any ideas on how to debug?
Trevor