I have a very bizarre networking issue. While using ray with docker compose
@ray.remote
def test_no_work():
import requests
requests.get('qdrant:6333')
@ray.remote
def test_works():
import requests
breakpoint()
requests.get('qdrant:6333')
When i call test_no_work.remote()
I get a DNS issue, where I can’t connect.
File "/usr/local/lib/python3.11/site-packages/qdrant_client/http/api_client.py", line 108, in send_inner
raise ResponseHandlingException(e)
qdrant_client.http.exceptions.ResponseHandlingException: [Errno 111] Connection refused
However when I run:
docker compose exec ray-head-1 bash -c 'ray debug'
debugger select 0:
(pdb)import request
(pdb)requests.get('http://qdrant:6333')
<Response [200]>
I tried adding a delay, as well as a retry hostname lookup in the task, but no luck, cause I thought maybe the timing for /etc/host setup might be the issue.
Anyone else have any suggestions here?
Does a remote function setup its own network?
Do I need to provide some other kwargs to the remote?
docker compose exec ray-worker-1 cat /etc/hosts
127.0.0.1 localhost
::1 localhost ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
172.17.0.1 host.docker.internal
172.24.0.6 5928a48588b0
And from the logs based on this docker-compose service:
ray-worker-1:
build:
context: .
dockerfile: ./dockerfile.ray.cluster
depends_on:
- ray-head
command: sh -c 'while sleep 5;do cat /etc/hosts;done & ray start --address=ray-head:6479 --ray-client-server-port=10001 --block'
environment:
- RAY_HEAD_IP=ray-head
- ENV=local-docker
networks:
- net
extra_hosts:
- "host.docker.internal:host-gateway"
ray-worker-1-1 | 127.0.0.1 localhost
ray-worker-1-1 | ::1 localhost ip6-localhost ip6-loopback
ray-worker-1-1 | fe00::0 ip6-localnet
ray-worker-1-1 | ff00::0 ip6-mcastprefix
ray-worker-1-1 | ff02::1 ip6-allnodes
ray-worker-1-1 | ff02::2 ip6-allrouters
ray-worker-1-1 | 172.17.0.1 host.docker.internal
ray-worker-1-1 | 172.24.0.4 4bf2a2e764cf
ray-worker-1-1 | 127.0.0.1 localhost
More debugging logs with the debugging print logic:
for _ in range(0, 6):
time.sleep(random.random()*5)
print(is_hostname_available('qdrant', qdrant_port))
print(is_hostname_available('172.17.0.1', qdrant_port))
print(is_hostname_available('localhost', qdrant_port))
if is_hostname_available(
qdrant_hostname, qdrant_port
):
break
File "/tmp/ray/session_2024-07-17_21-55-30_385021_7/runtime_resources/py_modules_files/_ray_pkg_f7ae26a04b5311bc/vertexai_loaders/loaders/confluence/run.py", line 35, in load_documents_core
qdrant = load_documents(documents, chunk_size, collection_name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/ray/session_2024-07-17_21-55-30_385021_7/runtime_resources/py_modules_files/_ray_pkg_f7ae26a04b5311bc/vertexai_loaders/load.py", line 182, in load_documents
return get_qdrant_from_documents(docs, embeddings, collection_name=collection_name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/ray/session_2024-07-17_21-55-30_385021_7/runtime_resources/py_modules_files/_ray_pkg_f7ae26a04b5311bc/vertexai_loaders/load.py", line 60, in get_qdrant_from_documents
return qdrant.from_documents(
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/langchain_core/vectorstores/base.py", line 1058, in from_documents
return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/langchain_community/vectorstores/qdrant.py", line 1339, in from_texts
qdrant = cls.construct_instance(
^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/langchain_community/vectorstores/qdrant.py", line 1667, in construct_instance
collection_info = client.get_collection(collection_name=collection_name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/qdrant_client/qdrant_client.py", line 1755, in get_collection
return self._client.get_collection(collection_name=collection_name, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/qdrant_client/qdrant_remote.py", line 2236, in get_collection
result: Optional[types.CollectionInfo] = self.http.collections_api.get_collection(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/qdrant_client/http/api/collections_api.py", line 1314, in get_collection
return self._build_for_get_collection(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/qdrant_client/http/api/collections_api.py", line 397, in _build_for_get_collection
return self.api_client.request(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/qdrant_client/http/api_client.py", line 79, in request
return self.send(request, type_)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/qdrant_client/http/api_client.py", line 96, in send
response = self.middleware(request, self.send_inner)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/qdrant_client/http/api_client.py", line 205, in __call__
return call_next(request)
^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/qdrant_client/http/api_client.py", line 108, in send_inner
raise ResponseHandlingException(e)
qdrant_client.http.exceptions.ResponseHandlingException: [Errno 111] Connection refused
(load_documents_ray pid=369, ip=172.24.0.5) Checking for hostname: localhost:6333
(load_documents_ray pid=369, ip=172.24.0.5) Checking for hostname: qdrant:6333
(load_documents_ray pid=369, ip=172.24.0.5) True
(load_documents_ray pid=369, ip=172.24.0.5) Checking for hostname: 172.17.0.1:6333
(load_documents_ray pid=369, ip=172.24.0.5) True
(load_documents_ray pid=369, ip=172.24.0.5) Checking for hostname: localhost:6333
(load_documents_ray pid=369, ip=172.24.0.5) False
(load_documents_ray pid=369, ip=172.24.0.5) Checking for hostname: qdrant:6333
(load_documents_ray pid=309, ip=172.24.0.6) Checking for hostname: qdrant:6333
(load_documents_ray pid=309, ip=172.24.0.6) True
(load_documents_ray pid=309, ip=172.24.0.6) Checking for hostname: 172.17.0.1:6333
(load_documents_ray pid=309, ip=172.24.0.6) True
(load_documents_ray pid=309, ip=172.24.0.6) Checking for hostname: localhost:6333
(load_documents_ray pid=309, ip=172.24.0.6) False
(load_documents_ray pid=309, ip=172.24.0.6) Checking for hostname: qdrant:6333