Use runtime_env docker got Check failed: !job_id_env.empty()

The docker image is created by

FROM ubuntu:20.04
RUN apt-get update -y
RUN apt-get install -y python3 python3-pip
RUN ln -s /usr/bin/python3 /usr/bin/python
RUN pip3 install --no-cache-dir absl-py ray[default]
RUN pip3 install --no-cache-dir jax[tpu] -f https://storage.googleapis.com/jax-releases/libtpu_releases.html
ENTRYPOINT [ "python3" , "--version"]

And when I run the job, I have the following error

1{"command_prefix": ["cd", "/tmp/ray/session_2023-05-04_05-54-58_484700_14533/runtime_resources/working_dir_files/_ray_pkg_6068c19fb3b8530f", "&&"], "env_vars": {"RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES": "1", "PYTHONPATH": "/tmp/ray/session_2023-05-04_05-54-58_484700_14533/runtime_resources/working_dir_files/_ray_pkg_6068c19fb3b8530f"}, "py_executable": "docker run -v /tmp/ray:/tmp/ray --network=host --privileged --pid=host --ipc=host --env RAY_RAYLET_PID=15100 -v /home/yejingxin/ray_venv/lib/python3.8/site-packages/ray:/home/yejingxin/ray_venv/lib/python3.8/site-packages/ray --entrypoint python3 gcr.io/xyz:yejingxin-debug", "resources_dir": null, "container": {}, "java_jars": []}2[2023-05-04 05:55:10,491 C 15714 15714] core_worker.cc:50:  Check failed: !job_id_env.empty() 3*** StackTrace Information ***4/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(+0xd7dcaa) [0x7f3a4b1f6caa] ray::operator<<()5/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(+0xd7f792) [0x7f3a4b1f8792] ray::SpdLogMessage::Flush()6/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(_ZN3ray6RayLogD1Ev+0x37) [0x7f3a4b1f8aa7] ray::RayLog::~RayLog()7/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(_ZN3ray4core15GetProcessJobIDERKNS0_17CoreWorkerOptionsE+0x10b) [0x7f3a4aafbdfb] ray::core::GetProcessJobID()8/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(_ZN3ray4core10CoreWorkerC1ERKNS0_17CoreWorkerOptionsERKNS_8WorkerIDE+0x8a) [0x7f3a4aafbf0a] ray::core::CoreWorker::CoreWorker()9/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(_ZN3ray4core21CoreWorkerProcessImplC2ERKNS0_17CoreWorkerOptionsE+0x587) [0x7f3a4ab03167] ray::core::CoreWorkerProcessImpl::CoreWorkerProcessImpl()10/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(_ZN3ray4core17CoreWorkerProcess10InitializeERKNS0_17CoreWorkerOptionsE+0xcf) [0x7f3a4ab041bf] ray::core::CoreWorkerProcess::Initialize()11/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(+0x53a895) [0x7f3a4a9b3895] __pyx_pw_3ray_7_raylet_10CoreWorker_1__cinit__()12/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(+0x53bf33) [0x7f3a4a9b4f33] __pyx_tp_new_3ray_7_raylet_CoreWorker()13ray::IDLE(_PyObject_MakeTpCall+0x183) [0x5f6f43] _PyObject_MakeTpCall14ray::IDLE(_PyEval_EvalFrameDefault+0x5dae) [0x57107e] _PyEval_EvalFrameDefault15ray::IDLE(_PyEval_EvalCodeWithName+0x26a) [0x569cea] _PyEval_EvalCodeWithName16ray::IDLE(_PyFunction_Vectorcall+0x393) [0x5f6a13] _PyFunction_Vectorcall17ray::IDLE(_PyEval_EvalFrameDefault+0x1901) [0x56cbd1] _PyEval_EvalFrameDefault18ray::IDLE(_PyEval_EvalCodeWithName+0x26a) [0x569cea] _PyEval_EvalCodeWithName19ray::IDLE(PyEval_EvalCode+0x27) [0x68e7b7] PyEval_EvalCode20ray::IDLE() [0x680001]21ray::IDLE() [0x68007f]22ray::IDLE() [0x680121]23ray::IDLE(PyRun_SimpleFileExFlags+0x197) [0x680db7] PyRun_SimpleFileExFlags24ray::IDLE(Py_RunMain+0x212) [0x6b8122] Py_RunMain25ray::IDLE(Py_BytesMain+0x2d) [0x6b84ad] Py_BytesMain26/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf3) [0x7f3a4bda2083] __libc_start_main27ray::IDLE(_start+0x2e) [0x5fb39e] _start

I cleaned up this stack trace for you (please post a readable version if you want to get good responses!)

1{"command_prefix": ["cd", "/tmp/ray/session_2023-05-04_05-54-58_484700_14533/runtime_resources/working_dir_files/_ray_pkg_6068c19fb3b8530f", "&&"], "env_vars": {"RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES": "1", "PYTHONPATH": "/tmp/ray/session_2023-05-04_05-54-58_484700_14533/runtime_resources/working_dir_files/_ray_pkg_6068c19fb3b8530f"}, "py_executable": "docker run -v /tmp/ray:/tmp/ray --network=host --privileged --pid=host --ipc=host --env RAY_RAYLET_PID=15100 -v /home/yejingxin/ray_venv/lib/python3.8/site-packages/ray:/home/yejingxin/ray_venv/lib/python3.8/site-packages/ray --entrypoint python3 gcr.io/xyz:yejingxin-debug", "resources_dir": null, "container": {}, "java_jars": []}
2[2023-05-04 05:55:10,491 C 15714 15714] core_worker.cc:50:  Check failed: !job_id_env.empty()
3*** StackTrace Information ***
4/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(+0xd7dcaa) [0x7f3a4b1f6caa] ray::operator<<()
5/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(+0xd7f792) [0x7f3a4b1f8792] ray::SpdLogMessage::Flush()
6/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(_ZN3ray6RayLogD1Ev+0x37) [0x7f3a4b1f8aa7] ray::RayLog::~RayLog()
7/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(_ZN3ray4core15GetProcessJobIDERKNS0_17CoreWorkerOptionsE+0x10b) [0x7f3a4aafbdfb] ray::core::GetProcessJobID()
8/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(_ZN3ray4core10CoreWorkerC1ERKNS0_17CoreWorkerOptionsERKNS_8WorkerIDE+0x8a) [0x7f3a4aafbf0a] ray::core::CoreWorker::CoreWorker()
9/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(_ZN3ray4core21CoreWorkerProcessImplC2ERKNS0_17CoreWorkerOptionsE+0x587) [0x7f3a4ab03167] ray::core::CoreWorkerProcessImpl::CoreWorkerProcessImpl()
10/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(_ZN3ray4core17CoreWorkerProcess10InitializeERKNS0_17CoreWorkerOptionsE+0xcf) [0x7f3a4ab041bf] ray::core::CoreWorkerProcess::Initialize()
11/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(+0x53a895) [0x7f3a4a9b3895] __pyx_pw_3ray_7_raylet_10CoreWorker_1__cinit__()
12/usr/local/lib/python3.8/dist-packages/ray/_raylet.so(+0x53bf33) [0x7f3a4a9b4f33] __pyx_tp_new_3ray_7_raylet_CoreWorker()
13ray::IDLE(_PyObject_MakeTpCall+0x183) [0x5f6f43] _PyObject_MakeTpCall
14ray::IDLE(_PyEval_EvalFrameDefault+0x5dae) [0x57107e] _PyEval_EvalFrameDefault
15ray::IDLE(_PyEval_EvalCodeWithName+0x26a) [0x569cea] _PyEval_EvalCodeWithName
16ray::IDLE(_PyFunction_Vectorcall+0x393) [0x5f6a13] _PyFunction_Vectorcall
17ray::IDLE(_PyEval_EvalFrameDefault+0x1901) [0x56cbd1] _PyEval_EvalFrameDefault
18ray::IDLE(_PyEval_EvalCodeWithName+0x26a) [0x569cea] _PyEval_EvalCodeWithName
19ray::IDLE(PyEval_EvalCode+0x27) [0x68e7b7] PyEval_EvalCode
20ray::IDLE() [0x680001]
21ray::IDLE() [0x68007f]
22ray::IDLE() [0x680121]
23ray::IDLE(PyRun_SimpleFileExFlags+0x197) [0x680db7] PyRun_SimpleFileExFlags
24ray::IDLE(Py_RunMain+0x212) [0x6b8122] Py_RunMain
25ray::IDLE(Py_BytesMain+0x2d) [0x6b84ad] Py_BytesMain
26/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf3) [0x7f3a4bda2083] __libc_start_main
27ray::IDLE(_start+0x2e) [0x5fb39e] _start

Let me look into why this is happening…

thanks @cade , just FYI , here is another thread we are discussing the same issue

1 Like