Keep getting AttributeError: Can't get attribute 'GcsClient' on <module 'ray._private.gcs_utils'

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

I am trying to replicate this https://docs.ray.io/en/master/ray-air/examples/gptj_serving.html example of gpt-j serving on an AWS cluster (g4dn.xlarge) that I’ve spun up. But I keep running into an AttributeError: Can't get attribute 'GcsClient' error, when trying to use ray serve

runtime_env.yml:

pip:
  - accelerate>=0.16.0
  - transformers>=4.26.0
  - numpy<1.24
  - torch
  - fastapi
  - uvicorn

script.py:

import ray
import pandas as pd
from ray import serve
from starlette.requests import Request


@serve.deployment(ray_actor_options={"num_gpus": 1})
class PredictDeployment:
    def __init__(self, model_id: str, revision: str = None):
        from transformers import AutoModelForCausalLM, AutoTokenizer
        import torch

        self.model = AutoModelForCausalLM.from_pretrained(
            model_id,
            revision=revision,
            torch_dtype=torch.float16,
            low_cpu_mem_usage=True,
            device_map="auto",  # automatically makes use of all GPUs available to the Actor
        )
        self.tokenizer = AutoTokenizer.from_pretrained(model_id)

    def generate(self, text: str) -> pd.DataFrame:
        input_ids = self.tokenizer(text, return_tensors="pt").input_ids.to(
            self.model.device
        )

        gen_tokens = self.model.generate(
            input_ids,
            do_sample=True,
            temperature=0.9,
            max_length=100,
        )
        return pd.DataFrame(
            self.tokenizer.batch_decode(gen_tokens), columns=["responses"]
        )

    async def __call__(self, http_request: Request) -> str:
        json_request: str = await http_request.json()
        prompts = []
        for prompt in json_request:
            text = prompt["text"]
            if isinstance(text, list):
                prompts.extend(text)
            else:
                prompts.append(text)
        return self.generate(prompts)


model_id = "EleutherAI/gpt-j-6B"
revision = "float16"  # use float16 weights to fit in 16GB GPUs
prompt = (
    "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
    "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
    "researchers was the fact that the unicorns spoke perfect English."
)

deployment = PredictDeployment.bind(model_id=model_id, revision=revision)

command

poetry run serve run --address=ray://<ec2-server>:10001 --working-dir="./private_gptj_efficient_finetuning/" private_gptj_efficient_finetuning.script:deployment --runtime-env ray/runtime_env.yml

Traceback:

Put failed:
Traceback (most recent call last):
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/bin/serve", line 8, in <module>
    sys.exit(cli())
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/click/core.py", line 1130, in __call__
    return self.main(*args, **kwargs)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/click/core.py", line 1055, in main
    rv = self.invoke(ctx)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/click/core.py", line 1657, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/click/core.py", line 1404, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/click/core.py", line 760, in invoke
    return __callback(*args, **kwargs)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/serve/scripts.py", line 339, in run
    client = _private_api.serve_start(
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/serve/_private/api.py", line 229, in serve_start
    controller = ServeController.options(**controller_actor_options).remote(
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/actor.py", line 639, in remote
    return actor_cls._remote(args=args, kwargs=kwargs, **updated_options)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/tracing/tracing_helper.py", line 387, in _invocation_actor_class_remote_span
    return method(self, args, kwargs, *_args, **_kwargs)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/actor.py", line 767, in _remote
    return client_mode_convert_actor(self, args, kwargs, **actor_options)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 198, in client_mode_convert_actor
    return client_actor._remote(in_args, in_kwargs, **kwargs)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/common.py", line 396, in _remote
    return self.options(**option_args).remote(*args, **kwargs)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/common.py", line 602, in remote
    futures = ray.call_remote(self, *args, **kwargs)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/api.py", line 100, in call_remote
    return self.worker.call_remote(instance, *args, **kwargs)
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/worker.py", line 556, in call_remote
    task = instance._prepare_client_task()
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/common.py", line 587, in _prepare_client_task
    task = self._remote_stub._prepare_client_task()
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/common.py", line 407, in _prepare_client_task
    self._ensure_ref()
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/common.py", line 377, in _ensure_ref
    self._ref = ray.worker._put_pickled(
  File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/worker.py", line 510, in _put_pickled
    raise cloudpickle.loads(resp.error)
AttributeError: Can't get attribute 'GcsClient' on <module 'ray._private.gcs_utils' from '/home/ubuntu/.local/lib/python3.8/site-packages/ray/_private/gcs_utils.py'>

Are you using ray nightly? I saw this error yesterday transiently and it resolved after pulling in the latest master.

Hey!

I am using ray = {extras = ["default"], version = "^2.3.1"} in my poetry pyproject.toml which turns out to be ray, version 2.3.1