How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I am trying to replicate this https://docs.ray.io/en/master/ray-air/examples/gptj_serving.html example of gpt-j serving on an AWS cluster (g4dn.xlarge) that I’ve spun up. But I keep running into an AttributeError: Can't get attribute 'GcsClient'
error, when trying to use ray serve
runtime_env.yml
:
pip:
- accelerate>=0.16.0
- transformers>=4.26.0
- numpy<1.24
- torch
- fastapi
- uvicorn
script.py
:
import ray
import pandas as pd
from ray import serve
from starlette.requests import Request
@serve.deployment(ray_actor_options={"num_gpus": 1})
class PredictDeployment:
def __init__(self, model_id: str, revision: str = None):
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
self.model = AutoModelForCausalLM.from_pretrained(
model_id,
revision=revision,
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
device_map="auto", # automatically makes use of all GPUs available to the Actor
)
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
def generate(self, text: str) -> pd.DataFrame:
input_ids = self.tokenizer(text, return_tensors="pt").input_ids.to(
self.model.device
)
gen_tokens = self.model.generate(
input_ids,
do_sample=True,
temperature=0.9,
max_length=100,
)
return pd.DataFrame(
self.tokenizer.batch_decode(gen_tokens), columns=["responses"]
)
async def __call__(self, http_request: Request) -> str:
json_request: str = await http_request.json()
prompts = []
for prompt in json_request:
text = prompt["text"]
if isinstance(text, list):
prompts.extend(text)
else:
prompts.append(text)
return self.generate(prompts)
model_id = "EleutherAI/gpt-j-6B"
revision = "float16" # use float16 weights to fit in 16GB GPUs
prompt = (
"In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
"previously unexplored valley, in the Andes Mountains. Even more surprising to the "
"researchers was the fact that the unicorns spoke perfect English."
)
deployment = PredictDeployment.bind(model_id=model_id, revision=revision)
command
poetry run serve run --address=ray://<ec2-server>:10001 --working-dir="./private_gptj_efficient_finetuning/" private_gptj_efficient_finetuning.script:deployment --runtime-env ray/runtime_env.yml
Traceback
:
Put failed:
Traceback (most recent call last):
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/bin/serve", line 8, in <module>
sys.exit(cli())
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/click/core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/serve/scripts.py", line 339, in run
client = _private_api.serve_start(
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/serve/_private/api.py", line 229, in serve_start
controller = ServeController.options(**controller_actor_options).remote(
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/actor.py", line 639, in remote
return actor_cls._remote(args=args, kwargs=kwargs, **updated_options)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/tracing/tracing_helper.py", line 387, in _invocation_actor_class_remote_span
return method(self, args, kwargs, *_args, **_kwargs)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/actor.py", line 767, in _remote
return client_mode_convert_actor(self, args, kwargs, **actor_options)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 198, in client_mode_convert_actor
return client_actor._remote(in_args, in_kwargs, **kwargs)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/common.py", line 396, in _remote
return self.options(**option_args).remote(*args, **kwargs)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/common.py", line 602, in remote
futures = ray.call_remote(self, *args, **kwargs)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/api.py", line 100, in call_remote
return self.worker.call_remote(instance, *args, **kwargs)
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/worker.py", line 556, in call_remote
task = instance._prepare_client_task()
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/common.py", line 587, in _prepare_client_task
task = self._remote_stub._prepare_client_task()
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/common.py", line 407, in _prepare_client_task
self._ensure_ref()
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/common.py", line 377, in _ensure_ref
self._ref = ray.worker._put_pickled(
File "/home/nerdai/.cache/pypoetry/virtualenvs/private-gptj-efficient-finetuning-F2jH88Qb-py3.8/lib/python3.8/site-packages/ray/util/client/worker.py", line 510, in _put_pickled
raise cloudpickle.loads(resp.error)
AttributeError: Can't get attribute 'GcsClient' on <module 'ray._private.gcs_utils' from '/home/ubuntu/.local/lib/python3.8/site-packages/ray/_private/gcs_utils.py'>