# Dockerfile for head
FROM python:3.10.13-slim
RUN apt-get update && apt-get install -y g++ gcc libsndfile1 git ffmpeg podman curl
RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list \
&& apt-get update
RUN apt-get install -y nvidia-container-toolkit
RUN python -m pip install -U pip==23.3.1
RUN python -m pip install ray[default,serve]==2.8.0
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
WORKDIR /root/ray/
COPY . .
ENTRYPOINT ["/root/ray/docker/entrypoint.sh"]
# entrypoint.sh
nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
RAY_prestart_worker_first_driver=0.0 ray start --head --dashboard-host=0.0.0.0 --block
# service.py
import asyncio
from io import BytesIO
import numpy as np
import torch
from fastapi import FastAPI
from fastapi.responses import Response
from PIL import Image
from ray import serve
from ray.serve.handle import DeploymentHandle
app = FastAPI()
@serve.deployment(num_replicas=1)
@serve.ingress(app)
class APIIngressOD:
def __init__(self, object_detection_handle) -> None:
self.handle: DeploymentHandle = object_detection_handle.options(
use_new_handle_api=True,
)
@app.get(
"/",
responses={200: {"content": {"image/jpeg": {}}}},
response_class=Response,
)
async def detect(self, image_url: str):
image = await self.handle.detect.remote(image_url)
file_stream = BytesIO()
image.save(file_stream, "jpeg")
return Response(content=file_stream.getvalue(), media_type="image/jpeg")
@serve.deployment(
ray_actor_options={"num_gpus": 0.25},
autoscaling_config={"min_replicas": 2, "max_replicas": 4, "downscale_delay_s": 60},
)
class ObjectDetection:
def __init__(self):
self.model = torch.hub.load("ultralytics/yolov5", "yolov5s")
self.model.cuda()
async def detect(self, image_url: str):
loop = asyncio.get_running_loop()
result_im = await loop.run_in_executor(None, self.model, image_url)
return Image.fromarray(result_im.render()[0].astype(np.uint8))
app = APIIngressOD.bind(ObjectDetection.bind())
serve.run(app, name="object_detection", route_prefix="/detect")
# Dockerfile for <ray-service>
FROM python:3.10.13-slim
RUN python -m pip install -U pip==23.3.1
COPY . .
RUN python -m pip install Pillow \
opencv-python \
torchvision>=0.16.* \
numpy \
torch \
pandas \
ray[serve]==2.8.0
docker build -t <ray-service> .
docker push <ray-service>
RAY_ADDRESS='http://localhost:8265' ray job submit --runtime-env-json '{"container": {"image": "<ray-service>:latest", "run_options": ["--tty", "--privileged", "--cap-drop ALL", "--log-level=debug", "--device nvidia.com/gpu=all", "--security-opt=label=disable", "--restart unless-stopped"]}, "config": {"eager_install": false}, "env_vars":{"NVIDIA_VISIBLE_DEVICES": "all"}}' -- python service.py
I have had a few problems using this command:
- I have to go into the main node container and pull the image beforehand. Otherwise, no matter how long I wait, the job remains uncompleted. Is this happening because of the timeout to complete the job? Is it possible to adjust this timeout?
- After sending a job the images try to pull infinitely many times (
raylet.err
). As if there is no limit in attempts. That is, if we have not managed to pull the image in N time, I expect the job to go to failed status. But it stays pending forever. Is it possible to configure killing jobs that failed to start? - I also tried running my service on grpc with the image specified. Everything is fine. Requests go through on port 9000, but as soon as I deploy the service without image on port 8000 (specifying only dependencies via pip). The grpc service, which before I deployed another service was working, returns this response:
status = StatusCode.NOT_FOUND details = "Application metadata not set. Please ping /ray.serve.RayServeAPIService/ListApplications for available applications."