Trying to deploy multiple models on a 8 GPU cluster: get error:
(ServeController pid=550735) RuntimeError: CUDA error: invalid device ordinal (ServeController pid=550735) CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. (ServeController pid=550735) For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Tried to use DAG, serve.run or use cli, get the same error
Versions / Dependencies
2.4.0
Reproduction script
import torch
import numpy as np
import logging
import ray
from ray import serve
from starlette.requests import Request
import os
from ray.serve.drivers import DAGDriver
# Logging
logging.basicConfig(level=logging.DEBUG)
import torch
import torch.nn as nn
class MyModel(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(MyModel, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.fc2(out)
return out
RAY_ACTOR_OPTIONS = {"num_gpus": 1}
@serve.deployment(ray_actor_options=RAY_ACTOR_OPTIONS)
class APP1:
def __init__(self):
print("@@@@@@@@@@@@@@@@ APP1 @@@@@@@@@@@@@@@@@@@@@@")
print("ray.get_gpu_ids(): {}".format(ray.get_gpu_ids()))
print("CUDA_VISIBLE_DEVICES: {}".format(os.environ["CUDA_VISIBLE_DEVICES"]))
device = f"cuda:{ray.get_gpu_ids()[0]}"
print(f"Initializing APP1 to {device}")
print("@@@@@@@@@@@@@@@@@ APP1 @@@@@@@@@@@@@@@@@@@@@")
# device = torch.device(f"cuda:{ray.get_gpu_ids()[0]}")
self.device = device
self.model = MyModel(10, 10, 3)
self.model.to(device)
async def __call__(self, starlette_request: Request):
input = (await starlette_request.json())["input"]
input = np.array(input)
output = self.model(torch.from_numpy(input).to(self.device))
output = output.cpu().detach().numpy().tolist()
return {"status":"success", "image":output}
RAY_ACTOR_OPTIONS = {"num_gpus": 1}
@serve.deployment(ray_actor_options=RAY_ACTOR_OPTIONS)
class APP2:
def __init__(self):
print("@@@@@@@@@@@@@@@@ APP2 @@@@@@@@@@@@@@@@@@@@@@")
print("ray.get_gpu_ids(): {}".format(ray.get_gpu_ids()))
print("CUDA_VISIBLE_DEVICES: {}".format(os.environ["CUDA_VISIBLE_DEVICES"]))
device = f"cuda:{ray.get_gpu_ids()[0]}"
print(f"Initializing APP2 to {device}")
print("@@@@@@@@@@@@@@@@@ APP2 @@@@@@@@@@@@@@@@@@@@@")
# device = torch.device(f"cuda:{ray.get_gpu_ids()[0]}")
self.device = device
self.model = MyModel(10, 10, 3)
self.model.to(device)
async def __call__(self, starlette_request: Request):
input = (await starlette_request.json())["input"]
input = np.array(input)
output = self.model(torch.from_numpy(input).to(self.device))
output = output.cpu().detach().numpy().tolist()
return {"status":"success", "image":output}
ray.init(num_gpus=8)
# serve.run(APP1.options().bind(), name="app1", route_prefix=f"/app1")
# serve.run(APP2.options().bind(), name="app2", route_prefix=f"/app2")
dag = {
'/app1': APP1.bind(),
# '/image_editor': ImageEditor.bind(),
# '/text2image': Text2Image.bind(),
'/app2': APP2.bind()
}
driver = DAGDriver.bind(dag)
handle = serve.run(driver, port=8000)