Difference between serve run and serve deploy commands


# File name: serve_quickstart.py
from starlette.requests import Request
import os
import ray
from ray import serve

# from transformers import pipeline

# from ray_app.trt.trt_model import TrtModel
from ray_app.utils.utils import preprocess, nms, xywh2xyxy
# from ray_app.trt.trt_model import TrtModel

# import json

# import numpy as np
# import urllib.request

from fastapi import FastAPI, File, Form, HTTPException
app = FastAPI()

@serve.deployment(num_replicas=1, ray_actor_options={"num_cpus": 0.2, "num_gpus": 0.2})
class Faceblur:
    def __init__(self):
        MODEL_PATH = "/app/weights/yolov7-face.trt"
        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        print(f'# ray.get_gpu_ids(): {ray.get_gpu_ids()}')
        print(f'# os.environ["CUDA_VISIBLE_DEVICES"]: {os.environ["CUDA_VISIBLE_DEVICES"]}')

    def home(self):
        return "Welcome!"
faceblur_app = Faceblur.bind()

I am using this as entrypoint for my ray_app module. When I am running it using serve run command its able to run properly but when I am trying to use the same piece of code using serve deploy I am getting error that CUDA not found. I want to understand what’s wrong with my approach.
Also these print statements giving me empty array
print(f’# ray.get_gpu_ids(): {ray.get_gpu_ids()}‘)
print(f’# os.environ[“CUDA_VISIBLE_DEVICES”]: {os.environ[“CUDA_VISIBLE_DEVICES”]}')