I use ray[serve]
as the server and send requests by http
. But after sending hours of requests, I found the memory of ray’s actors is increasing continuously (over 90G for 20 actors). How can I control the total memory used by ray
?
Memory usage is increasing linearly.
- OS: MacOS 10.15.7 / Linux Ubuntu 18 LTS
- Ray version: 1.7.0/1.4.1
- Python version: 3.7.11
- Installation:
cat requirements
> ray[serve]==1.7.0
> psutil
> requests
pip install -r requirements
The code for reproduction is as follows. I have tried object_store_memory
, but it doesn’t work for ray serve.
import json
import os
import numpy as np
import psutil
import ray
import requests
from ray import serve
@serve.deployment(name="test_qps", route_prefix="/test_qps", ray_actor_options={"num_cpus": 1}, num_replicas=1)
class QpsTest:
def __init__(self):
pass
async def __call__(self, request):
step_cnt = await request.json()
state = np.random.randint(0, 255, (1000, 1000), np.uint64)
process = psutil.Process(os.getpid())
proc_mem = process.memory_info().rss / (1024 ** 2)
print(f'actor_pid={process.pid} \t mem={proc_mem:6.1f} MB.')
return state
if __name__ == '__main__':
ray.init(num_cpus=1, dashboard_host="0.0.0.0", object_store_memory=150_000_000)
client = serve.start(http_options={"host": "0.0.0.0"})
QpsTest.deploy()
step_no = 5000000
step_cnt = 0
url = "http://127.0.0.1:8000/test_qps"
while step_cnt < step_no:
req = requests.post(url, data=json.dumps(step_cnt))
step_cnt += 1
process = psutil.Process(os.getpid())
proc_mem = process.memory_info().rss / (1024 ** 2)
print(f'main_pid={process.pid} \t mem={proc_mem:6.1f} MB.')
print('-' * 30)