Hi I am using Ray with Fast API where users can request the API to get the prediction score.
Could you please help me with the following:
I recently moved from a 2 core machine to 24 core machine.
**Before **:
CPU(s): 2
On-line CPU(s) list: 0,1
Thread(s) per core: 2
Core(s) per socket: 1
Socket(s): 1
Time Taken to to predict to call the below was 2 to 3 Milliseconds
# main.....
class Executor:
def __init__(self):
ray.init(_node_ip_address="0.0.0.0", ignore_reinit_error=True, num_cpus=24, include_dashboard=False)
self._models = self.prepare()
def prepare(self):
return all_models
async def predict(self, request):
for mname in request.model_names:
prediction = self._models[mname].predict.remote(request.context)
model_remote.append(prediction)
predictions = [ray.get(each_model) for each_model in model_remote]
return {"predictions": predictions}
#Actor
@ray.remote(num_cpus=num_cpus)
class MyModel():
def __init__(self, *args, **kwargs):
self.logger = get_logger()
self.init_model(*args, **kwargs)
def init_model(self, *args, **kwargs):
self._model = load(join(<model_path>))
self.model = load(join(<model path>))
async def predict(self, data):
hpd = pd.Series({.......},
dtype='int32').values.reshape(1, -1)
start_time = datetime.datetime.now()
pred = self.model.predict_proba(self._model.transform(hpd))[0]
return : float(max(pred))
After (:
CPU(s): 24
On-line CPU(s) list: 0-23
Thread(s) per core: 2
Core(s) per socket: 12
Socket(s): 1
Time Taken to to predict to call the below was 30 to 40 Milliseconds
cls.model.predict_proba(self._model.transform(hpd))
I tried changing the Ray num_cpus but on the new server its taking more time. Even though the number of cores are more now why does predict_proba call is taking more time ?