tldr: this is WAI due to worker start up time.
Can reproduce in working in a running Ray cluster, so it’s not due to ray cluster start up.
I tracked the worker IDs for each task and all workers are created in the first run, which takes time. Subsequent runs are scheduled to those workers so they don’t need start up cost.
import ray
import time
import numpy as np
ray.init()
timer = []
@ray.remote
def ray_function(matrix):
matrix.std()
return ray.get_runtime_context().get_worker_id()
large_matrix = np.random.rand(100, 100, 100)
from collections import defaultdict
d = defaultdict(list)
for i in range(10):
workers = []
start_time = time.time()
for work in range(15):
workers.append(ray_function.remote(large_matrix))
ws = ray.get(workers)
for w in ws:
d[w].append(i)
timer.append(time.time() - start_time)
print(timer)
print(d)
result:
[1.2360892295837402, 0.025461196899414062, 0.031101465225219727, 0.03042149543762207, 0.030280590057373047, 0.030264854431152344, 0.032823801040649414, 0.030443429946899414, 0.024106264114379883, 0.022119998931884766]
and
defaultdict(<class 'list'>, {'dda54e6b5ade17dc14b3bb06e7f680971b4ae77a6d9f950b1613fa16': [0, 1, 1, 8, 8, 9, 9, 9], 'a8136969e2ae2aa6dd3b97b390e91284efd8804e0213f864e842243f': [0, 1, 7, 8, 8, 8, 9, 9], '4182eb5dcc94293f58e2a4d29b9e68075a53ca163c937420845d8a77': [0], '987a780e101441942325baa39228fe82b0df4e17cb2a360c1d99c182': [0], '96215c2a684c37837dac00d6533422f1efa5aa2eb62cbd207fdb6cf4': [0, 1, 1, 2, 2, 2, 3, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9], '91163886c5f0672046c0c7f24e9869c9a6b3f1beb713f52d8f5b8d2f': [0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9], '40b46d4a5124eb5b024eac06ed46976083fa9ae3ca747ed0c537f1b5': [0, 1, 1, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9], '9be59e947deca6d27850f1ac1bd9b0ad3577c09bc80796e5a95b78dc': [0], '9e74c44ea0e1d9155b750093025387b16cf66563d76fb8187d72d649': [0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 7, 8, 8, 9, 9], 'a1f0e8baf7da47c5207176290ef3ebc558f022a308f65f6e48e094cb': [0, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 9, 9], '8af06c9591e5447836891d525459726f63dfeb83f9665354b0ba870b': [0, 1], '77f58325becce44998976e9297dea3e22dd2496137af87db6179f287': [0], '372ea291fb2a57445d827d613df8004a4a7bf1137bc1383d5265d13b': [0, 1], 'c49a2b45f2506ad0ea6d2219c101ec1bfb0b539c589234946f0437b1': [0], 'e398dc04db8456d64fa4914b9304fe2dc56fcc414b5f3870515724aa': [0, 1]})