Hey @Ruiyang_Wang, the following is a repro script that resulted in the above error:
import numpy as np
import ray
import os
import time
import random
def ray_put(arr: np.ndarray):
print(f"Size in bytes: {arr.nbytes}")
print("Putting in object store...")
arr_ref = ray.put(arr)
print("Placed in object store!")
return arr_ref
def main() -> None:
if ray.is_initialized() == False:
if "redis_password" in os.environ:
ray.init(
address="auto",
_redis_password=os.environ["redis_password"],
include_dashboard=True,
)
else:
ray.init(include_dashboard=True)
print("Ray Available Resources: {}".format(ray.available_resources()))
arrs = [np.array([np.random.choice(150000000, random.randint(50, 100)) for _ in range(150000000)], dtype=object) for _ in range(3)]
ray_put(arrs[0]) # Error occurs here
ray_put(arrs[1])
ray_put(arrs[2])
ray.shutdown()
if __name__ == "__main__":
main()
Traceback:
Traceback (most recent call last):
File "/users/PAS2065/adityatv/repro.py", line 39, in <module>
File "/users/PAS2065/adityatv/repro.py", line 30, in main
arrs = [
File "/users/PAS2065/adityatv/repro.py", line 11, in ray_put
print("Putting in object store...")
File "/users/PAS2065/adityatv/miniconda3/envs/myenv/lib/python3.9/site-packages/ray/_private/auto_init_hook.py", line 18, in auto_init_wrapper
return fn(*args, **kwargs)
File "/users/PAS2065/adityatv/miniconda3/envs/myenv/lib/python3.9/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "/users/PAS2065/adityatv/miniconda3/envs/myenv/lib/python3.9/site-packages/ray/_private/worker.py", line 2612, in put
object_ref = worker.put_object(value, owner_address=serialize_owner_address)
File "/users/PAS2065/adityatv/miniconda3/envs/myenv/lib/python3.9/site-packages/ray/_private/worker.py", line 693, in put_object
serialized_value = self.get_serialization_context().serialize(value)
File "/users/PAS2065/adityatv/miniconda3/envs/myenv/lib/python3.9/site-packages/ray/_private/serialization.py", line 466, in serialize
return self._serialize_to_msgpack(value)
File "/users/PAS2065/adityatv/miniconda3/envs/myenv/lib/python3.9/site-packages/ray/_private/serialization.py", line 450, in _serialize_to_msgpack
return MessagePackSerializedObject(
File "python/ray/includes/serialization.pxi", line 463, in ray._raylet.MessagePackSerializedObject.__init__
File "python/ray/includes/serialization.pxi", line 437, in ray._raylet.Pickle5SerializedObject.total_bytes.__get__
File "python/ray/includes/serialization.pxi", line 337, in ray._raylet.Pickle5Writer.get_total_bytes
ValueError: Total buffer metadata size is bigger than 2147483647. Consider reduce the number of buffers (number of numpy arrays, etc).
I ran this (and my original scripts) on a slurm node with 1024 GB allocated for the object store memory (total memory is 3000+ GB.) Thanks in advance!