RecursionError: maximum recursion depth exceeded

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

Here is the error while running ray, version 2.9.2.

---------------------------------------------------------------------------
RecursionError                            Traceback (most recent call last)
Cell In[38], line 2
      1 #getprices()
----> 2 pricetasks = getprices.remote()

File ~/anaconda3/lib/python3.10/site-packages/ray/remote_function.py:139, in RemoteFunction.__init__.<locals>._remote_proxy(*args, **kwargs)
    137 @wraps(function)
    138 def _remote_proxy(*args, **kwargs):
--> 139     return self._remote(args=args, kwargs=kwargs, **self._default_options)

File ~/anaconda3/lib/python3.10/site-packages/ray/_private/auto_init_hook.py:22, in wrap_auto_init.<locals>.auto_init_wrapper(*args, **kwargs)
     19 @wraps(fn)
     20 def auto_init_wrapper(*args, **kwargs):
     21     auto_init_ray()
---> 22     return fn(*args, **kwargs)

File ~/anaconda3/lib/python3.10/site-packages/ray/util/tracing/tracing_helper.py:310, in _tracing_task_invocation.<locals>._invocation_remote_span(self, args, kwargs, *_args, **_kwargs)
    308     if kwargs is not None:
    309         assert "_ray_trace_ctx" not in kwargs
--> 310     return method(self, args, kwargs, *_args, **_kwargs)
    312 assert "_ray_trace_ctx" not in kwargs
    313 tracer = _opentelemetry.trace.get_tracer(__name__)

File ~/anaconda3/lib/python3.10/site-packages/ray/remote_function.py:268, in RemoteFunction._remote(self, args, kwargs, **task_options)
    266 task_options.pop("max_calls", None)
    267 if client_mode_should_convert():
--> 268     return client_mode_convert_function(self, args, kwargs, **task_options)
    270 worker = ray._private.worker.global_worker
    271 worker.check_connected()

File ~/anaconda3/lib/python3.10/site-packages/ray/_private/client_mode_hook.py:164, in client_mode_convert_function(func_cls, in_args, in_kwargs, **kwargs)
    162     setattr(func_cls, RAY_CLIENT_MODE_ATTR, key)
    163 client_func = ray._get_converted(key)
--> 164 return client_func._remote(in_args, in_kwargs, **kwargs)

File ~/anaconda3/lib/python3.10/site-packages/ray/util/client/common.py:300, in ClientRemoteFunc._remote(self, args, kwargs, **option_args)
    298 if kwargs is None:
    299     kwargs = {}
--> 300 return self.options(**option_args).remote(*args, **kwargs)

File ~/anaconda3/lib/python3.10/site-packages/ray/util/client/common.py:583, in OptionWrapper.remote(self, *args, **kwargs)
    581 def remote(self, *args, **kwargs):
    582     self._remote_stub._signature.bind(*args, **kwargs)
--> 583     return return_refs(ray.call_remote(self, *args, **kwargs))

File ~/anaconda3/lib/python3.10/site-packages/ray/util/client/api.py:100, in _ClientAPI.call_remote(self, instance, *args, **kwargs)
     86 def call_remote(self, instance: "ClientStub", *args, **kwargs) -> List[Future]:
     87     """call_remote is called by stub objects to execute them remotely.
     88 
     89     This is used by stub objects in situations where they're called
   (...)
     98         kwargs: opaque keyword arguments
     99     """
--> 100     return self.worker.call_remote(instance, *args, **kwargs)

File ~/anaconda3/lib/python3.10/site-packages/ray/util/client/worker.py:556, in Worker.call_remote(self, instance, *args, **kwargs)
    555 def call_remote(self, instance, *args, **kwargs) -> List[Future]:
--> 556     task = instance._prepare_client_task()
    557     # data is serialized tuple of (args, kwargs)
    558     task.data = dumps_from_client((args, kwargs), self._client_id)

File ~/anaconda3/lib/python3.10/site-packages/ray/util/client/common.py:589, in OptionWrapper._prepare_client_task(self)
    588 def _prepare_client_task(self):
--> 589     task = self._remote_stub._prepare_client_task()
    590     set_task_options(task, self._options)
    591     return task

File ~/anaconda3/lib/python3.10/site-packages/ray/util/client/common.py:326, in ClientRemoteFunc._prepare_client_task(self)
    325 def _prepare_client_task(self) -> ray_client_pb2.ClientTask:
--> 326     self._ensure_ref()
    327     task = ray_client_pb2.ClientTask()
    328     task.type = ray_client_pb2.ClientTask.FUNCTION

File ~/anaconda3/lib/python3.10/site-packages/ray/util/client/common.py:321, in ClientRemoteFunc._ensure_ref(self)
    318 # Check pickled size before sending it to server, which is more
    319 # efficient and can be done synchronously inside remote() call.
    320 check_oversized_function(data, self._name, "remote function", None)
--> 321 self._ref = ray.worker._put_pickled(
    322     data, client_ref_id=self._client_side_ref.id
    323 )

File ~/anaconda3/lib/python3.10/site-packages/ray/util/client/worker.py:510, in Worker._put_pickled(self, data, client_ref_id, owner)
    508 if not resp.valid:
    509     try:
--> 510         raise cloudpickle.loads(resp.error)
    511     except (pickle.UnpicklingError, TypeError):
    512         logger.exception("Failed to deserialize {}".format(resp.error))

RecursionError: maximum recursion depth exceeded

This wasn’t a problem prior to the upgrade. Nothing else has changed. Running on Ubuntu Focal.

Here is the function that called it:

@ray.remote
def getprices():
    assets = assets_in_scope()

    #asset_refs = [stockpricehistory.remote(asset, starttime, **kwargs) for asset in assets]
    #ray.cancel(object_refs)
    asset_refs = [stockpricehistory.remote(row['symbol'], row['startdate'], row['enddate']) for index, row in assets.iterrows()]
    BATCH_SIZE = int(ray.cluster_resources()['CPU'])
    while asset_refs:
        # Process results in the finish order instead of the submission order.
        num_returns = min(BATCH_SIZE, len(asset_refs))
        ready_asset_refs, asset_refs = ray.wait(asset_refs, num_returns=num_returns)
        # The node only needs enough space to store
        # a batch of objects instead of all objects.

    return

Hi @Justin_Coffi,

Could you provide a runnable repro that I can run on my side for debugging?

Sure. The database isn’t publicly accessible so it might take a bit. Also wanted to add that when I remove Ray, it works as expected.

Once I removed this code:

 try:
     from cupyx.fallback_mode import numpy as np
 except ImportError:
     import numpy as np

and went to import numpy as np directly the problem stopped for me.