Sharing Classes with large data structures for multiprocessing

I have a Class that uses a LARGE list,dict and numpy array that I want to use in multiprocess env (Ray).

I want to make one instance of the Class to own the data and every other process to create a Sham obj with the same API which simply call the Real-deal.

How do I do that ?

PS> the Sham’s (which themselves are Actors … sham actors :wink: ) have to probably intercept any attr/method access and build remote call.


Here what I have so far … works for methods, but not attributes or Actor.getattr__ type of access

@ray.remote
class RemoteBlah(Blah) : pass

class ProxyBlah:
	def __init__(self, actor_name):
		self.actor_name = actor_name
		self.actor = ray.get_actor(actor_name)

	def __getattr__(self, attr):
		def wrapped_method(*args, **kwargs):
			print("The method {} is executing.".format(attr))
			result = getattr(self.actor, attr).remote(*args, **kwargs)
			print("The result was {}.".format(result))
			return result    
		return wrapped_method

hi @vsraptor,

I think you can create a following actor as your data store class, and wrap your proxy class over this data store, like following:

import ray
import argparse

@ray.remote
class DataStore:
    def __init__(self):
        self.storage = {}

    def store(self, name, data):
        self.storage[name] = data

    def load(self, name):
        return self.storage.get(name)


ray.init(address="auto")
store_actor = DataStore.remote()

class MyProxy:
    def set_field(self, value):
        ray.get(store_actor.store.remote("name", value))

    def get_field(self):
        return ray.get(store_actor.load.remote("name"))

proxy = MyProxy()
proxy.set_field(123)
print(proxy.get_field())

Is this something you are looking for?

1 Like

i was looking for getattr&getitem solution … and came up with this…


class ProxyBlah:

	method_list = [func for func in dir(Blah) if callable(getattr(Blah, func))]

	def __init__(self, actor_name):
		self.actor_name = actor_name
		self.actor = ray.get_actor(actor_name)

	def __getattr__(self, attr): return self.getattr(attr)
	def __getitem__(self, attr): return self.getitem(attr)
	def __setitem__(self, key, value): self.actor.set.remote(key,value)

	def getattr(self, attr):

		def remote_call(*args, **kwargs):
			print("The method {} is executing.".format(attr))
			result = getattr(self.actor, attr).remote(*args, **kwargs)
			print("The result was {}.".format(result))
			return ray.get(result)

		#if its not s method mimic __getitem__	
		if attr not in ProxyBlah.method_list : 
			return self.getitem(attr)
		return remote_call

	def getitem(self, key) : 
		val = ray.get(self.actor.get.remote(key))
		if val is None : return None
		return val