Hi all,
I was wondering if anyone has played around with a databricks setup.
I was inspired by the the following post: RayDemo - Databricks
But I tend to get errors at one of the final cells:
def linear_pred(x,y, i):
reg = linear_model.ElasticNet().fit(x, y)
p = reg.predict(np.array([[i + 1]]))
return p[0]
@pandas_udf(ArrayType(LongType()))
def ray_udf(s):
s = list(s)
pred = []
workers = []
for i in range(len(s)):
x = list(range(i+1))
x = np.asarray([[n] for n in x])
y = s[:i+1]
y = np.asarray(y)
workers.append(linear_pred.remote(x, y, i))
pred = ray.get(workers)
return pd.Series(pred)
res = df.select("county_name", "deaths", ray_udf("deaths").alias("preds"))
display(res)
Usually the error mentions something about protobuf:
> (raylet) Traceback (most recent call last):
> (raylet) File "/databricks/python/lib/python3.8/site-packages/ray/workers/default_worker.py", line 8, in <module>
> (raylet) import ray
> (raylet) File "/databricks/python/lib/python3.8/site-packages/ray/__init__.py", line 91, in <module>
> (raylet) import ray._raylet # noqa: E402
> (raylet) File "python/ray/_raylet.pyx", line 110, in init ray._raylet
> (raylet) File "/databricks/python/lib/python3.8/site-packages/ray/exceptions.py", line 5, in <module>
> (raylet) from ray.core.generated.common_pb2 import RayException, Language, PYTHON
> (raylet) File "/databricks/python/lib/python3.8/site-packages/ray/core/generated/common_pb2.py", line 15, in <module>
> (raylet) from . import runtime_env_common_pb2 as src_dot_ray_dot_protobuf_dot_runtime__env__common__pb2
> (raylet) File "/databricks/python/lib/python3.8/site-packages/ray/core/generated/runtime_env_common_pb2.py", line 21, in <module>
> (raylet) create_key=_descriptor._internal_create_key,
> (raylet) AttributeError: module 'google.protobuf.descriptor' has no attribute '_internal_create_key'
> (raylet) Traceback (most recent call last):
> (raylet) File "/databricks/python/lib/python3.8/site-packages/ray/workers/default_worker.py", line 8, in <modul
Many thanks,
George