When running a workflow on a cluster, I get the following: RuntimeError: No storage URI has been configured for the cluster. Specify a storage URI via ray.init(storage=<uri>)
or ray start --head --storage=<uri>
ray.init(
address="auto",
runtime_env={
"working_dir": "../",
"pip": ["graphviz", "pydot", "pyarrow "],
},
# storage="/tmp",
)
# Define Ray remote functions.
@ray.remote
def read_data(num: int):
return [i for i in range(num)]
@ray.remote
def preprocessing(data: List[float]) -> List[float]:
return [d**2 for d in data]
@ray.remote
def aggregate(data: List[float]) -> float:
return sum(data)
# Build the DAG:
# data -> preprocessed_data -> aggregate
data = read_data.bind(10)
preprocessed_data = preprocessing.bind(data)
output = aggregate.bind(preprocessed_data)
ray.dag.vis_utils.plot(output, "output.jpg")
print("running workflow")
# Execute the workflow and print the result.
print(workflow.run(output))
adding storage will result in: “ValueError: When connecting to an existing cluster, storage must not be provided”
This happens on workflow.run
ray, version 2.8.0
py3.10
Any idea?