Hi, i’m trying to run xgboost_ray on a Ray cluster deployed on K8. The connection to the K8 cluster works properly but when I try to run xgboost_ray I get "Exception iterating responses: No module named 'xgboost_ray'"
. I’m using the Ray Kubernetes Operator and the Docker image I used to deploy Ray on the K8 cluster is "rayproject/ray-ml"
Here my code and error. Thanks in advance for your help.
import ray
import ray.util
import os
import time
from xgboost_ray import train, RayDMatrix, RayParams
import pandas as pd
ray.util.connect("127.0.0.1:50051")
colnames = ["label"] + ["feature-%02d" % i for i in range(1, 29)]
df = pd.read_csv("HIGGS.csv.gz", names=colnames)
dtrain = RayDMatrix(df, label="label",)
config = {
"tree_method": "hist",
"eval_metric": ["logloss", "error"],
}
evals_result = {}
start = time.time()
bst = train(
config,
dtrain,
evals_result=evals_result,
ray_params=RayParams(max_actor_restarts=1),
num_boost_round=100,
evals=[(dtrain, "train")])
taken = time.time() - start
print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")
bst.save_model("higgs.xgb")
print("Final training error: {:.4f}".format(
evals_result["train"]["error"][-1]))
Got Error from data channel – shutting down: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.UNKNOWN
details = “Exception iterating responses: No module named ‘xgboost_ray’”
debug_error_string = “{“created”:”@1618494050.585477971",“description”:“Error received from peer ipv4:127.0.0.1:50051”,“file”:“src/core/lib/surface/call.cc”,“file_line”:1067,“grpc_message”:“Exception iterating responses: No module named ‘xgboost_ray’”,“grpc_status”:2}"Exception in thread Thread-8:
Traceback (most recent call last):
File “/home/vmuser/miniconda3/envs/anyscale/lib/python3.7/threading.py”, line 926, in _bootstrap_inner
self.run()
File “/home/vmuser/miniconda3/envs/anyscale/lib/python3.7/threading.py”, line 870, in run
self._target(*self._args, **self._kwargs)
File “/home/vmuser/miniconda3/envs/anyscale/lib/python3.7/site-packages/ray/util/client/dataclient.py”, line 76, in _data_main
raise e
File “/home/vmuser/miniconda3/envs/anyscale/lib/python3.7/site-packages/ray/util/client/dataclient.py”, line 61, in _data_main
for response in resp_stream:
File “/home/vmuser/miniconda3/envs/anyscale/lib/python3.7/site-packages/grpc/_channel.py”, line 426, in next
return self._next()
File “/home/vmuser/miniconda3/envs/anyscale/lib/python3.7/site-packages/grpc/_channel.py”, line 826, in _next
raise self
grpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.UNKNOWN
details = “Exception iterating responses: No module named ‘xgboost_ray’”
debug_error_string = “{“created”:”@1618494050.585477971",“description”:“Error received from peer ipv4:127.0.0.1:50051”,“file”:“src/core/lib/surface/call.cc”,“file_line”:1067,“grpc_message”:“Exception iterating responses: No module named ‘xgboost_ray’”,“grpc_status”:2}"