Error when executing multiple tasks, same function with arguments

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

My code:

import math
import ray
import time
import pandas as pd
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, chi2


@ray.remote(num_cpus=1, max_restarts = 5)
class Actor(object):
    def __init__(self):

        filename = "Dataset - HIKARI-2021.csv"
        df = pd.read_csv(filename, delimiter=',')
        df = df.drop(columns=["uid", "originh", "responh"])
        headers = list(df.columns)

        X = df[headers[:-2]]
        Y = df["traffic_category"]

        X = X[:len(X)//8]
        Y = Y[:len(Y)//8]

        self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, test_size=0.2, random_state=1)

    def compute(self,start, end):
        print(str(start) + " - " +  str(end))
        
        aux = start
        tmp = []

        while aux <= end and aux <= 10:
            
            fs = SelectKBest(score_func=chi2, k=aux)
            fs.fit(self.X_train, self.Y_train)
            X_train_fs = fs.transform(self.X_train)
            X_test_fs = fs.transform(self.X_test)

            model = KNeighborsClassifier(n_jobs=1)
            model.fit(X_train_fs, self.Y_train)
            Y_predict = model.predict(X_test_fs)

            accuracy = metrics.accuracy_score(self.Y_test, Y_predict)
            
            tmp.append([aux, accuracy])            
            aux += 1

        """filename = "results.txt" #str(start) + "-" + str(end) + ".txt"
        f = open(filename, "a")
        for item in tmp:
            f.write("%s\n" % item)
        f.close()

        #self.result.append(tmp)
        #self.result += 1
        #return self.result"""
        return 1
    

cpus = 4
actors = [Actor.remote() for _ in range(cpus)]

batch = math.ceil(10 / cpus)

res = ray.get([a.compute.remote(idx*batch+1,idx*batch+batch) for idx, a in enumerate(actors)])
print(res)

I am using Ray 1.12.1

hi @Rui_Fernandes thanks for your interest in Ray. What’s the error you encountered when running above code?