How severe does this issue affect your experience of using Ray?
- None: Just asking a question out of curiosity
Description:
hello, i am running a ray.tune task(ray 2.6.1), and set ray with num_cpus=3, the example as follows.
When I execute the program and view the Ray dashboard, I can see the Overview
page indeed use 3 cpu cores. (show picture1)
but in the Cluster
page, each worker CPU Usage > 100%. In theory, there are a total of 3 CPUs, then every job should be within 100. (show picture 2)
I would like to understand why the worker cpu usage exceed the num_cpus.
import os
import ray
from catboost import CatBoostClassifier
from ray import tune
from ray.air import RunConfig
from ray.tune.search.hyperopt import HyperOptSearch
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
def train_func(config):
# read sklearn data
data = load_breast_cancer(as_frame=True)
train = data.data
train["class"] = data.target
train_data, valid_data = train_test_split(train, random_state=2023, test_size=0.2)
features = train_data.columns.tolist()
features.remove("class")
x_train, y_train = train_data[features], train_data["class"]
x_valid, y_valid = valid_data[features], valid_data["class"]
# catboost
algo = CatBoostClassifier(**config)
algo.fit(x_train, y_train)
y_pred = algo.predict(x_valid)
f1 = f1_score(y_valid, y_pred)
return {"f1": f1}
if __name__ == '__main__':
os.environ["TUNE_MAX_PENDING_TRIALS_PG"] = "3"
ray.init(num_cpus=3, include_dashboard=True, dashboard_host="10.3.163.116")
search_space = {
"learning_rate": tune.uniform(5e-3, 0.29),
"depth": tune.randint(5, 8),
"l2_leaf_reg": tune.randint(1, 5),
}
searcher = HyperOptSearch(search_space, metric="f1", mode="max")
tuner = tune.Tuner(
trainable=tune.with_parameters(train_func),
tune_config=tune.TuneConfig(metric="f1", mode="max", search_alg=searcher,
num_samples=100, time_budget_s=600,
max_concurrent_trials=3),
run_config=RunConfig(verbose=2)
)
result = tuner.fit()
Overview(picture1)
Cluster(picture2)
Top(picture3)