Hi @kai , thanks for the help!
My params file is:
import logging
from ray import tune
FILENAME = 'df_train_initial'
RAY_TUNER = True
NB_TRIALS = 3
MLFLOW = False
TEST = 0.1
VAL = 0.2
NFOLDS = 3
# create a logger
logger = logging.getLogger('Classification_logger')
DEBUG = False
if DEBUG:
SEED = 123
else:
SEED = None
EARLY_STOP = 10
SEARCH_OPTIMIZATION = 'bohb'
EVAL_METRIC = ["merror", "mlogloss"]
USE_GPU = True
# params for xgb model
xgb_params = {
'n_estimators': tune.randint(10, 80),
'reg_alpha': tune.loguniform(0.1, 100),
'booster': tune.choice(['gbtree', 'gblinear']),
'colsample_bylevel': tune.uniform(0.05, 0.5),
'colsample_bytree': tune.uniform(0.05, 0.5),
'learning_rate': tune.uniform(0.001, 0.4),
'reg_lambda': tune.loguniform(0.1, 100),
'subsample': tune.uniform(0.2, 0.7),
"max_depth": tune.randint(1, 10),
"min_child_weight": tune.choice([1, 2, 3]),
"eta": tune.loguniform(1e-4, 1e-1),
}
In the code where I have the xgb model I have something like this:
…
if RAY_TUNER:
import ray
if not ray.is_initialized():
ray.init(num_cpus=4)
def xgb_model(x_train,
y_train,
x_val,
y_val,
features,
classes):
kfold = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=SEED)
fold = 1
fig, ax = plt.subplots(NFOLDS, len(EVAL_METRIC), sharex=True)
fig.tight_layout()
for i, (train_idx, val_idx) in enumerate(kfold.split(x_train, y_train)):
x_train_, y_train_ = x_train[train_idx, :], y_train[train_idx]
x_val_, y_val_ = x_train[val_idx, :], y_train[val_idx]
# I am applying SMOTE here
...
...
oversample = BorderlineSMOTE(sampling_strategy=over)
undersample = RandomUnderSampler(sampling_strategy=under)
steps = [('o', oversample), ('u', undersample)]
pipeline = Pipeline(steps=steps)
X_balanced, y_balanced = pipeline.fit_resample(x_train_, y_train_)
if RAY_TUNER:
xgb_class = xgb.XGBClassifier(objective ='multi:softprob',
num_class=nb_classes,
use_label_encoder=False,
seed=SEED,
enable_categorical=False)
model = TuneSearchCV(
xgb_class,
param_distributions=xgb_params,
n_trials=NB_TRIALS,
max_iters=15,
search_optimization=SEARCH_OPTIMIZATION,
early_stopping=True,
scoring='f1_micro',
n_jobs=NB_CPUS,
name='Ray tune',
verbose=0,
local_dir='./ray_results',
use_gpu=USE_GPU,
)
...
# Train the model
history = model.fit(X_balanced,
y_balanced,
eval_metric=EVAL_METRIC,
early_stopping_rounds=EARLY_STOP,
eval_set=[(X_balanced, y_balanced), (x_val_, y_val_)])
if RAY_TUNER:
best_model = history.best_estimator_
...