Thanks for the prompt reply.
Modifying the code snippet from here, the following is what I want to achieve. Essentially, I want to use the trained lightGBM to get the test predictions and then compute some custom error on my own, and then report that to tune. How can I achieve that in the current scenario if train
automatically reports it to tune?
def train_breast_cancer(config, ray_params):
# Load dataset
data, labels = datasets.load_breast_cancer(return_X_y=True)
# Split into train and test set
train_x, test_x, train_y, test_y = train_test_split(
data, labels, test_size=0.25)
train_set = RayDMatrix(train_x, train_y)
test_set = RayDMatrix(test_x, test_y)
evals_result = {}
bst = train(
params=config,
dtrain=train_set,
valid_sets=[train_set, test_set],
valid_names=["train", "eval"],
evals_result=evals_result,
ray_params=ray_params,
verbose_eval=True,
num_boost_round=100)
print('-'*10, 'Saving model', '-'*10)
print(tune.get_trial_name, tune.get_trial_id)
model_path = "tuned.lgbm"
bst.booster_.save_model(model_path)
print("Final validation error: {:.4f}".format(
evals_result["eval"]["binary_error"][-1]))
#####################################################################################
# Predict
#####################################################################################
bst = lgb.Booster(model_file=model_path) # lgb is standard lightgbm module
pred_ray = Predict(bst, test_set, ray_params=RayParams(num_actors=NUM_ACTORS))
#####################################################################################
# Calculate custom loss, that does custom operations on `pred_ray`
#####################################################################################
custom_error = my_custom_error_that_does_some_postprocessing(pred_ray, test_y, some_metadata)
evals_result['eval-custom_error'] = custom_error
#####################################################################################
# Report this custom error to tune, and do HPO based on this
#####################################################################################
tune.report(**evals_result)
def main(cpus_per_actor, num_actors, num_samples):
# Set LightGBM config.
config = {
"objective": "binary",
"metric": ["binary_logloss", "binary_error"],
"eta": tune.loguniform(1e-4, 1e-1),
"subsample": tune.uniform(0.5, 1.0),
"max_depth": tune.randint(1, 9),
}
ray_params = RayParams(
max_actor_restarts=1,
gpus_per_actor=0,
cpus_per_actor=cpus_per_actor,
num_actors=num_actors)
print('-'*10, 'Running Ray tune', '-'*10)
analysis = tune.run(
tune.with_parameters(train_breast_cancer, ray_params=ray_params),
# Use the `get_tune_resources` helper function to set the resources.
resources_per_trial=ray_params.get_tune_resources(),
config=config,
num_samples=num_samples,
metric="eval-custom_error",
mode="min",
local_dir="./tune_results")
# Load the best model checkpoint.
best_bst = lightgbm_ray.tune.load_model(
os.path.join(analysis.best_logdir, "tuned.lgbm"))
best_bst.save_model("best_model.lgbm")
accuracy = 1. - analysis.best_result["eval-binary_error"]
print(f"Best model parameters: {analysis.best_config}")
print(f"Best model total accuracy: {accuracy:.4f}")