Tune.sample_from Parameters Not Appearing in TensorBoard Logs

Aricept094 · September 30, 2023, 10:33pm

Medium: It contributes to significant difficulty to complete my task, but I can work around it.

I am using Ray Tune for hyperparameter optimization and trying to visualize the results in TensorBoard. However, I noticed that the hyperparameters sampled using tune.sample_from do not appear in TensorBoard, while other parameters do. for example in the following code only feature_selection_choice , scaler_choice , use_svm , use_ridge are shown in tensoboard .
i would really appreciate the help.

from lightgbm import LGBMClassifier
from sklearn.feature_selection import RFE, SelectKBest, SelectPercentile
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from ray import tune
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from ray import tune
from ray import train
from sklearn.metrics import fbeta_score
from ray.tune import TuneConfig
from ray.tune.search.optuna import OptunaSearch
from sklearn.decomposition import PCA
from sklearn.feature_selection import f_classif, mutual_info_classif
from ray.tune.schedulers import AsyncHyperBandScheduler
from xgboost import XGBClassifier
from skrebate import ReliefF
from ray import air

n_samples = 1000
n_features = 24
X = np.random.rand(n_samples, n_features - 1)
y = np.random.randint(0, 2, n_samples)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42, stratify=y)
columns_to_scale = list(range(n_features - 10))

def train_model(config):

scaler_choice = config.get('scaler_choice')
scaler = {
    'StandardScaler': StandardScaler(),
    'MinMaxScaler': MinMaxScaler(),
    'RobustScaler': RobustScaler()
}.get(scaler_choice)

feature_selection_choice = config.get('feature_selection_choice')
feature_selector = None
if feature_selection_choice == 'pca':
    feature_selector = PCA(n_components=config['pca_n_components'])
elif feature_selection_choice == 'selectpercentile':
    alg = config.get('alg')
    score_func = {'f_classif': f_classif, 'mutual_info_classif': mutual_info_classif}.get(alg)
    feature_selector = SelectPercentile(score_func=score_func, percentile=config['percentile'])
elif feature_selection_choice == 'K_best':
    alg_K = config.get('alg_k')
    score_func_k = {'f_classif': f_classif, 'mutual_info_classif': mutual_info_classif}.get(alg_K)
    feature_selector = SelectKBest(score_func=score_func_k, k=config['K_Numbers'])
elif feature_selection_choice == 'relief':
    feature_selector = ReliefF(n_neighbors=config['n_neighbors'], n_features_to_select=config['n_features_to_select'])
elif feature_selection_choice == 'rfe':
    estimator_type = config.get('estimator_type')
    estimator = {'svm': SVC(kernel="linear"), 'logistic': LogisticRegression(), 'random_forest': RandomForestClassifier()}.get(estimator_type)
    feature_selector = RFE(estimator, n_features_to_select=config['n_features_to_select_rfe'])

preprocessor = ColumnTransformer(
    transformers=[
        ('scale', scaler, columns_to_scale)
    ],
    remainder='passthrough'
)

models = []
     
if config.get('use_svm'):
    models.append(('svm', SVC(
        C=config['svm_C'], 
        kernel=config['svm_kernel'],
        degree=config['svm_degree'],
        gamma=config['svm_gamma'],
        coef0=config['svm_coef0'],
        shrinking=config['svm_shrinking'],
        probability=True,
        tol=config['svm_tol'],
        class_weight='balanced',
        decision_function_shape=config['svm_decision_function_shape']
    )))
    
if config.get('use_ridge'):
    models.append(('ridge', RidgeClassifier(
        alpha=config['ridge_alpha'],
        tol=config['ridge_tol'],
        class_weight='balanced',
        fit_intercept=config['ridge_fit_intercept'],
        )))
        

    
if models:
    ensemble = VotingClassifier(estimators=models, voting='hard')

    full_pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('feature_selector', feature_selector),
        ('classifier', ensemble)  
    ])
    
    full_pipeline.fit(X_train, y_train)
    y_pred = full_pipeline.predict(X_test)
    
    score = fbeta_score(y_test, y_pred, beta=2, pos_label=1)
    train.report({"f2": score})
else:
    train.report({"f2": -float('inf')})

search_space = {

'scaler_choice': tune.choice(['StandardScaler', 'MinMaxScaler', 'RobustScaler']), 
'feature_selection_choice': tune.choice(['selectpercentile', 'pca', 'K_best', 'relief', 'rfe']),
'pca_n_components': tune.sample_from(lambda config: int(np.random.uniform(1, X_train.shape[1])) if config.get('feature_selection_choice') == 'pca' else None),
'percentile': tune.sample_from(lambda config: int(np.random.uniform(1, 100)) if config.get('feature_selection_choice') == 'selectpercentile' else None),
'alg': tune.sample_from(lambda config: np.random.choice(['f_classif', 'mutual_info_classif']) if config.get('feature_selection_choice') == 'selectpercentile' else None),
'alg_k': tune.sample_from(lambda config: np.random.choice(['f_classif', 'mutual_info_classif']) if config.get('feature_selection_choice') == 'K_best' else None),
'K_Numbers': tune.sample_from(lambda config: int(np.random.uniform(1, X_train.shape[1])) if config.get('feature_selection_choice') == 'K_best' else None),
'n_neighbors': tune.sample_from(lambda config: int(np.random.uniform(1, 21)) if config.get('feature_selection_choice') == 'relief' else None),
'n_features_to_select': tune.sample_from(lambda config: int(np.random.uniform(1, X_train.shape[1])) if config.get('feature_selection_choice') == 'relief' else None),
'estimator_type': tune.sample_from(lambda config: np.random.choice(['svm', 'logistic', 'random_forest']) if config.get('feature_selection_choice') == 'rfe' else None),
'n_features_to_select_rfe': tune.sample_from(lambda config: int(np.random.uniform(1, X_train.shape[1])) if config.get('feature_selection_choice') == 'rfe' else None),


'use_svm': tune.choice([True, False]),
'use_ridge': tune.choice([True, False]),

        
'ridge_alpha': tune.sample_from(lambda config: np.random.uniform(1e-10, 1000) if config.get('use_ridge') else None),
'ridge_tol': tune.sample_from(lambda config: np.random.uniform(1e-19, 1e-1) if config.get('use_ridge') else None),
'ridge_fit_intercept': tune.sample_from(lambda config: np.random.choice([True, False]) if config.get('use_ridge') else None),

'svm_C': tune.sample_from(lambda config: np.random.uniform(1e-7, 10) if config.get('use_svm') else None),
'svm_kernel': tune.sample_from(lambda config: np.random.choice(['linear', 'poly', 'rbf', 'sigmoid']) if config.get('use_svm') else None),
'svm_degree': tune.sample_from(lambda config: np.random.randint(0, 10) if config.get('use_svm') else None),
'svm_gamma': tune.sample_from(lambda config: np.random.choice(['scale', 'auto']) if config.get('use_svm') else None),
'svm_coef0': tune.sample_from(lambda config: np.random.uniform(-10, 10) if config.get('use_svm') else None),
'svm_shrinking': tune.sample_from(lambda config: np.random.choice([True, False]) if config.get('use_svm') else None),
'svm_tol': tune.sample_from(lambda config: np.random.uniform(1e-9, 1e-1) if config.get('use_svm') else None),
'svm_decision_function_shape': tune.sample_from(lambda config: np.random.choice(['ovo', 'ovr']) if config.get('use_svm') else None),

}

tuner = tune.Tuner(
train_model,
param_space=search_space,
tune_config=TuneConfig(
metric=“f2”,
mode=“max”,
search_alg=OptunaSearch(),
scheduler=AsyncHyperBandScheduler(),
num_samples=500,
),
run_config=air.RunConfig(local_dir=“C:\Work\AI\ray”, log_to_file=(“my_stdout.log”, “my_stderr.log”))
)

results = tuner.fit()

Topic		Replies	Views
Ray tune log hyperparameters to tensorboard (solved) Ray Tune	1	621	November 26, 2020
[Tune Class API + PyTorch] Possible to add more custom scalars+weights+biases to Tensorboard events file? Ray Tune	11	2653	March 31, 2021
How to showcase selected Hyperparams per run? Ray Tune	1	314	December 21, 2021
No metrics in TensorBoard HParams Dashboard	0	161	July 7, 2024
Tune.run is not writing tensorboard log file RLlib	4	833	August 24, 2021

Tune.sample_from Parameters Not Appearing in TensorBoard Logs

Related topics