How to deal with TuneError: ('Trials did not complete',...)

I have following code running into TuneError.

I tried change num_samples=6, time_budget_s=600 to a larger values, says num_samples=6, time_budget_s=1800 , or num_samples=600, time_budget_s=1800 the same error occurred.

What causes it? How do you fix it?

import time
import ray
from ray import tune
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fastai.tabular import * 
from scipy.stats import loguniform

# load a sample dataset
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')

# simple split data into train & valid
valid_idx = range(len(df)-2000, len(df))

# define local variables
dep_var = 'salary'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']

def train_clf_config2(config, path=path, df=df, dep_var=dep_var, valid_idx=valid_idx, cat_names=cat_names,return_learner=False):
    """this function is inherited from train_clf_config() with 'config' added """
    # data preprocessing
    procs = [FillMissing, Categorify, Normalize]

    # prep data for tabular_learner()
    data = TabularDataBunch.from_df(path, df, dep_var=dep_var, valid_idx=valid_idx, procs=procs, cat_names=cat_names)

    precision = Precision()
    recall = Recall()

    print('================ training classifier================')

    # choose hyperparameters: layers, ps, emb_drop
    ps = config['ps']
    emb_drop = config['emb_drop']

    # define a tabular classifier
    learn = tabular_learner(data, layers=layers, ps=ps, emb_drop=emb_drop, emb_szs={'native-country': 10}, metrics=[accuracy, precision, recall, f1])
    # auto find learning rate
        lr = find_appropriate_lr(model=learn, plot=True)
        print(f'clf uses estimated lr={lr}')
        lr = 1e-2
        print(f'clf uses pre-defined lr={lr}')
    # train n_epoch
    n_epochs = config['n_epochs']
    learn.fit_one_cycle(n_epochs, moms=(lr*0.01,lr))
    # build validation performance metrics
    valid_metrics = dict(zip(['accuracy',	'precision',	'recall',	'f1'], [x.item() for x in learn.recorder.metrics[0]]))
    # send metrics to tune**valid_metrics)

    if return_learner:
        return learn, valid_metrics

################# tuning fastai classifier ################### 

# create HyperBand scheduler to have more efficient training
from ray.tune.schedulers import HyperBandScheduler

# Create HyperBand scheduler and maximize f1
hyperband = HyperBandScheduler(metric="f1", mode="max")

# tune hyperparameters defined by conditional search space
analysis =
    config = {"n_layers": tune.sample_from(lambda _: np.random.choice([2,3,4,5,6])), 
              "layers": tune.sample_from(lambda spec: [np.random.choice([100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200]) for i in range(spec.config.n_layers-1)]),
              "ps": tune.sample_from(lambda spec: [float(loguniform.rvs(1e-4, 1e0, size=1)) for i in range(spec.config.n_layers-1)]),
              "n_epochs": tune.lograndint(lower=1, upper=100),
              "emb_drop": tune.loguniform(lower=1e-4, upper=8e-1)
    time_budget_s=600 # time budget in seconds

Eorr message is:

TuneError: ('Trials did not complete', [train_clf_config2_ca838_00000, train_clf_config2_ca838_00001, train_clf_config2_ca838_00002, train_clf_config2_ca838_00003, train_clf_config2_ca838_00004, train_clf_config2_ca838_00005])

Hi, are there any other outputs? What does the result table look like?

If errors occur, you’ll usually find more information in a error.txt in the trial directory (take a look at ~/ray_results - the link to this file is usually printed in the output.

