Find number of hidden layer size using PBT

Hi all,

I want to find the best number of hidden layer size of the my model using Ray.

class BroadModel(tune.Trainable):
def build_model(self):
window_size = 200
self.x_gyro, self.x_acc, x_mag, x_mag, q = load_data_train()
self.Att_quat = Att_q(q)

    self.x_gyro, self.x_acc, self.Att_quat = shuffle(self.x_gyro, self.x_acc, self.Att_quat)
    x1 = Input((window_size, 3), name='x1')
    x2 = Input((window_size, 3), name='x2')
    convA1 = Conv1D(20,11,padding='same',activation='relu')(x1)
    convA2 = Conv1D(20,11,padding='same',activation='relu')(convA1)
    poolA = MaxPooling1D(3)(convA2)
    convB1 = Conv1D(20,11,padding='same',activation='relu')(x2)
    convB2 = Conv1D(20,11,padding='same',activation='relu')(convB1)
    poolB = MaxPooling1D(3)(convB2)
    AB = concatenate([poolA, poolB])
    lstm1 = Bidirectional(CuDNNGRU(10, return_sequences=True))(AB)
    drop1 = Dropout(0.25)(lstm1)
    lstm2 = Bidirectional(CuDNNGRU(10))(drop1)
    drop2 =  Dropout(self.config.get("dropout", 0.3))(lstm2)    
    y1_pred = Dense(4,kernel_regularizer='l2')(drop2)
    model = Model(inputs =[x1, x2], outputs = [y1_pred])
    model.compile(loss='mse', optimizer='adam')
    return model
def setup(self, config):
    
    model = self.build_model()

    model.compile(
        optimizer=Adam(learning_rate= self.config.get("lr", 1e-3)),
        loss="sparse_categorical_crossentropy",
        metrics=[tf.keras.metrics.RootMeanSquaredError()],
    )
    self.model = model
    
def step(self):
    self.x_gyro_t, self.sx_acc_t, x_mag_t, x_mag_t, q_t = load_data_test()
    self.Att_quat_t = Att_q(q_t)
    # train
    self.model.fit(
        [self.x_gyro, self.x_acc],
        self.Att_quat,
        batch_size=self.config.get("batch_size", 32),
        epochs=self.config.get("epochs", 1),
        validation_data=([self.x_gyro_t, self.x_acc_t], self.Att_quat_t),
        verbose=1,
    )
    _, accuracy = self.model.evaluate(
        [self.x_gyro_t, self.x_acc_t], self.Att_quat_t, verbose=0
    )
    return {"mean_accuracy": accuracy}

def save_checkpoint(self, checkpoint_dir):
    file_path = checkpoint_dir + "/model"
    self.model.save(file_path)
    return file_path

def load_checkpoint(self, path):
    # See https://stackoverflow.com/a/42763323
    del self.model
    self.model = load_model(path)

if name == “main”:
import ray
from ray.tune.schedulers import PopulationBasedTraining

parser = argparse.ArgumentParser()
parser.add_argument(
    "--smoke-test", action="store_true", help="Finish quickly for testing"
)
parser.add_argument(
    "--server-address",
    type=str,
    default=None,
    required=False,
    help="The address of server to connect to if using Ray Client.",
)
args, _ = parser.parse_known_args()

if args.smoke_test:
    ray.init(num_gpus=1)
    print(num_gpus)
elif args.server_address:
    ray.init(f"ray://{args.server_address}")

pbt = PopulationBasedTraining(
    perturbation_interval=2,
    hyperparam_mutations={
        "dropout": lambda: np.random.uniform(0, 1),
        "lr": lambda: 10 ** np.random.randint(-10, 0),
    },
)

tuner = tune.Tuner(
BroadModel,
    run_config=air.RunConfig(
        name="pbt_babi_memnn",
        stop={"training_iteration": 4 if args.smoke_test else 100},
    ),
    tune_config=tune.TuneConfig(
        scheduler=pbt,
        metric="mean_accuracy",
        mode="max",
        num_samples=2,
    ),
    param_space={
        "finish_fast": args.smoke_test,
        "batch_size": 32,
        "epochs": 1,
        "dropout": 0.3,
        "lr": 0.01,
    },
)
#tune.run(resources_per_trial={'gpu': 1}, tuner)
tuner.fit()

This is the code, but I got this two error
Tune detects GPUs, but no trials are using GPUs. To enable trials to use GPUs, set tune.run(resources_per_trial={‘gpu’: 1}…) which allows Tune to expose 1 GPU to each trial. You can also override Trainable.default_resource_request if using the Trainable API.
2022-08-30 04:14:49,732 WARNING trial_runner.py:1575 – You are trying to access _search_alg interface of TrialRunner in TrialScheduler, which is being restricted. If you believe it is reasonable for your scheduler to access this TrialRunner API, please reach out to Ray team on GitHub. A more strict API access pattern would be enforced starting 1.12s.0
(BroadModel pid=17115) 2022-08-30 04:14:54.026559: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected

and

validation_data=([self.x_gyro_t, self.x_acc_t], self.Att_quat_t),
AttributeError: ‘BroadModel’ object has no attribute ‘x_acc_t’

what is my mistake?

I think your first issue is resolved over slack?
Can you give it another run?

1 Like