@philmax @mannyv I have solved the problem with reproducibillity by making function:
def set_reproducibillity(seed=None):
if seed is None:
seed = 42
tf.random.set_seed(seed)
tf.keras.utils.set_random_seed(seed)
tf.config.experimental.enable_op_determinism() #tested with tensorflow=2.9.1
np.random.seed(seed)
random.seed(seed)
This function must be placed in two places:
- main body
- trainable function
the example code is below:
import random
import numpy as np
import ray
import tensorflow as tf
from ray import tune
from ray.tune.integration.keras import TuneReportCallback
from ray.tune.schedulers import ASHAScheduler
from tensorflow.keras.datasets import mnist
def set_reproducibillity(seed=None):
if seed is None:
seed = 42
tf.random.set_seed(seed)
tf.keras.utils.set_random_seed(seed)
tf.config.experimental.enable_op_determinism()
np.random.seed(seed)
random.seed(seed)
def train_mnist(config):
if config["reproducibility_active"]:
set_reproducibillity()
batch_size = config["batch"]
num_classes = 10
epochs = 200
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
# define model
inputs = tf.keras.layers.Input(shape=(28, 28))
x = tf.keras.layers.Flatten()(inputs)
x = tf.keras.layers.LayerNormalization()(x)
for i in range(config["layers"]):
x = tf.keras.layers.Dense(units=config["hidden"], activation=config["activation"])(x)
x = tf.keras.layers.Dropout(config["dropout"])(x)
outputs = tf.keras.layers.Dense(units=num_classes, activation="softmax")(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
model.compile(
loss="sparse_categorical_crossentropy",
optimizer=tf.keras.optimizers.Adam(learning_rate=config["learning_rate"]),
metrics=["accuracy"])
model.fit(
x_train,
y_train,
batch_size=batch_size,
epochs=epochs,
verbose=0,
validation_data=(x_test, y_test),
callbacks=[TuneReportCallback({
"mean_accuracy": "val_accuracy" ##optional values ['loss', 'accuracy', 'val_loss', 'val_accuracy']
})])
if __name__ == "__main__":
print('Is cuda available for container:', tf.config.list_physical_devices('GPU'))
ray.init()
config = {
"reproducibility_active": True,
"learning_rate": tune.choice([1e-5, 1e-4, 1e-3, 1e-2]),
"hidden": tune.choice([16, 32, 64, 128]),
"dropout": tune.choice([0.01, 0.02, 0.05, 0.1, 0.2]), # tune.uniform(0.01, 0.2)
"activation": tune.choice(["relu", "elu"]),
"layers": tune.choice([1, 2, 3]),
"batch": tune.choice([4, 8, 16, 32, 64, 128]),
}
if config["reproducibility_active"]:
set_reproducibillity()
sched_asha = ASHAScheduler(time_attr="training_iteration",
max_t=100,
grace_period=10,
# mode='max', #find maximum, do not define here if you define in tune.run
reduction_factor=3,
# brackets=1
)
analysis = tune.run(
train_mnist,
name="exp",
scheduler=sched_asha,
# Checkpoint settings
keep_checkpoints_num=3,
checkpoint_freq=3,
checkpoint_at_end=True,
# Optimalization
metric="mean_accuracy",
mode="max",
stop={ # trial is finished if this value is reached
"mean_accuracy": 0.96,
"training_iteration": 10,
'time_this_iter_s': 50,
# 'timesteps_total': 1000,
# 'episodes_total': 1000,
# 'time_total_s': 1000,
},
time_budget_s=200, # Global time budget in seconds after which all trials are stopped.
num_samples=10, # number of tested configurations from hyperspace
reuse_actors=True,
local_dir='../ray_results', # default value is ~/ray_results
resources_per_trial={
"cpu": 1,
"gpu": 0
},
config=config,
verbose=3, # values 0 to 3
)
print("Best hyperparameters found were: ", analysis.best_config)