How severe does this issue affect your experience of using Ray?
- Medium: It contributes to significant difficulty to complete my task, but I can work around it.
Hi everyone
I’ve set the ray tune in this way:
Config
config = {
"lr": tune.loguniform(1e-2, 1e-5),
"weight_decay": tune.loguniform(1e-2, 1e-5),
"batch_size": tune.grid_search([16,32]),
"epochs": tune.grid_search([10,25]),
"hidden_layer_dim":tune.grid_search([8,10])
}
TRAIN FUNCTION
def setting_model(config, df_train, df_val):
model = Featrues_model(
len(list_input_variables),
config["hidden_layer_dim"],
6
)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"])
BATCH_SIZE = config["batch_size"]
for epoch in range(config["epochs"]):
train_epoch_loss = 0
train_epoch_acc = 0
step = 0
for i in tqdm(range(0, df_train.shape[0], BATCH_SIZE)):
batch_X = np.array(
df_train[list_input_variables].iloc[i:i+BATCH_SIZE]
)
batch_X = torch.Tensor([x for x in batch_X])
batch_Y = np.array(
df_train[list_output_variables].iloc[i:i+BATCH_SIZE]
)
batch_Y = torch.Tensor([int(y) for y in batch_Y])
batch_Y = batch_Y.type(torch.int64)
optimizer.zero_grad()
outputs = model.forward(batch_X)
train_loss = criterion(outputs, batch_Y)
train_acc = multi_acc(outputs, batch_Y)
train_loss.backward()
optimizer.step()
train_epoch_loss += train_loss.item()
train_epoch_acc += train_acc.item()
step += 1
# print statistics
print(f"Epochs: {epoch}")
print(f"Train Loss: {train_epoch_loss/len(df_train)}")
print(f"Train Acc: {train_epoch_acc/step}")
print("\n")
# Validation loss
with torch.no_grad():
X_val = np.array(
df_val[list_input_variables]
)
X_val = torch.Tensor([x for x in X_val])
Y_val = np.array(
df_val[list_output_variables]
)
Y_val = torch.Tensor([int(y) for y in Y_val])
Y_val = Y_val.type(torch.int64)
outputs = model.forward(X_val)
_, predicted = torch.max(outputs.data, 1)
total = Y_val.size(0)
correct = (predicted == Y_val).sum().item()
loss = criterion(outputs, Y_val)
tune.report(loss=(loss.numpy()), accuracy=correct / total)
print(f"Validation Loss: {loss.numpy()/len(df_val)}")
print(f"Validation Acc: {correct / total:.3f}")
print("Finished Training")
RUN
result = tune.run(
partial(setting_model, df_train=df_train, df_val=df_val),
config=config,
fail_fast="raise"
)
GET BEST TRIAL
best_trial = result.get_best_trial()
And the last line of code returns this error:
ValueError: No `metric` has been passed and `default_metric` has not been set. Please specify the `metric` parameter.
The problem is that I don’t understand how and where to define this metric. I’ve described it with tune.report inside setting_model
but It seems not to work.