Hello, I have my function train_optimization which has an argument dict_params, which is a dictionary of multiple params.
Here is the snippet of the code: `def train_optimization(dict_params):
model = ConvNet(20, 1).double()
device = "cpu"
if torch.cuda.is_available():
device = "cuda"
if torch.cuda.device_count() > 1:
#model = nn.DataParallel(model, device_ids=[0, 1, 2, 3])
model = nn.DataParallel(model)
model.to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=config["lr"], capturable=True)
# loading the dataset
if dict_params.get("preprocess") == 'yes':
print("You choose to preprocess the edf files...")
dict_params['high_pass_freq'] = 1
dict_params['notch_freq'] = 60
dict_params['resample_freq'] = 250
train_set = TUH_Dataset(dict_params)
#train_loader = DataLoader(train_set, shuffle=True, batch_size=args.batch_size, drop_last=True, num_workers=4,worker_init_fn=seed_worker)
# set to test mode
#dict_params.update(mode='test')
#test_set = TUH_Dataset(dict_params)
#test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=4,worker_init_fn=seed_worker)
train_set_whole, labels_train = train_set_all(dict_params.get("dataset_path"))
train_indices, valid_indices, _, _ = train_test_split(range(len(train_set)), labels_train, stratify=labels_train, test_size=0.3,random_state=4)
train_split = Subset(train_set, train_indices)
valid_split = Subset(train_set, valid_indices)
train_split_loader = DataLoader(train_split, shuffle=True, batch_size=config["batch_size"],drop_last=True,num_workers=4,worker_init_fn=seed_worker)
valid_loader = DataLoader(valid_split, batch_size=config["batch_size"], shuffle=False,drop_last=False,num_workers=4,worker_init_fn=seed_worker)
for epoch in range(0, 30): # loop over the dataset multiple times
print(f"Epoch: {epoch+1}")
#model.train()
loss_all = 0
correct = 0
total = 0
val_loss = 0
val_total = 0
val_correct = 0
for i, data in enumerate(tqdm(train_split_loader)):
# get the inputs; data is a list of [inputs, labels]
inputs, labels, edf_names = data
inputs, labels = torch.as_tensor(inputs), torch.as_tensor(labels)
inputs, labels = inputs.to(device), labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
outputs = outputs.flatten()
loss = criterion(outputs, labels)
loss_all += loss.item() * inputs.size(0)
loss.backward()
optimizer.step()
predicted = (outputs > 0.5).float() # Apply threshold to get binary predictions, 1 positive class, abnormal
total += labels.size(0)
# calculating accuracy
correct += (predicted == labels).sum().item()
train_avg_acc = 100 * correct / total
loss_all = loss_all / len(train_split)
# Validation loss
#model.eval()
for i, data in enumerate(tqdm(valid_loader)):
with torch.no_grad():
inputs, labels, edf_names = data
inputs, labels = torch.as_tensor(inputs), torch.as_tensor(labels)
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
outputs = outputs.flatten()
loss = criterion(outputs, labels)
val_loss += loss.item() * inputs.size(0)
predicted = (outputs > 0.5).float() # Apply threshold to get binary predictions, 1 positive class, abnormal
total += labels.size(0)
# calculating accuracy
correct += (predicted == labels).sum().item()
valid_avg_acc = 100 * correct / total
val_loss = val_loss / len(valid_split)
#print(f"val_loss:{val_loss}")
tune.report(mean_accuracy=valid_avg_acc)'
TUH_Dataset is a custom class which is constructed in a way that you need to give the dict_params as a dictionary and then some operations are done, whether to standardize or not the data and so on. When I do tune.run
dataset_path = r'/home/ivaeftimska/EEG_TUH/EEG/v3.0.0/edf'
dict_params = {"dataset_path": dataset_path, "t_max": 60, "mode": "train", "preprocess": "yes", "standardize": "yes"}
config = {
"lr": tune.loguniform(1e-4, 1e-1),
"batch_size": tune.choice([16, 32, 64, 128]),
}
scheduler = ASHAScheduler(
metric="accuracy",
mode="max",
max_t=5,
grace_period=1,
reduction_factor=2,
)
analysis = tune.run(
tune.with_parameters(train_optimization, dict_params=dict_params),
resources_per_trial={"cpu": 8, "gpu": 2},
config=config,
num_samples=5,
scheduler=scheduler,
log_to_file='True'
)
print("Best config: ", analysis.get_best_config(metric="mean_accuracy"))
Get a dataframe for analyzing trial results.
df = analysis.dataframe()` I got the following error TypeError: train_optimization() got multiple values for argument ‘dict_params’, why is it not allowed to have an argument as a list of some params or dictionary? I dont want to change the whole structure of the TUH_Dataset class