How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
Hello, I have a question. I am trying to run Ray Tune for basic hpo with PyTorch.
I am getting this error: tune.with_parameters() only works with function trainables or classes that inherit from
tune.Trainable()`. Got type: <class ‘NoneType’>.
My training is an object type and inherits from other class (BaseLearner) and the script looks like this:
class myclass(BaseLearner):
def __init__(self, args):
self._network = network()
def incremental_train(self, data_manager, config):
train_dataset = (data_manager.get_dataset(np.arange(self._known_classes, self._total_classes), source="train",mode="train")
self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_dataset = data_manager.get_dataset(np.arange(0, self._total_classes), source="test", mode="test”)
self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
if len(self._multiple_gpus) > 1:
self._network = nn.DataParallel(self._network, self._multiple_gpus)
self.train(self.train_loader, self.test_loader, config)
if len(self._multiple_gpus) > 1:
self._network = self._network.module
def train(self, train_loader, test_loader, config):
self._network.to(self._device)
if self._cur_task == 0:
optimizer = optim.SGD(self._network.parameters(), momentum=0.9, lr=init_lr,weight_decay=init_weight_decay, )
scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=init_milestones,gamma=init_lr_decay)
self.init_train(train_loader, test_loader, optimizer, scheduler)
else:
optimizer = optim.SGD(self._network.parameters(), lr=config["lr"], momentum=0.9, weight_decay=weight_decay)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=milestones, gamma=lrate_decay)
self.update_representation(train_loader, test_loader, optimizer, scheduler)
def init_train(self, train_loader, test_loader, optimizer, scheduler):
for _, epoch in enumerate(prog_bar):
self._network.train()
losses = 0.0
correct, total = 0, 0
for i, (_, inputs, targets) in enumerate(train_loader):
targets = targets.type(torch.LongTensor)
inputs, targets = inputs.to(self._device), targets.to(self._device)
logits = self._network(inputs)["logits"]
loss = F.cross_entropy(logits, targets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses += loss.item()
_, preds = torch.max(logits, dim=1)
correct += preds.eq(targets.expand_as(preds)).cpu().sum()
total += len(targets)
scheduler.step()
train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2)
test_acc = self._compute_accuracy(self._network, test_loader)
def update_representation(self, train_loader, test_loader, optimizer, scheduler):
for _, epoch in enumerate(prog_bar):
self._network.train()
losses = 0.0
correct, total = 0, 0
for i, (_, inputs, targets) in enumerate(train_loader):
targets = targets.type(torch.LongTensor)
inputs, targets = inputs.to(self._device), targets.to(self._device)
logits = self._network(inputs)["logits"]
loss_clf = F.cross_entropy(
logits[:, self._known_classes:], targets - self._known_classes
)
loss = loss_clf
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses += loss.item()
_, preds = torch.max(logits, dim=1)
correct += preds.eq(targets.expand_as(preds)).cpu().sum()
total += len(targets)
scheduler.step()
train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2)
with tune.checkpoint_dir(epoch) as checkpoint_dir:
path = os.path.join(checkpoint_dir, "checkpoint")
torch.save((self._network.state_dict(), optimizer.state_dict()), path)
tune.report(loss=losses, accuracy=train_acc)
I use tune.with_parameters() function and provide the name of myclass (“myclass”) and the training function(“incremental_train”):
result = tune.run(tune.with_parameters(myclass.incremental_train(data_manager,config)),
resources_per_trial={"cpu": 12, "gpu": 1},
config=config,
num_samples=2,
search_alg=hyperopt_search,
scheduler=scheduler,
keep_checkpoints_num=1,
checkpoint_score_attr="loss")
best_trial = result.get_best_trial("accuracy", "max", "last")
print("Best trial config: {}".format(best_trial.config))
Any kind of help is appreciated, thank you so much!