`tune.with_parameters() only works with function trainables or classes that inherit from `tune.Trainable()`. Got type: <class 'NoneType'>

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

Hello, I have a question. I am trying to run Ray Tune for basic hpo with PyTorch.
I am getting this error: tune.with_parameters() only works with function trainables or classes that inherit from tune.Trainable()`. Got type: <class ‘NoneType’>.
My training is an object type and inherits from other class (BaseLearner) and the script looks like this:

class myclass(BaseLearner):
    def __init__(self, args):
        self._network = network()
    def incremental_train(self, data_manager, config):
        train_dataset = (data_manager.get_dataset(np.arange(self._known_classes, self._total_classes), source="train",mode="train")
        self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        test_dataset = data_manager.get_dataset(np.arange(0, self._total_classes), source="test", mode="test”)

        self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

        if len(self._multiple_gpus) > 1:
            self._network = nn.DataParallel(self._network, self._multiple_gpus)

            self.train(self.train_loader, self.test_loader, config)

            if len(self._multiple_gpus) > 1:
                self._network = self._network.module

    def train(self, train_loader, test_loader, config):
        self._network.to(self._device)

        if self._cur_task == 0:
            optimizer = optim.SGD(self._network.parameters(), momentum=0.9, lr=init_lr,weight_decay=init_weight_decay, )

            scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=init_milestones,gamma=init_lr_decay)
            
            self.init_train(train_loader, test_loader, optimizer, scheduler)

        else:
            optimizer = optim.SGD(self._network.parameters(), lr=config["lr"], momentum=0.9, weight_decay=weight_decay)

            scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=milestones, gamma=lrate_decay)

            self.update_representation(train_loader, test_loader, optimizer, scheduler)


    def init_train(self, train_loader, test_loader, optimizer, scheduler):
        for _, epoch in enumerate(prog_bar):
            self._network.train()

            losses = 0.0

            correct, total = 0, 0

            for i, (_, inputs, targets) in enumerate(train_loader):
                targets = targets.type(torch.LongTensor)
                inputs, targets = inputs.to(self._device), targets.to(self._device)
                logits = self._network(inputs)["logits"]
                loss = F.cross_entropy(logits, targets)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                losses += loss.item()

                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

                scheduler.step()

    train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2)
    test_acc = self._compute_accuracy(self._network, test_loader)


    def update_representation(self, train_loader, test_loader, optimizer, scheduler):
        for _, epoch in enumerate(prog_bar):
            self._network.train()
            losses = 0.0
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(train_loader):
                targets = targets.type(torch.LongTensor)
                inputs, targets = inputs.to(self._device), targets.to(self._device)
                logits = self._network(inputs)["logits"]
                loss_clf = F.cross_entropy(
                    logits[:, self._known_classes:], targets - self._known_classes
                )
                loss = loss_clf
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                losses += loss.item()

                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2)

            with tune.checkpoint_dir(epoch) as checkpoint_dir:
                path = os.path.join(checkpoint_dir, "checkpoint")
                torch.save((self._network.state_dict(), optimizer.state_dict()), path)

            tune.report(loss=losses, accuracy=train_acc)
  

I use tune.with_parameters() function and provide the name of myclass (“myclass”) and the training function(“incremental_train”):

 result = tune.run(tune.with_parameters(myclass.incremental_train(data_manager,config)),
              resources_per_trial={"cpu": 12, "gpu": 1},
              config=config,
              num_samples=2,
              search_alg=hyperopt_search,
              scheduler=scheduler,
              keep_checkpoints_num=1,
              checkpoint_score_attr="loss")
  
          best_trial = result.get_best_trial("accuracy", "max", "last")
          print("Best trial config: {}".format(best_trial.config))

Any kind of help is appreciated, thank you so much!

Hey @ElifCerenGok, it appears that you have two issues with your code.

First, as hinted by the error you are getting, tune.with_parameters expects a class or function as the first argument and all parameters as kwargs. It looks like you are instead executing that method and what tune.with_parameters gets in the end is the return value - None.

Second, there are three types of objectives you can use with Tune (and by extension, with tune.with_parameters) - Ray AIR Trainers and two types of trainables - functions and Trainable subclasses. You can read more about those here - Training (tune.Trainable, session.report) — Ray 2.1.0

In your case, you should either have myclass inherit from both tune.Trainable and BaseLearner, and the override setup and step methods so that Tune can run your training (you’d not use tune.report in your code then), or create a function that will initialize myclass inside of it and run training, using tune.report to pass metrics and checkpoints back to Tune.

This guide may also help - How to use Tune with PyTorch — Ray 2.1.0

Hi, Thank you so much for your reply that actually helped me to get closer to solution.
I followed your suggestion and instead of providing a function API, I provide trainable class API(train_and_tune) just like in this example: example

Right now my raytune related training code is like this:

class train_and_tune(myclass, tune.Trainable):
    def __init__(self, args, config):
        super().__init__(args)

    def setup(self, config):
        if len(self._multiple_gpus) > 1:
            self._network = self._network.module

        self.optimizer = optim.SGD(
            self._network.parameters(),
            lr=config.get("lr"),
            momentum=0.9,
            weight_decay=weight_decay,
        )

    def _update_representation(self):
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer=self.optimizer, milestones=milestones, gamma=lrate_decay)
            self._network.train()
            losses = 0.0
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(self.train_loader):
                targets = targets.type(torch.LongTensor)
                inputs, targets = inputs.to(self._device), targets.to(self._device)
                logits = self._network(inputs)["logits"]

                loss= F.cross_entropy(
                    logits[:, self._known_classes :], targets - self._known_classes
                )
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                losses += loss.item()

                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2)

        return {"accuracy": train_acc}

if __name__ == '__main__':
            tuner = tune.Tuner(
                tune.with_resources(train_and_tune, resources={"cpu": 3, "gpu": 1}),
                run_config=air.RunConfig(
                    stop={
                        "accuracy": 0.95,
                        "training_iteration":20,
                    },
                    checkpoint_config=air.CheckpointConfig(
                        checkpoint_at_end=True, checkpoint_frequency=3
                    ),
                ),
                tune_config=tune.TuneConfig(
                    metric="accuracy",
                    mode="max",
                    scheduler=scheduler,
                    num_samples=20,
                ),
                param_space={
                    "lr": tune.uniform(0.001, 0.1),

                },
            )
            results = tuner.fit()

            print("Best config is:", results.get_best_result().config)

However, I get very confusing error that I couldn’t track it:

2022-12-11 16:20:43,745 ERROR ray_trial_executor.py:580 -- Trial train_and_tune_61124_00000: Unexpected error starting runner.
Traceback (most recent call last):
  File "C:\Users\20212002\Anaconda3\envs\daddy\lib\site-packages\ray\tune\execution\ray_trial_executor.py", line 573, in start_trial
    return self._start_trial(trial)
  File "C:\Users\20212002\Anaconda3\envs\daddy\lib\site-packages\ray\tune\execution\ray_trial_executor.py", line 473, in _start_trial
    runner = self._setup_remote_runner(trial)
  File "C:\Users\20212002\Anaconda3\envs\daddy\lib\site-packages\ray\tune\execution\ray_trial_executor.py", line 414, in _setup_remote_runner
    return full_actor_class.remote(**kwargs)
  File "C:\Users\20212002\Anaconda3\envs\daddy\lib\site-packages\ray\actor.py", line 637, in remote
    return actor_cls._remote(args=args, kwargs=kwargs, **updated_options)
  File "C:\Users\20212002\Anaconda3\envs\daddy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 387, in _invocation_actor_class_remote_span
    return method(self, args, kwargs, *_args, **_kwargs)
  File "C:\Users\20212002\Anaconda3\envs\daddy\lib\site-packages\ray\actor.py", line 884, in _remote
    creation_args = signature.flatten_args(function_signature, args, kwargs)
  File "C:\Users\20212002\Anaconda3\envs\daddy\lib\site-packages\ray\_private\signature.py", line 114, in flatten_args
    raise TypeError(str(exc)) from None
TypeError: missing a required argument: 'args'

Thank you so much!

I believe it is because the init method has to have the same signature as in Trainable. In other words, the args argument shouldn’t be in the signature.