Restore policy in multiagent with Tune

Hey guys,

I am working in a two-agent environment where one policy was already trained and needs to be restored, and the other gets trained from scratch. These policies are instantiated inside the algorithm configuration.
After that, the training is executed with Tune. The issue is that, to perform tuner.fit(), I first need to convert the algorithm configuration to a legacy dictionary through the to_dict() method, which tries to execute the deepcopy of a trained policy, which causes an error.

Here are the code highlights, relevant for the issue.

Algorithm creation:

        alg_config = config.algorithm.all_algorithm_configs[args.algorithm_config](args)

        alg_config = (
            alg_config.environment(
                env="hrl",
                env_config={
                    "rescale_obs": {
                        "do_normalize_box_obs": True,
                        "reward_coeff": 1,  # rescale factor for all rewards
                    },
                },
                normalize_actions=True,
                clip_actions=True,
                disable_env_checking=True,
                env_task_fn=None,
            )
            .callbacks(
                make_multi_callbacks([GradientLoggerCallback]) #, CustomLoggerCallback, ActivateMaskingCallback])
            )
        )

Trained policy restoration

lv1_policy = Policy.from_checkpoint(args.move_policy)[DEFAULT_POLICY_ID]
policies_list = [None] * args.hrl_n_of_levels
policies_list[0] = lv1_policy

Multiagent setting

    alg_config.multi_agent(
        policies=get_policies(args.hrl_n_of_levels, policies_list, obs_spaces, action_spaces),
        policy_mapping_fn=policy_mapping_fn,
        policies_to_train=[policy for policy in get_policies(args.hrl_n_of_levels, None, obs_spaces, action_spaces) if policy != 'lv1_policy']
    )

Training

    tuner = ray.tune.Tuner(PPO, param_space=alg_config.to_dict(), run_config=run_config, tune_config=tune_config)
    results = tuner.fit()

Here is the received error

Traceback (most recent call last):
File "...", line 100, in tuner_run
    tuner = ray.tune.Tuner(PPO, param_space=alg_config.to_dict(), run_config=run_config, tune_config=tune_config)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm_config.py", line 477, in to_dict
    config = copy.deepcopy(vars(self))
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
    y = copier(x, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
    y = copier(x, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
    y = _reconstruct(x, memo, *rv)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 270, in _reconstruct
    state = deepcopy(state, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
    y = copier(x, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
    y = _reconstruct(x, memo, *rv)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 270, in _reconstruct
    state = deepcopy(state, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
    y = copier(x, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
    y = copier(x, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 205, in _deepcopy_list
    append(deepcopy(a, memo))
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
    y = _reconstruct(x, memo, *rv)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 270, in _reconstruct
    state = deepcopy(state, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
    y = copier(x, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
    y = _reconstruct(x, memo, *rv)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 296, in _reconstruct
    value = deepcopy(value, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
    y = _reconstruct(x, memo, *rv)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 270, in _reconstruct
    state = deepcopy(state, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
    y = copier(x, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
    y = _reconstruct(x, memo, *rv)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 296, in _reconstruct
    value = deepcopy(value, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
    y = _reconstruct(x, memo, *rv)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 270, in _reconstruct
    state = deepcopy(state, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
    y = copier(x, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 153, in deepcopy
    y = copier(memo)
  File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/site-packages/torch/_tensor.py", line 102, in __deepcopy__
    raise RuntimeError(
RuntimeError: Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment

Is there a workaround to this issue?