Hey guys,
I am working in a two-agent environment where one policy was already trained and needs to be restored, and the other gets trained from scratch. These policies are instantiated inside the algorithm configuration.
After that, the training is executed with Tune. The issue is that, to perform tuner.fit(), I first need to convert the algorithm configuration to a legacy dictionary through the to_dict() method, which tries to execute the deepcopy of a trained policy, which causes an error.
Here are the code highlights, relevant for the issue.
Algorithm creation:
alg_config = config.algorithm.all_algorithm_configs[args.algorithm_config](args)
alg_config = (
alg_config.environment(
env="hrl",
env_config={
"rescale_obs": {
"do_normalize_box_obs": True,
"reward_coeff": 1, # rescale factor for all rewards
},
},
normalize_actions=True,
clip_actions=True,
disable_env_checking=True,
env_task_fn=None,
)
.callbacks(
make_multi_callbacks([GradientLoggerCallback]) #, CustomLoggerCallback, ActivateMaskingCallback])
)
)
Trained policy restoration
lv1_policy = Policy.from_checkpoint(args.move_policy)[DEFAULT_POLICY_ID]
policies_list = [None] * args.hrl_n_of_levels
policies_list[0] = lv1_policy
Multiagent setting
alg_config.multi_agent(
policies=get_policies(args.hrl_n_of_levels, policies_list, obs_spaces, action_spaces),
policy_mapping_fn=policy_mapping_fn,
policies_to_train=[policy for policy in get_policies(args.hrl_n_of_levels, None, obs_spaces, action_spaces) if policy != 'lv1_policy']
)
Training
tuner = ray.tune.Tuner(PPO, param_space=alg_config.to_dict(), run_config=run_config, tune_config=tune_config)
results = tuner.fit()
Here is the received error
Traceback (most recent call last):
File "...", line 100, in tuner_run
tuner = ray.tune.Tuner(PPO, param_space=alg_config.to_dict(), run_config=run_config, tune_config=tune_config)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm_config.py", line 477, in to_dict
config = copy.deepcopy(vars(self))
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 270, in _reconstruct
state = deepcopy(state, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 270, in _reconstruct
state = deepcopy(state, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 205, in _deepcopy_list
append(deepcopy(a, memo))
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 270, in _reconstruct
state = deepcopy(state, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 296, in _reconstruct
value = deepcopy(value, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 270, in _reconstruct
state = deepcopy(state, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 296, in _reconstruct
value = deepcopy(value, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 270, in _reconstruct
state = deepcopy(state, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 230, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/copy.py", line 153, in deepcopy
y = copier(memo)
File "/home/av/anaconda3/envs/environment_fixed/lib/python3.8/site-packages/torch/_tensor.py", line 102, in __deepcopy__
raise RuntimeError(
RuntimeError: Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment
Is there a workaround to this issue?