How do I change the default optimiser for a policy

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

I am trying to train DQN with CartPole-v1 (code is below) and would like to:

  1. Change the standard optimizer which is used from Adam to AdamW.
  2. Update the target network every 1000 steps.

If I am right, DQNConfig() loads the default config. How would I go about making those changes to the default DQN config?

# set visible gpus
import os
os.environ["CUDA_VISIBLE_DEVICES"]="2,3"

import ray
from ray import air, tune
from ray.rllib.algorithms.dqn import DQNConfig

if __name__ == '__main__':
    rllib_config = DQNConfig()\
        .framework(framework=tune.grid_search(['torch', 'tf2']), eager_tracing=True)\
        .debugging(seed=0)\
        .environment(env='CartPole-v1')\
        .resources(num_gpus=0.2)\
        .exploration({'type': 'EpsilonGreedy', "initial_epsilon": 0.9, "final_epsilon": 0.05, "epsilone_timesteps": 100000})\
        .rollouts(rollout_fragment_length='auto')\
        .training(
            gamma=0.99,
            lr=1e-4,
            train_batch_size=128,
            model={'fcnet_hiddens': [128, 128], 'fcnet_activation': 'relu'},
            optimizer={'adam_epsilon': 1e-7},
            num_atoms=1,
            noisy=False,
            dueling=False,
            double_q=False,
            n_step=1,
            training_intensity=None,
            replay_buffer_config={'type': 'MultiAgentReplayBuffer', 'capacity': 10000, 'replay_sequence_length': 1},
            td_error_loss_fn='huber'        
            )\
        .reporting(metrics_num_episodes_for_smoothing=500)

    air_config = air.RunConfig(
        name='results',
        stop={'episode_reward_mean': 475},
        checkpoint_config=air.CheckpointConfig(
            checkpoint_at_end=True
        ),
        local_dir='rllib/dqn/cartpole',
        log_to_file=True,
    )

    tuner = tune.Tuner(
        'DQN',
        param_space=rllib_config,
        run_config=air_config,
    )

    ray.init()
    tuner.fit()
    ray.shutdown()