How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I am trying to train DQN with CartPole-v1 (code is below) and would like to:
- Change the standard optimizer which is used from Adam to AdamW.
- Update the target network every 1000 steps.
If I am right, DQNConfig() loads the default config. How would I go about making those changes to the default DQN config?
# set visible gpus
import os
os.environ["CUDA_VISIBLE_DEVICES"]="2,3"
import ray
from ray import air, tune
from ray.rllib.algorithms.dqn import DQNConfig
if __name__ == '__main__':
rllib_config = DQNConfig()\
.framework(framework=tune.grid_search(['torch', 'tf2']), eager_tracing=True)\
.debugging(seed=0)\
.environment(env='CartPole-v1')\
.resources(num_gpus=0.2)\
.exploration({'type': 'EpsilonGreedy', "initial_epsilon": 0.9, "final_epsilon": 0.05, "epsilone_timesteps": 100000})\
.rollouts(rollout_fragment_length='auto')\
.training(
gamma=0.99,
lr=1e-4,
train_batch_size=128,
model={'fcnet_hiddens': [128, 128], 'fcnet_activation': 'relu'},
optimizer={'adam_epsilon': 1e-7},
num_atoms=1,
noisy=False,
dueling=False,
double_q=False,
n_step=1,
training_intensity=None,
replay_buffer_config={'type': 'MultiAgentReplayBuffer', 'capacity': 10000, 'replay_sequence_length': 1},
td_error_loss_fn='huber'
)\
.reporting(metrics_num_episodes_for_smoothing=500)
air_config = air.RunConfig(
name='results',
stop={'episode_reward_mean': 475},
checkpoint_config=air.CheckpointConfig(
checkpoint_at_end=True
),
local_dir='rllib/dqn/cartpole',
log_to_file=True,
)
tuner = tune.Tuner(
'DQN',
param_space=rllib_config,
run_config=air_config,
)
ray.init()
tuner.fit()
ray.shutdown()