Trouble reproducing results with DQN

How severe does this issue affect your experience of using Ray?

  • Low: It annoys or frustrates me for a moment.

Hello there,
I have trouble reproducing the DQN result for Breakout. I have used very much the same parameters as specified in the YAML file.

Except I have changed the capacity of the memory to 180’000 and added compression.

It peaks at a score of around 11 and the average is at around 2.
The following code is the config I used:

replay_config = config.replay_buffer_config.update({
        "capacity": 180000,
        "type": "MultiAgentReplayBuffer",
        # "prioritized_replay_alpha": 0.6,
        # # "prioritized_replay_beta": 0.4,
        # "prioritized_replay_eps": 1e-6,
    })

replay_config = config.replay_buffer_config
replay_config["capacity"] = 180000
print(replay_config)
config = config.training(
                         # gamma=0.99,
                         lr=0.0000625,
                         train_batch_size=32,
                         # model=model_config,
                         dueling=False,
                         double_q=True,
                         target_network_update_freq=8000,
                         hiddens=[512],
                         n_step=1,
                         replay_buffer_config=replay_config,
                         # td_error_loss_fn="huber",
                         num_steps_sampled_before_learning_starts=20000,
                         adam_epsilon=0.00015,
                         #DISABLE RAINBOW
                         noisy=False,
                         num_atoms=1,
                        )

config = config.environment(env="ALE/Breakout-v5",
                            env_config={"frameskip": 1, # Disabled
                                       })
config = config.framework(framework="tf")
config = config.rollouts(
                        # num_rollout_workers=2,
                        # create_env_on_local_worker=True,
                        # num_envs_per_worker=2,
                        rollout_fragment_length=4,
                        # preprocessor_pref="deepmind",
                        compress_observations=True)

explore_config = config.exploration_config
explore_config["final_epsilon"] = 0.01
explore_config["epsilon_timesteps"] = 200000
print(explore_config)
config = config.exploration(
                           # explore=True,
                           exploration_config=explore_config
                           )
# config = config.checkpointing(export_native_model_files=True)
config = config.resources(num_gpus=1)

tuner = tune.Tuner("DQN",
                   run_config=air.RunConfig(stop={"agent_timesteps_total": 1e7},
                                           name="Ablation-Breakout-Removed-test",
                                            # API REFERENCE
                                            # https://docs.ray.io/en/master/ray-air/package-ref.html#ray.air.config.CheckpointConfig
                                           # checkpoint_config=air.CheckpointConfig(num_to_keep=5,
                                           #                                        checkpoint_score_attribute="episode_reward_max",
                                           #                                        checkpoint_score_order="max",
                                           #                                        # checkpoint_frequency=10,
                                           #                                        checkpoint_at_end=True,
                                           #                                       ),
                                          ),
                   param_space=config.to_dict())

results = tuner.fit()