How to train better

my PPO train result is not as good as expected, the episode_reward_max is close to -14, but the episode_reward_mean is about -20, why they are so different? how can i improve the perform?

algorithm = 'PPO'
    code_file_path = os.path.dirname(os.path.realpath(__file__))
    save_path = code_file_path + f"/tmp/{env_name}/"
    user_checkpoint_dir = save_path + f"/rllib_best_checkpoint"
    ############################################## 定义算法参数 ##################################################
    config = (
        get_trainable_cls(algorithm)
        .get_default_config()
        .environment(env_name)
        .framework("torch")
        .rollouts(num_rollout_workers=9)
        .training(
            model={
                "fcnet_hiddens": [512, 512],
                "fcnet_activation": "tanh",
            },
        )
    )
    if algorithm == "PPO":
        config.training(
            lr_schedule=None,
            # lr_schedule=[[0, 0.0001], [1000000, 0.00005], [2000000, 0.00001], [3000000, 0.000005]],  # tune.grid_search([1e-4, 2e-4]),
            lr=5e-5,
            # tune.grid_search([1e-4, 2e-4]),
            train_batch_size=4000,

            # # PPO specific settings:
            use_critic=True,
            use_gae=True,
            # lambda_=1.0,
            lambda_=0.95,
            # lambda_=tune.grid_search([0.95, 0.9]),
            use_kl_loss=True,
            kl_coeff=0.2,
            kl_target=0.01,
            sgd_minibatch_size=128,
            num_sgd_iter=30,
            shuffle_sequences=True,
            vf_loss_coeff=1.0,
            entropy_coeff=0.01,
            # entropy_coeff=tune.grid_search([0.02, 0.01]),
            entropy_coeff_schedule=None,
            # clip_param=0.3,
            clip_param=0.5,
            # clip_param=tune.grid_search([0.3, 1),
            vf_clip_param=10.0,
            grad_clip=None,
        )
    ############################################## 使用tune进行训练 ##################################################
    results = tune.Tuner(
        algorithm,
        param_space=config,
        run_config=train.RunConfig(
            stop={
                "training_iteration": 100,
                # 'timesteps_total': 50000,
            },
            verbose=2,
            checkpoint_config=train.CheckpointConfig(
                num_to_keep=5,
                checkpoint_frequency=5,
                checkpoint_score_attribute="episode_reward_max",
                checkpoint_score_order='max',
                checkpoint_at_end=True
            ),
            storage_path=save_path,
        ),
    ).fit()
    # 保存最后的结果
    ckpt = results.get_best_result(metric="episode_reward_mean", mode="max").checkpoint
    algo = Algorithm.from_checkpoint(ckpt)
    algo.save(user_checkpoint_dir)