PPO does not seem to be learning anything

Im trying to use PPO to learn Mario. He does not seem to study well. Can hyperparameter Tuning help? Or maybe im doing smth wrong.

class Mario(gym.Env):
    def __init__(self, env_config):
        self.__COUNT: int = 0

        env = gym_super_mario_bros.make("SuperMarioBros-1-1-v0")
        self.__env = JoypadSpace(env, [["right"], ["right", "A"], ["left"], ["left", "A"], ["A"]])

        self.__X_POS_PREVIUOS: int
        self.__LIFE_COUNTER_PREVIOUS: int
        self.__X_POS_CURRENT: int = 40
        self.__LIFE_COUNTER_CURRENT: int = 2

        self.reset()

        self.action_space: Discrete = Discrete(n=5)
        self.observation_space: Box = Box(low=0, high=255, shape=(84, 84, 3), dtype=np.uint8)

    @staticmethod
    def __observation(obs) -> np.ndarray:
        #display(Image.fromarray(obs))
        t: torch.Tensor = torch.from_numpy(obs.copy())
        t: torch.Tensor = torch.permute(t, (2, 0, 1))
        transform = T.Resize((84, 84))
        t: torch.Tensor = transform(t)
        t: torch.Tensor = torch.permute(t, (1, 2, 0))
        proccessed_obs: np.ndarray = t.numpy()
        return proccessed_obs

    @staticmethod
    def __save_frame(obs: np.ndarray) -> None:
        UNIQUE_NAME: str = md5(str(datetime.datetime.now()).encode()).hexdigest()
        cv2.imwrite(fr'/content/drive/My Drive/Images/{UNIQUE_NAME}.png' , obs)

    def __proccess_reward(self, info: dict) -> int:
        
        self.__X_POS_PREVIUOS: int = info["x_pos"]
        self.__LIFE_COUNTER_PREVIOUS: int = info["life"]

        TD_MOVE: int = self.__X_POS_PREVIUOS - self.__X_POS_CURRENT
        TD_LIVES: int = (self.__LIFE_COUNTER_PREVIOUS - self.__LIFE_COUNTER_CURRENT) * 100

        FLAG_GET: bool = info["flag_get"]
        COMPLETION: int = 0 if not FLAG_GET else info["time"]

        self.__X_POS_CURRENT: int = info["x_pos"]
        self.__LIFE_COUNTER_CURRENT: int = info["life"]

        SPEED: float = (info["x_pos"]-40) / info["time"] * 10

        REWARD: int = TD_MOVE + TD_LIVES + COMPLETION + SPEED
        
        return REWARD

    def step(self, action) -> tuple:
        obs, REWARD, done, info = self.__env.step(action=action)
        
        #self.__save_frame(obs=obs)
        new_obs: torch.Tensor = self.__observation(obs=obs)
        NEW_REWARD: int = self.__proccess_reward(info=info)
        
        return new_obs, NEW_REWARD, done, info

    def reset(self) -> np.ndarray:
        Init_obs: torch.Tensor = self.__env.reset()
        return self.__observation(Init_obs)
mario_config={
    "env": "mario",
    "framework": "torch",
    "env_config": {},
    "num_gpus": 1,
    "num_workers": 5, # кол-во паралельно запущенных агентов
    "lr": 0.0001,
    "use_gpu": True,

    "rollout_fragment_length": 20, # минибатч. Из них состоить основной батч сайз (минимальная единица батча)
    "train_batch_size": 400, # Основной батч. Он формируется из минибатчсайза, перебирая минибатчсайз
    "num_cpus_per_worker": 0,
    "num_gpus_per_worker": 0.2,
    "gamma": 0.99,
    "batch_mode": "truncate_episodes", # complete_episodes
    "shuffle_buffer_size": 10, # перемешивать батчи с такой минимальной длиной
    "recreate_failed_workers": True,
    "horizon": 8000,

    "output": "/content/drive/MyDrive/RL/Mario/results/PPO/checkpoint_001000/",
}
register_env('mario', lambda config: Mario(mario_config))
algo = ppo.PPO(config=mario_config, env="mario")

for i in range(1000):
    train_results = algo.train()
    if i % 100 == 0 or i == 1000 - 1:
        checkpoint = algo.save("/content/drive/MyDrive/RL/Mario/results/PPO/")

checkpoint = algo.save("/content/drive/MyDrive/RL/Mario/results/PPO/")
algo.stop()

Results:

'sampler_results': {'custom_metrics': {},
                     'episode_len_mean': 160.0,
                     'episode_media': {},
                     'episode_reward_max': 478.24483703328093,
                     'episode_reward_mean': 478.24483703328093,
                     'episode_reward_min': 478.24483703328093,
                     'episodes_this_iter': 1,
                     'hist_stats': {'episode_lengths': [160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160,
                                                        160],
                                    'episode_reward': [478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093,
                                                       478.24483703328093]},
                     'num_faulty_episodes': 0,
                     'policy_reward_max': {},
                     'policy_reward_mean': {},
                     'policy_reward_min': {},
                     'sampler_perf': {'mean_action_processing_ms': 0.1310760700854791,
                                      'mean_env_render_ms': 0.0,
                                      'mean_env_wait_ms': 6.316789056530501,
                                      'mean_inference_ms': 5.924251481449408,
                                      'mean_raw_obs_processing_ms': 0.9874959776300141}},