Hi @Lars_Simon_Zehnder @Douae_Ahmadoun ,
I’m also facing a similar issue. I am trying to train pong in pettingzoo environment using DQN.
Below is my implementation
import os
from pettingzoo.atari import pong_v3
import ray
from gymnasium.spaces import Box, Discrete
from ray import tune
from ray.rllib.algorithms.dqn import DQNConfig
from ray.rllib.algorithms.dqn.dqn_torch_model import DQNTorchModel
from ray.rllib.env import PettingZooEnv
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_torch
from ray.rllib.utils.torch_utils import FLOAT_MAX
from ray.tune.registry import register_env
from supersuit import color_reduction_v0, frame_stack_v1, frame_skip_v0,normalize_obs_v0,dtype_v0,resize_v1
import numpy as np
torch, nn = try_import_torch()
if __name__ == "__main__":
ray.init()
alg_name = "DQN"
# function that outputs the environment you wish to register.
def policy_mapping_fn(agent_id, episode, worker, **kwargs):
return "first_0" if episode.episode_id % 2 == agent_id else "second_0"
def env_creator():
env = pong_v3.env()
env=frame_stack_v1(color_reduction_v0(frame_skip_v0(env, 4),'full'),4)
env= dtype_v0(env, np.dtype(np.float32))
env= normalize_obs_v0(env, env_min=0, env_max=1)
env=resize_v1(env,x_size=84,y_size=84)
return env
env_name = "pong_v3"
register_env(env_name, lambda config: PettingZooEnv(env_creator()))
test_env = PettingZooEnv(env_creator())
obs_space = test_env.observation_space
act_space = test_env.action_space
config = (
DQNConfig()
.environment(
env=env_name,
env_config={
"max_episode_steps": 108000,
"obs_type": "grayscale",
# The authors actually use an action repetition of 4.
"repeat_action_probability": 0.25,
},
clip_rewards=True,
)
.training(
# Note, the paper uses also an Adam epsilon of 0.00015.
lr=0.0000625,
n_step=3,
gamma=0.99,
tau=1.0,
train_batch_size=32,
target_network_update_freq=32000,
replay_buffer_config={
"type": "PrioritizedEpisodeReplayBuffer",
"capacity": 1000000,
"alpha": 0.5,
# Note the paper used a linear schedule for beta.
"beta": 0.4,
},
# Note, these are frames.
num_steps_sampled_before_learning_starts=80000,
noisy=True,
num_atoms=51,
v_min=-10.0,
v_max=10.0,
double_q=True,
dueling=True,
# learner_connector=_make_learner_connector,
)
.reporting(
metrics_num_episodes_for_smoothing=10,
min_sample_timesteps_per_iteration=1000,
)
.evaluation(
evaluation_duration="auto",
evaluation_interval=1,
evaluation_num_env_runners=1,
evaluation_parallel_to_training=True,
evaluation_config={
"explore": False,
},
)
.multi_agent(
policies={
"first_0": (None, obs_space, act_space, {}),
"second_0": (None, obs_space, act_space, {}),
},
policy_mapping_fn=policy_mapping_fn,
)
.resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
.debugging(
log_level="DEBUG"
) # TODO: change to ERROR to match pistonball example
.framework(framework="torch")
)
P.S.- I am taking references from these two examples to write the code.
- ray/rllib/tuned_examples/dqn/benchmark_dqn_atari.py at master · ray-project/ray · GitHub
- RLlib: DQN for Simple Poker - PettingZoo Documentation