I’m attempting to tune a large set of hyperparameters, and am receiving a TyperError for the replay buffer config:
… ray\rllib\utils\replay_buffers\multi_agent_prioritized_replay_buffer.py", line 129, in __init __
shard_capacity = capacity // num_shards
TypeError: unsupported operand type(s) for //: ‘Integer’ and ‘int’
Any help is greatly appreciated
here is my script:
config:DQNConfig = DQNConfig()
explore_config = {
"type": "EpsilonGreedy",
"initial_epsilon": tune.uniform(0.1, 1.0),
"final_epsilon": tune.uniform(0.01, 0.1),
"epsilon_timesteps": tune.randint(1000, 10000),
}
replay_config = {
"type": "MultiAgentPrioritizedReplayBuffer",
"capacity": tune.qrandint(1000, 5000, 1000),
"prioritized_replay_alpha": tune.uniform(0.2, 0.8),
"prioritized_replay_beta": tune.uniform(0.2, 0.8),
"prioritized_replay_eps": tune.uniform(1e-7, 1e-5),
"replay_sequence_length": 1,
"worker_side_prioritization": False,
}
config=config.training(
dueling=False,
double_q=False,
hiddens=tune.choice([[128, 128, 128], [256, 256, 256], [512, 512, 512]]),
lr=tune.loguniform(1e-4, 1e-2),
gamma=tune.uniform(0.9, 0.99),
train_batch_size=tune.randint(100, 2500),
replay_buffer_config=replay_config
)
config=config.exploration(exploration_config=explore_config)
config=config.environment(env=Env, env_config=algo.setup.ENV_CONFIG)
def train_dqn(config):
model = DQNTrainer(config)
avg_reward = model.train()
return avg_reward
analysis = tune.run(
train_dqn,
config=config,
metric="mean_reward",
mode="max",
num_samples=10000,
scheduler=HyperBandForBOHB()
)
best_config = analysis.get_best_config(metric="mean_reward")
best_checkpoint = analysis.get_best_checkpoint(metric="mean_reward")
trainer = RLTrainer(best_config.config)
trainer.restore(best_checkpoint)
checkpoint_file = trainer.save()
print("model saved to", checkpoint_file)