High: It blocks me to complete my task.
Hi everyone -
I am currently building a multi-agent RL setup using rllib with PPO - see the script below. The script generally runs for both tune and train but in either case the number of trained steps and gents remains zero throughout the process.
I was wondering whether someone’s faced a similar issue?
**‘counters’: **
{‘num_env_steps_sampled’: 3584,
** ‘num_env_steps_trained’: 0, **
**‘num_agent_steps_sampled’: 10752, **
‘num_agent_steps_trained’: 0},
Current mains script:
import os
import sys
import platform
if platform.system() != “Linux”:
if ‘SUMO_HOME’ in os.environ:
tools = os.path.join(os.environ[‘SUMO_HOME’], ‘tools’)
sys.path.append(tools) # we need to import python modules from the $SUMO_HOME/tools directory
else:
sys.exit(“please declare environment variable ‘SUMO_HOME’”)
import traci
else:
import libsumo as traci
import numpy as np
import pandas as pd
import ray
import traci
from ray import tune
#from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.ppo import (
PPOConfig,
PPOTF1Policy,
PPOTF2Policy,
PPOTorchPolicy,
)
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv, PettingZooEnv
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3 import PPO
import ma_environment.custom_envs as custom_env
import supersuit as ss
def env_creator(args):
env = custom_env.MA_grid_new(
net_file = “…”,
route_file =“…”,
use_gui=False,
num_seconds=30000,
begin_time=19800,
time_to_teleport=300,
reward_fn=‘combined_emission’,
sumo_warnings=False)
return env
if name == “main”:
ray.init()
env_name = "MA_grid_new"
register_env(env_name, lambda config: ParallelPettingZooEnv(env_creator(config)))
env = ParallelPettingZooEnv(env_creator({}))
#get obs and action space
obs_space = env.observation_space
act_space = env.action_space
config = (
PPOConfig()
.environment(env=env_name, disable_env_checking=True)
.rollouts(num_rollout_workers=3, rollout_fragment_length='auto')
.training(
train_batch_size=512,
lr=2e-5,
gamma=0.95,
lambda_=0.9,
use_gae=True,
clip_param=0.4,
grad_clip=None,
entropy_coeff=0.1,
vf_loss_coeff=0.25,
sgd_minibatch_size=64,
num_sgd_iter=10,
)
.debugging(log_level="ERROR")
.framework(framework="torch")
.resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
.evaluation(evaluation_num_workers=1)
)
algo = config.build()
for _ in range(10):
print('Training iteration: ', _)
print(algo.train())
algo.evaluate()