I have created an custom environment class that uses the VectorEnv as a subclass using Cartpole-v1 base env as shown below,
class MockVectorEnv(VectorEnv):
def __init__(self, num_envs):
self.env = gym.make("CartPole-v1")
super().__init__(
observation_space=self.env.observation_space,
action_space=self.env.action_space,
num_envs=num_envs,
)
def vector_reset(self) -> List[EnvObsType]:
obs = self.env.reset()
return [obs for _ in range(self.num_envs)]
def reset_at(self, index: Optional[int] = None) -> EnvObsType:
return self.env.reset()
def vector_step(self, actions) -> Tuple[List[EnvObsType], List[float], List[bool], List[EnvInfoDict]]:
obs_batch, rew_batch, done_batch, info_batch = [], [], [], []
for i in range(self.num_envs):
obs, rew, done, info = self.env.step(actions[i])
obs_batch.append(obs)
rew_batch.append(rew)
done_batch.append(done)
info_batch.append(info)
return obs_batch, rew_batch, done_batch, info_batch
def get_sub_environments(self) -> List[EnvType]:
return [self.env for _ in range(self.num_envs)]
But when I am trying to use this env and setting num_envs_per_worker:5 and remote_worker_envs:True , envs are not running in parallel as shown below. When I looked into the dashboard only one worker i.e only one cpu is being used.
if __name__ == "__main__":
ray.init( ) #dashboard_host="0.0.0.0", dashboard_port=8265)
tune.register_env("custom_vec_env", lambda env_ctx: MockVectorEnv(1))
analysis = tune.run(
"PPO",
local_dir="./results/tb_logs_1/",
stop={"timesteps_total": 129000},
metric="episode_reward_mean",
mode="max",
config={
"env": 'custom_vec_env',
"use_gae": True,
"num_workers": 1,
"num_envs_per_worker": 5,
"rollout_fragment_length": 100,
"sgd_minibatch_size": 128,
"train_batch_size": 1000,
"lr": 0.001,
"gamma": 0.95,
"entropy_coeff": 0.02,
"num_sgd_iter": 10,
"remote_worker_envs": True,
},
)
This was not the case when I used CartPole-v1 env directly without custom VectorEnv. Why am I not able to run the envs in parallel using custom VectorEnv? am I doing anything wrong? Please help me with this.