I’m trying to set up a training pipeline using ray remote env where I am creating 2 envs per CPU. after checking the ray dashboard. each remote env actor is taking up 600MB which relatively high given how simple is the dummy env I’m using on the example below .
import gym
from gym import spaces
import numpy as np
import ray
from ray import tune
from ray.tune.registry import register_env
class CustomEnv(gym.Env):
def __init__(self, config):
pass
def reset(self):
return spaces.Box(low=0, high=255, shape=(1,), dtype=np.uint8).sample() # return random observation
def step(self, action):
obs = spaces.Box(low=0, high=255, shape=(1,), dtype=np.uint8).sample() # return random observation
reward = 0 # no reward
return obs, reward, False, {}
def observation_space(self):
return spaces.Box(low=0, high=255, shape=(1,), dtype=np.uint8)
def action_space(self):
return spaces.Discrete(1)
def _get_spaces(self):
return spaces.Box(low=0, high=255, shape=(1,), dtype=np.uint8), spaces.Discrete(1)
def env_factory(env_config):
return CustomEnv(env_config)
if __name__ == '__main__':
# Register the environment with RLlib
register_env("custom_env", env_factory)
# Define the RLlib training configuration
config = {
"env": "custom_env",
"num_workers": 1,
"framework": "tf",
"model": {
"fcnet_hiddens": [256, 256]
},
"gamma": 0.99,
"lr": 0.0005,
"train_batch_size": 2000,
"rollout_fragment_length": 100,
"batch_mode": "truncate_episodes",
"num_envs_per_worker": 2,
"remote_worker_envs": True,
"num_gpus": 0,
}
# Initialize the Ray cluster
ray.init()
# Train the RLlib agent
analysis = tune.run(
"PPO",
config=config,
stop={"episode_reward_mean": 200},
checkpoint_at_end=True
)
# Print the results of the training
print(analysis.dataframe())
the memory taken by each remote env is shown on the ray dashboard:
is there any solution to drop this weird memory usage down or explain it ?
thanks,