I am trying to run a multi-agent reinforcement learning environment but seem to only be able to use 1 cpu even when num_workers
is set to something higher. In the example below ray reports that it is using 12 CPUs (the number of cpus on my machine) but htop reveals that only 1 cpu is actually being used. I have confirmed that training with single agent environments (e.g., cartpole) uses all the cpus.
Can you help me understand what I am doing wrong? Thanks.
import numpy as np
from gym.spaces import Discrete, Box
from ray.rllib.env.multi_agent_env import MultiAgentEnv
import ray.tune
class SimpleMultiAgentEnv(MultiAgentEnv):
def __init__(self):
self.action_space = Discrete(1)
self.observation_space = Box(low=np.array([-np.inf]),
high=np.array([np.inf]))
def step(self, action_dict: dict) -> tuple:
obs = {'player1': np.array([0.]), 'player2': np.array([1.])}
rew = {'player1': 0., 'player2': 0.}
self.step_ct += 1
done = {"__all__": self.step_ct > 100}
return obs, rew, done, {}
def reset(self) -> dict:
self.step_ct = 0
return {'player1': np.array([0.]), 'player2': np.array([1.])}
def main() -> None:
ray.tune.registry.register_env(
"SimpleMultiAgentEnv", lambda _: SimpleMultiAgentEnv())
env = SimpleMultiAgentEnv()
ray.tune.run(
run_or_experiment='PPO',
stop={'timesteps_total': 100000},
config={
'env': 'SimpleMultiAgentEnv',
'num_gpus': 0,
'num_workers': 11,
'framework': 'torch',
'multiagent': {
'policies_to_train': ['learned'],
'policies': {
'learned':
(None, env.observation_space, env.action_space, {})
},
'policy_mapping_fn': lambda _: 'learned'
},
})
if __name__ == '__main__':
main()