Multiagent only using one cpu

I am trying to run a multi-agent reinforcement learning environment but seem to only be able to use 1 cpu even when num_workers is set to something higher. In the example below ray reports that it is using 12 CPUs (the number of cpus on my machine) but htop reveals that only 1 cpu is actually being used. I have confirmed that training with single agent environments (e.g., cartpole) uses all the cpus.

Can you help me understand what I am doing wrong? Thanks.

import numpy as np
from gym.spaces import Discrete, Box
from ray.rllib.env.multi_agent_env import MultiAgentEnv
import ray.tune

class SimpleMultiAgentEnv(MultiAgentEnv):
    def __init__(self):
        self.action_space = Discrete(1)
        self.observation_space = Box(low=np.array([-np.inf]),

    def step(self, action_dict: dict) -> tuple:
        obs = {'player1': np.array([0.]), 'player2': np.array([1.])}

        rew = {'player1': 0., 'player2': 0.}
        self.step_ct += 1
        done = {"__all__": self.step_ct > 100}
        return obs, rew, done, {}

    def reset(self) -> dict:
        self.step_ct = 0
        return {'player1': np.array([0.]), 'player2': np.array([1.])}

def main() -> None:

        "SimpleMultiAgentEnv", lambda _: SimpleMultiAgentEnv())

    env = SimpleMultiAgentEnv()
        stop={'timesteps_total': 100000},
            'env': 'SimpleMultiAgentEnv',
            'num_gpus': 0,
            'num_workers': 11,
            'framework': 'torch',
            'multiagent': {
                'policies_to_train': ['learned'],
                'policies': {
                        (None, env.observation_space, env.action_space, {})
                'policy_mapping_fn': lambda _: 'learned'

if __name__ == '__main__':

Can you simply try: ray.init(num_cpus=12)?