Qmix is not running when use gpu

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

QMIX this._loss is None
qmix_policy.py

when set num_gpus > 0, the error is following:
2022-09-06 06:54:29,618 ERROR trial_runner.py:980 – Trial QMIX_grouped_twostep_b920e_00000: Error processing event.
ray.exceptions.RayTaskError(ValueError): ray::QMix.train() (pid=73360, ip=172.17.0.11, repr=QMix)
TypeError: ‘NoneType’ object is not callable

The above exception was the direct cause of the following exception:

ray::QMix.train() (pid=73360, ip=172.17.0.11, repr=QMix)
File “/opt/conda/lib/python3.7/site-packages/ray/tune/trainable/trainable.py”, line 347, in train
result = self.step()
File “/opt/conda/lib/python3.7/site-packages/ray/rllib/algorithms/algorithm.py”, line 661, in step
results, train_iter_ctx = self._run_one_training_iteration()
File “/opt/conda/lib/python3.7/site-packages/ray/rllib/algorithms/algorithm.py”, line 2382, in _run_one_training_iteration
recreate=self.config[“recreate_failed_workers”],
File “/opt/conda/lib/python3.7/site-packages/ray/rllib/algorithms/algorithm.py”, line 2190, in try_recover_from_step_attempt
raise error
File “/opt/conda/lib/python3.7/site-packages/ray/rllib/algorithms/algorithm.py”, line 2373, in _run_one_training_iteration
results = self.training_step()
File “/opt/conda/lib/python3.7/site-packages/ray/rllib/algorithms/qmix/qmix.py”, line 274, in training_step
train_results = multi_gpu_train_one_step(self, train_batch)
File “/opt/conda/lib/python3.7/site-packages/ray/rllib/execution/train_ops.py”, line 177, in multi_gpu_train_one_step
permutation[batch_index] * per_device_batch_size, buffer_index=0
File “/opt/conda/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py”, line 591, in learn_on_loaded_batch
tower_outputs = self._multi_gpu_parallel_grad_calc(device_batches)
File “/opt/conda/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py”, line 1170, in _multi_gpu_parallel_grad_calc
raise output[0] from output[1]
ValueError: ‘NoneType’ object is not callable
tracebackTraceback (most recent call last):
File “/opt/conda/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py”, line 1064, in _worker
self._loss(self, model, self.dist_class, sample_batch)
TypeError: ‘NoneType’ object is not callable

Hi @ilovecomet

Could you please provide a reproduction script?

Cheers

import argparse
from gym.spaces import Dict, Discrete, Tuple, MultiDiscrete
import logging
import os

import ray
from ray import air, tune
from ray.tune import register_env
from ray.rllib.algorithms.qmix import QMixConfig
from ray.rllib.env.multi_agent_env import ENV_STATE
from ray.rllib.examples.env.two_step_game import TwoStepGame
# from ray.rllib.policy.policy import PolicySpec
from ray.rllib.utils.test_utils import check_learning_achieved

logger = logging.getLogger(__name__)

parser = argparse.ArgumentParser()
parser.add_argument(
    "--run", type=str, default="QMIX", help="The RLlib-registered algorithm to use."
)
parser.add_argument(
    "--framework",
    choices=["tf", "tf2", "tfe", "torch"],
    default="torch",
    help="The DL framework specifier.",
)
parser.add_argument("--num-cpus", type=int, default=10)
parser.add_argument(
    "--mixer",
    type=str,
    default="vdn",
    choices=["qmix", "vdn", "none"],
    help="The mixer model to use.",
)
parser.add_argument(
    "--as-test",
    action="store_true",
    help="Whether this script should be run as a test: --stop-reward must "
    "be achieved within --stop-timesteps AND --stop-iters.",
)
parser.add_argument(
    "--stop-iters", type=int, default=200, help="Number of iterations to train."
)
parser.add_argument(
    "--stop-timesteps", type=int, default=70000, help="Number of timesteps to train."
)
parser.add_argument(
    "--stop-reward", type=float, default=8.0, help="Reward at which we stop training."
)
parser.add_argument(
    "--local-mode",
    action="store_true",
    help="Init Ray in local mode for easier debugging.",
)

if __name__ == "__main__":
    args = parser.parse_args()
    # os.environ['CUDA_VISIBLE_DEVICES'] = '2,3,4,5'
    os.environ["RLLIB_NUM_GPUS"] = "1"
    ray.init(local_mode=args.local_mode
             ,num_cpus = args.num_cpus or None
             # ,num_gpus=1
             # ,address='auto'
             # ,_node_ip_address='172.17.0.3'
             ,dashboard_host="0.0.0.0"
             )

    grouping = {
        "group_1": [0, 1],
    }
    obs_space = Tuple(
        [
            Dict(
                {
                    "obs": MultiDiscrete([2, 2, 2, 3]),
                    ENV_STATE: MultiDiscrete([2, 2, 2]),
                }
            ),
            Dict(
                {
                    "obs": MultiDiscrete([2, 2, 2, 3]),
                    ENV_STATE: MultiDiscrete([2, 2, 2]),
                }
            ),
        ]
    )
    act_space = Tuple(
        [
            TwoStepGame.action_space,
            TwoStepGame.action_space,
        ]
    )
    register_env(
        "grouped_twostep",
        lambda config: TwoStepGame(config).with_agent_groups(
            grouping, obs_space=obs_space, act_space=act_space
        ),
    )

    if args.run == "QMIX":
        config = (
            QMixConfig()
            .debugging(log_level="WARN"
                       # ,fake_sampler=True
            )
            .training(mixer=args.mixer, train_batch_size=32)
            .rollouts(num_rollout_workers=2,
                      num_envs_per_worker=1,
                      remote_worker_envs=False,
                      rollout_fragment_length=4

            )
            .exploration(
                exploration_config={
                    "final_epsilon": 0.0,
                }
            )
            .environment(
                env="grouped_twostep",
                env_config={
                    "separate_state_space": True,
                    "one_hot_state_encoding": True,
                },
            )
            .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))
                       ,num_cpus_per_worker=1
            )
        )
        config = config.to_dict()
    else:
        config = {
            "env": TwoStepGame,
            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
            "framework": args.framework,
        }

    stop = {
        "episode_reward_mean": args.stop_reward,
        "timesteps_total": args.stop_timesteps,
        "training_iteration": args.stop_iters,
    }

    results = tune.Tuner(
        args.run,
        run_config=air.RunConfig(stop=stop, verbose=2),
        param_space=config,
    ).fit()

    if args.as_test:
        check_learning_achieved(results, args.stop_reward)

    ray.shutdown()

if set num_gpus = 0 , it is ok, if set num_gpus > 0, error will happen. code is 2 floor, ray verison is 2.0.0
i find diff is in torch_policy.py when device_type=cpu
tks

I can reproduce this and have [opened an issiue](https://github.com/ray-project/ray/issues/28428). Let’s move the discussion there. Thanks for reporting this!

1 Like