Comptible numpy with ray 2.43.0

Hi, I am trying to run code with ray and I am getting error with numpy. I have tried different version of numpy but I am getting stuck here. Here is the error with version I have tried.

pip show ray
Name: ray
Version: 2.43.0

pip install numpy==1.23.3 and 1.26.4
pip install numpy==2.0.0

File “/home/ray/rllib/examples/centralized_criticmod.py”, line 35, in
from ray import tune
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/tune/init.py”, line 5, in
import pandas # noqa: F401
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/pandas/init.py”, line 22, in
from pandas.compat import is_numpy_dev as _is_numpy_dev # pyright: ignore # noqa:F401
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/pandas/compat/init.py”, line 25, in
from pandas.compat.numpy import (
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/pandas/compat/numpy/init.py”, line 4, in
from pandas.util.version import Version
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/pandas/util/init.py”, line 2, in
from pandas.util._decorators import ( # noqa:F401
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/pandas/util/_decorators.py”, line 14, in
from pandas._libs.properties import cache_readonly
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/pandas/_libs/init.py”, line 13, in
from pandas._libs.interval import Interval
File “pandas/_libs/interval.pyx”, line 1, in init pandas._libs.interval
ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

e “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/policy/torch_policy_v2.py”, line 505, in compute_actions_from_input_dict
return self._compute_action_helper(
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/utils/threading.py”, line 32, in wrapper
raise e
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/utils/threading.py”, line 24, in wrapper
return func(self, *a, **k)
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/policy/torch_policy_v2.py”, line 1075, in _compute_action_helper
dist_inputs, state_out = self.model(input_dict, state_batches, seq_lens)
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/models/modelv2.py”, line 256, in call
res = self.forward(restored, state or , seq_lens)
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/examples/_old_api_stack/models/centralized_critic_models.py”, line 122, in forward
model_out, _ = self.model(input_dict, state, seq_lens)
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/models/modelv2.py”, line 256, in call
res = self.forward(restored, state or , seq_lens)
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/models/torch/fcnet.py”, line 143, in forward
obs = input_dict[“obs_flat”].float()
AttributeError: ‘numpy.ndarray’ object has no attribute ‘float’

pip install numpy==1.21.0
same as 1.21.1 error,1.22.3,
pip install numpy==1.21.1,1.21.2- same eroor in 1.21.6 also
np_version_forbids_neg_powint = LooseVersion(numpy.version) >= LooseVersion(‘1.12.0b1’)
/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/evaluation/postprocessing.py:2: UserWarning: A NumPy version >=1.23.5 and <2.5.0 is required for this version of SciPy (detected version 1.21.1)
import scipy.signal
Traceback (most recent call last):
File “/home/ray/rllib/examples/centralized_criticmod.py”, line 37, in
from ray.rllib.algorithms.ppo.ppo import PPO, PPOConfig
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/init.py”, line 7, in
from ray.rllib.env.base_env import BaseEnv
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/env/init.py”, line 7, in
from ray.rllib.env.policy_server_input import PolicyServerInput
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/env/policy_server_input.py”, line 20, in
from ray.rllib.evaluation.metrics import RolloutMetrics
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/evaluation/init.py”, line 7, in
from ray.rllib.evaluation.postprocessing import compute_advantages
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/ray/rllib/evaluation/postprocessing.py”, line 2, in
import scipy.signal
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/scipy/signal/init.py”, line 302, in
from ._spline_filters import *
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/scipy/signal/_spline_filters.py”, line 11, in
from ._signaltools import lfilter, sosfilt, lfiltic
File “/home/miniconda3/envs/multirlmod/lib/python3.10/site-packages/scipy/signal/_signaltools.py”, line 12, in
from numpy._typing import ArrayLike
ModuleNotFoundError: No module named ‘numpy._typing’

The main error to resolve is this one:AttributeError: ‘numpy.ndarray’ object has no attribute ‘float’

I have also tried to installed nightlybuild but still getting the same error

how to resolve this.

Hi there!
It seems like numpy has removed the .float() function in recent versions. You can read a bit more on this stackoverflow question:

But to repost:

  • NumPy 1.20 (release notes) deprecated numpy.float, numpy.int, and similar aliases, causing them to issue a deprecation warning
  • NumPy 1.24 (release notes) removed these aliases altogether, causing an error when they are used

Maybe you can try using astype() instead to translate it into float? numpy.ndarray.astype — NumPy v2.1 Manual

Let me know if this helps with your error ^^

Hi @christina, I did tried astype but I am getting error. In fact, I tried float64 and other options also but I am getting error. Can you please suggest me other solutions if you come across. I have also tried various ray and numpy version combinations but I am not able to filename “ray/rllib/examples/centralized_critic.py” with ray version 2.43.0 and numpy version 1.26.4.

Thanks for reply.

Best wishes

Hmm, can you paste the code here if you can and let me know what line it is failing on exactly? I think that’ll help me a lot in figuring out what’s going on :slight_smile:

Also there might be a small chance that it’s an issue with binary incompatibility, maybe if all else fails you can try doing a fresh conda or venv and reinstalling the pinned library versions so there aren’t any stale ones that might be interfering with your code.

Sure. I am simply trying to run file which is given in ray/rllib/examples folder/centralized_critic. I start with twostepgame and change other environment also. But the error is the same. Here is the code.

@OldAPIStack

***********************************************************************************

IMPORTANT NOTE: This script uses the old API stack and will soon be replaced by

ray.rllib.examples.multi_agent.pettingzoo_shared_value_function.py!

***********************************************************************************

“”"An example of customizing PPO to leverage a centralized critic.

Here the model and policy are hard-coded to implement a centralized critic
for TwoStepGame, but you can adapt this for your own use cases.

Compared to simply running rllib/examples/two_step_game.py --run=PPO,
this centralized critic version reaches vf_explained_variance=1.0 more stably
since it takes into account the opponent actions as well as the policy’s.
Note that this is also using two independent policies instead of weight-sharing
with one.

See also: centralized_critic_2.py for a simpler approach that instead
modifies the environment.
“”"

import argparse
import gym

from gymnasium.spaces import Discrete

from gym.spaces import Discrete

import numpy as np
import os
import sys

import ray

from ray import tune
from ray.tune.result import TRAINING_ITERATION
from ray.rllib.algorithms.ppo.ppo import PPO, PPOConfig
from ray.rllib.algorithms.ppo.ppo_tf_policy import (
PPOTF1Policy,
PPOTF2Policy,
)
from ray.rllib.algorithms.ppo.ppo_torch_policy import PPOTorchPolicy
from ray.rllib.evaluation.postprocessing import compute_advantages, Postprocessing

sys.path.append(‘home/ryzen/ray/rllib’)

from ray.rllib.examples.centralized_critic import CentralizedValueMixin # no error upto examples

from ray.rllib.examples.envs.classes.multi_agent.two_step_game import TwoStepGame

from ray.rllib.examples.envs.classes.multi_agent.rock_paper_scissors import RockPaperScissors

from ray.rllib.examples._old_api_stack.models.centralized_critic_models import (
CentralizedCriticModel,
TorchCentralizedCriticModel,
)
from ray.rllib.models import ModelCatalog
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.metrics import (
ENV_RUNNER_RESULTS,
EPISODE_RETURN_MEAN,
NUM_ENV_STEPS_SAMPLED_LIFETIME,
)
from ray.rllib.utils.numpy import convert_to_numpy
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.utils.tf_utils import explained_variance, make_tf_callable
from ray.rllib.utils.torch_utils import convert_to_torch_tensor

tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()

OPPONENT_OBS = “opponent_obs”
OPPONENT_ACTION = “opponent_action”

parser = argparse.ArgumentParser()
parser.add_argument(
“–framework”,
choices=[“tf”, “tf2”, “torch”],
default=“torch”,
help=“The DL framework specifier.”,
)
parser.add_argument(
“–as-test”,
action=“store_true”,
help="Whether this script should be run as a test: --stop-reward must "
“be achieved within --stop-timesteps AND --stop-iters.”,
)
parser.add_argument(
“–stop-iters”, type=int, default=100, help=“Number of iterations to train.”
)
parser.add_argument(
“–stop-timesteps”, type=int, default=100000, help=“Number of timesteps to train.”
)
parser.add_argument(
“–stop-reward”, type=float, default=7.99, help=“Reward at which we stop training.”
)

class CentralizedValueMixin:
“”“Add method to evaluate the central value function from the model.”“”

def __init__(self):
    if self.config["framework"] != "torch":
        self.compute_central_vf = make_tf_callable(self.get_session())(
            self.model.central_value_function
        )
    else:
        self.compute_central_vf = self.model.central_value_function

Grabs the opponent obs/act and includes it in the experience train_batch,

and computes GAE using the central vf predictions.

def centralized_critic_postprocessing(
policy, sample_batch, other_agent_batches=None, episode=None
):
pytorch = policy.config[“framework”] == “torch”
if (pytorch and hasattr(policy, “compute_central_vf”)) or (
not pytorch and policy.loss_initialized()
):
assert other_agent_batches is not None
[(_, _, opponent_batch)] = list(other_agent_batches.values())

    # also record the opponent obs and actions in the trajectory
    sample_batch[OPPONENT_OBS] = opponent_batch[SampleBatch.CUR_OBS]
    sample_batch[OPPONENT_ACTION] = opponent_batch[SampleBatch.ACTIONS]

    # overwrite default VF prediction with the central VF
    if args.framework == "torch":
        sample_batch[SampleBatch.VF_PREDS] = (
            policy.compute_central_vf(
                convert_to_torch_tensor(
                    sample_batch[SampleBatch.CUR_OBS], policy.device
                ),
                convert_to_torch_tensor(sample_batch[OPPONENT_OBS], policy.device),
                convert_to_torch_tensor(
                    sample_batch[OPPONENT_ACTION], policy.device
                ),
            )
            .cpu()
            .detach()
            .numpy()
        )
    else:
        sample_batch[SampleBatch.VF_PREDS] = convert_to_numpy(
            policy.compute_central_vf(
                sample_batch[SampleBatch.CUR_OBS],
                sample_batch[OPPONENT_OBS],
                sample_batch[OPPONENT_ACTION],
            )
        )
else:
    # Policy hasn't been initialized yet, use zeros.
    sample_batch[OPPONENT_OBS] = np.zeros_like(sample_batch[SampleBatch.CUR_OBS])
    sample_batch[OPPONENT_ACTION] = np.zeros_like(sample_batch[SampleBatch.ACTIONS])
    sample_batch[SampleBatch.VF_PREDS] = np.zeros_like(
        sample_batch[SampleBatch.REWARDS], dtype=np.float32
    )

completed = sample_batch[SampleBatch.TERMINATEDS][-1]
if completed:
    last_r = 0.0
else:
    last_r = sample_batch[SampleBatch.VF_PREDS][-1]

train_batch = compute_advantages(
    sample_batch,
    last_r,
    policy.config["gamma"],
    policy.config["lambda"],
    use_gae=policy.config["use_gae"],
)
return train_batch

Copied from PPO but optimizing the central value function.

def loss_with_central_critic(policy, base_policy, model, dist_class, train_batch):
# Save original value function.
vf_saved = model.value_function

# Calculate loss with a custom value function.
model.value_function = lambda: policy.model.central_value_function(
    train_batch[SampleBatch.CUR_OBS],
    train_batch[OPPONENT_OBS],
    train_batch[OPPONENT_ACTION],
)
policy._central_value_out = model.value_function()
loss = base_policy.loss(model, dist_class, train_batch)

# Restore original value function.
model.value_function = vf_saved

return loss

def central_vf_stats(policy, train_batch):
# Report the explained variance of the central value function.
return {
“vf_explained_var”: explained_variance(
train_batch[Postprocessing.VALUE_TARGETS], policy._central_value_out
)
}

def get_ccppo_policy(base):
class CCPPOTFPolicy(CentralizedValueMixin, base):
def init(self, observation_space, action_space, config):
base.init(self, observation_space, action_space, config)
CentralizedValueMixin.init(self)

    @override(base)
    def loss(self, model, dist_class, train_batch):
        # Use super() to get to the base PPO policy.
        # This special loss function utilizes a shared
        # value function defined on self, and the loss function
        # defined on PPO policies.
        return loss_with_central_critic(
            self, super(), model, dist_class, train_batch
        )

    @override(base)
    def postprocess_trajectory(
        self, sample_batch, other_agent_batches=None, episode=None
    ):
        return centralized_critic_postprocessing(
            self, sample_batch, other_agent_batches, episode
        )

    @override(base)
    def stats_fn(self, train_batch: SampleBatch):
        stats = super().stats_fn(train_batch)
        stats.update(central_vf_stats(self, train_batch))
        return stats

return CCPPOTFPolicy

CCPPOStaticGraphTFPolicy = get_ccppo_policy(PPOTF1Policy)
CCPPOEagerTFPolicy = get_ccppo_policy(PPOTF2Policy)

class CCPPOTorchPolicy(CentralizedValueMixin, PPOTorchPolicy):
def init(self, observation_space, action_space, config):
PPOTorchPolicy.init(self, observation_space, action_space, config)
CentralizedValueMixin.init(self)

@override(PPOTorchPolicy)
def loss(self, model, dist_class, train_batch):
    return loss_with_central_critic(self, super(), model, dist_class, train_batch)

@override(PPOTorchPolicy)
def postprocess_trajectory(
    self, sample_batch, other_agent_batches=None, episode=None
):
    return centralized_critic_postprocessing(
        self, sample_batch, other_agent_batches, episode
    )

class CentralizedCritic(PPO):
@classmethod
@override(PPO)
def get_default_policy_class(cls, config):
if config[“framework”] == “torch”:
return CCPPOTorchPolicy
elif config[“framework”] == “tf”:
return CCPPOStaticGraphTFPolicy
else:
return CCPPOEagerTFPolicy

if name == “main”:
ray.init(local_mode=True)
args = parser.parse_args()

ModelCatalog.register_custom_model(
    "cc_model",
    TorchCentralizedCriticModel
    if args.framework == "torch"
    else CentralizedCriticModel,
)

config = (
    PPOConfig()
    .api_stack(
        enable_env_runner_and_connector_v2=False,
        enable_rl_module_and_learner=False,
    )
    .environment(RockPaperScissors)
    .framework(args.framework)
    .env_runners(batch_mode="complete_episodes", num_env_runners=0)
    .training(model={"custom_model": "cc_model"})
    .multi_agent(
        policies={
            "pol1": (
                None,
                Discrete(6),
                RockPaperScissors.action_space,
                # `framework` would also be ok here.
                PPOConfig.overrides(framework_str=args.framework),
            ),
            "pol2": (
                None,
                Discrete(6),
                RockPaperScissors.action_space,
                # `framework` would also be ok here.
                PPOConfig.overrides(framework_str=args.framework),
            ),
        },
        policy_mapping_fn=lambda agent_id, episode, worker, **kwargs: "pol1"
        if agent_id == 0
        else "pol2",
    )
    # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
    .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
)

stop = {
    TRAINING_ITERATION: args.stop_iters,
    NUM_ENV_STEPS_SAMPLED_LIFETIME: args.stop_timesteps,
    f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": args.stop_reward,
}

tuner = tune.Tuner(
    CentralizedCritic,
    param_space=config.to_dict(),
    run_config=tune.RunConfig(stop=stop, verbose=1),
)
results = tuner.fit()

if args.as_test:
    check_learning_achieved(results, args.stop_reward)

Thanks for your help