Ray.rllib.agents.ppo missing

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

Hi all,

I have (in the past) dabbled with some ML with a 750ti but that didn’t cut it so I put my projects on hold until I could upgrade.

I have a new system now, but my previous code doesn’t work anymore so I was hoping someone can tell me what I need to update to make it work (as well as link any migration guides that would help).

My current code:

import ray
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.env import PolicyServerInput

from ray.rllib.algorithms.ppo import PPOConfig

import numpy as np
import argparse
from gymnasium.spaces import MultiDiscrete, Box

ppo_config = PPOConfig()

parser = argparse.ArgumentParser(description='Optional app description')
parser.add_argument('-ip', type=str, help='IP of this device')

parser.add_argument('-checkpoint', type=str, help='location of checkpoint to restore from')

args = parser.parse_args()

def _input(ioctx):
    return PolicyServerInput(
        ioctx,
        args.ip,
        55556,
    )


x = 320
y = 240


# kl_coeff, ->
# vf_loss_coeff used to be 0.01??
# "entropy_coeff": 0.00005,
# "clip_param": 0.1,
ppo_config.gamma = 0.998  # default 0.99
ppo_config.lambda_ = 0.99  # default 1.0???
ppo_config.kl_target = 0.01  # used to use 0.02
ppo_config.rollout_fragment_length = 512
ppo_config.train_batch_size = 6400
ppo_config.sgd_minibatch_size = 256
ppo_config.num_sgd_iter = 2 # default 30???
ppo_config.lr = 3.5e-5  # 5e-5
ppo_config.model = {
    # Share layers for value function. If you set this to True, it's
    # important to tune vf_loss_coeff.
    "vf_share_layers": False,

    "use_lstm": True,
    "max_seq_len": 32,
    "lstm_cell_size": 128,
    "lstm_use_prev_action": True,

    # 'use_attention': True,
    # "max_seq_len": 128,
    # "attention_num_transformer_units": 1,
    # "attention_dim": 1024,
    # "attention_memory_inference": 128,
    # "attention_memory_training": 128,
    # "attention_num_heads": 8,
    # "attention_head_dim": 64,
    # "attention_position_wise_mlp_dim": 512,
    # "attention_use_n_prev_actions": 0,
    # "attention_use_n_prev_rewards": 0,
    # "attention_init_gru_gate_bias": 2.0,

    "conv_filters": [
        # [4, [3, 4], [1, 1]],
        # [16, [6, 8], [3, 3]],
        # [32, [6, 8], [3, 4]],
        # [64, [6, 6], 3],
        # [256, [9, 9], 1],

        # 480 x 640
        # [4, [7, 7], [3, 3]],
        # [16, [5, 5], [3, 3]],
        # [32, [5, 5], [2, 2]],
        # [64, [5, 5], [2, 2]],
        # [256, [5, 5], [3, 5]],

        # 240 X 320
        [16, [5, 5], 3],
        [32, [5, 5], 3],
        [64, [5, 5], 3],
        [128, [3, 3], 2],
        [256, [3, 3], 2],
        [512, [3, 3], 2],
    ],
    "conv_activation": "relu",
    "post_fcnet_hiddens": [512],
    "post_fcnet_activation": "relu"
}
ppo_config.batch_mode = "complete_episodes"
ppo_config.simple_optimizer = True
ppo_config.num_gpus = 1


ppo_config.rollouts(num_rollout_workers=0)

ppo_config.offline_data(input_=_input)

ppo_config.env = None
ppo_config.observation_space = Box(low=0, high=1, shape=(y, x, 1), dtype=np.float32)
ppo_config.action_space = MultiDiscrete(
    [
        2,  # W
        2,  # A
        2,  # S
        2,  # D
        2,  # Space
        2,  # H
        2,  # J
        2,  # K
        2  # L
    ]
)
ppo_config.env_config = {
    "sleep": True,
}
ppo_config.framework_str = 'tf'
ppo_config.log_sys_usage = False
ppo_config.compress_observations = True
ppo_config.shuffle_sequences = False
tempyy = ppo_config.to_dict()

ray.init(num_cpus=4, num_gpus=1, log_to_driver=False)
trainer = PPOTrainer

from ray import tune

name = "" + args.checkpoint
print(f"Starting: {name}")

tune.run(trainer,
         resume='AUTO',
         config=ppo_config.to_dict(), name=name, keep_checkpoints_num=None, checkpoint_score_attr="episode_reward_mean",
         max_failures=1,
         # restore="C:\\Users\\denys\\ray_results\\mediumbrawl-attention-256Att-128MLP-L2\\PPOTrainer_RandomEnv_1e882_00000_0_2022-06-02_15-13-44\\checkpoint_000028\\checkpoint-28",
         checkpoint_freq=5, checkpoint_at_end=True)

The error I’m getting:

Traceback (most recent call last):
  File "C:\Users\denys\Documents\GitHub\brawlhalla\policy_server.py", line 2, in <module>
    from ray.rllib.agents.ppo import PPOTrainer
ModuleNotFoundError: No module named 'ray.rllib.agents.ppo'

Hello, the agents folder has been deprecated in favor of algorithms on ray 2.x version.

# from ray.rllib.agents.ppo import PPOTrainer 
from ray.rllib.algorithms.ppo import PPO
1 Like

Is the api/rest of my code staying the same?

That helped, but I’m getting another issue - opened a new thread since it’s seems not related?