How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
Hi all,
I have (in the past) dabbled with some ML with a 750ti but that didn’t cut it so I put my projects on hold until I could upgrade.
I have a new system now, but my previous code doesn’t work anymore so I was hoping someone can tell me what I need to update to make it work (as well as link any migration guides that would help).
My current code:
import ray
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.env import PolicyServerInput
from ray.rllib.algorithms.ppo import PPOConfig
import numpy as np
import argparse
from gymnasium.spaces import MultiDiscrete, Box
ppo_config = PPOConfig()
parser = argparse.ArgumentParser(description='Optional app description')
parser.add_argument('-ip', type=str, help='IP of this device')
parser.add_argument('-checkpoint', type=str, help='location of checkpoint to restore from')
args = parser.parse_args()
def _input(ioctx):
return PolicyServerInput(
ioctx,
args.ip,
55556,
)
x = 320
y = 240
# kl_coeff, ->
# vf_loss_coeff used to be 0.01??
# "entropy_coeff": 0.00005,
# "clip_param": 0.1,
ppo_config.gamma = 0.998 # default 0.99
ppo_config.lambda_ = 0.99 # default 1.0???
ppo_config.kl_target = 0.01 # used to use 0.02
ppo_config.rollout_fragment_length = 512
ppo_config.train_batch_size = 6400
ppo_config.sgd_minibatch_size = 256
ppo_config.num_sgd_iter = 2 # default 30???
ppo_config.lr = 3.5e-5 # 5e-5
ppo_config.model = {
# Share layers for value function. If you set this to True, it's
# important to tune vf_loss_coeff.
"vf_share_layers": False,
"use_lstm": True,
"max_seq_len": 32,
"lstm_cell_size": 128,
"lstm_use_prev_action": True,
# 'use_attention': True,
# "max_seq_len": 128,
# "attention_num_transformer_units": 1,
# "attention_dim": 1024,
# "attention_memory_inference": 128,
# "attention_memory_training": 128,
# "attention_num_heads": 8,
# "attention_head_dim": 64,
# "attention_position_wise_mlp_dim": 512,
# "attention_use_n_prev_actions": 0,
# "attention_use_n_prev_rewards": 0,
# "attention_init_gru_gate_bias": 2.0,
"conv_filters": [
# [4, [3, 4], [1, 1]],
# [16, [6, 8], [3, 3]],
# [32, [6, 8], [3, 4]],
# [64, [6, 6], 3],
# [256, [9, 9], 1],
# 480 x 640
# [4, [7, 7], [3, 3]],
# [16, [5, 5], [3, 3]],
# [32, [5, 5], [2, 2]],
# [64, [5, 5], [2, 2]],
# [256, [5, 5], [3, 5]],
# 240 X 320
[16, [5, 5], 3],
[32, [5, 5], 3],
[64, [5, 5], 3],
[128, [3, 3], 2],
[256, [3, 3], 2],
[512, [3, 3], 2],
],
"conv_activation": "relu",
"post_fcnet_hiddens": [512],
"post_fcnet_activation": "relu"
}
ppo_config.batch_mode = "complete_episodes"
ppo_config.simple_optimizer = True
ppo_config.num_gpus = 1
ppo_config.rollouts(num_rollout_workers=0)
ppo_config.offline_data(input_=_input)
ppo_config.env = None
ppo_config.observation_space = Box(low=0, high=1, shape=(y, x, 1), dtype=np.float32)
ppo_config.action_space = MultiDiscrete(
[
2, # W
2, # A
2, # S
2, # D
2, # Space
2, # H
2, # J
2, # K
2 # L
]
)
ppo_config.env_config = {
"sleep": True,
}
ppo_config.framework_str = 'tf'
ppo_config.log_sys_usage = False
ppo_config.compress_observations = True
ppo_config.shuffle_sequences = False
tempyy = ppo_config.to_dict()
ray.init(num_cpus=4, num_gpus=1, log_to_driver=False)
trainer = PPOTrainer
from ray import tune
name = "" + args.checkpoint
print(f"Starting: {name}")
tune.run(trainer,
resume='AUTO',
config=ppo_config.to_dict(), name=name, keep_checkpoints_num=None, checkpoint_score_attr="episode_reward_mean",
max_failures=1,
# restore="C:\\Users\\denys\\ray_results\\mediumbrawl-attention-256Att-128MLP-L2\\PPOTrainer_RandomEnv_1e882_00000_0_2022-06-02_15-13-44\\checkpoint_000028\\checkpoint-28",
checkpoint_freq=5, checkpoint_at_end=True)
The error I’m getting:
Traceback (most recent call last):
File "C:\Users\denys\Documents\GitHub\brawlhalla\policy_server.py", line 2, in <module>
from ray.rllib.agents.ppo import PPOTrainer
ModuleNotFoundError: No module named 'ray.rllib.agents.ppo'