How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
Hi all,
I am trying to use the police_server + policy_client setup but I’m getting the following error:
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\env\policy_server_input.py", line 238, in do_POST
response = self.execute_command(parsed_input)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\env\policy_server_input.py", line 267, in execute_command
setup_child_rollout_worker()
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\env\policy_server_input.py", line 210, in setup_child_rollout_worker
) = _create_embedded_rollout_worker(
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\env\policy_client.py", line 405, in _create_embedded_rollout_worker
rollout_worker = RolloutWorker(**kwargs)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 830, in __init__
self.sampler = SyncSampler(
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 246, in __init__
self._env_runner_obj = EnvRunnerV2(
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 236, in __init__
raise ValueError(
ValueError: Policies using the new Connector API do not support ExternalEnv.
.</p>
<p>Error code explanation: 500 - Server got itself in trouble.</p>
</body>
</html>
: {'episode_id': None, 'command': <Commands.START_EPISODE: 'START_EPISODE'>, 'training_enabled': True}
Traceback (most recent call last):
File "C:\Users\denys\OneDrive\Documents\GitHub\brawlhalla\polivery_client.py", line 66, in <module>
episode_id = client.start_episode()
File "C:\Users\denys\AppData\Local\Programs\Python\Python310\lib\site-packages\ray\rllib\env\policy_client.py", line 105, in start_episode
return self._send(
File "C:\Users\denys\AppData\Local\Programs\Python\Python310\lib\site-packages\ray\rllib\env\policy_client.py", line 247, in _send
response.raise_for_status()
File "C:\Users\denys\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 500 Server Error: Traceback (most recent call last): for url: http://192.168.0.32:55556/
The policy server code:
import ray
from ray.rllib.env import PolicyServerInput
from ray.rllib.algorithms.ppo import PPOConfig
import numpy as np
import argparse
from gymnasium.spaces import MultiDiscrete, Box
ppo_config = PPOConfig()
parser = argparse.ArgumentParser(description='Optional app description')
parser.add_argument('-ip', type=str, help='IP of this device')
parser.add_argument('-checkpoint', type=str, help='location of checkpoint to restore from')
args = parser.parse_args()
def _input(ioctx):
return PolicyServerInput(
ioctx,
args.ip,
55556,
)
x = 320
y = 240
# kl_coeff, ->
# vf_loss_coeff used to be 0.01??
# "entropy_coeff": 0.00005,
# "clip_param": 0.1,
ppo_config.gamma = 0.998 # default 0.99
ppo_config.lambda_ = 0.99 # default 1.0???
ppo_config.kl_target = 0.01 # used to use 0.02
ppo_config.rollout_fragment_length = 512
ppo_config.train_batch_size = 6400
ppo_config.sgd_minibatch_size = 256
ppo_config.num_sgd_iter = 2 # default 30???
ppo_config.lr = 3.5e-5 # 5e-5
ppo_config.model = {
# Share layers for value function. If you set this to True, it's
# important to tune vf_loss_coeff.
"vf_share_layers": False,
"use_lstm": True,
"max_seq_len": 32,
"lstm_cell_size": 128,
"lstm_use_prev_action": True,
# 'use_attention': True,
# "max_seq_len": 128,
# "attention_num_transformer_units": 1,
# "attention_dim": 1024,
# "attention_memory_inference": 128,
# "attention_memory_training": 128,
# "attention_num_heads": 8,
# "attention_head_dim": 64,
# "attention_position_wise_mlp_dim": 512,
# "attention_use_n_prev_actions": 0,
# "attention_use_n_prev_rewards": 0,
# "attention_init_gru_gate_bias": 2.0,
"conv_filters": [
# [4, [3, 4], [1, 1]],
# [16, [6, 8], [3, 3]],
# [32, [6, 8], [3, 4]],
# [64, [6, 6], 3],
# [256, [9, 9], 1],
# 480 x 640
# [4, [7, 7], [3, 3]],
# [16, [5, 5], [3, 3]],
# [32, [5, 5], [2, 2]],
# [64, [5, 5], [2, 2]],
# [256, [5, 5], [3, 5]],
# 240 X 320
[16, [5, 5], 3],
[32, [5, 5], 3],
[64, [5, 5], 3],
[128, [3, 3], 2],
[256, [3, 3], 2],
[512, [3, 3], 2],
],
"conv_activation": "relu",
"post_fcnet_hiddens": [512],
"post_fcnet_activation": "relu"
}
ppo_config.batch_mode = "complete_episodes"
ppo_config.simple_optimizer = True
ppo_config.num_gpus = 1
ppo_config.rollouts(num_rollout_workers=0)
ppo_config.offline_data(input_=_input)
ppo_config.env = None
ppo_config.observation_space = Box(low=0, high=1, shape=(y, x, 1), dtype=np.float32)
ppo_config.action_space = MultiDiscrete(
[
2, # W
2, # A
2, # S
2, # D
2, # Space
2, # H
2, # J
2, # K
2 # L
]
)
ppo_config.env_config = {
"sleep": True,
}
ppo_config.framework_str = 'torch'
ppo_config.log_sys_usage = False
ppo_config.compress_observations = True
ppo_config.shuffle_sequences = False
tempyy = ppo_config.to_dict()
ray.init(num_cpus=4, num_gpus=1, log_to_driver=False)
from ray import tune
name = "" + args.checkpoint
print(f"Starting: {name}")
tune.run("PPO",
resume='AUTO',
config=ppo_config.to_dict(),
name=name, keep_checkpoints_num=None, checkpoint_score_attr="episode_reward_mean",
max_failures=1,
# restore="C:\\Users\\denys\\ray_results\\mediumbrawl-attention-256Att-128MLP-L2\\PPOTrainer_RandomEnv_1e882_00000_0_2022-06-02_15-13-44\\checkpoint_000028\\checkpoint-28",
checkpoint_freq=5, checkpoint_at_end=True)
I’m not sure why it’s complaining about an external env?