How severe does this issue affect your experience of using Ray?
- Low: It annoys or frustrates me for a moment.
I am having issues were the verbose=1 definition is not being followed. Also, there doesn’t seem to be any forward progress in the iterations or episode ts? The cmd prompt will also re-output text on the same iter/ts repeatedly (I am having this issue on regular ray tune training as well)
Training Script
import random
from ray import train, tune
from ray.tune.schedulers import PopulationBasedTraining
import argparse
from ray import tune
from ray.tune import register_env
from RemusRayEnvSimple import CustomMultiAgentEnv
from ray.rllib.algorithms.sac import SACConfig
if __name__ == "__main__":
import argparse
register_env("REMUS RAY",lambda config: CustomMultiAgentEnv)
config = (
SACConfig()
.framework("torch")
.debugging(seed=0, log_level="ERROR")
.rollouts(
#batch_mode="truncate_episodes",
#rollout_fragment_length=256
)
.training(
train_batch_size = tune.choice([10000, 20000, 40000]),
lr=1e-4,
#replay_buffer_config={"type": "MultiAgentPrioritizedReplayBuffer"},
)
.environment(env=CustomMultiAgentEnv)
.multi_agent(
policies=["policy0", "policy1"],
policy_mapping_fn=lambda agent_id, episode, worker, **kwargs: "policy0" if agent_id == "agent_0" else "policy1",
policies_to_train=["policy0", "policy1"],
count_steps_by="env_steps"
)
#.reporting(min_train_timesteps_per_iteration=256*10)
)
# Modify the actor and critic network configurations directly
config.twin_q=True
config.q_model_config["fcnet_hiddens"] = [512, 512]
config.q_model_config["fcnet_activation"] = "relu"
config.policy_model_config["fcnet_hiddens"] = [1024, 1024]
config.policy_model_config["fcnet_activation"] = "relu"
config.replay_buffer_config["_enable_replay_buffer_api"]= True
config.replay_buffer_config["type"] = "MultiAgentReplayBuffer"
config.replay_buffer_config["capacity"] = 5_000_000
config.num_sgd_iter = tune.choice([10, 20, 30])
config.sgd_minibatch_size = tune.choice([128, 512, 2048])
config.clip_param = 0.2
config.lambda_=0.95
config.kl_coeff=1.0
parser = argparse.ArgumentParser()
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing"
)
args, _ = parser.parse_known_args()
# Postprocess the perturbed config to ensure it's still valid
def explore(config):
# ensure we collect enough timesteps to do sgd
if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
config["train_batch_size"] = config["sgd_minibatch_size"] * 2
# ensure we run at least one sgd iter
if config["num_sgd_iter"] < 1:
config["num_sgd_iter"] = 1
return config
hyperparam_mutations = {
"lambda": lambda: random.uniform(0.9, 1.0),
"clip_param": lambda: random.uniform(0.01, 0.5),
"lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
"num_sgd_iter": lambda: random.randint(1, 30),
"sgd_minibatch_size": lambda: random.randint(128, 16384),
"train_batch_size": lambda: random.randint(128, 160000),
}
pbt = PopulationBasedTraining(
time_attr="time_total_s",
perturbation_interval=120,
resample_probability=0.25,
# Specifies the mutations of these hyperparams
hyperparam_mutations=hyperparam_mutations,
custom_explore_fn=explore,
)
# Stop when we've either reached 100 training iterations or reward=300
stopping_criteria = {"training_iteration": 250, "episode_reward_mean": 0}
tuner = tune.Tuner(
"SAC",
tune_config=tune.TuneConfig(
metric="episode_reward_mean",
mode="max",
scheduler=pbt,
num_samples=1 if args.smoke_test else 2,
),
param_space=config,
run_config=train.RunConfig(stop=stopping_criteria, verbose=1),
)
results = tuner.fit()
import pprint
best_result = results.get_best_result()
print("Best performing trial's final set of hyperparameters:\n")
pprint.pprint(
{k: v for k, v in best_result.config.items() if k in hyperparam_mutations}
)
print("\nBest performing trial's final reported metrics:\n")
metrics_to_print = [
"episode_reward_mean",
"episode_reward_max",
"episode_reward_min",
"episode_len_mean",
]
pprint.pprint({k: v for k, v in best_result.metrics.items() if k in metrics_to_print})
Output
Trial status: 2 RUNNING
Current time: 2024-04-24 20:31:27. Total running time: 30s
Logical resource usage: 2.0/12 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:RTX)
Current best trial: 151d4_00000 with episode_reward_mean=-3429.7659050742477 and params={'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'num_gpus': 0, 'num_cpus_per_worker': 1, 'num_gpus_per_worker': 0, '_fake_gpus': False, 'num_learner_workers': 0, 'num_gpus_per_learner_worker': 0, 'num_cpus_per_learner_worker': 1, 'local_gpu_idx': 0, 'custom_resources_per_worker': {}, 'placement_strategy': 'PACK', 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'local_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'torch_compile_learner': False, 'torch_compile_learner_what_to_compile': <TorchCompileWhatToCompile.FORWARD_TRAIN: 'forward_train'>, 'torch_compile_learner_dynamo_backend': 'inductor', 'torch_compile_learner_dynamo_mode': None, 'torch_compile_worker': False, 'torch_compile_worker_dynamo_backend': 'onnxrt', 'torch_compile_worker_dynamo_mode': None, 'env': <class 'RemusRayEnvSimple.CustomMultiAgentEnv'>, 'env_config': {}, 'observation_space': None, 'action_space': None, 'env_task_fn': None, 'render_env': False, 'clip_rewards': None, 'normalize_actions': True, 'clip_actions': False, 'disable_env_checking': False, 'auto_wrap_old_gym_envs': True, 'action_mask_key': 'action_mask', '_is_atari': None, 'env_runner_cls': None, 'num_envs_per_worker': 1, 'enable_connectors': True, '_env_to_module_connector': None, '_module_to_env_connector': None, 'add_default_connectors_to_env_to_module_pipeline': True, 'add_default_connectors_to_module_to_env_pipeline': True, 'episode_lookback_horizon': 1, 'rollout_fragment_length': 'auto', 'batch_mode': 'complete_episodes', 'validate_workers_after_construction': True, 'compress_observations': False, 'sampler_perf_stats_ema_coef': None, 'sample_async': -1, 'remote_worker_envs': False, 'remote_env_batch_wait_ms': 0, 'enable_tf1_exec_eagerly': False, 'sample_collector': <class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>, 'preprocessor_pref': 'deepmind', 'observation_filter': 'NoFilter', 'update_worker_filter_stats': True, 'use_worker_filter_stats': True, 'gamma': 0.99, 'lr': 0.0001, 'grad_clip': None, 'grad_clip_by': 'global_norm', 'train_batch_size': 10000, 'train_batch_size_per_learner': None, 'model': {'_disable_preprocessor_api': False, '_disable_action_flattening': False, 'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'fcnet_weights_initializer': None, 'fcnet_weights_initializer_config': None, 'fcnet_bias_initializer': None, 'fcnet_bias_initializer_config': None, 'conv_filters': None, 'conv_activation': 'relu', 'conv_kernel_initializer': None, 'conv_kernel_initializer_config': None, 'conv_bias_initializer': None, 'conv_bias_initializer_config': None, 'conv_transpose_kernel_initializer': None, 'conv_transpose_kernel_initializer_config': None, 'conv_transpose_bias_initializer': None, 'conv_transpose_bias_initializer_config': None, 'post_fcnet_hiddens': [], 'post_fcnet_activation': 'relu', 'post_fcnet_weights_initializer': None, 'post_fcnet_weights_initializer_config': None, 'post_fcnet_bias_initializer': None, 'post_fcnet_bias_initializer_config': None, 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': True, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action': False, 'lstm_use_prev_reward': False, 'lstm_weights_initializer': None, 'lstm_weights_initializer_config': None, 'lstm_bias_initializer': None, 'lstm_bias_initializer_config': None, '_time_major': False, 'use_attention': False, 'attention_num_transformer_units': 1, 'attention_dim': 64, 'attention_num_heads': 1, 'attention_head_dim': 32, 'attention_memory_inference': 50, 'attention_memory_training': 50, 'attention_position_wise_mlp_dim': 32, 'attention_init_gru_gate_bias': 2.0, 'attention_use_n_prev_actions': 0, 'attention_use_n_prev_rewards': 0, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None, 'encoder_latent_dim': None, 'always_check_shapes': False, 'lstm_use_prev_action_reward': -1, '_use_default_native_models': -1}, '_learner_connector': None, 'add_default_connectors_to_learner_pipeline': True, 'optimizer': {}, 'max_requests_in_flight_per_sampler_worker': 2, '_learner_class': None, 'explore': True, 'exploration_config': {'type': 'StochasticSampling'}, 'algorithm_config_overrides_per_module': {}, '_per_module_overrides': {}, 'count_steps_by': 'env_steps', 'policies': ['policy0', 'policy1'], 'policy_map_capacity': 100, 'policy_mapping_fn': <function <lambda> at 0x000002828D0E3880>, 'policies_to_train': ['policy0', 'policy1'], 'policy_states_are_swappable': False, 'observation_fn': None, 'input_config': {}, 'actions_in_input_normalized': False, 'postprocess_inputs': False, 'shuffle_buffer_size': 0, 'output': None, 'output_config': {}, 'output_compress_columns': ['obs', 'new_obs'], 'output_max_file_size': 67108864, 'offline_sampling': False, 'evaluation_interval': None, 'evaluation_duration': 10, 'evaluation_duration_unit': 'episodes', 'evaluation_sample_timeout_s': 180.0, 'evaluation_parallel_to_training': False, 'evaluation_config': None, 'off_policy_estimation_methods': {}, 'ope_split_batch_by_episode': True, 'evaluation_num_workers': 0, 'custom_async_evaluation_function': None, 'always_attach_evaluation_results': False, 'enable_async_evaluation': False, 'in_evaluation': False, 'sync_filters_on_rollout_workers_timeout_s': 60.0, 'keep_per_episode_custom_metrics': False, 'metrics_episode_collection_timeout_s': 60.0, 'metrics_num_episodes_for_smoothing': 100, 'min_time_s_per_iteration': 1, 'min_train_timesteps_per_iteration': 0, 'min_sample_timesteps_per_iteration': 100, 'export_native_model_files': False, 'checkpoint_trainable_policies_only': False, 'logger_creator': None, 'logger_config': None, 'log_level': 'ERROR', 'log_sys_usage': True, 'fake_sampler': False, 'seed': 0, 'ignore_worker_failures': False, 'recreate_failed_workers': False, 'max_num_worker_restarts': 1000, 'delay_between_worker_restarts_s': 60.0, 'restart_failed_sub_environments': False, 'num_consecutive_worker_failures_tolerance': 100, 'worker_health_probe_timeout_s': 60, 'worker_restore_timeout_s': 1800, '_rl_module_spec': None, '_AlgorithmConfig__prior_exploration_config': None, '_enable_new_api_stack': False, '_tf_policy_handles_more_than_one_loss': False, '_disable_preprocessor_api': False, '_disable_action_flattening': False, '_disable_initialize_loss_from_dummy_batch': False, 'simple_optimizer': False, 'policy_map_cache': -1, 'worker_cls': -1, 'synchronize_filters': -1, 'replay_sequence_length': None, '_disable_execution_plan_api': -1, 'twin_q': True, 'q_model_config': {'fcnet_hiddens': [512, 512], 'fcnet_activation': 'relu', 'post_fcnet_hiddens': [], 'post_fcnet_activation': None, 'custom_model': None, 'custom_model_config': {}}, 'policy_model_config': {'fcnet_hiddens': [1024, 1024], 'fcnet_activation': 'relu', 'post_fcnet_hiddens': [], 'post_fcnet_activation': None, 'custom_model': None, 'custom_model_config': {}}, 'tau': 0.005, 'initial_alpha': 1.0, 'target_entropy': 'auto', 'n_step': 1, 'replay_buffer_config': {'_enable_replay_buffer_api': True, 'type': 'MultiAgentReplayBuffer', 'capacity': 5000000, 'prioritized_replay': False, 'prioritized_replay_alpha': 0.6, 'prioritized_replay_beta': 0.4, 'prioritized_replay_eps': 1e-06, 'worker_side_prioritization': False}, 'store_buffer_in_checkpoints': False, 'training_intensity': None, 'optimization': {'actor_learning_rate': 0.0003, 'critic_learning_rate': 0.0003, 'entropy_learning_rate': 0.0003}, 'target_network_update_freq': 0, 'num_steps_sampled_before_learning_starts': 1500, '_deterministic_loss': False, '_use_beta_distribution': False, 'use_state_preprocessor': -1, 'worker_side_prioritization': -1, 'num_sgd_iter': 30, 'sgd_minibatch_size': 2048, 'clip_param': 0.2, 'kl_coeff': 1.0, '__stdout_file__': None, '__stderr_file__': None, 'lambda': 0.95, 'input': 'sampler', 'callbacks': <class 'ray.rllib.algorithms.callbacks.DefaultCallbacks'>, 'create_env_on_driver': False, 'custom_eval_function': None, 'framework': 'torch', 'num_cpus_for_driver': 1, 'num_workers': 0}
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ Trial name status train_batch_size num_sgd_iter sgd_minibatch_size iter total time (s) ts reward episode_reward_max episode_reward_min episode_len_mean
episodes_this_iter │
├─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤
│ SAC_CustomMultiAgentEnv_151d4_00000 RUNNING 10000 30 2048 1 8.95022 500 -3429.77 -3429.77 -3429.77 500
1 │
│ SAC_CustomMultiAgentEnv_151d4_00001 RUNNING 40000 30 512 1 8.73973 500 -3429.77 -3429.77 -3429.77 500
1 │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
Trial status: 2 RUNNING
Current time: 2024-04-24 20:31:57. Total running time: 1min 0s
Logical resource usage: 2.0/12 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:RTX)
Current best trial: 151d4_00000 with episode_reward_mean=-3402.975541878805 and params={'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'num_gpus': 0, 'num_cpus_per_worker': 1, 'num_gpus_per_worker': 0, '_fake_gpus': False, 'num_learner_workers': 0, 'num_gpus_per_learner_worker': 0, 'num_cpus_per_learner_worker': 1, 'local_gpu_idx': 0, 'custom_resources_per_worker': {}, 'placement_strategy': 'PACK', 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1},
'allow_soft_placement': True}, 'local_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'torch_compile_learner': False, 'torch_compile_learner_what_to_compile': <TorchCompileWhatToCompile.FORWARD_TRAIN: 'forward_train'>, 'torch_compile_learner_dynamo_backend': 'inductor', 'torch_compile_learner_dynamo_mode': None, 'torch_compile_worker': False, 'torch_compile_worker_dynamo_backend': 'onnxrt', 'torch_compile_worker_dynamo_mode': None, 'env': <class 'RemusRayEnvSimple.CustomMultiAgentEnv'>, 'env_config': {}, 'observation_space': None, 'action_space': None, 'env_task_fn': None, 'render_env': False, 'clip_rewards': None, 'normalize_actions': True, 'clip_actions': False, 'disable_env_checking': False, 'auto_wrap_old_gym_envs': True, 'action_mask_key': 'action_mask', '_is_atari': None, 'env_runner_cls': None, 'num_envs_per_worker': 1, 'enable_connectors': True, '_env_to_module_connector': None, '_module_to_env_connector': None, 'add_default_connectors_to_env_to_module_pipeline': True, 'add_default_connectors_to_module_to_env_pipeline': True, 'episode_lookback_horizon':
1, 'rollout_fragment_length': 'auto', 'batch_mode': 'complete_episodes', 'validate_workers_after_construction': True, 'compress_observations': False, 'sampler_perf_stats_ema_coef': None, 'sample_async': -1, 'remote_worker_envs': False, 'remote_env_batch_wait_ms': 0, 'enable_tf1_exec_eagerly': False, 'sample_collector': <class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>, 'preprocessor_pref': 'deepmind', 'observation_filter': 'NoFilter', 'update_worker_filter_stats': True, 'use_worker_filter_stats': True, 'gamma': 0.99, 'lr': 0.0001, 'grad_clip': None, 'grad_clip_by': 'global_norm', 'train_batch_size': 10000, 'train_batch_size_per_learner': None, 'model': {'_disable_preprocessor_api': False, '_disable_action_flattening': False, 'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'fcnet_weights_initializer': None, 'fcnet_weights_initializer_config': None, 'fcnet_bias_initializer': None, 'fcnet_bias_initializer_config': None, 'conv_filters': None, 'conv_activation': 'relu', 'conv_kernel_initializer': None, 'conv_kernel_initializer_config': None, 'conv_bias_initializer': None, 'conv_bias_initializer_config': None, 'conv_transpose_kernel_initializer': None, 'conv_transpose_kernel_initializer_config': None, 'conv_transpose_bias_initializer': None, 'conv_transpose_bias_initializer_config': None, 'post_fcnet_hiddens': [], 'post_fcnet_activation': 'relu', 'post_fcnet_weights_initializer': None, 'post_fcnet_weights_initializer_config': None, 'post_fcnet_bias_initializer': None, 'post_fcnet_bias_initializer_config':
None, 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': True, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action': False, 'lstm_use_prev_reward': False, 'lstm_weights_initializer': None, 'lstm_weights_initializer_config': None, 'lstm_bias_initializer': None, 'lstm_bias_initializer_config': None, '_time_major': False, 'use_attention': False, 'attention_num_transformer_units': 1, 'attention_dim':
64, 'attention_num_heads': 1, 'attention_head_dim': 32, 'attention_memory_inference': 50, 'attention_memory_training': 50, 'attention_position_wise_mlp_dim': 32, 'attention_init_gru_gate_bias': 2.0, 'attention_use_n_prev_actions': 0, 'attention_use_n_prev_rewards': 0, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None, 'encoder_latent_dim': None, 'always_check_shapes': False, 'lstm_use_prev_action_reward': -1, '_use_default_native_models': -1}, '_learner_connector': None, 'add_default_connectors_to_learner_pipeline': True, 'optimizer': {}, 'max_requests_in_flight_per_sampler_worker': 2, '_learner_class': None, 'explore': True, 'exploration_config': {'type': 'StochasticSampling'}, 'algorithm_config_overrides_per_module': {}, '_per_module_overrides': {}, 'count_steps_by': 'env_steps', 'policies': ['policy0', 'policy1'], 'policy_map_capacity': 100, 'policy_mapping_fn': <function <lambda> at 0x000002828D0E23B0>, 'policies_to_train': ['policy0', 'policy1'], 'policy_states_are_swappable': False, 'observation_fn': None, 'input_config': {}, 'actions_in_input_normalized': False, 'postprocess_inputs': False, 'shuffle_buffer_size': 0, 'output': None, 'output_config': {}, 'output_compress_columns': ['obs', 'new_obs'], 'output_max_file_size': 67108864, 'offline_sampling': False, 'evaluation_interval': None, 'evaluation_duration': 10, 'evaluation_duration_unit': 'episodes', 'evaluation_sample_timeout_s': 180.0, 'evaluation_parallel_to_training': False, 'evaluation_config': None, 'off_policy_estimation_methods': {}, 'ope_split_batch_by_episode': True, 'evaluation_num_workers': 0, 'custom_async_evaluation_function': None, 'always_attach_evaluation_results': False, 'enable_async_evaluation': False, 'in_evaluation': False, 'sync_filters_on_rollout_workers_timeout_s': 60.0, 'keep_per_episode_custom_metrics': False, 'metrics_episode_collection_timeout_s': 60.0, 'metrics_num_episodes_for_smoothing': 100, 'min_time_s_per_iteration': 1, 'min_train_timesteps_per_iteration': 0, 'min_sample_timesteps_per_iteration': 100, 'export_native_model_files': False, 'checkpoint_trainable_policies_only': False, 'logger_creator': None, 'logger_config': None, 'log_level': 'ERROR', 'log_sys_usage': True, 'fake_sampler': False, 'seed': 0, 'ignore_worker_failures': False, 'recreate_failed_workers': False, 'max_num_worker_restarts': 1000, 'delay_between_worker_restarts_s': 60.0, 'restart_failed_sub_environments': False, 'num_consecutive_worker_failures_tolerance': 100, 'worker_health_probe_timeout_s': 60, 'worker_restore_timeout_s': 1800, '_rl_module_spec': None, '_AlgorithmConfig__prior_exploration_config': None, '_enable_new_api_stack': False, '_tf_policy_handles_more_than_one_loss': False, '_disable_preprocessor_api': False, '_disable_action_flattening': False, '_disable_initialize_loss_from_dummy_batch': False, 'simple_optimizer': False, 'policy_map_cache': -1, 'worker_cls': -1, 'synchronize_filters': -1, 'replay_sequence_length': None, '_disable_execution_plan_api': -1, 'twin_q': True, 'q_model_config': {'fcnet_hiddens': [512, 512], 'fcnet_activation': 'relu', 'post_fcnet_hiddens': [], 'post_fcnet_activation': None, 'custom_model': None, 'custom_model_config': {}}, 'policy_model_config': {'fcnet_hiddens': [1024, 1024], 'fcnet_activation': 'relu', 'post_fcnet_hiddens': [], 'post_fcnet_activation': None, 'custom_model': None, 'custom_model_config': {}}, 'tau': 0.005, 'initial_alpha': 1.0, 'target_entropy': 'auto', 'n_step': 1, 'replay_buffer_config': {'_enable_replay_buffer_api': True, 'type': 'MultiAgentReplayBuffer', 'capacity': 5000000, 'prioritized_replay': False, 'prioritized_replay_alpha': 0.6, 'prioritized_replay_beta': 0.4, 'prioritized_replay_eps': 1e-06, 'worker_side_prioritization': False}, 'store_buffer_in_checkpoints': False, 'training_intensity': None, 'optimization': {'actor_learning_rate': 0.0003, 'critic_learning_rate': 0.0003, 'entropy_learning_rate': 0.0003}, 'target_network_update_freq': 0, 'num_steps_sampled_before_learning_starts': 1500, '_deterministic_loss': False, '_use_beta_distribution': False, 'use_state_preprocessor': -1, 'worker_side_prioritization': -1, 'num_sgd_iter': 30, 'sgd_minibatch_size': 2048, 'clip_param': 0.2, 'kl_coeff': 1.0, '__stdout_file__': None, '__stderr_file__': None, 'lambda': 0.95, 'input': 'sampler', 'callbacks': <class 'ray.rllib.algorithms.callbacks.DefaultCallbacks'>, 'create_env_on_driver': False, 'custom_eval_function': None, 'framework': 'torch', 'num_cpus_for_driver': 1, 'num_workers': 0}
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ Trial name status train_batch_size num_sgd_iter sgd_minibatch_size iter total time (s) ts reward episode_reward_max episode_reward_min episode_len_mean
episodes_this_iter │
├─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤
│ SAC_CustomMultiAgentEnv_151d4_00000 RUNNING 10000 30 2048 3 26.0897 1500 -3402.98 -3102.86 -3676.31 500
1 │
│ SAC_CustomMultiAgentEnv_151d4_00001 RUNNING 40000 30 512 3 26.0153 1500 -3402.98 -3102.86 -3676.31 500
1 │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
Trial status: 2 RUNNING