How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I had a model implementing modelv2 that had worked well with ray 2.2. I am trying to migrate to ray 2.6 or higher but encountered a lot of issues.
In ray 2.6, it works if I use similar PPO_config files as before but I could not load/restore the model. In ray 2.8 asks me to explicitly write in PPO training configurations:
config.training(_enable_learner_api=False)
config.rl_module(_enable_rl_module_api=False)
After trying to disable the rl_module api in either 2.6 or 2.8, the bug occurs as below, and I had no clue how to start the debugging process.
Any advice will be appreciated!
Training started with configuration:
╭───────────────────────────────────────────────────────────────────────────╮
│ Training config │
├───────────────────────────────────────────────────────────────────────────┤
│ _AlgorithmConfig__prior_exploration_config │
│ _disable_action_flattening False │
│ _disable_execution_plan_api True │
│ _disable_initialize_loss_from_dummy_batch False │
│ _disable_preprocessor_api False │
│ _enable_learner_api False │
│ _enable_rl_module_api False │
│ _fake_gpus False │
│ _is_atari │
│ _learner_class │
│ _tf_policy_handles_more_than_one_loss False │
│ action_mask_key action_mask │
│ action_space │
│ actions_in_input_normalized False │
│ always_attach_evaluation_results False │
│ auto_wrap_old_gym_envs True │
│ batch_mode truncate_episodes │
│ callbacks ...ten.MyCallbacks'> │
│ checkpoint_trainable_policies_only False │
│ clip_actions False │
│ clip_param 0.3 │
│ clip_rewards │
│ compress_observations False │
│ count_steps_by env_steps │
│ create_env_on_driver False │
│ custom_eval_function │
│ delay_between_worker_restarts_s 60. │
│ disable_env_checking False │
│ eager_max_retraces 20 │
│ eager_tracing True │
│ enable_async_evaluation False │
│ enable_connectors True │
│ enable_tf1_exec_eagerly False │
│ entropy_coeff 0. │
│ entropy_coeff_schedule │
│ env ...tiAgentArena_v3'> │
│ env_config/ACTION_MAP/0 [0, -1] │
│ env_config/ACTION_MAP/1 [1, 0] │
│ env_config/ACTION_MAP/2 [0, 1] │
│ env_config/ACTION_MAP/3 [-1, 0] │
│ env_config/assigned_initialization_position [[1, 1], [3, 2]] │
│ env_config/collision_share_between_pairs True │
│ env_config/collision_shared_within_chasers False │
│ env_config/collision_suprpass_explore True │
│ env_config/height 10 │
│ env_config/num_agents 2 │
│ env_config/num_chasers 1 │
│ env_config/num_escapers 1 │
│ env_config/num_events 3 │
│ env_config/random_initialization_position True │
│ env_config/range_of_view 3 │
│ env_config/reward_dictionary/chaser/collision 0.1 │
│ env_config/reward_dictionary/chaser/explore 0.1 │
│ env_config/reward_dictionary/chaser/other -0.1 │
│ env_config/reward_dictionary/escaper/collision -0.1 │
│ env_config/reward_dictionary/escaper/explore 0.1 │
│ env_config/reward_dictionary/escaper/other -0.1 │
│ env_config/roles │
│ env_config/timestep_limit 100 │
│ env_config/use_1d_obs False │
│ env_config/visit_count_visible False │
│ env_config/width 10 │
│ env_runner_cls │
│ env_task_fn │
│ evaluation_config │
│ evaluation_duration 10 │
│ evaluation_duration_unit episodes │
│ evaluation_interval │
│ evaluation_num_workers 0 │
│ evaluation_parallel_to_training False │
│ evaluation_sample_timeout_s 180. │
│ exploration_config/type StochasticSampling │
│ explore True │
│ export_native_model_files False │
│ fake_sampler False │
│ framework torch │
│ gamma 0.99 │
│ grad_clip │
│ grad_clip_by global_norm │
│ ignore_worker_failures False │
│ in_evaluation False │
│ input sampler │
│ keep_per_episode_custom_metrics False │
│ kl_coeff 0.2 │
│ kl_target 0.01 │
│ lambda 1. │
│ local_gpu_idx 0 │
│ local_tf_session_args/inter_op_parallelism_threads 8 │
│ local_tf_session_args/intra_op_parallelism_threads 8 │
│ log_level DEBUG │
│ log_sys_usage True │
│ logger_config │
│ logger_creator │
│ lr 0.00005 │
│ lr_schedule │
│ max_num_worker_restarts 1000 │
│ max_requests_in_flight_per_sampler_worker 2 │
│ metrics_episode_collection_timeout_s 60. │
│ metrics_num_episodes_for_smoothing 100 │
│ min_sample_timesteps_per_iteration 0 │
│ min_time_s_per_iteration │
│ min_train_timesteps_per_iteration 0 │
│ model/_disable_action_flattening False │
│ model/_disable_preprocessor_api False │
│ model/_time_major False │
│ model/_use_default_native_models -1 │
│ model/always_check_shapes False │
│ model/attention_dim 64 │
│ model/attention_head_dim 32 │
│ model/attention_init_gru_gate_bias 2.0 │
│ model/attention_memory_inference 50 │
│ model/attention_memory_training 50 │
│ model/attention_num_heads 1 │
│ model/attention_num_transformer_units 1 │
│ model/attention_position_wise_mlp_dim 32 │
│ model/attention_use_n_prev_actions 0 │
│ model/attention_use_n_prev_rewards 0 │
│ model/conv_activation relu │
│ model/conv_filters │
│ model/custom_action_dist │
│ model/custom_model rnn_noFC │
│ model/custom_model_config/device cpu │
│ model/custom_model_config/fc_size 200 │
│ model/custom_model_config/l2_lambda 3 │
│ model/custom_model_config/l2_lambda_inp 0 │
│ model/custom_model_config/rnn_hidden_size 256 │
│ model/custom_preprocessor │
│ model/dim 84 │
│ model/encoder_latent_dim │
│ model/fcnet_activation tanh │
│ model/fcnet_hiddens [256, 256] │
│ model/framestack True │
│ model/free_log_std False │
│ model/grayscale False │
│ model/lstm_cell_size 256 │
│ model/lstm_use_prev_action False │
│ model/lstm_use_prev_action_reward -1 │
│ model/lstm_use_prev_reward False │
│ model/max_seq_len 20 │
│ model/no_final_linear False │
│ model/post_fcnet_activation relu │
│ model/post_fcnet_hiddens [] │
│ model/use_attention False │
│ model/use_lstm False │
│ model/vf_share_layers False │
│ model/zero_mean True │
│ normalize_actions True │
│ num_consecutive_worker_failures_tolerance 100 │
│ num_cpus_for_driver 1 │
│ num_cpus_per_learner_worker 1 │
│ num_cpus_per_worker 1 │
│ num_envs_per_worker 1 │
│ num_gpus 0 │
│ num_gpus_per_learner_worker 0 │
│ num_gpus_per_worker 0 │
│ num_learner_workers 2 │
│ num_sgd_iter 30 │
│ num_workers 2 │
│ observation_filter NoFilter │
│ observation_fn │
│ observation_space │
│ offline_sampling False │
│ ope_split_batch_by_episode True │
│ output │
│ output_compress_columns ['obs', 'new_obs'] │
│ output_max_file_size 67108864 │
│ placement_strategy PACK │
│ policies/agent_0 ... Discrete(4), {}) │
│ policies/agent_1 ... Discrete(4), {}) │
│ policies_to_train │
│ policy_map_cache -1 │
│ policy_map_capacity 100 │
│ policy_mapping_fn ...t 0x7fe586a6eac0> │
│ policy_states_are_swappable False │
│ postprocess_inputs False │
│ preprocessor_pref deepmind │
│ recreate_failed_workers False │
│ remote_env_batch_wait_ms 0 │
│ remote_worker_envs False │
│ render_env False │
│ replay_sequence_length │
│ restart_failed_sub_environments False │
│ rl_module_spec │
│ rollout_fragment_length auto │
│ sample_async False │
│ sample_collector ...leListCollector'> │
│ sampler_perf_stats_ema_coef │
│ seed │
│ sgd_minibatch_size 128 │
│ shuffle_buffer_size 0 │
│ shuffle_sequences True │
│ simple_optimizer -1 │
│ sync_filters_on_rollout_workers_timeout_s 60. │
│ synchronize_filters -1 │
│ tf_session_args/allow_soft_placement True │
│ tf_session_args/device_count/CPU 1 │
│ tf_session_args/gpu_options/allow_growth True │
│ tf_session_args/inter_op_parallelism_threads 2 │
│ tf_session_args/intra_op_parallelism_threads 2 │
│ tf_session_args/log_device_placement False │
│ torch_compile_learner False │
│ torch_compile_learner_dynamo_backend inductor │
│ torch_compile_learner_dynamo_mode │
│ torch_compile_learner_what_to_compile ...ile.FORWARD_TRAIN │
│ torch_compile_worker False │
│ torch_compile_worker_dynamo_backend onnxrt │
│ torch_compile_worker_dynamo_mode │
│ train_batch_size 4000 │
│ update_worker_filter_stats True │
│ use_critic True │
│ use_gae True │
│ use_kl_loss True │
│ use_worker_filter_stats True │
│ validate_workers_after_construction True │
│ vf_clip_param 10. │
│ vf_loss_coeff 1. │
│ vf_share_layers -1 │
│ worker_cls -1 │
│ worker_health_probe_timeout_s 60 │
│ worker_restore_timeout_s 1800 │
╰───────────────────────────────────────────────────────────────────────────╯
[2023-11-20 16:44:01,978 E 194560 194560] core_worker.cc:1705: Pushed Error with JobID: 01000000 of type: task with message: ray::RolloutWorker.apply() (pid=194560, ip=10.47.57.189, actor_id=c66c281d89b2e1609590dcfa01000000, repr=<ray.rllib.evaluation.rollout_worker._modify_class.<locals>.Class object at 0x7fe560c2ff50>)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/utils/actor_manager.py", line 185, in apply
raise e
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/utils/actor_manager.py", line 176, in apply
return func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/execution/rollout_ops.py", line 86, in <lambda>
lambda w: w.sample(), local_worker=False, healthy_only=True
^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/evaluation/rollout_worker.py", line 696, in sample
batches = [self.input_reader.next()]
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/evaluation/sampler.py", line 92, in next
batches = [self.get_data()]
^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/evaluation/sampler.py", line 277, in get_data
item = next(self._env_runner)
^^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/evaluation/env_runner_v2.py", line 344, in run
outputs = self.step()
^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/evaluation/env_runner_v2.py", line 370, in step
active_envs, to_eval, outputs = self._process_observations(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/evaluation/env_runner_v2.py", line 637, in _process_observations
processed = policy.agent_connectors(acd_list)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/connectors/agent/pipeline.py", line 41, in __call__
ret = c(ret)
^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/connectors/connector.py", line 254, in __call__
return [self.transform(d) for d in acd_list]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/connectors/connector.py", line 254, in <listcomp>
return [self.transform(d) for d in acd_list]
^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/connectors/agent/view_requirement.py", line 118, in transform
sample_batch = agent_collector.build_for_inference()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/evaluation/collectors/agent_collector.py", line 366, in build_for_inference
self._cache_in_np(np_data, data_col)
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/evaluation/collectors/agent_collector.py", line 613, in _cache_in_np
cache_dict[key] = [_to_float_np_array(d) for d in self.buffers[key]]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/evaluation/collectors/agent_collector.py", line 613, in <listcomp>
cache_dict[key] = [_to_float_np_array(d) for d in self.buffers[key]]
^^^^^^^^^^^^^^^^^^^^^
File "/home/lime/miniconda3/envs/ray26/lib/python3.11/site-packages/ray/rllib/evaluation/collectors/agent_collector.py", line 32, in _to_float_np_array
if torch and torch.is_tensor(v[0]):
~^^^
IndexError: list index out of range at time: 1.70053e+09