Console log:
(AIRDQN pid=20592) 2023-02-09 21:56:44,973 INFO simple_q.py:308 – In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True
if this doesn’t work for you.
(AIRDQN pid=20592) 2023-02-09 21:56:44,974 INFO algorithm.py:460 – Current log_level is WARN. For more information, set ‘log_level’: ‘INFO’ / ‘DEBUG’ or use the -v and -vv flags.
(AIRDQN pid=20669) 2023-02-09 21:56:44,973 INFO simple_q.py:308 – In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True
if this doesn’t work for you.
(AIRDQN pid=20669) 2023-02-09 21:56:44,974 INFO algorithm.py:460 – Current log_level is WARN. For more information, set ‘log_level’: ‘INFO’ / ‘DEBUG’ or use the -v and -vv flags.
(AIRDQN pid=20666) 2023-02-09 21:56:44,978 INFO simple_q.py:308 – In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True
if this doesn’t work for you.
(AIRDQN pid=20666) 2023-02-09 21:56:44,979 INFO algorithm.py:460 – Current log_level is WARN. For more information, set ‘log_level’: ‘INFO’ / ‘DEBUG’ or use the -v and -vv flags.
Read: 0%| | 0/1 [00:00<?, ?it/s]
Read: 0%| | 0/1 [00:00<?, ?it/s]
(AIRDQN pid=20667) 2023-02-09 21:56:44,988 INFO simple_q.py:308 – In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True
if this doesn’t work for you.
(AIRDQN pid=20667) 2023-02-09 21:56:44,989 INFO algorithm.py:460 – Current log_level is WARN. For more information, set ‘log_level’: ‘INFO’ / ‘DEBUG’ or use the -v and -vv flags.
Read: 0%| | 0/1 [00:00<?, ?it/s]
Read: 0%| | 0/1 [00:00<?, ?it/s]
(AIRDQN pid=20668) 2023-02-09 21:56:45,242 INFO simple_q.py:308 – In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True
if this doesn’t work for you.
(AIRDQN pid=20668) 2023-02-09 21:56:45,243 INFO algorithm.py:460 – Current log_level is WARN. For more information, set ‘log_level’: ‘INFO’ / ‘DEBUG’ or use the -v and -vv flags.
Read: 0%| | 0/1 [00:00<?, ?it/s]
(AIRDQN pid=19972) /home/ec2-user/.local/lib/python3.7/site-packages/ray/rllib/utils/filter.py:84: DeprecationWarning: Passing None into shape arguments as an alias for () is deprecated.
(AIRDQN pid=19972) self.mean_array = np.zeros(shape)
(RolloutWorker pid=20076) /home/ec2-user/.local/lib/python3.7/site-packages/ray/rllib/utils/filter.py:84: DeprecationWarning: Passing None into shape arguments as an alias for () is deprecated.
(RolloutWorker pid=20076) self.mean_array = np.zeros(shape)
Here is the tail end of the ray stack output when the process hangs. How can I attach a file to share the entire ray stack output?
Stack dump for ec2-user 37113 0.8 0.2 64578864 453176 pts/1 SNl+ 22:09 0:11 ray::AIRRLTrainer.init()
Process 37113: ray::AIRRLTrainer.init()
Python v3.7.16 (/usr/bin/python3.7)
Thread 37113 (idle): “MainThread”
pthread_cond_timedwait@@GLIBC_2.3.2 (libpthread-2.26.so)
ray::core::GetRequest::Wait (ray/_raylet.so)
ray::core::CoreWorkerMemoryStore::GetImpl (ray/_raylet.so)
ray::core::CoreWorkerMemoryStore::Wait (ray/_raylet.so)
ray::core::CoreWorker::Wait (ray/_raylet.so)
wait (ray/_raylet.so)
wait (ray/_private/worker.py:2481)
wrapper (ray/_private/client_mode_hook.py:105)
fetch_until_complete (ray/data/_internal/progress_bar.py:74)
_apply (ray/data/_internal/compute.py:115)
call (ray/data/_internal/plan.py:672)
execute (ray/data/_internal/plan.py:309)
init (ray/data/dataset.py:217)
repartition (ray/data/dataset.py:810)
get_dataset_and_shards (ray/rllib/offline/dataset_reader.py:179)
init (ray/rllib/evaluation/worker_set.py:131)
setup (ray/rllib/algorithms/algorithm.py:531)
_resume_span (ray/util/tracing/tracing_helper.py:466)
init (ray/tune/trainable/trainable.py:161)
init (ray/rllib/algorithms/algorithm.py:414)
init (ray/train/rl/rl_trainer.py:214)
_resume_span (ray/util/tracing/tracing_helper.py:466)
actor_method_executor (ray/_private/function_manager.py:674)
function_executor (ray/_raylet.so)
_raylet_task_execution_handler (ray/_raylet.so)
std::_Function_handler<ray::Status(ray::rpc::Address const&, ray::rpc::TaskType, std::string, ray::core::RayFunction const&, std::unordered_map<std::string, double, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, double> > > const&, std::vector<std::shared_ptrray::RayObject, std::allocator<std::shared_ptrray::RayObject > > const&, std::vector<ray::rpc::ObjectReference, std::allocatorray::rpc::ObjectReference > const&, std::string const&, std::string const&, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::shared_ptrray::LocalMemoryBuffer&, bool*, bool*, std::vector<ray::ConcurrencyGroup, std::allocatorray::ConcurrencyGroup > const&, std::string, bool), ray::Status ()(ray::rpc::Address const&, ray::rpc::TaskType, std::string, ray::core::RayFunction const&, std::unordered_map<std::string, double, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, double> > > const&, std::vector<std::shared_ptrray::RayObject, std::allocator<std::shared_ptrray::RayObject > > const&, std::vector<ray::rpc::ObjectReference, std::allocatorray::rpc::ObjectReference > const&, std::string, std::string, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::shared_ptrray::LocalMemoryBuffer&, bool, bool*, std::vector<ray::ConcurrencyGroup, std::allocatorray::ConcurrencyGroup > const&, std::string, bool)>::_M_invoke (ray/_raylet.so)
ray::core::CoreWorker::ExecuteTask (ray/_raylet.so)
std::_Function_handler<ray::Status(ray::TaskSpecification const&, std::shared_ptr<std::unordered_map<std::string, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > >, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > > > > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, google::protobuf::RepeatedPtrFieldray::rpc::ObjectReferenceCount, bool, bool*), std::_Bind<ray::Status (ray::core::CoreWorker(ray::core::CoreWorker*, std::_Placeholder<1>, std::_Placeholder<2>, std::_Placeholder<3>, std::_Placeholder<4>, std::_Placeholder<5>, std::_Placeholder<6>, std::_Placeholder<7>)::)(ray::TaskSpecification const&, std::shared_ptr<std::unordered_map<std::string, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > >, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > > > > > > const&, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, google::protobuf::RepeatedPtrFieldray::rpc::ObjectReferenceCount, bool*, bool*)> >::_M_invoke (ray/_raylet.so)
ray::core::CoreWorkerDirectTaskReceiver::HandleTask(ray::rpc::PushTaskRequest, ray::rpc::PushTaskReply*, std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)::{lambda(std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)#1}::operator() const (ray/_raylet.so)
std::_Function_handler<void (std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>), ray::core::CoreWorkerDirectTaskReceiver::HandleTask(ray::rpc::PushTaskRequest, ray::rpc::PushTaskReply*, std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)::{lambda(std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)#1}>::_M_invoke (ray/_raylet.so)
ray::core::InboundRequest::Accept (ray/_raylet.so)
ray::core::NormalSchedulingQueue::ScheduleRequests (ray/_raylet.so)
EventTracker::RecordExecution (ray/_raylet.so)
std::_Function_handler<void (), instrumented_io_context::post(std::function<void ()>, std::string)::{lambda()#1}>::_M_invoke (ray/_raylet.so)
boost::asio::detail::completion_handler<std::function<void ()>, boost::asio::io_context::basic_executor_type<std::allocator, (unsigned int)0> >::do_complete (ray/_raylet.so)
boost::asio::detail::scheduler::do_run_one (ray/_raylet.so)
boost::asio::detail::scheduler::run (ray/_raylet.so)
boost::asio::io_context::run (ray/_raylet.so)
ray::core::CoreWorker::RunTaskExecutionLoop (ray/_raylet.so)
ray::core::CoreWorkerProcessImpl::RunWorkerTaskExecutionLoop (ray/_raylet.so)
ray::core::CoreWorkerProcess::RunTaskExecutionLoop (ray/_raylet.so)
run_task_loop (ray/_raylet.so)
main_loop (ray/_private/worker.py:763)
(ray/_private/workers/default_worker.py:231)
Thread 38420 (idle): “ray_import_thread”
do_futex_wait (libpthread-2.26.so)
__new_sem_wait_slow (libpthread-2.26.so)
PyThread_acquire_lock_timed (libpython3.7m.so.1.0)
wait (threading.py:300)
_wait_once (grpc/_common.py:112)
wait (grpc/_common.py:157)
result (grpc/_channel.py:735)
_poll_locked (ray/_private/gcs_pubsub.py:249)
poll (ray/_private/gcs_pubsub.py:385)
_run (ray/_private/import_thread.py:70)
run (threading.py:870)
_bootstrap_inner (threading.py:926)
_bootstrap (threading.py:890)
clone (libc-2.26.so)
Thread 38621 (idle): “Thread-2”
do_futex_wait (libpthread-2.26.so)
__new_sem_wait_slow (libpthread-2.26.so)
PyThread_acquire_lock_timed (libpython3.7m.so.1.0)
wait (threading.py:300)
wait (threading.py:552)
run (tqdm/_monitor.py:60)
_bootstrap_inner (threading.py:926)
_bootstrap (threading.py:890)
clone (libc-2.26.so)
Thread 47419 (idle): “Thread-5”
epoll_wait (libc-2.26.so)
0x7f21b28defae (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f21b28e31a2 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f21b28e738f (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f21b29d7e47 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f21b2a452b5 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f21b2a45479 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f21b2add7b8 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f21b2addf5f (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f21b2adfdb5 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
channel_spin (grpc/_channel.py:1258)
0x7f21b2a2740c (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
run (threading.py:870)
_bootstrap_inner (threading.py:926)
_bootstrap (threading.py:890)
clone (libc-2.26.so)
Stack dump for ec2-user 37117 0.8 0.2 64578792 453884 pts/1 SNl+ 22:09 0:10 ray::AIRRLTrainer.init()
Process 37117: ray::AIRRLTrainer.init()
Python v3.7.16 (/usr/bin/python3.7)
Thread 37117 (idle): “MainThread”
pthread_cond_timedwait@@GLIBC_2.3.2 (libpthread-2.26.so)
ray::core::GetRequest::Wait (ray/_raylet.so)
ray::core::CoreWorkerMemoryStore::GetImpl (ray/_raylet.so)
ray::core::CoreWorkerMemoryStore::Wait (ray/_raylet.so)
ray::core::CoreWorker::Wait (ray/_raylet.so)
wait (ray/_raylet.so)
wait (ray/_private/worker.py:2481)
wrapper (ray/_private/client_mode_hook.py:105)
fetch_until_complete (ray/data/_internal/progress_bar.py:74)
_apply (ray/data/_internal/compute.py:115)
call (ray/data/_internal/plan.py:672)
execute (ray/data/_internal/plan.py:309)
init (ray/data/dataset.py:217)
repartition (ray/data/dataset.py:810)
get_dataset_and_shards (ray/rllib/offline/dataset_reader.py:179)
init (ray/rllib/evaluation/worker_set.py:131)
setup (ray/rllib/algorithms/algorithm.py:531)
_resume_span (ray/util/tracing/tracing_helper.py:466)
init (ray/tune/trainable/trainable.py:161)
init (ray/rllib/algorithms/algorithm.py:414)
init (ray/train/rl/rl_trainer.py:214)
_resume_span (ray/util/tracing/tracing_helper.py:466)
actor_method_executor (ray/_private/function_manager.py:674)
function_executor (ray/_raylet.so)
_raylet_task_execution_handler (ray/_raylet.so)
std::_Function_handler<ray::Status(ray::rpc::Address const&, ray::rpc::TaskType, std::string, ray::core::RayFunction const&, std::unordered_map<std::string, double, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, double> > > const&, std::vector<std::shared_ptrray::RayObject, std::allocator<std::shared_ptrray::RayObject > > const&, std::vector<ray::rpc::ObjectReference, std::allocatorray::rpc::ObjectReference > const&, std::string const&, std::string const&, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::shared_ptrray::LocalMemoryBuffer&, bool*, bool*, std::vector<ray::ConcurrencyGroup, std::allocatorray::ConcurrencyGroup > const&, std::string, bool), ray::Status ()(ray::rpc::Address const&, ray::rpc::TaskType, std::string, ray::core::RayFunction const&, std::unordered_map<std::string, double, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, double> > > const&, std::vector<std::shared_ptrray::RayObject, std::allocator<std::shared_ptrray::RayObject > > const&, std::vector<ray::rpc::ObjectReference, std::allocatorray::rpc::ObjectReference > const&, std::string, std::string, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::shared_ptrray::LocalMemoryBuffer&, bool, bool*, std::vector<ray::ConcurrencyGroup, std::allocatorray::ConcurrencyGroup > const&, std::string, bool)>::_M_invoke (ray/_raylet.so)
ray::core::CoreWorker::ExecuteTask (ray/_raylet.so)
std::_Function_handler<ray::Status(ray::TaskSpecification const&, std::shared_ptr<std::unordered_map<std::string, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > >, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > > > > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, google::protobuf::RepeatedPtrFieldray::rpc::ObjectReferenceCount, bool, bool*), std::_Bind<ray::Status (ray::core::CoreWorker(ray::core::CoreWorker*, std::_Placeholder<1>, std::_Placeholder<2>, std::_Placeholder<3>, std::_Placeholder<4>, std::_Placeholder<5>, std::_Placeholder<6>, std::_Placeholder<7>)::)(ray::TaskSpecification const&, std::shared_ptr<std::unordered_map<std::string, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > >, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > > > > > > const&, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, google::protobuf::RepeatedPtrFieldray::rpc::ObjectReferenceCount, bool*, bool*)> >::_M_invoke (ray/_raylet.so)
ray::core::CoreWorkerDirectTaskReceiver::HandleTask(ray::rpc::PushTaskRequest, ray::rpc::PushTaskReply*, std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)::{lambda(std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)#1}::operator() const (ray/_raylet.so)
std::_Function_handler<void (std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>), ray::core::CoreWorkerDirectTaskReceiver::HandleTask(ray::rpc::PushTaskRequest, ray::rpc::PushTaskReply*, std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)::{lambda(std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)#1}>::_M_invoke (ray/_raylet.so)
ray::core::InboundRequest::Accept (ray/_raylet.so)
ray::core::NormalSchedulingQueue::ScheduleRequests (ray/_raylet.so)
EventTracker::RecordExecution (ray/_raylet.so)
std::_Function_handler<void (), instrumented_io_context::post(std::function<void ()>, std::string)::{lambda()#1}>::_M_invoke (ray/_raylet.so)
boost::asio::detail::completion_handler<std::function<void ()>, boost::asio::io_context::basic_executor_type<std::allocator, (unsigned int)0> >::do_complete (ray/_raylet.so)
boost::asio::detail::scheduler::do_run_one (ray/_raylet.so)
boost::asio::detail::scheduler::run (ray/_raylet.so)
boost::asio::io_context::run (ray/_raylet.so)
ray::core::CoreWorker::RunTaskExecutionLoop (ray/_raylet.so)
ray::core::CoreWorkerProcessImpl::RunWorkerTaskExecutionLoop (ray/_raylet.so)
ray::core::CoreWorkerProcess::RunTaskExecutionLoop (ray/_raylet.so)
run_task_loop (ray/_raylet.so)
main_loop (ray/_private/worker.py:763)
(ray/_private/workers/default_worker.py:231)
Thread 38489 (idle): “ray_import_thread”
do_futex_wait (libpthread-2.26.so)
__new_sem_wait_slow (libpthread-2.26.so)
PyThread_acquire_lock_timed (libpython3.7m.so.1.0)
wait (threading.py:300)
_wait_once (grpc/_common.py:112)
wait (grpc/_common.py:157)
result (grpc/_channel.py:735)
_poll_locked (ray/_private/gcs_pubsub.py:249)
poll (ray/_private/gcs_pubsub.py:385)
_run (ray/_private/import_thread.py:70)
run (threading.py:870)
_bootstrap_inner (threading.py:926)
_bootstrap (threading.py:890)
clone (libc-2.26.so)
Thread 38624 (idle): “Thread-2”
do_futex_wait (libpthread-2.26.so)
__new_sem_wait_slow (libpthread-2.26.so)
PyThread_acquire_lock_timed (libpython3.7m.so.1.0)
wait (threading.py:300)
wait (threading.py:552)
run (tqdm/_monitor.py:60)
_bootstrap_inner (threading.py:926)
_bootstrap (threading.py:890)
clone (libc-2.26.so)
Thread 47463 (idle): “Thread-5”
epoll_wait (libc-2.26.so)
0x7f8ecc95afae (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f8ecc95f1a2 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f8ecc96338f (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f8ecca53e47 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f8eccac12b5 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f8eccac1479 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f8eccb597b8 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f8eccb59f5f (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7f8eccb5bdb5 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
channel_spin (grpc/_channel.py:1258)
0x7f8eccaa340c (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
run (threading.py:870)
_bootstrap_inner (threading.py:926)
_bootstrap (threading.py:890)
clone (libc-2.26.so)
Stack dump for ec2-user 37803 0.8 0.2 64578740 453176 pts/1 SNl+ 22:09 0:11 ray::AIRRLTrainer.init()
Process 37803: ray::AIRRLTrainer.init()
Python v3.7.16 (/usr/bin/python3.7)
Thread 37803 (idle): “MainThread”
pthread_cond_timedwait@@GLIBC_2.3.2 (libpthread-2.26.so)
ray::core::GetRequest::Wait (ray/_raylet.so)
ray::core::CoreWorkerMemoryStore::GetImpl (ray/_raylet.so)
ray::core::CoreWorkerMemoryStore::Wait (ray/_raylet.so)
ray::core::CoreWorker::Wait (ray/_raylet.so)
wait (ray/_raylet.so)
wait (ray/_private/worker.py:2481)
wrapper (ray/_private/client_mode_hook.py:105)
fetch_until_complete (ray/data/_internal/progress_bar.py:74)
_apply (ray/data/_internal/compute.py:115)
call (ray/data/_internal/plan.py:672)
execute (ray/data/_internal/plan.py:309)
init (ray/data/dataset.py:217)
repartition (ray/data/dataset.py:810)
get_dataset_and_shards (ray/rllib/offline/dataset_reader.py:179)
init (ray/rllib/evaluation/worker_set.py:131)
setup (ray/rllib/algorithms/algorithm.py:531)
_resume_span (ray/util/tracing/tracing_helper.py:466)
init (ray/tune/trainable/trainable.py:161)
init (ray/rllib/algorithms/algorithm.py:414)
init (ray/train/rl/rl_trainer.py:214)
_resume_span (ray/util/tracing/tracing_helper.py:466)
actor_method_executor (ray/_private/function_manager.py:674)
function_executor (ray/_raylet.so)
_raylet_task_execution_handler (ray/_raylet.so)
std::_Function_handler<ray::Status(ray::rpc::Address const&, ray::rpc::TaskType, std::string, ray::core::RayFunction const&, std::unordered_map<std::string, double, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, double> > > const&, std::vector<std::shared_ptrray::RayObject, std::allocator<std::shared_ptrray::RayObject > > const&, std::vector<ray::rpc::ObjectReference, std::allocatorray::rpc::ObjectReference > const&, std::string const&, std::string const&, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::shared_ptrray::LocalMemoryBuffer&, bool*, bool*, std::vector<ray::ConcurrencyGroup, std::allocatorray::ConcurrencyGroup > const&, std::string, bool), ray::Status ()(ray::rpc::Address const&, ray::rpc::TaskType, std::string, ray::core::RayFunction const&, std::unordered_map<std::string, double, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, double> > > const&, std::vector<std::shared_ptrray::RayObject, std::allocator<std::shared_ptrray::RayObject > > const&, std::vector<ray::rpc::ObjectReference, std::allocatorray::rpc::ObjectReference > const&, std::string, std::string, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::shared_ptrray::LocalMemoryBuffer&, bool, bool*, std::vector<ray::ConcurrencyGroup, std::allocatorray::ConcurrencyGroup > const&, std::string, bool)>::_M_invoke (ray/_raylet.so)
ray::core::CoreWorker::ExecuteTask (ray/_raylet.so)
std::_Function_handler<ray::Status(ray::TaskSpecification const&, std::shared_ptr<std::unordered_map<std::string, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > >, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > > > > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, google::protobuf::RepeatedPtrFieldray::rpc::ObjectReferenceCount, bool, bool*), std::_Bind<ray::Status (ray::core::CoreWorker(ray::core::CoreWorker*, std::_Placeholder<1>, std::_Placeholder<2>, std::_Placeholder<3>, std::_Placeholder<4>, std::_Placeholder<5>, std::_Placeholder<6>, std::_Placeholder<7>)::)(ray::TaskSpecification const&, std::shared_ptr<std::unordered_map<std::string, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > >, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, std::vector<std::pair<long, double>, std::allocator<std::pair<long, double> > > > > > > const&, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, std::vector<std::pair<ray::ObjectID, std::shared_ptrray::RayObject >, std::allocator<std::pair<ray::ObjectID, std::shared_ptrray::RayObject > > >, google::protobuf::RepeatedPtrFieldray::rpc::ObjectReferenceCount, bool*, bool*)> >::_M_invoke (ray/_raylet.so)
ray::core::CoreWorkerDirectTaskReceiver::HandleTask(ray::rpc::PushTaskRequest, ray::rpc::PushTaskReply*, std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)::{lambda(std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)#1}::operator() const (ray/_raylet.so)
std::_Function_handler<void (std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>), ray::core::CoreWorkerDirectTaskReceiver::HandleTask(ray::rpc::PushTaskRequest, ray::rpc::PushTaskReply*, std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)::{lambda(std::function<void (ray::Status, std::function<void ()>, std::function<void ()>)>)#1}>::_M_invoke (ray/_raylet.so)
ray::core::InboundRequest::Accept (ray/_raylet.so)
ray::core::NormalSchedulingQueue::ScheduleRequests (ray/_raylet.so)
EventTracker::RecordExecution (ray/_raylet.so)
std::_Function_handler<void (), instrumented_io_context::post(std::function<void ()>, std::string)::{lambda()#1}>::_M_invoke (ray/_raylet.so)
boost::asio::detail::completion_handler<std::function<void ()>, boost::asio::io_context::basic_executor_type<std::allocator, (unsigned int)0> >::do_complete (ray/_raylet.so)
boost::asio::detail::scheduler::do_run_one (ray/_raylet.so)
boost::asio::detail::scheduler::run (ray/_raylet.so)
boost::asio::io_context::run (ray/_raylet.so)
ray::core::CoreWorker::RunTaskExecutionLoop (ray/_raylet.so)
ray::core::CoreWorkerProcessImpl::RunWorkerTaskExecutionLoop (ray/_raylet.so)
ray::core::CoreWorkerProcess::RunTaskExecutionLoop (ray/_raylet.so)
run_task_loop (ray/_raylet.so)
main_loop (ray/_private/worker.py:763)
(ray/_private/workers/default_worker.py:231)
Thread 38571 (idle): “ray_import_thread”
do_futex_wait (libpthread-2.26.so)
__new_sem_wait_slow (libpthread-2.26.so)
0x7ff2e13aa1a2 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7ff2e13ae38f (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7ff2e149ee47 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7ff2e150c2b5 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7ff2e150c479 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7ff2e15a47b8 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7ff2e15a4f5f (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
0x7ff2e15a6db5 (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
channel_spin (grpc/_channel.py:1258)
0x7ff2e14ee40c (grpc/_cython/cygrpc.cpython-37m-x86_64-linux-gnu.so)
run (threading.py:870)
_bootstrap_inner (threading.py:926)
_bootstrap (threading.py:890)
clone (libc-2.26.so)