- High: It blocks me to complete my task.
Question
I have learned that ray tune can use tensorboardX, and I tested it locally. But when I try to use tensorboardX in ray train myself, I get an error. Is it because I use it wrongly or is it not supported by ray train?
Versions
-
python 3.7.12
-
ray 2.0.0.dev0
code
from ray.train import Trainer
from ray.train.callbacks import JsonLoggerCallback, TBXLoggerCallback
from tensorboardX import SummaryWriter
writer = SummaryWriter('runs/exp')
def train_func(config):
for i in range(10):
writer.add_scalar('test_tensorboardX', i**3, global_step=i)
return True
def train_linear(num_workers=2, use_gpu=False, epochs=3):
trainer = Trainer(backend="torch", num_workers=num_workers, use_gpu=use_gpu)
config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": epochs}
trainer.start()
results = trainer.run(
train_func, config, callbacks=[JsonLoggerCallback(), TBXLoggerCallback()]
)
trainer.shutdown()
print(results)
return results
if __name__ == "__main__":
import ray
ray.init(address=None)
train_linear()
Error
Traceback (most recent call last):
File "test.py", line 28, in <module>
train_linear()
File "test.py", line 17, in train_linear
train_func, config, callbacks=[JsonLoggerCallback(), TBXLoggerCallback()]
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/train/trainer.py", line 332, in run
run_dir=self.latest_run_dir,
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/train/trainer.py", line 686, in __init__
checkpoint_strategy=checkpoint_strategy,
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/train/trainer.py", line 711, in _start_training
lambda: ray.get(
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/train/trainer.py", line 720, in _run_with_error_handling
return func()
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/train/trainer.py", line 713, in <lambda>
train_func=train_func, dataset=dataset, checkpoint=checkpoint_dict
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/actor.py", line 122, in remote
return self._remote(args, kwargs)
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/util/tracing/tracing_helper.py", line 421, in _start_span
return method(self, args, kwargs, *_args, **_kwargs)
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/actor.py", line 168, in _remote
return invocation(args, kwargs)
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/actor.py", line 161, in invocation
concurrency_group_name=concurrency_group,
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/actor.py", line 1181, in _actor_method_call
concurrency_group_name if concurrency_group_name is not None else b"",
File "python/ray/_raylet.pyx", line 1704, in ray._raylet.CoreWorker.submit_actor_task
File "python/ray/_raylet.pyx", line 1709, in ray._raylet.CoreWorker.submit_actor_task**strong text**
File "python/ray/_raylet.pyx", line 379, in ray._raylet.prepare_args_and_increment_put_refs
File "python/ray/_raylet.pyx", line 370, in ray._raylet.prepare_args_and_increment_put_refs
File "python/ray/_raylet.pyx", line 413, in ray._raylet.prepare_args_internal
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/serialization.py", line 412, in serialize
return self._serialize_to_msgpack(value)
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/serialization.py", line 391, in _serialize_to_msgpack
metadata, python_objects
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/serialization.py", line 352, in _serialize_to_pickle5
raise e
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/serialization.py", line 348, in _serialize_to_pickle5
value, protocol=5, buffer_callback=writer.buffer_callback
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 73, in dumps
cp.dump(obj)
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 620, in dump
return Pickler.dump(self, obj)
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/multiprocessing/queues.py", line 58, in __getstate__
context.assert_spawning(self)
File "/miniconda3/envs/raydp_raymaster/lib/python3.7/multiprocessing/context.py", line 356, in assert_spawning
' through inheritance' % type(obj).__name__
RuntimeError: Queue objects should only be shared between processes through inheritance