Ray: 1.12.1, Python: 3.7, OS: Ubuntu and MacOS
I have the following code
import time
import logging
from unittest import runner
import ray
import tqdm
import torch
import numpy as np
from ray.util.queue import Queue
logging.basicConfig(level=logging.INFO)
@ray.remote
class Config(object):
def __init__(self) -> None:
self.stop = 0
def set_stop(self):
self.stop = 1
def get_stop(self):
return self.stop
@ray.remote
class Runner(object):
def run(self):
pass
def get_data(self):
buffer = ray.get_actor("Buffer")
self.data = ray.get(buffer.get_data.remote())
return self.data
@ray.remote
class Buffer(object):
def __init__(self, queue, max_size=100) -> None:
self.queue = queue
self.max_size = max_size
self.data = []
logging.basicConfig(level=logging.INFO)
def append(self, v):
self.data.append(v)
def get_data(self):
return self.data
def run(self):
config = ray.get_actor("Config")
while True:
data = self.queue.get()
self.data.append(data)
logging.info(f'in buffer run: get data {data["index"]}, the shape {data["data"].shape}, data len: {len(self.data)}')
assert data['data'].shape[0] == 1024
stop = ray.get(config.get_stop.remote())
if stop == 1:
print(f'in buffer: break')
break
@ray.remote
class Trainer(object):
def __init__(self) -> None:
logging.basicConfig(level=logging.INFO)
def run(self):
iter = 0
logging.info(f'start trainer.run!')
config = ray.get_actor("Config")
buffer = ray.get_actor("Buffer")
while True:
iter += 1
# time.sleep(5)
# data = ray.get(buffer.get_data.remote())
# print(f'in trainer data size: {len(data)}')
stop = ray.get(config.get_stop.remote())
stop/0
if stop == 1:
logging.info(f'in trainer.run stop: {stop}')
data = ray.get(buffer.get_data.remote())
logging.info(f'in trainer, data size: {len(data)}')
break
if __name__ == '__main__':
ray.init()
queue = Queue(maxsize=10000)
config = Config.options(name='Config', max_concurrency=4).remote()
buffer = Buffer.options(name='Buffer', max_concurrency=4).remote(queue=queue, max_size=10000)
runner = Runner.options(name='Runner', max_concurrency=4).remote()
trainer = Trainer.options(name="Trainer", max_concurrency=4).remote()
workers = [buffer.run.remote(), trainer.run.remote()]
logging.info(f"workers: {workers}")
for i in tqdm.tqdm(range(100)):
# print(f'Append {i}')
time.sleep(0.001)
# if i == 10000:
# time.sleep(30)
d = {
'index': i,
'data': torch.from_numpy(np.array([1]*1024, dtype=np.float32))
}
queue.put(d)
time.sleep(10)
config.set_stop.remote()
time.sleep(20)
logging.info(f"buffer get data size: {len(ray.get(buffer.get_data.remote()))}")
runner.run.remote()
logging.info(f"config get stop: {ray.get(config.get_stop.remote())}")
logging.info(f"runner get data size: {len(ray.get(runner.get_data.remote()))}")
ray.wait(workers)
ray.shutdown()
As you can find it, stop/0
will raise an error in normal mode. However, in the terminal, it did not report the error but simply bypass it.
$ python my_code.py
2022-05-28 23:02:49,060 INFO services.py:1462 -- View the Ray dashboard at http://127.0.0.1:8265
INFO:root:workers: [ObjectRef(f4402ec78d3a260713619bc06cc483d63a30db2b0100000001000000), ObjectRef(f91b78d7db9a6593d8ec6bed6bb656e8601fa7990100000001000000)]
0%| | 0/100 [00:00<?, ?it/s](Trainer pid=3046356) INFO:root:start trainer.run!
100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 100/100 [00:06<00:00, 15.16it/s]
(Buffer pid=3046221) INFO:root:in buffer run: get data 0, the shape torch.Size([1024]), data len: 1
(Buffer pid=3046221) INFO:root:in buffer run: get data 1, the shape torch.Size([1024]), data len: 2
(Buffer pid=3046221) INFO:root:in buffer run: get data 2, the shape torch.Size([1024]), data len: 3
(Buffer pid=3046221) INFO:root:in buffer run: get data 3, the shape torch.Size([1024]), data len: 4
(Buffer pid=3046221) INFO:root:in buffer run: get data 4, the shape torch.Size([1024]), data len: 5
(Buffer pid=3046221) INFO:root:in buffer run: get data 5, the shape torch.Size([1024]), data len: 6
(Buffer pid=3046221) INFO:root:in buffer run: get data 6, the shape torch.Size([1024]), data len: 7
(Buffer pid=3046221) INFO:root:in buffer run: get data 7, the shape torch.Size([1024]), data len: 8
(Buffer pid=3046221) INFO:root:in buffer run: get data 8, the shape torch.Size([1024]), data len: 9
(Buffer pid=3046221) INFO:root:in buffer run: get data 9, the shape torch.Size([1024]), data len: 10
(Buffer pid=3046221) INFO:root:in buffer run: get data 10, the shape torch.Size([1024]), data len: 11
(Buffer pid=3046221) INFO:root:in buffer run: get data 11, the shape torch.Size([1024]), data len: 12
(Buffer pid=3046221) INFO:root:in buffer run: get data 12, the shape torch.Size([1024]), data len: 13
(Buffer pid=3046221) INFO:root:in buffer run: get data 13, the shape torch.Size([1024]), data len: 14
(Buffer pid=3046221) INFO:root:in buffer run: get data 14, the shape torch.Size([1024]), data len: 15
(Buffer pid=3046221) INFO:root:in buffer run: get data 15, the shape torch.Size([1024]), data len: 16
(Buffer pid=3046221) INFO:root:in buffer run: get data 16, the shape torch.Size([1024]), data len: 17
(Buffer pid=3046221) INFO:root:in buffer run: get data 17, the shape torch.Size([1024]), data len: 18
(Buffer pid=3046221) INFO:root:in buffer run: get data 18, the shape torch.Size([1024]), data len: 19
(Buffer pid=3046221) INFO:root:in buffer run: get data 19, the shape torch.Size([1024]), data len: 20
(Buffer pid=3046221) INFO:root:in buffer run: get data 20, the shape torch.Size([1024]), data len: 21
(Buffer pid=3046221) INFO:root:in buffer run: get data 21, the shape torch.Size([1024]), data len: 22
(Buffer pid=3046221) INFO:root:in buffer run: get data 22, the shape torch.Size([1024]), data len: 23
(Buffer pid=3046221) INFO:root:in buffer run: get data 23, the shape torch.Size([1024]), data len: 24
(Buffer pid=3046221) INFO:root:in buffer run: get data 24, the shape torch.Size([1024]), data len: 25
(Buffer pid=3046221) INFO:root:in buffer run: get data 25, the shape torch.Size([1024]), data len: 26
(Buffer pid=3046221) INFO:root:in buffer run: get data 26, the shape torch.Size([1024]), data len: 27
(Buffer pid=3046221) INFO:root:in buffer run: get data 27, the shape torch.Size([1024]), data len: 28
(Buffer pid=3046221) INFO:root:in buffer run: get data 28, the shape torch.Size([1024]), data len: 29
(Buffer pid=3046221) INFO:root:in buffer run: get data 29, the shape torch.Size([1024]), data len: 30
(Buffer pid=3046221) INFO:root:in buffer run: get data 30, the shape torch.Size([1024]), data len: 31
(Buffer pid=3046221) INFO:root:in buffer run: get data 31, the shape torch.Size([1024]), data len: 32
(Buffer pid=3046221) INFO:root:in buffer run: get data 32, the shape torch.Size([1024]), data len: 33
(Buffer pid=3046221) INFO:root:in buffer run: get data 33, the shape torch.Size([1024]), data len: 34
(Buffer pid=3046221) INFO:root:in buffer run: get data 34, the shape torch.Size([1024]), data len: 35
(Buffer pid=3046221) INFO:root:in buffer run: get data 35, the shape torch.Size([1024]), data len: 36
(Buffer pid=3046221) INFO:root:in buffer run: get data 36, the shape torch.Size([1024]), data len: 37
(Buffer pid=3046221) INFO:root:in buffer run: get data 37, the shape torch.Size([1024]), data len: 38
(Buffer pid=3046221) INFO:root:in buffer run: get data 38, the shape torch.Size([1024]), data len: 39
(Buffer pid=3046221) INFO:root:in buffer run: get data 39, the shape torch.Size([1024]), data len: 40
(Buffer pid=3046221) INFO:root:in buffer run: get data 40, the shape torch.Size([1024]), data len: 41
(Buffer pid=3046221) INFO:root:in buffer run: get data 41, the shape torch.Size([1024]), data len: 42
(Buffer pid=3046221) INFO:root:in buffer run: get data 42, the shape torch.Size([1024]), data len: 43
(Buffer pid=3046221) INFO:root:in buffer run: get data 43, the shape torch.Size([1024]), data len: 44
(Buffer pid=3046221) INFO:root:in buffer run: get data 44, the shape torch.Size([1024]), data len: 45
(Buffer pid=3046221) INFO:root:in buffer run: get data 45, the shape torch.Size([1024]), data len: 46
(Buffer pid=3046221) INFO:root:in buffer run: get data 46, the shape torch.Size([1024]), data len: 47
(Buffer pid=3046221) INFO:root:in buffer run: get data 47, the shape torch.Size([1024]), data len: 48
(Buffer pid=3046221) INFO:root:in buffer run: get data 48, the shape torch.Size([1024]), data len: 49
(Buffer pid=3046221) INFO:root:in buffer run: get data 49, the shape torch.Size([1024]), data len: 50
(Buffer pid=3046221) INFO:root:in buffer run: get data 50, the shape torch.Size([1024]), data len: 51
(Buffer pid=3046221) INFO:root:in buffer run: get data 51, the shape torch.Size([1024]), data len: 52
(Buffer pid=3046221) INFO:root:in buffer run: get data 52, the shape torch.Size([1024]), data len: 53
(Buffer pid=3046221) INFO:root:in buffer run: get data 53, the shape torch.Size([1024]), data len: 54
(Buffer pid=3046221) INFO:root:in buffer run: get data 54, the shape torch.Size([1024]), data len: 55
(Buffer pid=3046221) INFO:root:in buffer run: get data 55, the shape torch.Size([1024]), data len: 56
(Buffer pid=3046221) INFO:root:in buffer run: get data 56, the shape torch.Size([1024]), data len: 57
(Buffer pid=3046221) INFO:root:in buffer run: get data 57, the shape torch.Size([1024]), data len: 58
(Buffer pid=3046221) INFO:root:in buffer run: get data 58, the shape torch.Size([1024]), data len: 59
(Buffer pid=3046221) INFO:root:in buffer run: get data 59, the shape torch.Size([1024]), data len: 60
(Buffer pid=3046221) INFO:root:in buffer run: get data 60, the shape torch.Size([1024]), data len: 61
(Buffer pid=3046221) INFO:root:in buffer run: get data 61, the shape torch.Size([1024]), data len: 62
(Buffer pid=3046221) INFO:root:in buffer run: get data 62, the shape torch.Size([1024]), data len: 63
(Buffer pid=3046221) INFO:root:in buffer run: get data 63, the shape torch.Size([1024]), data len: 64
(Buffer pid=3046221) INFO:root:in buffer run: get data 64, the shape torch.Size([1024]), data len: 65
(Buffer pid=3046221) INFO:root:in buffer run: get data 65, the shape torch.Size([1024]), data len: 66
(Buffer pid=3046221) INFO:root:in buffer run: get data 66, the shape torch.Size([1024]), data len: 67
(Buffer pid=3046221) INFO:root:in buffer run: get data 67, the shape torch.Size([1024]), data len: 68
(Buffer pid=3046221) INFO:root:in buffer run: get data 68, the shape torch.Size([1024]), data len: 69
(Buffer pid=3046221) INFO:root:in buffer run: get data 69, the shape torch.Size([1024]), data len: 70
(Buffer pid=3046221) INFO:root:in buffer run: get data 70, the shape torch.Size([1024]), data len: 71
(Buffer pid=3046221) INFO:root:in buffer run: get data 71, the shape torch.Size([1024]), data len: 72
(Buffer pid=3046221) INFO:root:in buffer run: get data 72, the shape torch.Size([1024]), data len: 73
(Buffer pid=3046221) INFO:root:in buffer run: get data 73, the shape torch.Size([1024]), data len: 74
(Buffer pid=3046221) INFO:root:in buffer run: get data 74, the shape torch.Size([1024]), data len: 75
(Buffer pid=3046221) INFO:root:in buffer run: get data 75, the shape torch.Size([1024]), data len: 76
(Buffer pid=3046221) INFO:root:in buffer run: get data 76, the shape torch.Size([1024]), data len: 77
(Buffer pid=3046221) INFO:root:in buffer run: get data 77, the shape torch.Size([1024]), data len: 78
(Buffer pid=3046221) INFO:root:in buffer run: get data 78, the shape torch.Size([1024]), data len: 79
(Buffer pid=3046221) INFO:root:in buffer run: get data 79, the shape torch.Size([1024]), data len: 80
(Buffer pid=3046221) INFO:root:in buffer run: get data 80, the shape torch.Size([1024]), data len: 81
(Buffer pid=3046221) INFO:root:in buffer run: get data 81, the shape torch.Size([1024]), data len: 82
(Buffer pid=3046221) INFO:root:in buffer run: get data 82, the shape torch.Size([1024]), data len: 83
(Buffer pid=3046221) INFO:root:in buffer run: get data 83, the shape torch.Size([1024]), data len: 84
(Buffer pid=3046221) INFO:root:in buffer run: get data 84, the shape torch.Size([1024]), data len: 85
(Buffer pid=3046221) INFO:root:in buffer run: get data 85, the shape torch.Size([1024]), data len: 86
(Buffer pid=3046221) INFO:root:in buffer run: get data 86, the shape torch.Size([1024]), data len: 87
(Buffer pid=3046221) INFO:root:in buffer run: get data 87, the shape torch.Size([1024]), data len: 88
(Buffer pid=3046221) INFO:root:in buffer run: get data 88, the shape torch.Size([1024]), data len: 89
(Buffer pid=3046221) INFO:root:in buffer run: get data 89, the shape torch.Size([1024]), data len: 90
(Buffer pid=3046221) INFO:root:in buffer run: get data 90, the shape torch.Size([1024]), data len: 91
(Buffer pid=3046221) INFO:root:in buffer run: get data 91, the shape torch.Size([1024]), data len: 92
(Buffer pid=3046221) INFO:root:in buffer run: get data 92, the shape torch.Size([1024]), data len: 93
(Buffer pid=3046221) INFO:root:in buffer run: get data 93, the shape torch.Size([1024]), data len: 94
(Buffer pid=3046221) INFO:root:in buffer run: get data 94, the shape torch.Size([1024]), data len: 95
(Buffer pid=3046221) INFO:root:in buffer run: get data 95, the shape torch.Size([1024]), data len: 96
(Buffer pid=3046221) INFO:root:in buffer run: get data 96, the shape torch.Size([1024]), data len: 97
(Buffer pid=3046221) INFO:root:in buffer run: get data 97, the shape torch.Size([1024]), data len: 98
(Buffer pid=3046221) INFO:root:in buffer run: get data 98, the shape torch.Size([1024]), data len: 99
(Buffer pid=3046221) INFO:root:in buffer run: get data 99, the shape torch.Size([1024]), data len: 100
(Trainer pid=3046356) INFO:root:in trainer.run stop: 1
(Trainer pid=3046356) INFO:root:in trainer, data size: 100
INFO:root:buffer get data size: 100
INFO:root:config get stop: 1
INFO:root:runner get data size: 100
Why Ray actors bypass the error?