Mini-cluster error: Process finished with exit code 3

I get the following error when trying to run a test with mini-cluster:

tests/test_hyperparams_random_search_e2e.py::TestHyperParamRandomSearchE2E::test_run_e2e_hyperparam_search_mini_cluster_ray_distributed Fatal Python error: Aborted

Current thread 0x00007fc8 (most recent call first):
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\ray\state.py”, line 94 in _really_init_global_state
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\ray\state.py”, line 54 in _check_connected
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\ray\state.py”, line 303 in node_table
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\ray\cluster_utils.py”, line 164 in _wait_for_node
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\ray\cluster_utils.py”, line 116 in add_node
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\ray\cluster_utils.py”, line 41 in init
File “C:\dev\clientactivity\tests\test_hyperparams_random_search_e2e.py”, line 49 in test_run_e2e_hyperparam_search_mini_cluster_ray_distributed
File “C:\Users\dm57337.conda\envs\py38\lib\unittest\case.py”, line 633 in _callTestMethod
File “C:\Users\dm57337.conda\envs\py38\lib\unittest\case.py”, line 676 in run
File “C:\Users\dm57337.conda\envs\py38\lib\unittest\case.py”, line 736 in call
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\unittest.py”, line 231 in runtest
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\runner.py”, line 135 in pytest_runtest_call
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\callers.py”, line 187 in _multicall
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\manager.py”, line 84 in
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\manager.py”, line 93 in _hookexec
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\hooks.py”, line 286 in call
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\runner.py”, line 217 in
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\runner.py”, line 244 in from_call
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\runner.py”, line 216 in call_runtest_hook
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\runner.py”, line 186 in call_and_report
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\runner.py”, line 100 in runtestprotocol
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\runner.py”, line 85 in pytest_runtest_protocol
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\callers.py”, line 187 in _multicall
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\manager.py”, line 84 in
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\manager.py”, line 93 in _hookexec
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\hooks.py”, line 286 in call
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\main.py”, line 272 in pytest_runtestloop
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\callers.py”, line 187 in _multicall
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\manager.py”, line 84 in
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\manager.py”, line 93 in _hookexec
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\hooks.py”, line 286 in call
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\main.py”, line 247 in _main
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\main.py”, line 191 in wrap_session
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\main.py”, line 240 in pytest_cmdline_main
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\callers.py”, line 187 in _multicall
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\manager.py”, line 84 in
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\manager.py”, line 93 in hookexec
File “C:\Users\dm57337.conda\envs\py38\lib\site-packages\pluggy\hooks.py”, line 286 in call
File "C:\Users\dm57337.conda\envs\py38\lib\site-packages_pytest\config_init
.py", line 124 in main
File “C:\Program Files\JetBrains\PyCharm 2020.2.3\plugins\python\helpers\pycharm_jb_pytest_runner.py”, line 43 in

Process finished with exit code 3

My test code is:

def test_run_e2e_hyperparam_search_mini_cluster_ray_distributed(self):
from ray.cluster_utils import Cluster

    # Starts a head-node for the cluster.
    cluster = Cluster(
        initialize_head=True,
        head_node_args={
            "num_cpus": 1,
        })

    ray.init(address=cluster.address)
    result_ids = [run_search_distributed.remote(self.config_file, self.hyper_file, distributed_run=True) for x in range(self.hyper_file['iterations'])]
    while len(result_ids):
        done_id, results = ray.wait(result_ids)
        try:
            task_res = ray.get(done_id[
                                   0])  # TODO: debug/explore 'done_id' object to understand the results output (if task finished successfully or not)
        except (RayTaskError, RayError, TaskCancelledError, RaySystemError, WorkerCrashedError, GetTimeoutError) as e:
            self.fail("test_run_e2e_hyperparam_search_mini_cluster_ray_distributed has raised an exception", e)
    ray.shutdown()
    self.assertTrue(os.path.exists('./bestmodel.h5'))
    self.assertTrue(os.path.exists(self.hyper_file["file"]))

What can be the problem?
I’m on ray version 1.1.0

Are you using Windows? I think the cluster mode is not supported for Windows now.

I’m using a mini-cluster in the example above.

Eventually, the problem was, that previous ray processes were loaded in memory, so simply running ‘ray stop --force’ has resolved this error.

1 Like