I constantly get timeout when using @mlflow_mixin with mlflow.autolog() in my code.
Code sample to reproduce:
def test_run_e2e_hyperparam_search_single_process_ray_tune(self, mock_copy_data_from_shared_storage: MagicMock):
ray.init(local_mode=True, num_cpus=1, include_dashboard=False)
analysis = loanrecommender_tune_call(run_search_distributed_tune, self.hyper_file, resources={"cpu": 1})
df = analysis.results_df
ray.shutdown()
self.assertIsNotNone(analysis.best_config)
self.assertIsNotNone(df.keras_f1)
@mlflow_mixin
def run_search_distributed_tune(config, checkpoint_dir=getRootDir()):
print("Running distributed search")
mlflow.autolog()
from ray import tune
start = 0
if checkpoint_dir:
with open(os.path.join(checkpoint_dir, "checkpoint")) as f:
state = json.loads(f.read())
start = state["step"] + 1
# Obtain a checkpoint directory
with tune.checkpoint_dir(step=start) as checkpoint_dir:
path = os.path.join(checkpoint_dir, "checkpoint")
with open(path, "w") as f:
f.write(json.dumps({"step": start}))
best_model, metric = run_search(config_file, hyper_file, True, config, distributed_run_tune=True)
What could be the reason?
P.S If I remove the @mlflow_mixin decorator & mlflow.autolog() function call - all good.