Saving checkpoints with good custom_metric using tune.run()

At least for this example here using the json log does work with these changes applied.

--- orig/ray/tune/analysis/experiment_analysis.py	2021-05-12 22:18:02.195126415 -0400
+++ fixed/ray/tune/analysis/experiment_analysis.py	2021-05-12 22:15:36.774961258 -0400
@@ -17,7 +17,7 @@
 
 from ray.tune.error import TuneError
 from ray.tune.result import DEFAULT_METRIC, EXPR_PROGRESS_FILE, \
-    EXPR_PARAM_FILE, CONFIG_PREFIX, TRAINING_ITERATION
+    EXPR_PARAM_FILE, CONFIG_PREFIX, TRAINING_ITERATION, EXPR_RESULT_FILE
 from ray.tune.trial import Trial
 from ray.tune.utils.trainable import TrainableUtil
 from ray.tune.utils.util import unflattened_lookup
@@ -172,9 +172,11 @@
         fail_count = 0
         for path in self._get_trial_paths():
             try:
-                self.trial_dataframes[path] = pd.read_csv(
-                    os.path.join(path, EXPR_PROGRESS_FILE))
-            except Exception:
+                data = [json.loads(line) for line in open(os.path.join(path, EXPR_RESULT_FILE), 'r').read().split('\n') if line]
+                self.trial_dataframes[path] = pd.json_normalize(data, sep="/")
+            except Exception as ex:
                 fail_count += 1
 
         if fail_count:
@@ -280,6 +282,8 @@
         assert mode is None or mode in ["max", "min"]
         rows = {}
         for path, df in self.trial_dataframes.items():
+            if metric not in df:
+                continue
             if mode == "max":
                 idx = df[metric].idxmax()
             elif mode == "min":
1 Like