From e52d4ccc8ac3099ce9814c0e8b8197a1b86be301 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Mon, 16 Sep 2024 16:14:38 -0500 Subject: [PATCH] if the paths given don't appear in /data/user, /data/project, or /scratch, set tld to None for now --- src/convert-to-parquet/convert-to-parquet.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/convert-to-parquet/convert-to-parquet.py b/src/convert-to-parquet/convert-to-parquet.py index 2a8b70c..f8685de 100755 --- a/src/convert-to-parquet/convert-to-parquet.py +++ b/src/convert-to-parquet/convert-to-parquet.py @@ -31,7 +31,12 @@ def parse_line(line): d = dict([re.match(r'([\w]+)=(.*)',l).groups() for l in details.split('|')]) - tld = re.match(r'(?:/data/user(?:/home)?/|/data/project/|/scratch/)([^/]+)',path).group(1) + grp = re.match(r'(?:/data/user(?:/home)?/|/data/project/|/scratch/)([^/]+)',path) + if grp: + tld = grp.groups(1) + else: + tld = None + d.update({'path': path, 'tld': tld}) return d -- GitLab