From e52d4ccc8ac3099ce9814c0e8b8197a1b86be301 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Mon, 16 Sep 2024 16:14:38 -0500
Subject: [PATCH] if the paths given don't appear in /data/user, /data/project,
 or /scratch, set tld to None for now

---
 src/convert-to-parquet/convert-to-parquet.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/convert-to-parquet/convert-to-parquet.py b/src/convert-to-parquet/convert-to-parquet.py
index 2a8b70c..f8685de 100755
--- a/src/convert-to-parquet/convert-to-parquet.py
+++ b/src/convert-to-parquet/convert-to-parquet.py
@@ -31,7 +31,12 @@ def parse_line(line):
         
         d = dict([re.match(r'([\w]+)=(.*)',l).groups() for l in details.split('|')])
 
-        tld = re.match(r'(?:/data/user(?:/home)?/|/data/project/|/scratch/)([^/]+)',path).group(1)
+        grp = re.match(r'(?:/data/user(?:/home)?/|/data/project/|/scratch/)([^/]+)',path)
+        if grp:
+            tld = grp.groups(1)
+        else:
+            tld = None
+            
         d.update({'path': path,
                   'tld': tld})
         return d
-- 
GitLab