diff --git a/src/convert-to-parquet/convert-to-parquet.py b/src/convert-to-parquet/convert-to-parquet.py index 2a8b70c693a9594a428fbc53724239052caf7ff7..f8685de903af374683234b30aebc45812726c350 100755 --- a/src/convert-to-parquet/convert-to-parquet.py +++ b/src/convert-to-parquet/convert-to-parquet.py @@ -31,7 +31,12 @@ def parse_line(line): d = dict([re.match(r'([\w]+)=(.*)',l).groups() for l in details.split('|')]) - tld = re.match(r'(?:/data/user(?:/home)?/|/data/project/|/scratch/)([^/]+)',path).group(1) + grp = re.match(r'(?:/data/user(?:/home)?/|/data/project/|/scratch/)([^/]+)',path) + if grp: + tld = grp.groups(1) + else: + tld = None + d.update({'path': path, 'tld': tld}) return d