Skip to content
Snippets Groups Projects
Commit df8ade2c authored by John-Paul Robinson's avatar John-Paul Robinson
Browse files

Create per-user last atime report generator and wrapper batch script

The report atime generator reads a provided input and generates
a per-user atime report.
The wrapper sbatch allows running the script in an array job
to support scaling across large data sets split into many files.
parent 2b954a9c
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
import datetime
import pandas as pd
import matplotlib.pyplot as plt
from urllib.parse import unquote
import sys
recs = []
count=0
progress=0
file = sys.argv[1]
with open(file) as gpfs_data:
for line in gpfs_data:
#print(unquote(line))
left, right = unquote(line).split(" -- ", 1)
fname = right.strip()
inode, meta = left.split('|', 1)
_, inode, _ = inode.split()
#print(meta)
meta, _ = meta.rsplit('|', 1)
#print(meta)
props = []
for prop in meta.split('|'):
props.append(prop.split('='))
#props.append(['path', fname])
#print(props)
props = dict(props)
props["inode"] = inode
for key in ["heat", "pool", "mode", "misc"]:
del props[key]
recs.append(props)
count += 1
progress += 1
if (progress // 20000000):
print(f"{datetime.datetime.now()} progress: {count}: {fname}")
progress = 0
if (count > 200000000):
break
df = pd.DataFrame(recs)
df = df.rename(columns={'access': 'atime', 'create': 'ctime', 'modify': 'mtime'})
for intcol in ['size', 'kballoc', 'uid', 'gid']:
df[intcol] = df[intcol].astype("int")
for intcol in ['atime', 'ctime', 'mtime']:
df[intcol] = df[intcol].astype('datetime64[ns]')
print(df.groupby(["uid"], sort=False)["atime"].max())
df.to_pickle(f"{file}.gz")
#!/bin/bash
suffix=`printf %.3d $SLURM_ARRAY_TASK_ID`
#cd data/list-16144464.list.gather-info.d
#cd data/list-17075953.list.gather-info.d
cd data/list-17094088.list.gather-info.d
echo -n "list-$suffix "
../../last-access-per-user.py list-$suffix
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment