Skip to content
Snippets Groups Projects
Commit 2b954a9c authored by John-Paul Robinson's avatar John-Paul Robinson
Browse files

Exploratory notebook to generate logic for report

Figure out logic to parse list files and produce atime summaries.
parent 28617e9f
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:63ee8026 tags:
```
import datetime
import pandas as pd
import matplotlib.pyplot as plt
from urllib.parse import unquote
import sys
recs = []
count=0
progress=0
file = "data/list-17075953.list.gather-info.d/list-000"
with open(file) as gpfs_data:
for line in gpfs_data:
#print(unquote(line))
left, right = unquote(line).split(" -- ", 1)
fname = right.strip()
inode, meta = left.split('|', 1)
_, inode, _ = inode.split()
#print(meta)
meta, _ = meta.rsplit('|', 1)
#print(meta)
props = []
for prop in meta.split('|'):
props.append(prop.split('='))
#props.append(['path', fname])
#print(props)
props = dict(props)
props["inode"] = inode
for key in ["heat", "pool", "mode", "misc"]:
del props[key]
recs.append(props)
count += 1
progress += 1
if (progress // 20000000):
print(f"{datetime.datetime.now()} progress: {count}: {fname}")
progress = 0
if (count > 200000000):
break
```
%% Cell type:code id:aa0f2ba7 tags:
```
df = pd.DataFrame(recs)
```
%% Cell type:code id:d6308a64 tags:
```
df = df.rename(columns={'access': 'atime', 'create': 'ctime', 'modify': 'mtime'})
```
%% Cell type:code id:c60d8868 tags:
```
for intcol in ['size', 'kballoc', 'uid', 'gid']:
df[intcol] = df[intcol].astype("int")
for intcol in ['atime', 'ctime', 'mtime']:
df[intcol] = df[intcol].astype('datetime64[ns]')
```
%% Cell type:code id:4ca6e641 tags:
```
print(f"max atime: {df['atime'].max()}\n")
```
%% Cell type:code id:71fd3b6f tags:
```
df.info()
```
%% Cell type:code id:cc576380 tags:
```
print(df.groupby(["uid"], sort=False)["atime"].max())
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment