"git@gitlab.rc.uab.edu:rc/gpfs-policy.git" did not exist on "f9708260e1d1580831f8019296a2800c2957fc95"
Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "63ee8026",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from urllib.parse import unquote\n",
"import sys\n",
"\n",
"recs = []\n",
"count=0\n",
"progress=0\n",
"\n",
"file = \"data/list-17075953.list.gather-info.d/list-000\"\n",
"\n",
"with open(file) as gpfs_data:\n",
" for line in gpfs_data:\n",
" #print(unquote(line))\n",
" left, right = unquote(line).split(\" -- \", 1)\n",
" fname = right.strip()\n",
" inode, meta = left.split('|', 1)\n",
" _, inode, _ = inode.split() \n",
" #print(meta)\n",
" meta, _ = meta.rsplit('|', 1)\n",
" #print(meta)\n",
" props = []\n",
" for prop in meta.split('|'):\n",
" props.append(prop.split('='))\n",
" #props.append(['path', fname])\n",
" #print(props)\n",
" props = dict(props)\n",
" props[\"inode\"] = inode\n",
" for key in [\"heat\", \"pool\", \"mode\", \"misc\"]:\n",
" del props[key] \n",
" recs.append(props)\n",
" count += 1\n",
" progress += 1\n",
" if (progress // 20000000):\n",
" print(f\"{datetime.datetime.now()} progress: {count}: {fname}\")\n",
" progress = 0\n",
" if (count > 200000000):\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa0f2ba7",
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(recs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d6308a64",
"metadata": {},
"outputs": [],
"source": [
"df = df.rename(columns={'access': 'atime', 'create': 'ctime', 'modify': 'mtime'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c60d8868",
"metadata": {},
"outputs": [],
"source": [
"for intcol in ['size', 'kballoc', 'uid', 'gid']:\n",
" df[intcol] = df[intcol].astype(\"int\")\n",
"\n",
"for intcol in ['atime', 'ctime', 'mtime']:\n",
" df[intcol] = df[intcol].astype('datetime64[ns]')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ca6e641",
"metadata": {},
"outputs": [],
"source": [
"print(f\"max atime: {df['atime'].max()}\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "71fd3b6f",
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc576380",
"metadata": {},
"outputs": [],
"source": [
"print(df.groupby([\"uid\"], sort=False)[\"atime\"].max())"
]
}
],
"metadata": {
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}