{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "63ee8026", "metadata": {}, "outputs": [], "source": [ "import datetime\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from urllib.parse import unquote\n", "import sys\n", "\n", "recs = []\n", "count=0\n", "progress=0\n", "\n", "file = \"data/list-17075953.list.gather-info.d/list-000\"\n", "\n", "with open(file) as gpfs_data:\n", " for line in gpfs_data:\n", " #print(unquote(line))\n", " left, right = unquote(line).split(\" -- \", 1)\n", " fname = right.strip()\n", " inode, meta = left.split('|', 1)\n", " _, inode, _ = inode.split() \n", " #print(meta)\n", " meta, _ = meta.rsplit('|', 1)\n", " #print(meta)\n", " props = []\n", " for prop in meta.split('|'):\n", " props.append(prop.split('='))\n", " #props.append(['path', fname])\n", " #print(props)\n", " props = dict(props)\n", " props[\"inode\"] = inode\n", " for key in [\"heat\", \"pool\", \"mode\", \"misc\"]:\n", " del props[key] \n", " recs.append(props)\n", " count += 1\n", " progress += 1\n", " if (progress // 20000000):\n", " print(f\"{datetime.datetime.now()} progress: {count}: {fname}\")\n", " progress = 0\n", " if (count > 200000000):\n", " break" ] }, { "cell_type": "code", "execution_count": null, "id": "aa0f2ba7", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(recs)" ] }, { "cell_type": "code", "execution_count": null, "id": "d6308a64", "metadata": {}, "outputs": [], "source": [ "df = df.rename(columns={'access': 'atime', 'create': 'ctime', 'modify': 'mtime'})" ] }, { "cell_type": "code", "execution_count": null, "id": "c60d8868", "metadata": {}, "outputs": [], "source": [ "for intcol in ['size', 'kballoc', 'uid', 'gid']:\n", " df[intcol] = df[intcol].astype(\"int\")\n", "\n", "for intcol in ['atime', 'ctime', 'mtime']:\n", " df[intcol] = df[intcol].astype('datetime64[ns]')" ] }, { "cell_type": "code", "execution_count": null, "id": "4ca6e641", "metadata": {}, "outputs": [], "source": [ "print(f\"max atime: {df['atime'].max()}\\n\")" ] }, { "cell_type": "code", "execution_count": null, "id": "71fd3b6f", "metadata": {}, "outputs": [], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "cc576380", "metadata": {}, "outputs": [], "source": [ "print(df.groupby([\"uid\"], sort=False)[\"atime\"].max())" ] } ], "metadata": { "language_info": { "name": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 }