Skip to content
Snippets Groups Projects
max-access-per-user.ipynb 2.98 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63ee8026",
   "metadata": {},
   "outputs": [],
   "source": [
    "import datetime\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from urllib.parse import unquote\n",
    "import sys\n",
    "\n",
    "recs = []\n",
    "count=0\n",
    "progress=0\n",
    "\n",
    "file = \"data/list-17075953.list.gather-info.d/list-000\"\n",
    "\n",
    "with open(file) as gpfs_data:\n",
    "    for line in gpfs_data:\n",
    "        #print(unquote(line))\n",
    "        left, right = unquote(line).split(\" -- \", 1)\n",
    "        fname = right.strip()\n",
    "        inode, meta = left.split('|', 1)\n",
    "        _, inode, _ = inode.split() \n",
    "        #print(meta)\n",
    "        meta, _  = meta.rsplit('|', 1)\n",
    "        #print(meta)\n",
    "        props = []\n",
    "        for prop in meta.split('|'):\n",
    "            props.append(prop.split('='))\n",
    "        #props.append(['path', fname])\n",
    "        #print(props)\n",
    "        props = dict(props)\n",
    "        props[\"inode\"] = inode\n",
    "        for key in [\"heat\", \"pool\", \"mode\", \"misc\"]:\n",
    "            del props[key] \n",
    "        recs.append(props)\n",
    "        count += 1\n",
    "        progress += 1\n",
    "        if (progress // 20000000):\n",
    "            print(f\"{datetime.datetime.now()} progress: {count}: {fname}\")\n",
    "            progress = 0\n",
    "        if (count > 200000000):\n",
    "            break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa0f2ba7",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(recs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d6308a64",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.rename(columns={'access': 'atime', 'create': 'ctime', 'modify': 'mtime'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c60d8868",
   "metadata": {},
   "outputs": [],
   "source": [
    "for intcol in ['size', 'kballoc', 'uid', 'gid']:\n",
    "    df[intcol] = df[intcol].astype(\"int\")\n",
    "\n",
    "for intcol in ['atime', 'ctime', 'mtime']:\n",
    "    df[intcol] = df[intcol].astype('datetime64[ns]')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ca6e641",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"max atime: {df['atime'].max()}\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71fd3b6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cc576380",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(df.groupby([\"uid\"], sort=False)[\"atime\"].max())"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}