diff --git a/scratch-log-explorations.ipynb b/scratch-log-explorations.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..c27ec47d59f08510623d8ae33e4a95f9ab2993eb
--- /dev/null
+++ b/scratch-log-explorations.ipynb
@@ -0,0 +1,606 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Notebook to explore parsing of the gpfs policy outputs\n",
+    "\n",
+    "This is a collection of cells to understand data.\n",
+    "No particular endpoint in mind."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "    5001:000fffffffffffff:0000000000004741:4b8f012b:0:2c172b:10002:0:40!hyun.d.song-vandy/pull_31/m1/prot_m1.out:13!scratch_tier1;253!|size=444|kballoc=0|access=2022-01-01 06:58:37.177440|create=2022-01-01 06:21:33.356110|modify=2022-01-01 06:23:47.011273|uid=10973|gid=10973|heat=+0.00000000000000E+000|pool=scratch_tier1|path=/scratch/hyun.d.song-vandy/pull_31/m1/prot_m1.out|misc=FAu|"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file=\"data/mmapplypolicy.61746.962D9400.list.no_extern_list_list-30day-with-excludes_slurm-12551165_2022-03-03-04:00:09\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file=\"data/mmapplypolicy.54197.413B7AB5.list.no_extern_list_list-only-temporary-scratch_slurm-12790116_2022-03-14-18:47:51\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file=\"data/mmapplypolicy.120904.9DBFF7E6.list.no_extern_list_list-30day-with-excludes_slurm-13113652_2022-04-05-04:00:28\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Parser functions\n",
+    "\n",
+    "First we define the stucture of the file then the columns we want to use."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fields=['ignore', 'size', 'kballoc', 'atime', 'ctime', 'mtime', 'uid', 'gid', 'heat', 'pool', 'path', 'misc']\n",
+    "\n",
+    "usecols=['size', 'kballoc', 'atime', 'ctime', 'mtime', 'uid', 'gid', 'heat', 'pool', 'path', 'misc']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def splitter(x):\n",
+    "    '''\n",
+    "    split each name=value field on = and return the value\n",
+    "    '''\n",
+    "    return x.split(\"=\")[1]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Set up a splitters dictionary to process all the used fields with the splitter function.\n",
+    "https://realpython.com/python-defaultdict/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "splitters = {}\n",
+    "\n",
+    "for name in usecols:\n",
+    "    splitters.setdefault(name, splitter)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(file,\n",
+    "                 sep=\"|\", header=0, \n",
+    "                 #on_bad_lines=\"warn\", \n",
+    "                 index_col=False,\n",
+    "                 #nrows=1000000,\n",
+    "                 names=fields,\n",
+    "                 usecols=usecols,\n",
+    "                 converters=splitters,\n",
+    "                 parse_dates=['atime', 'ctime', 'mtime'],\n",
+    "                )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.info()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Clean up data types for numeric values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for intcol in ['size', 'kballoc', 'uid', 'gid']:\n",
+    "    df[intcol] = df[intcol].astype(\"int\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head(3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Quick summary of total storage allocated used by 30+day files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"kballoc\"].sum()/1024"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"size\"].sum()/1024/1024"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"atime\"].min()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[[\"atime\",\"uid\"]].sort_values(by=\"atime\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[[\"uid\",\"size\"]].groupby(\"uid\").sum()/1000/1000/1000/1000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(df[[\"uid\",\"size\"]].groupby(\"uid\").sum()/1000/1000/1000/1000).sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"atime\"].sort_values().head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"uid\"].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"misc\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"isfile\"]=df[\"misc\"].str.contains('F')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(df[\"uid\"].unique())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"uid\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Get usernames from uid values via the pwd password db iteration module https://stackoverflow.com/a/421670/8928529"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwd.getpwuid(12137)[0].split(\":\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def getuser(uid):\n",
+    "    return pwd.getpwuid(int(uid))[0].split(\":\")[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "getuser(10973)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for uid in sorted(df[\"uid\"].unique()):\n",
+    "    print(\"uid: {} name: {}\".format(uid, pwd.getpwuid(int(uid))[0].split(\":\")[0]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sorted(df[\"heat\"].unique())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"path\"] = df[\"path\"].astype(\"str\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.concat([df, df[\"path\"].apply(\"str\").split(\"/\", 4, expand=True)[[1,3,4]].rename(columns={1: \"fs\", 3:\"scratchdir\", 4:\"filename\"})], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.rename(columns={\"sratchdir\": \"scratchdir\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "userdata = df[[\"scratchdir\", \"size\", \"kballoc\", \"isfile\"]].groupby([\"scratchdir\"]).sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "userdata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "userdata[\"size\"]/1000/1000/1000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"path\"].apply(\"str\").split(\"/\", 4, expand=True)[[3,4]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"path\"].apply(\"str\").split(\"/\", 4, expand=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bytesdays=df[[\"atime\",\"size\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd=bytesdays.set_index(\"atime\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd=bd.resample('D').sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd[\"sum\"]=bd.cumsum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd[:\"2022-02-15\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "size, gb = bd[bd[\"size\"]>0].loc[:\"2022-01-01\"].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd.loc[:\"2021-12-31\"].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd.loc[:\"2022-01-01\"].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd.loc[\"2022-01-01\":]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd[bd[\"size\"]>0]/1024/1024/1024 #.plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd[\"gb\"] = bd[\"sum\"]/1024/1024/1024"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd[\"gb\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "b2d=bd[\"2021-10-01\":]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "1024*1024*1024*1024"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd7=b2d[[\"gb\"]].rolling(7, center=True).sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot houry, daily, 7-day rolling mean\n",
+    "fig, ax = plt.subplots()\n",
+    "#ax.plot(kW, marker='.', markersize=2, color='gray', linestyle='None', label='Hourly Average')\n",
+    "ax.plot(b2d[\"gb\"], color='brown', linewidth=2, label='1-day Average')\n",
+    "ax.plot(bd7[\"gb\"], color='black', linewidth=1, label='7-day Rolling Average')\n",
+    "label='Trend (7 day Rolling Sum)'\n",
+    "ax.legend()\n",
+    "ax.set_ylabel('Size (GBytes)')\n",
+    "ax.set_title('Cheaha Trends in Scratch Usage');"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}