diff --git a/report-grouby-tld-year-of-last-access.ipynb b/report-grouby-tld-year-of-last-access.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..95a4567444a224d2c8c1cfd5f3a3961ad682814f
--- /dev/null
+++ b/report-grouby-tld-year-of-last-access.ipynb
@@ -0,0 +1,235 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "5fb66d11",
+   "metadata": {},
+   "source": [
+    "# run report on pickled list policy data\n",
+    "\n",
+    "The script reads pickled files that match the `glob_pattern` from the `pickledir` derived from `dirname` and runs the report saving it as a csv to the peer \"`dirname`-reports\" dir by default.\n",
+    "\n",
+    "Some progress info is available via the `verbose` flag.\n",
+    "\n",
+    "The current report aggrates storage stats by top-level-dir and age (year) of data's last access. The goal of this report is to understand the distribution of lesser used data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5059337b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from urllib.parse import unquote\n",
+    "import sys\n",
+    "import os\n",
+    "import pathlib\n",
+    "import re"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5f4c10d1",
+   "metadata": {},
+   "source": [
+    "## input vars"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92ddc402",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dirname=\"\"  # directory to fine files to pickle\n",
+    "glob_pattern = \"*.gz\"  # file name glob pattern to match, can be file name for individual file\n",
+    "line_regex_filter = \".*\"   # regex to match lines of interest in file\n",
+    "pickledir=f\"{dirname}/pickles\"\n",
+    "reportdir=f\"{dirname}-reports\"\n",
+    "tldpath=\"/\"\n",
+    "\n",
+    "verbose = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ed367712",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get top level dir on which to aggregate\n",
+    "\n",
+    "def get_tld(df, dirname):\n",
+    "    dirpaths = dirname.split(\"/\")\n",
+    "    new=df[\"path\"].str.split(\"/\", n=len(dirpaths)+1, expand=True)\n",
+    "    df[\"tld\"] = new[len(dirpaths)]\n",
+    "   \n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dd92dd03",
+   "metadata": {},
+   "source": [
+    "## Read and parse the files according to glob_pattern"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "20315d88",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dirpath = pathlib.Path(pickledir)\n",
+    "\n",
+    "files = list()\n",
+    "for file in list(dirpath.glob(glob_pattern)):\n",
+    "    files.append(str(file))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cbad833f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parsedfiles = list()\n",
+    "for file in files:\n",
+    "    if (verbose): print(f\"parse: {file}\")\n",
+    "    filename=os.path.basename(file)\n",
+    "    parsedfiles.append(pd.read_pickle(file))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4ed9ca1b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df=pd.concat(parsedfiles)\n",
+    "    del(parsedfiles)\n",
+    "else:\n",
+    "    return"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b69c9fde",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = get_tld(df, tldpath)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4352f00c",
+   "metadata": {},
+   "source": [
+    "## Run report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e3fe4e71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "report = df.groupby(['tld', df.access.dt.year]).agg({\"size\": [\"sum\", \"mean\", \"median\", \"min\", \"max\", \"std\", \"count\"]})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "329bc196",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "del(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "754fcc89",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "report.columns.values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f279c061",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "report.columns = [col[1] for col in report.columns.values]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ef9b007",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "report[\"gigabytes\"] = report[\"sum\"]/1000/1000/1000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4de0256",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if (verbose): print(report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ffc99a54",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# only create dir if there is data to pickle\n",
+    "if (len(report) and not os.path.isdir(reportdir)):\n",
+    "    os.mkdir(reportdir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12d02352",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if (verbose): print(f\"report: groupby-tld\")\n",
+    "report.to_csv(f\"{reportdir}/groupby-tld.csv.gz\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}