{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "0f3842e0", "metadata": {}, "outputs": [], "source": [ "import datetime\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from urllib.parse import unquote\n", "import sys\n", "import os\n", "import glob\n", "\n", "recs = []\n", "count=0\n", "progress=0\n", "\n", "report_name = \"temporary-scratch\"\n", "dir = \"data/list-17075953.list.gather-info.d\"\n", "#report_name = \"temporary-scratch\"\n", "#dir = 'data/list-17094088.list.gather-info.d'\\\n", "glob_pattern=\"*.gz\"\n", "\n", "report_name = \"old-scratch\"\n", "dir = \"data/list-16144464.list.gather-info.d\"\n", "dir = \"data/list-policy_old-scratch_2022-09-15/pickles\"\n", "glob_pattern = \"list-*.gz\"\n", "\n", "os.chdir(dir)" ] }, { "cell_type": "code", "execution_count": null, "id": "3b8c0042", "metadata": {}, "outputs": [], "source": [ "#os.chdir('../..')" ] }, { "cell_type": "code", "execution_count": null, "id": "b379845f", "metadata": {}, "outputs": [], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": null, "id": "d3d9f107", "metadata": {}, "outputs": [], "source": [ "!which python" ] }, { "cell_type": "code", "execution_count": null, "id": "c2a94675", "metadata": {}, "outputs": [], "source": [ "frames = []\n", "df = pd.DataFrame()\n", "\n", "for file in glob.glob(glob_pattern):\n", " print(f\"processing: {file}\")\n", " # combine picked dfs into one df\n", " # https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html\n", " dfnew = pd.read_pickle(file)\n", " #frames.append(df)\n", " df = pd.concat([df, dfnew])\n", "#del(frames)" ] }, { "cell_type": "code", "execution_count": null, "id": "a6a4e9bf", "metadata": {}, "outputs": [], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "4ca6e641", "metadata": {}, "outputs": [], "source": [ "print(f\"max atime: {df['access'].max()}\\n\")" ] }, { "cell_type": "code", "execution_count": null, "id": "cc576380", "metadata": {}, "outputs": [], "source": [ "df.groupby([\"uid\"], sort=False)[\"access\"].max().sort_values()" ] }, { "cell_type": "markdown", "id": "1a482ac6", "metadata": {}, "source": [ "Get user names https://stackoverflow.com/a/421670/8928529" ] }, { "cell_type": "code", "execution_count": null, "id": "7c4e531e", "metadata": {}, "outputs": [], "source": [ "import pwd\n", "def getuser(uid):\n", " return pwd.getpwuid(int(uid))[0].split(\":\")[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "6980fc30", "metadata": {}, "outputs": [], "source": [ "# set uname for uid\n", "for uid in sorted(df[\"uid\"].unique()):\n", " uname = pwd.getpwuid(int(uid))[0].split(\":\")[0]\n", " print(\"uid: {} name: {}\".format(uid, uname))\n", " df.loc[df[\"uid\"]==uid, [\"uname\"]] = uname" ] }, { "cell_type": "code", "execution_count": null, "id": "29af2c16", "metadata": {}, "outputs": [], "source": [ "space=df.groupby([\"uname\"], sort=False)[\"size\"].sum().sort_values().to_frame()" ] }, { "cell_type": "code", "execution_count": null, "id": "77b361b1", "metadata": {}, "outputs": [], "source": [ "space[\"GB\"]=space[\"size\"]/1000/1000/1000\n", "space[\"GiB\"]=space[\"size\"]/1024/1024/1024" ] }, { "cell_type": "code", "execution_count": null, "id": "90b897a5", "metadata": {}, "outputs": [], "source": [ "space=df.groupby([\"uname\"], sort=False)[\"size\"].sum().sort_values().to_frame()" ] }, { "cell_type": "code", "execution_count": null, "id": "5065d98e", "metadata": {}, "outputs": [], "source": [ "space" ] }, { "cell_type": "code", "execution_count": null, "id": "1bf8adb7", "metadata": {}, "outputs": [], "source": [ "atime=df.groupby([\"uname\"], sort=False)[\"atime\"].max().sort_values().to_frame()" ] }, { "cell_type": "code", "execution_count": null, "id": "fb33c1de", "metadata": {}, "outputs": [], "source": [ "atime.rename(columns={\"atime\": \"newest\"}, inplace=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "ce376ec2", "metadata": {}, "outputs": [], "source": [ "atime[\"oldest\"]=df.groupby([\"uname\"], sort=False)[\"atime\"].min().sort_values().to_frame()" ] }, { "cell_type": "code", "execution_count": null, "id": "6fd5346a", "metadata": {}, "outputs": [], "source": [ "atime[\"newest_date\"] = pd.to_datetime(atime[\"newest\"].dt.strftime('%Y-%m-%d'))\n", "atime[\"oldest_date\"] = pd.to_datetime(atime[\"oldest\"].dt.strftime('%Y-%m-%d'))" ] }, { "cell_type": "code", "execution_count": null, "id": "422ecc0c", "metadata": {}, "outputs": [], "source": [ "report=pd.concat([atime, space], axis=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "1c979329", "metadata": {}, "outputs": [], "source": [ "pd.set_option('display.max_rows', None)" ] }, { "cell_type": "code", "execution_count": null, "id": "32139106", "metadata": {}, "outputs": [], "source": [ "# https://stackoverflow.com/a/20937592/8928529\n", "pd.options.display.float_format = '{:,.2f}'.format" ] }, { "cell_type": "code", "execution_count": null, "id": "8d40d60f", "metadata": {}, "outputs": [], "source": [ "report[[\"newest_date\", \"oldest_date\", \"GB\", \"GiB\"]]" ] }, { "cell_type": "code", "execution_count": null, "id": "871995b1", "metadata": {}, "outputs": [], "source": [ "print(f\"report: {report_name}\")\n", "print(report[[\"newest_date\", \"oldest_date\", \"GB\", \"GiB\"]])\n", "print(\"--------\\ntotals: {0:,.2f}GB {1:,.2f}GiB\".format(report['GB'].sum(), report['GiB'].sum()))" ] } ], "metadata": { "language_info": { "name": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 }