Add example aggregation grouping by file size instead of age

e4b6bf28 · Matthew K Defenderfer · 506d9052 · e4b6bf28
Commit e4b6bf28 authored 4 months ago by Matthew K Defenderfer
--- a/example-dask-setup.ipynb
+++ b/example-dask-setup.ipynb
@@ -19,7 +19,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
@@ -64,37 +64,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "data": {
-      "text/plain": [
-       "'http://127.0.0.1:8787/status'"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
   "source": [
    "manager.dashboard_link"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "172.20.201.241\n"
-     ]
-    }
-   ],
   "source": [
    "! hostname --ip-address"
   ]
@@ -134,16 +115,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "<dask.config.set at 0x2aabe78824d0>"
+       "<dask.config.set at 0x2aabe7362e10>"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -181,7 +162,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -216,121 +197,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total in-memory VRAM used by 'path' and 'size' columns: 92.14 GiB\n"
-     ]
-    }
-   ],
   "source": [
    "print(f\"Total in-memory VRAM used by 'path' and 'size' columns: {mem.divide(1024**3).sum().round(2)} GiB\")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "data": {
-      "text/plain": [
-       "('/data/project/ABL-IT/.DS_Store',\n",
-       " '/data/project/ABL-IT/projects/Lin_SPIRE/MRI/preprocfiles/Spire05_Pre_CINL6836/study/REST1_AP_MB6_2.5mmISO_1000ms_9/IM-0005-0118.dcm',\n",
-       " '/data/project/EMemRE/DATA/MRI/DICOM/EMemRE035/EMemRE035_V1_CINL6700/dMRI_dir99_AP_1.5mmISO_19/IM-0014-4071.dcm',\n",
-       " '/data/project/LV_lab/hrmicra-Data/test-1/7T-data/sample4-nu-correct/WIP692_0p35pix_TE427_RF900_2D_364_3D_SPF68_Run6_14/IM-0010-0308.dcm',\n",
-       " '/data/project/NLSB/Hyun/SRP_copy/FSL_files/Level1/346/run3.feat/tsplot/ps_tsplotc_zstat32_ev23.txt',\n",
-       " '/data/project/NLSB/Tori/subjects/sub-PHCPP1010193/T1w/sub-PHCPP1010193/mri/wm.seg.mgz',\n",
-       " '/data/project/ahnlab/Dr.Lim/Lim_S_FAK-mSMC/work/conda/env-f9036bf12aff90859cd805f3a32a03bf/lib/R/library/parallel/R/parallel',\n",
-       " '/data/project/atlab/labs-atlab/mhanby/backups/cheaha/daily/saturday/mhanby/src/emacs-24.3/lisp/autoinsert.el',\n",
-       " '/data/project/atlab/labs-atlab/mhanby/backups/eng-bec264la/weekly/week1/mhanby/Documents/research-computing/puppet/.git/objects/77/b6e3ae762dd00794c8f2056f036414b9d7e082',\n",
-       " '/data/project/bhatialab/ccheng-lab/2024_TMN/cellranger_counts/Bhatia_TMN_ccount_v02_TMN19/SC_RNA_COUNTER_CS/SC_MULTI_CORE/MULTI_GEM_WELL_PROCESSOR/COUNT_GEM_WELL_PROCESSOR/_BASIC_SC_RNA_COUNTER/WRITE_POS_BAM/fork0/chnk20-u09327ef307/_stdout',\n",
-       " '/data/project/bhattlab/COPD_NEWDATA_P1P2_DICOMS/phase1/16130X_UIA_COPD_16130X_UIA_COPD/19000101/Series_003-62659036196bdfe9-16130X_INSP_STD_UIA_COPD/dicom/series.dicom/2.25.89221888316857769794708671924701625845.dcm',\n",
-       " '/data/project/bhattlab/COPD_NEWDATA_P1P2_DICOMS/phase1/25254Q_FAL_COPD_25254Q_FAL_COPD/19000101/Series_006-00eb02e715e6842f-25254Q_EXP_STD_FAL_COPD/dicom/series.dicom/2.25.106617562204882508090634465445538072456.dcm',\n",
-       " '/data/project/bhattlab/COPD_NEWDATA_P1P2_DICOMS/phase2/14916Z_TEM_COPD_14916Z_TEM_COPD/19000101/Series_007-ef32004c0c883f5c-14916Z_EXP_B31f_355_TEM_COPD2/dicom/series.dicom/2.25.65131882594671425579844151138083885469.dcm',\n",
-       " '/data/project/bhattlab/COPD_NEWDATA_P1P2_DICOMS/phase2/20290V_MVA_COPD_20290V_MVA_COPD/19000101/Series_10332-f2aab4029954fb65-20290V_INSP_B_335_MVA_COPD2/dicom/series.dicom/2.25.309216945428201951321874154676994485365.dcm',\n",
-       " '/data/project/bhattlab/COPD_NEWDATA_P1P2_DICOMS/phase2/24812T_NJC_COPD_24812T_NJC_COPD/19000101/Series_014-5003f39dc7e45d8d-24812T_EXP_B45f_300_NJC_COPD2/dicom/series.dicom/2.25.175590724531261947317079434812871255341.dcm',\n",
-       " '/data/project/bhattlab/COPD_P3_DICOMS/COPD-3/14872F/COPD3/14872F_INSP_I31f2_313_LD_COPD3/dicom/2.25.142903849106513443485461193523243126350.dcm',\n",
-       " '/data/project/bhattlab/Gene_P1/COPD/IMAGES/12790T_INSP_STD_COL_COPD/1.3.12.2.1107.5.1.4.50399.30000008120213581934300002924.dcm',\n",
-       " '/data/project/bhattlab/Gene_P1/COPD/IMAGES/23931W_EXP_STD_UIA_COPD/1.3.12.2.1107.5.1.4.0.30000010081118140401500015651.dcm',\n",
-       " '/data/project/bhattlab/Gene_P2/IMAGES_COPDGen-Phase2(1-4500)/14008Q/14008Q_EXP_STD_328_COL_COPD2/1.2.840.113619.2.358.3.1930041893.843.1418041776.830.528.dcm',\n",
-       " '/data/project/bhattlab/Gene_P2/IMAGES_COPDGen-Phase2(1-4500)/18341U/18341U_EXP_BONE_346_COL_COPD2/1.2.840.113619.2.358.3.1930041893.842.1427886294.385.58.dcm',\n",
-       " '/data/project/bhattlab/Gene_P2/IMAGES_COPDGen-Phase2(1-4500)/23266N/23266N_EXP_STD_262_MSM_COPD2/1.2.840.113619.2.55.1.1762927041.1883.1437488798.285.339.dcm',\n",
-       " '/data/project/bhattlab/NLST/nlst-ct/200715/T1/1.3.6.1.4.1.14519.5.2.1.7009.9004.340203159186312593355684074268/1.3.6.1.4.1.14519.5.2.1.7009.9004.673735091869060898180209152693',\n",
-       " '/data/project/bhattlab/NLST/nlst-ct/218279/T2/1.3.6.1.4.1.14519.5.2.1.7009.9004.102835467614329321595330461997/1.3.6.1.4.1.14519.5.2.1.7009.9004.119963918942248939985827708106',\n",
-       " '/data/project/bhattlab/Nithin/test_insp/10759P/img/img/10759P_069.png',\n",
-       " '/data/project/bhattlab/Pratim/multiclass_2d/2D_mask/all_spiromics/img/MU302131/MU302131_604_003.png',\n",
-       " '/data/project/bhattlab/Pratim/multiclass_2d/2D_mask/input_COPD/img/10545W/10545W_375_002.png',\n",
-       " '/data/project/bhattlab/Pratim/multiclass_2d/2D_mask/input_COPD/img/15213W/15213W_586_003.png',\n",
-       " '/data/project/bhattlab/Pratim/multiclass_2d/2D_mask/input_COPD/img/19253C/19253C_253_003.png',\n",
-       " '/data/project/bhattlab/Vivek/ILD_VALVE/ILD/Unzipped/BROGDON_BETTY_JOAN/02750071',\n",
-       " '/data/project/ccts/client/kimberly/ics940_cgm_10x/coverage_orig/.snakemake/conda/f3fe95ac/lib/pkgconfig/libssh2.pc',\n",
-       " '/data/project/ccts/user/samcarli/ics1353_goldberg_gsea_copy/GSEA/.snakemake/conda/d9115e85/lib/tk8.6/demos/knightstour.tcl',\n",
-       " '/data/project/ccts/user/samcarli/ics1355_mcdonald_ferret_rnaseq/trinity/trinity_test_subset_2.9.0/trinity_out_dir/read_partitions/Fb_6/CBin_6602/c660275.trinity.reads.fa.out/salmon_outdir/quant.sf',\n",
-       " '/data/project/ccts/web/ccts/bmi/ics/zajac,a/ics718/GSEA/output/msigdb_5.2/deg_pval_rnk_top2000/c4/mem.unstim.cd8.naive/enplot_MODULE_265_163.png',\n",
-       " '/data/project/chonglab/Pengxu/SLE_sv/SLE114/grocsvs/results/CollectReadsForBarcodesStep/event_fastqs.SLE114.10x_SLE114.chrY.28613708.fa/36.fa',\n",
-       " '/data/project/chonglab/chongzch/ICGC/RNA-seq/RNA_translation_validation/df6987d0-b191-46ae-adf3-60f18cc698f0/trinity_out_dir/Dir_50b6aa8d-3938-4299-9da3-d518d4d3fc19_gdc_realn_rehead.bam.minC1.gff/chr1/196/231702011_231749959.trinity.reads',\n",
-       " '/data/project/ding_lab/PUS7KD_samples/combined_fastq_shGFP_teton/CHEUI/temp_13251713.tmp',\n",
-       " '/data/project/ding_lab/PUS7KD_samples/combined_fastq_shGFP_teton/CHEUI/temp_35605223.tmp',\n",
-       " '/data/project/gersteneckerlab/HR-MICRA_Neuropsychology_finished/processed/processed_Gerstenecker/Hamms010/sagittal_3d_flair/processed_with_ASHS/multispectral/magdeburg_7T/multiatlas/tseg_left_train024/atlas_to_native.nii.gz',\n",
-       " '/data/project/grytzlab/alex/testCases2Full/output/27-Nov-2023/outData_878938.mat',\n",
-       " '/data/project/hendrickslab/Package_1232404/EP_imaging/workdir/fmriprep_22_1_wf/single_subject_1017_wf/func_preproc_task_rest_dir_AP_run_1_wf/bold_bold_trans_wf/threshold/mapflow/_threshold392/_0x0bb065784c07809c8c519025dec4604a.json',\n",
-       " '/data/project/hendrickslab/Package_1232404/EP_imaging/workdir/fmriprep_22_1_wf/single_subject_4029_wf/func_preproc_task_rest_dir_PA_run_2_wf/ica_aroma_wf/ds_report_ica_aroma/_report/report.rst',\n",
-       " '/data/project/iqml/datasets/ADNI_bella/ASHS/ashs_outfile_abc_240201/6455_2020-09-29/tse_native_chunk_right.nii.gz',\n",
-       " '/data/project/iqml/datasets/ADNI_bella/ASHS/ashs_outfile_updated_230910/6612_2021-02-12/multiatlas/fusion/posterior_corr_usegray_right_011.nii.gz',\n",
-       " '/data/project/iqml/datasets/ADNI_bella/MR_2013-2022_cheaha/subjects/2079_2016-12-08/label/lh.BA3a_exvivo.label',\n",
-       " '/data/project/kanalab/UA_data/BrainREAD/derivatives-in/fmriprep_24_1_wf/sub_Br0137_wf/anat_fit_wf/ds_template_registration_wf/ds_std2anat_xfm/mapflow/_ds_std2anat_xfm2/_node.pklz',\n",
-       " '/data/project/kanalab/UA_data/study_LMB/Autism_S1/Wait_List_Controls/0046_LMB/0046_vissen/rafCFNL0000002028-0008-00314-000314-00.hdr',\n",
-       " '/data/project/kes/circrnaflow_mouse_20240516/circRNAFlow/quick_start_2/work/6f/5832b5316f90d475a38230b7b8e7fd/here_home/kipoi_user/.local/lib/python3.7/site-packages/Bio/Align/substitution_matrices/__init__.py',\n",
-       " '/data/project/kes/circrnaflow_mouse_20240516/circRNAFlow/quick_start_2/work/e4/a0c5e8b60fcb4e8da185f8d1248ca0/here_home/kipoi_user/.conda/envs/kipoi-deepTarget/lib/python3.7/site-packages/sklearn/externals/joblib/__pycache__/_memory_helpers.cpython-37.pyc',\n",
-       " '/data/project/lahtilab/NK_diffusion/intrinsic_diffusivity/sub-patient46/dwi/sub-patient46.ses-01.drbuddi_final_OF_slices/42/38.raw',\n",
-       " '/data/project/lahtilab/all_data_raw/SZ0184_DUP_SZ96_16_PARMI_SZ042/RESEARCH_PROTOCOLS_LAHTI_20190509_130745_013000/T2W_SPC_0008/PARMI_SZ042_16.MR.RESEARCH_PROTOCOLS_LAHTI.0008.0063.2019.05.09.15.15.57.349478.52471132.IMA',\n",
-       " '/data/project/lahtilab/prismaNK/patient/sub-patient41/ses-01/dwi/sub-patient41.ses-01.drbuddi_final_OF_slices/164/27.raw',\n",
-       " '/data/project/lulab/1_analysis_sajesan/20220216_HPC7_D3A_memory/work/c0/cc129d7f3395590d6a4e06de47fb43/IL6EV_untreated_rep2/css/report.css',\n",
-       " '/data/project/mcdonaldlab/wl-in-copd/fine_mapping/aou_cosmo/nhw_ld/paintor_chr3_73345901_chr3:73345901:A:G_enum2/LogFile.A4711',\n",
-       " '/data/project/neurocomputing/FreeSurfer10092019/subjects/SUBJ09/surf/lh.sulc',\n",
-       " '/data/project/neurocomputing/WUSTL_amyloid/CNDA_E211163_PUPTIMECOURSE_20170607111515/out/resources/DATA/files/pet_proc/1011_505_mMR_v1n_RSF_ROI2_f9',\n",
-       " '/data/project/neurocomputing/WUSTL_amyloid_11212019/CNDA_E138763_freesurfer_20150709120133/out/resources/SNAPSHOTS/files/snapshots/1011_083_MMR_V1_brnmsk_cor_207.gif',\n",
-       " '/data/project/nvl_lab/HoloBMI/Raw/191004/NVI12/D9/im/baseline/baseline_191004T133031-002/baseline_191004T133031-002_Cycle00001_Ch2_026492.ome.tif',\n",
-       " '/data/project/nvl_lab/HoloBMI/Raw/191015/NVI13/D20/im/HoloVTA_pretrain/HoloVTA_pretrain_191015T162420-010/HoloVTA_pretrain_191015T162420-010_Cycle00001_Ch2_032404.ome.tif',\n",
-       " '/data/project/nvl_lab/HoloBMI/Raw/191102/NVI12/D33/im/baseline/baseline_191102T125501-073/baseline_191102T125501-073_Cycle00001_Ch2_006325.ome.tif',\n",
-       " '/data/project/nvl_lab/HoloBMI/Raw/191112/NVI17/D08/im/BMI/BMI_191112T225755-022/BMI_191112T225755-022_Cycle00001_Ch2_024719.ome.tif',\n",
-       " '/data/project/public_datasets/ngs/genomes/cov2WA1_vero/.snakemake/conda/edabcfac/lib/python3.8/xml/parsers/expat.py',\n",
-       " '/data/project/ssg-big-data/Sadeep_KD_Seq/analytics/sandbox/hwiener/AllDB/DB_Geis/chr14$1$107043718/__23e90abe-cc6e-45bb-82e5-43a7805d8c3446916276086528_1649129377192/QUAL.tdb',\n",
-       " '/data/project/ssg-big-data/goldn/goldn/analytics/mmap/ldl/chr16/ldlcv2d1.ldlcv2d1_gradcol_with_interaction_chr16_chunk7.mle.pval.csv',\n",
-       " '/data/project/szaflarskilab/NODDI_2020/completed/008NES2037_V1/20190724172327_dMRI_dir99_AP_1.5mmISO_12_proc/20190724172327_dMRI_dir99_AP_1.5mmISO_12_up_RAWFLOAT/bval.0038_sl.0139.raw',\n",
-       " '/data/project/szaflarskilab/UTAK/MRI_rawdata/008NES2025_V2/008NES2025_V2/LAFRANCE_SZAFLARSKI_20190412_160119_749000/FACES_TASK_AP_MB6_2_5MMISO_1000MS_0010/2025.MR.LAFRANCE_SZAFLARSKI.0010.0253.2019.04.12.16.53.42.563854.210295506.IMA',\n",
-       " '/data/project/thymelab/october_2024_hcrtr2_discovery_results/old_agonist_data/confs_for_placements/conf_placements_for_analysis/real_and_hbonds/Z2899428259/placements/7l1u_receptor_only_Z2899428259_54_1.pdb',\n",
-       " '/data/project/thymelab/october_2024_hcrtr2_discovery_results/old_antagonist_data_to_keep/placements_for_refinement/expanded_conformer_set_placements/low_ddg/PV-004442973527/placements/4s0v_receptor_only_PV-004442973527_68_0/4s0v_receptor_only_PV-004442973527_68_0.pdb',\n",
-       " '/data/project/thymelab/october_2024_hcrtr2_discovery_results/old_antagonist_data_to_keep/placements_for_refinement/expanded_conformer_set_placements/real_and_hbonds/PV-004819573350/placements/4s0v_receptor_only_PV-004819573350_15_2.pdb',\n",
-       " '/data/project/thymelab/october_2024_hcrtr2_discovery_results/old_antagonist_data_to_keep/placements_for_refinement/expanded_conformer_set_placements/real_and_hbonds/Z4303193566/placements/4s0v_receptor_only_Z4303193566_93_0.pdb',\n",
-       " '/data/project/triplab/backups/react/G/REACT/Participant Data/Development for Databases and Syntax/REACT_annotation_development/QC_urban_pedestrian_sets_oct_2021/Inaara/Completed Frames/106199_61/frame13851_2020-08-03 09_23_31.485000-05_00.jpg',\n",
-       " '/data/project/triplab/data/REACT/EYE/EYE_screen3/EYE_s3_fr/103599_73_frames/frame13129_2020-06-17 15:27:44.743000-05:00.jpg',\n",
-       " '/data/project/triplab/data/REACT/EYE/EYE_screen3/EYE_s3_fr/201399_21_frames/frame9386_2019-11-20 11:51:02.295000-06:00.jpg',\n",
-       " '/data/project/triplab/data/REACT/EYE/EYE_screen3/EYE_s3_fr/302699_22_frames/frame12047_2019-03-10 10:49:10.952000-05:00.jpg',\n",
-       " '/data/project/triplab/data/REACT/EYE/EYE_screen3/EYE_s3_fr/401099_22_frames/frame9018_2019-04-26 17:55:41.594000-05:00.jpg',\n",
-       " '/data/project/vislab/ENACT/VINES/VINES_CFNL/CFNL0000002009/1/series_6/image_5.dcm',\n",
-       " '/data/project/vislab/RSTORE_FILES/media/2be3886d-fcad-4718-a479-4dfe0829b7e2/store2/MATLABold/toolbox/stateflow/sfdemos/html/sf_newtons_cradle_01.png',\n",
-       " '/data/project/vislab/a/FLAP/preMRIscans/raw_files/not_organized/fr1003 - Dropped/MRI Pre-Test/FLAP_FR1003_Pre.MR.PSYCHOLOGY-Seit.20.84.2024.04.25.12.44.49.493.42979612.dcm',\n",
-       " '/data/project/vislab/a/MDP/Matt/Retinotopy/A/PreData/Subjects/MDP003/Series_016_RETINO_PA_run2/mc/MAT_0345',\n",
-       " '/data/project/vislab/raw/MDP/BACKUP/MDP/workingdir/mriqc/workflow_enumerator/funcMRIQC/SpatialNormalization/_in_file_..data..project..vislab..raw..MDP..BACKUP..MDP..sub-MDP013..func..sub-MDP013_task-Emotion_dir-AP_run-01_bold.nii.gz/SharpenEPI/_node.pklz',\n",
-       " '/data/project/vislab/raw/MDP/workdir/fmriprep_wf/single_subject_MDP120_wf/func_preproc_task_Movie4_dir_PA_run_01_wf/initial_boldref_wf/enhance_and_skullstrip_bold_wf/fixhdr_skullstrip2/uni_xform_mask_xform.nii.gz',\n",
-       " '/data/project/weaverlab/freybf/VDR_high_low_atac/Tobias_analysis/BINDetect_output_homer/NR1H2RXRA_MA0115.1/beds/NR1H2RXRA_MA0115.1_VDR_high_r1_footprints_bound.bed',\n",
-       " '/data/project/worthey_lab/projects/experimental_pipelines/tarun/DITTO/data/external/splits/xzzeacp',\n",
-       " '/data/project/xnat/archive/ememre/arc001/CINL6468/SCANS/3/DICOM/1.3.12.2.1107.5.2.43.66069.30000023062210210673600000012-3-63-1eksgae.dcm',\n",
-       " '/data/project/zindl_lab/H2Ab1/cellranger_output/D9ICneg_1/SC_RNA_COUNTER_CS/SC_MULTI_CORE/MULTI_GEM_WELL_PROCESSOR/VDJ_T_GEM_WELL_PROCESSOR/SC_VDJ_CONTIG_ASSEMBLER/RUST_BRIDGE/fork0/_outs',\n",
-       " '/data/project/zindl_lab/vdr/data/vdr_metadata.csv')"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
   "source": [
    "# Setting the index causes divisions between partitions to become know since all of the data are sorted. Notice that \n",
    "# partitions are not automatically sized by tld (i.e. one tld per partition) and so some partitions will contain \n",
@@ -386,6 +264,2024 @@
   "source": [
    "ddf.to_parquet(out_path)"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Example: Aggregate Data By File Size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cudf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ddf = dd.read_parquet(gpfs_ds.joinpath('parquet'),columns=['path','size'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bins = [1024**n for n in range(0,6)]\n",
+    "labels = ['1B-1kiB','1kiB-1MiB','1MiB-1GiB','1GiB-1TiB','>1TiB']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ddf['size_group'] = ddf['size'].map_partitions(lambda x: cudf.cut(x,bins=bins,labels=labels,right=False))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_agg = ddf.groupby('size_group',observed=True)['size'].agg(['count','sum']).compute().to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_agg.columns = ['file_count','bytes']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_agg = df_agg.reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_agg['size_group'] = df_agg['size_group'].astype('category').cat.set_categories(labels,ordered=True)\n",
+    "df_agg = df_agg.sort_values('size_group')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from rc_gpfs.report import plotting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exp,unit = plotting.choose_appropriate_storage_unit(df_agg['bytes'])\n",
+    "df_agg[unit] = df_agg['bytes']/(1024**exp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_agg[['file_count_cum',f'{unit}_cum']] = df_agg[['file_count',unit]].cumsum()\n",
+    "df_agg[[unit,f'{unit}_cum']] = df_agg[[unit,f'{unit}_cum']].round(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "marker": {
+          "color": "#636EFA"
+         },
+         "name": "Raw",
+         "text": [
+          "0.03",
+          "99.15",
+          "787.45",
+          "2,411.03",
+          "50.81"
+         ],
+         "textposition": "outside",
+         "type": "bar",
+         "x": [
+          "1B-1kiB",
+          "1kiB-1MiB",
+          "1MiB-1GiB",
+          "1GiB-1TiB",
+          ">1TiB"
+         ],
+         "y": [
+          0.03,
+          99.153,
+          787.454,
+          2411.028,
+          50.813
+         ]
+        },
+        {
+         "marker": {
+          "color": "#EF553B"
+         },
+         "mode": "lines+markers+text",
+         "name": "Cumulative",
+         "text": [
+          null,
+          "99.18",
+          "886.64",
+          "3,297.66",
+          "3,348.48"
+         ],
+         "textposition": "top center",
+         "type": "scatter",
+         "x": [
+          "1B-1kiB",
+          "1kiB-1MiB",
+          "1MiB-1GiB",
+          "1GiB-1TiB",
+          ">1TiB"
+         ],
+         "y": [
+          0.03,
+          99.183,
+          886.637,
+          3297.665,
+          3348.478
+         ]
+        }
+       ],
+       "layout": {
+        "hovermode": false,
+        "margin": {
+         "b": 20,
+         "l": 40,
+         "r": 40,
+         "t": 100
+        },
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "heatmapgl": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmapgl"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "white",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "#C8D4E3"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "white",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "radialaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "yaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "zaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "caxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "font": {
+          "size": 24
+         },
+         "text": "Storage Used By File Size",
+         "x": 0.5,
+         "xanchor": "center"
+        },
+        "xaxis": {
+         "title": {
+          "text": "Size Group"
+         }
+        },
+        "yaxis": {
+         "title": {
+          "text": "Size (TiB)"
+         }
+        }
+       }
+      },
+      "text/html": [
+       "<div>                            <div id=\"0bd8a386-5888-457a-91ad-e262d6a6aef1\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                require([\"plotly\"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"0bd8a386-5888-457a-91ad-e262d6a6aef1\")) {                    Plotly.newPlot(                        \"0bd8a386-5888-457a-91ad-e262d6a6aef1\",                        [{\"marker\":{\"color\":\"#636EFA\"},\"name\":\"Raw\",\"text\":[\"0.03\",\"99.15\",\"787.45\",\"2,411.03\",\"50.81\"],\"textposition\":\"outside\",\"x\":[\"1B-1kiB\",\"1kiB-1MiB\",\"1MiB-1GiB\",\"1GiB-1TiB\",\"\\u003e1TiB\"],\"y\":[0.03,99.153,787.454,2411.028,50.813],\"type\":\"bar\"},{\"marker\":{\"color\":\"#EF553B\"},\"mode\":\"lines+markers+text\",\"name\":\"Cumulative\",\"text\":[null,\"99.18\",\"886.64\",\"3,297.66\",\"3,348.48\"],\"textposition\":\"top center\",\"x\":[\"1B-1kiB\",\"1kiB-1MiB\",\"1MiB-1GiB\",\"1GiB-1TiB\",\"\\u003e1TiB\"],\"y\":[0.03,99.183,886.637,3297.665,3348.478],\"type\":\"scatter\"}],                        {\"template\":{\"data\":{\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"white\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"white\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"#C8D4E3\",\"linecolor\":\"#C8D4E3\",\"minorgridcolor\":\"#C8D4E3\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"#C8D4E3\",\"linecolor\":\"#C8D4E3\",\"minorgridcolor\":\"#C8D4E3\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"choropleth\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"choropleth\"}],\"contourcarpet\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"contourcarpet\"}],\"contour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"contour\"}],\"heatmapgl\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmapgl\"}],\"heatmap\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmap\"}],\"histogram2dcontour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2dcontour\"}],\"histogram2d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2d\"}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"mesh3d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"mesh3d\"}],\"parcoords\":[{\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"parcoords\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}],\"scatter3d\":[{\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatter3d\"}],\"scattercarpet\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattercarpet\"}],\"scattergeo\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattergeo\"}],\"scattergl\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattergl\"}],\"scattermapbox\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattermapbox\"}],\"scatterpolargl\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterpolargl\"}],\"scatterpolar\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterpolar\"}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"scatterternary\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterternary\"}],\"surface\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"surface\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}]},\"layout\":{\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"autotypenumbers\":\"strict\",\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]],\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"geo\":{\"bgcolor\":\"white\",\"lakecolor\":\"white\",\"landcolor\":\"white\",\"showlakes\":true,\"showland\":true,\"subunitcolor\":\"#C8D4E3\"},\"hoverlabel\":{\"align\":\"left\"},\"hovermode\":\"closest\",\"mapbox\":{\"style\":\"light\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"white\",\"polar\":{\"angularaxis\":{\"gridcolor\":\"#EBF0F8\",\"linecolor\":\"#EBF0F8\",\"ticks\":\"\"},\"bgcolor\":\"white\",\"radialaxis\":{\"gridcolor\":\"#EBF0F8\",\"linecolor\":\"#EBF0F8\",\"ticks\":\"\"}},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"white\",\"gridcolor\":\"#DFE8F3\",\"gridwidth\":2,\"linecolor\":\"#EBF0F8\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"#EBF0F8\"},\"yaxis\":{\"backgroundcolor\":\"white\",\"gridcolor\":\"#DFE8F3\",\"gridwidth\":2,\"linecolor\":\"#EBF0F8\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"#EBF0F8\"},\"zaxis\":{\"backgroundcolor\":\"white\",\"gridcolor\":\"#DFE8F3\",\"gridwidth\":2,\"linecolor\":\"#EBF0F8\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"#EBF0F8\"}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"ternary\":{\"aaxis\":{\"gridcolor\":\"#DFE8F3\",\"linecolor\":\"#A2B1C6\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"#DFE8F3\",\"linecolor\":\"#A2B1C6\",\"ticks\":\"\"},\"bgcolor\":\"white\",\"caxis\":{\"gridcolor\":\"#DFE8F3\",\"linecolor\":\"#A2B1C6\",\"ticks\":\"\"}},\"title\":{\"x\":0.05},\"xaxis\":{\"automargin\":true,\"gridcolor\":\"#EBF0F8\",\"linecolor\":\"#EBF0F8\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"#EBF0F8\",\"zerolinewidth\":2},\"yaxis\":{\"automargin\":true,\"gridcolor\":\"#EBF0F8\",\"linecolor\":\"#EBF0F8\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"#EBF0F8\",\"zerolinewidth\":2}}},\"title\":{\"font\":{\"size\":24},\"text\":\"Storage Used By File Size\",\"x\":0.5,\"xanchor\":\"center\"},\"margin\":{\"t\":100,\"b\":20,\"l\":40,\"r\":40},\"xaxis\":{\"title\":{\"text\":\"Size Group\"}},\"yaxis\":{\"title\":{\"text\":\"Size (TiB)\"}},\"hovermode\":false},                        {\"responsive\": true}                    ).then(function(){\n",
+       "                            \n",
+       "var gd = document.getElementById('0bd8a386-5888-457a-91ad-e262d6a6aef1');\n",
+       "var x = new MutationObserver(function (mutations, observer) {{\n",
+       "        var display = window.getComputedStyle(gd).display;\n",
+       "        if (!display || display === 'none') {{\n",
+       "            console.log([gd, 'removed!']);\n",
+       "            Plotly.purge(gd);\n",
+       "            observer.disconnect();\n",
+       "        }}\n",
+       "}});\n",
+       "\n",
+       "// Listen for the removal of the full notebook cells\n",
+       "var notebookContainer = gd.closest('#notebook-container');\n",
+       "if (notebookContainer) {{\n",
+       "    x.observe(notebookContainer, {childList: true});\n",
+       "}}\n",
+       "\n",
+       "// Listen for the clearing of the current output cell\n",
+       "var outputEl = gd.closest('.output');\n",
+       "if (outputEl) {{\n",
+       "    x.observe(outputEl, {childList: true});\n",
+       "}}\n",
+       "\n",
+       "                        })                };                });            </script>        </div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "f1 = plotting.pareto_chart(df_agg,x='size_group',y=unit,\n",
+    "                           title=\"Storage Used By File Size\", xlabel='Size Group',ylabel='Size (TiB)')\n",
+    "f1.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "marker": {
+          "color": "#636EFA"
+         },
+         "name": "Raw",
+         "text": [
+          "92,313,000.00",
+          "448,774,895.00",
+          "44,137,064.00",
+          "430,231.00",
+          "33.00"
+         ],
+         "textposition": "outside",
+         "type": "bar",
+         "x": [
+          "1B-1kiB",
+          "1kiB-1MiB",
+          "1MiB-1GiB",
+          "1GiB-1TiB",
+          ">1TiB"
+         ],
+         "y": [
+          92313000,
+          448774895,
+          44137064,
+          430231,
+          33
+         ]
+        },
+        {
+         "marker": {
+          "color": "#EF553B"
+         },
+         "mode": "lines+markers+text",
+         "name": "Cumulative",
+         "text": [
+          null,
+          "541,087,895.00",
+          "585,224,959.00",
+          "585,655,190.00",
+          "585,655,223.00"
+         ],
+         "textposition": "top center",
+         "type": "scatter",
+         "x": [
+          "1B-1kiB",
+          "1kiB-1MiB",
+          "1MiB-1GiB",
+          "1GiB-1TiB",
+          ">1TiB"
+         ],
+         "y": [
+          92313000,
+          541087895,
+          585224959,
+          585655190,
+          585655223
+         ]
+        }
+       ],
+       "layout": {
+        "hovermode": false,
+        "margin": {
+         "b": 20,
+         "l": 40,
+         "r": 40,
+         "t": 100
+        },
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "heatmapgl": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmapgl"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "white",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "#C8D4E3"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "white",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "radialaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "yaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "zaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "caxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "font": {
+          "size": 24
+         },
+         "text": "File Count By File Size",
+         "x": 0.5,
+         "xanchor": "center"
+        },
+        "xaxis": {
+         "title": {
+          "text": "Size Group"
+         }
+        },
+        "yaxis": {
+         "title": {
+          "text": "Count"
+         }
+        }
+       }
+      },
+      "text/html": [
+       "<div>                            <div id=\"13903273-72e7-45e5-93a4-008ae986df60\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                require([\"plotly\"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"13903273-72e7-45e5-93a4-008ae986df60\")) {                    Plotly.newPlot(                        \"13903273-72e7-45e5-93a4-008ae986df60\",                        [{\"marker\":{\"color\":\"#636EFA\"},\"name\":\"Raw\",\"text\":[\"92,313,000.00\",\"448,774,895.00\",\"44,137,064.00\",\"430,231.00\",\"33.00\"],\"textposition\":\"outside\",\"x\":[\"1B-1kiB\",\"1kiB-1MiB\",\"1MiB-1GiB\",\"1GiB-1TiB\",\"\\u003e1TiB\"],\"y\":[92313000,448774895,44137064,430231,33],\"type\":\"bar\"},{\"marker\":{\"color\":\"#EF553B\"},\"mode\":\"lines+markers+text\",\"name\":\"Cumulative\",\"text\":[null,\"541,087,895.00\",\"585,224,959.00\",\"585,655,190.00\",\"585,655,223.00\"],\"textposition\":\"top center\",\"x\":[\"1B-1kiB\",\"1kiB-1MiB\",\"1MiB-1GiB\",\"1GiB-1TiB\",\"\\u003e1TiB\"],\"y\":[92313000,541087895,585224959,585655190,585655223],\"type\":\"scatter\"}],                        {\"template\":{\"data\":{\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"white\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"white\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"#C8D4E3\",\"linecolor\":\"#C8D4E3\",\"minorgridcolor\":\"#C8D4E3\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"#C8D4E3\",\"linecolor\":\"#C8D4E3\",\"minorgridcolor\":\"#C8D4E3\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"choropleth\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"choropleth\"}],\"contourcarpet\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"contourcarpet\"}],\"contour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"contour\"}],\"heatmapgl\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmapgl\"}],\"heatmap\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmap\"}],\"histogram2dcontour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2dcontour\"}],\"histogram2d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2d\"}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"mesh3d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"mesh3d\"}],\"parcoords\":[{\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"parcoords\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}],\"scatter3d\":[{\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatter3d\"}],\"scattercarpet\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattercarpet\"}],\"scattergeo\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattergeo\"}],\"scattergl\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattergl\"}],\"scattermapbox\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattermapbox\"}],\"scatterpolargl\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterpolargl\"}],\"scatterpolar\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterpolar\"}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"scatterternary\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterternary\"}],\"surface\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"surface\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}]},\"layout\":{\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"autotypenumbers\":\"strict\",\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]],\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"geo\":{\"bgcolor\":\"white\",\"lakecolor\":\"white\",\"landcolor\":\"white\",\"showlakes\":true,\"showland\":true,\"subunitcolor\":\"#C8D4E3\"},\"hoverlabel\":{\"align\":\"left\"},\"hovermode\":\"closest\",\"mapbox\":{\"style\":\"light\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"white\",\"polar\":{\"angularaxis\":{\"gridcolor\":\"#EBF0F8\",\"linecolor\":\"#EBF0F8\",\"ticks\":\"\"},\"bgcolor\":\"white\",\"radialaxis\":{\"gridcolor\":\"#EBF0F8\",\"linecolor\":\"#EBF0F8\",\"ticks\":\"\"}},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"white\",\"gridcolor\":\"#DFE8F3\",\"gridwidth\":2,\"linecolor\":\"#EBF0F8\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"#EBF0F8\"},\"yaxis\":{\"backgroundcolor\":\"white\",\"gridcolor\":\"#DFE8F3\",\"gridwidth\":2,\"linecolor\":\"#EBF0F8\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"#EBF0F8\"},\"zaxis\":{\"backgroundcolor\":\"white\",\"gridcolor\":\"#DFE8F3\",\"gridwidth\":2,\"linecolor\":\"#EBF0F8\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"#EBF0F8\"}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"ternary\":{\"aaxis\":{\"gridcolor\":\"#DFE8F3\",\"linecolor\":\"#A2B1C6\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"#DFE8F3\",\"linecolor\":\"#A2B1C6\",\"ticks\":\"\"},\"bgcolor\":\"white\",\"caxis\":{\"gridcolor\":\"#DFE8F3\",\"linecolor\":\"#A2B1C6\",\"ticks\":\"\"}},\"title\":{\"x\":0.05},\"xaxis\":{\"automargin\":true,\"gridcolor\":\"#EBF0F8\",\"linecolor\":\"#EBF0F8\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"#EBF0F8\",\"zerolinewidth\":2},\"yaxis\":{\"automargin\":true,\"gridcolor\":\"#EBF0F8\",\"linecolor\":\"#EBF0F8\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"#EBF0F8\",\"zerolinewidth\":2}}},\"title\":{\"font\":{\"size\":24},\"text\":\"File Count By File Size\",\"x\":0.5,\"xanchor\":\"center\"},\"margin\":{\"t\":100,\"b\":20,\"l\":40,\"r\":40},\"xaxis\":{\"title\":{\"text\":\"Size Group\"}},\"yaxis\":{\"title\":{\"text\":\"Count\"}},\"hovermode\":false},                        {\"responsive\": true}                    ).then(function(){\n",
+       "                            \n",
+       "var gd = document.getElementById('13903273-72e7-45e5-93a4-008ae986df60');\n",
+       "var x = new MutationObserver(function (mutations, observer) {{\n",
+       "        var display = window.getComputedStyle(gd).display;\n",
+       "        if (!display || display === 'none') {{\n",
+       "            console.log([gd, 'removed!']);\n",
+       "            Plotly.purge(gd);\n",
+       "            observer.disconnect();\n",
+       "        }}\n",
+       "}});\n",
+       "\n",
+       "// Listen for the removal of the full notebook cells\n",
+       "var notebookContainer = gd.closest('#notebook-container');\n",
+       "if (notebookContainer) {{\n",
+       "    x.observe(notebookContainer, {childList: true});\n",
+       "}}\n",
+       "\n",
+       "// Listen for the clearing of the current output cell\n",
+       "var outputEl = gd.closest('.output');\n",
+       "if (outputEl) {{\n",
+       "    x.observe(outputEl, {childList: true});\n",
+       "}}\n",
+       "\n",
+       "                        })                };                });            </script>        </div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "f2 = plotting.pareto_chart(df_agg, x='size_group', y='file_count',\n",
+    "                           title=\"File Count By File Size\", xlabel='Size Group',ylabel='Count')\n",
+    "f2.show()"
+   ]
  }
 ],
 "metadata": {

 %% Cell type:markdown id: tags:
 ## Example Dask Workflow
 %% Cell type:code id: tags:
 ``` python
 from pathlib import Path
 from rc_gpfs import compute
 ```
 %% Cell type:code id: tags:
 ``` python
 with_cuda=True
 manager = compute.start_local_cluster(
    with_cuda,
    threads_per_worker=10,
    local_directory='/scratch/local',
    rmm_pool_size='70 GiB',
    rmm_managed_memory=True,
    pre_import='cudf,rmm')
 ```
 %% Output
    WARNING:bokeh.server.util:Host wildcard '*' will allow connections originating from multiple (or possibly all) hostnames or IPs. Use non-wildcard values to restrict access explicitly
 %% Cell type:markdown id: tags:
 Cluster settings:
 - `threads_per_worker`: start 10 threads in all available GPUs
 - `local_directory`: local working directory for dask worker processes
 - `rmm_pool_size`: initialize a 70 GiB memory pool to greatly reduce the number of memory allocation requests during work. If needed, dask can still utilize up to the maximum VRAM, this is just an initial allocation
 - `rmm_managed_memory`: greatly improves out-of-memory performance
 - `pre_import`: pre-load workers with the given libraries
 %% Cell type:markdown id: tags:
 ### Dask Dashboard
 If you're on the same network as the compute node (either VPN or sshuttle), you can access the dask dashboard in your browser by going to `<node_ip>:8787`. Jobs on `c0241` will have a dashboard at `172.20.201.241:8787`. You can print the link using the `dashboard_link` property as well, but that will most likely show `127.0.0.1` as the IP which will not work.
 %% Cell type:code id: tags:
 ``` python
 manager.dashboard_link
 ```
-%% Output
-    'http://127.0.0.1:8787/status'
 %% Cell type:code id: tags:
 ``` python
 ! hostname --ip-address
 ```
-%% Output
-    172.20.201.241
 %% Cell type:markdown id: tags:
 It's advised to have the dashboard available, especially when using methods like `persist()` that look like they have completed successfully but are still running computation in the background
 %% Cell type:markdown id: tags:
 ### Shutdown Cluster
 It's imperative to shut down the cluster when you're done working. There have been a number of instances where I've restarted the kernel in the middle of a dask compute task where the worker processes could not be successfully killed. This caused the dask watchdog process to timeout which caused NHC to put the node into a drain state. Load increased consistently until the node became unresponsive and had to be rebooted. Before ending your job, call `manager.shutdown()` to close both the dask client and cluster objects.
 %% Cell type:code id: tags:
 ``` python
 #manager.shutdown()
 ```
 %% Cell type:markdown id: tags:
 ## Dask Analysis
 This setup assumes you're using a LocalCUDACluster and so sets the default dataframe backend to `cudf` instead of `pandas`. Remember that every partition is a `cudf.DataFrame` which is mostly compatible with a pandas-style workflow but not always. A common issue is when trying to do anything semi-complicated with datetimes, like cutting into groups. It's generally better to convert those to unix timestamps first (ints) and work from there.
 %% Cell type:code id: tags:
 ``` python
 import dask.dataframe as dd
 import dask
 dask.config.set({'dataframe.backend':'cudf'})
 ```
 %% Output
-    <dask.config.set at 0x2aabe78824d0>
+    <dask.config.set at 0x2aabe7362e10>
 %% Cell type:markdown id: tags:
 This example also uses one of the flat parquet datasets from a single GPFS policy run, but can be extended to the hive structure with very minor modifications
 %% Cell type:markdown id: tags:
 ### Dataframe Indexing
 If you're using the flat parquet, it's highly advised to not set an index after setting up the dataframe unless the `path` column is excluded from the dataset. This causes a large shuffle that must be done mostly in-memory, and the `path` column alone can exceed 80 GB in `data-project` GPFS logs. This can be worked around by reading just a few columns and setting up managed memory (`rmm`) in the cluster options which was done at the beginning of the notebook. This allows most compute to take place, but I wouldn't depend on it for everything.
 If you need a dataset that has already been indexed by path, use the hive dataset versions instead. Those are found in `/data/rc/gpfs-policy/data/gpfs-hive/{data-project,data-user,scratch}`. These have not all been computed yet though so a desired dataset may be unavailable.
 %% Cell type:markdown id: tags:
 ### Example: Arranging Files into Groups By Similar Size
 %% Cell type:code id: tags:
 ``` python
 gpfs_ds = Path('/data/rc/gpfs-policy/data/list-policy_data-project_list-path-external_slurm-30555647_2024-11-16T00:00:12')
 ddf = dd.read_parquet(gpfs_ds.joinpath('parquet'),columns=['path','size'])
 ```
 %% Cell type:code id: tags:
 ``` python
 # Set the index to be the path, performing a sort and shuffle across partitions. Persist this into distributed memory
 # since this process takes a while. persist will say everything is completed within a few minutes, but there's still
 # compute happening in the background. This is normal and the intended behavior of persist. If another cell is run
 # while persist computation is still happening, that cell will wait until persist completes before starting, but the
 # jupyter cell timer will start, making it look like the following computation is taking longer than it should
 ddf = ddf.set_index('path').persist()
 ```
 %% Cell type:code id: tags:
 ``` python
 # Check per-partition in-memory usage. The index is included by default, deep gets us a more accurate measure since the
 # path strings are so long
 mem = ddf.memory_usage_per_partition(deep=True).compute()
 ```
 %% Cell type:code id: tags:
 ``` python
 print(f"Total in-memory VRAM used by 'path' and 'size' columns: {mem.divide(1024**3).sum().round(2)} GiB")
 ```
-%% Output
-    Total in-memory VRAM used by 'path' and 'size' columns: 92.14 GiB
 %% Cell type:code id: tags:
 ``` python
 # Setting the index causes divisions between partitions to become know since all of the data are sorted. Notice that
 # partitions are not automatically sized by tld (i.e. one tld per partition) and so some partitions will contain
 # multiple tld values
 ddf.divisions
 ```
-%% Output
-    ('/data/project/ABL-IT/.DS_Store',
-     '/data/project/ABL-IT/projects/Lin_SPIRE/MRI/preprocfiles/Spire05_Pre_CINL6836/study/REST1_AP_MB6_2.5mmISO_1000ms_9/IM-0005-0118.dcm',
-     '/data/project/EMemRE/DATA/MRI/DICOM/EMemRE035/EMemRE035_V1_CINL6700/dMRI_dir99_AP_1.5mmISO_19/IM-0014-4071.dcm',
-     '/data/project/LV_lab/hrmicra-Data/test-1/7T-data/sample4-nu-correct/WIP692_0p35pix_TE427_RF900_2D_364_3D_SPF68_Run6_14/IM-0010-0308.dcm',
-     '/data/project/NLSB/Hyun/SRP_copy/FSL_files/Level1/346/run3.feat/tsplot/ps_tsplotc_zstat32_ev23.txt',
-     '/data/project/NLSB/Tori/subjects/sub-PHCPP1010193/T1w/sub-PHCPP1010193/mri/wm.seg.mgz',
-     '/data/project/ahnlab/Dr.Lim/Lim_S_FAK-mSMC/work/conda/env-f9036bf12aff90859cd805f3a32a03bf/lib/R/library/parallel/R/parallel',
-     '/data/project/atlab/labs-atlab/mhanby/backups/cheaha/daily/saturday/mhanby/src/emacs-24.3/lisp/autoinsert.el',
-     '/data/project/atlab/labs-atlab/mhanby/backups/eng-bec264la/weekly/week1/mhanby/Documents/research-computing/puppet/.git/objects/77/b6e3ae762dd00794c8f2056f036414b9d7e082',
-     '/data/project/bhatialab/ccheng-lab/2024_TMN/cellranger_counts/Bhatia_TMN_ccount_v02_TMN19/SC_RNA_COUNTER_CS/SC_MULTI_CORE/MULTI_GEM_WELL_PROCESSOR/COUNT_GEM_WELL_PROCESSOR/_BASIC_SC_RNA_COUNTER/WRITE_POS_BAM/fork0/chnk20-u09327ef307/_stdout',
-     '/data/project/bhattlab/COPD_NEWDATA_P1P2_DICOMS/phase1/16130X_UIA_COPD_16130X_UIA_COPD/19000101/Series_003-62659036196bdfe9-16130X_INSP_STD_UIA_COPD/dicom/series.dicom/2.25.89221888316857769794708671924701625845.dcm',
-     '/data/project/bhattlab/COPD_NEWDATA_P1P2_DICOMS/phase1/25254Q_FAL_COPD_25254Q_FAL_COPD/19000101/Series_006-00eb02e715e6842f-25254Q_EXP_STD_FAL_COPD/dicom/series.dicom/2.25.106617562204882508090634465445538072456.dcm',
-     '/data/project/bhattlab/COPD_NEWDATA_P1P2_DICOMS/phase2/14916Z_TEM_COPD_14916Z_TEM_COPD/19000101/Series_007-ef32004c0c883f5c-14916Z_EXP_B31f_355_TEM_COPD2/dicom/series.dicom/2.25.65131882594671425579844151138083885469.dcm',
-     '/data/project/bhattlab/COPD_NEWDATA_P1P2_DICOMS/phase2/20290V_MVA_COPD_20290V_MVA_COPD/19000101/Series_10332-f2aab4029954fb65-20290V_INSP_B_335_MVA_COPD2/dicom/series.dicom/2.25.309216945428201951321874154676994485365.dcm',
-     '/data/project/bhattlab/COPD_NEWDATA_P1P2_DICOMS/phase2/24812T_NJC_COPD_24812T_NJC_COPD/19000101/Series_014-5003f39dc7e45d8d-24812T_EXP_B45f_300_NJC_COPD2/dicom/series.dicom/2.25.175590724531261947317079434812871255341.dcm',
-     '/data/project/bhattlab/COPD_P3_DICOMS/COPD-3/14872F/COPD3/14872F_INSP_I31f2_313_LD_COPD3/dicom/2.25.142903849106513443485461193523243126350.dcm',
-     '/data/project/bhattlab/Gene_P1/COPD/IMAGES/12790T_INSP_STD_COL_COPD/1.3.12.2.1107.5.1.4.50399.30000008120213581934300002924.dcm',
-     '/data/project/bhattlab/Gene_P1/COPD/IMAGES/23931W_EXP_STD_UIA_COPD/1.3.12.2.1107.5.1.4.0.30000010081118140401500015651.dcm',
-     '/data/project/bhattlab/Gene_P2/IMAGES_COPDGen-Phase2(1-4500)/14008Q/14008Q_EXP_STD_328_COL_COPD2/1.2.840.113619.2.358.3.1930041893.843.1418041776.830.528.dcm',
-     '/data/project/bhattlab/Gene_P2/IMAGES_COPDGen-Phase2(1-4500)/18341U/18341U_EXP_BONE_346_COL_COPD2/1.2.840.113619.2.358.3.1930041893.842.1427886294.385.58.dcm',
-     '/data/project/bhattlab/Gene_P2/IMAGES_COPDGen-Phase2(1-4500)/23266N/23266N_EXP_STD_262_MSM_COPD2/1.2.840.113619.2.55.1.1762927041.1883.1437488798.285.339.dcm',
-     '/data/project/bhattlab/NLST/nlst-ct/200715/T1/1.3.6.1.4.1.14519.5.2.1.7009.9004.340203159186312593355684074268/1.3.6.1.4.1.14519.5.2.1.7009.9004.673735091869060898180209152693',
-     '/data/project/bhattlab/NLST/nlst-ct/218279/T2/1.3.6.1.4.1.14519.5.2.1.7009.9004.102835467614329321595330461997/1.3.6.1.4.1.14519.5.2.1.7009.9004.119963918942248939985827708106',
-     '/data/project/bhattlab/Nithin/test_insp/10759P/img/img/10759P_069.png',
-     '/data/project/bhattlab/Pratim/multiclass_2d/2D_mask/all_spiromics/img/MU302131/MU302131_604_003.png',
-     '/data/project/bhattlab/Pratim/multiclass_2d/2D_mask/input_COPD/img/10545W/10545W_375_002.png',
-     '/data/project/bhattlab/Pratim/multiclass_2d/2D_mask/input_COPD/img/15213W/15213W_586_003.png',
-     '/data/project/bhattlab/Pratim/multiclass_2d/2D_mask/input_COPD/img/19253C/19253C_253_003.png',
-     '/data/project/bhattlab/Vivek/ILD_VALVE/ILD/Unzipped/BROGDON_BETTY_JOAN/02750071',
-     '/data/project/ccts/client/kimberly/ics940_cgm_10x/coverage_orig/.snakemake/conda/f3fe95ac/lib/pkgconfig/libssh2.pc',
-     '/data/project/ccts/user/samcarli/ics1353_goldberg_gsea_copy/GSEA/.snakemake/conda/d9115e85/lib/tk8.6/demos/knightstour.tcl',
-     '/data/project/ccts/user/samcarli/ics1355_mcdonald_ferret_rnaseq/trinity/trinity_test_subset_2.9.0/trinity_out_dir/read_partitions/Fb_6/CBin_6602/c660275.trinity.reads.fa.out/salmon_outdir/quant.sf',
-     '/data/project/ccts/web/ccts/bmi/ics/zajac,a/ics718/GSEA/output/msigdb_5.2/deg_pval_rnk_top2000/c4/mem.unstim.cd8.naive/enplot_MODULE_265_163.png',
-     '/data/project/chonglab/Pengxu/SLE_sv/SLE114/grocsvs/results/CollectReadsForBarcodesStep/event_fastqs.SLE114.10x_SLE114.chrY.28613708.fa/36.fa',
-     '/data/project/chonglab/chongzch/ICGC/RNA-seq/RNA_translation_validation/df6987d0-b191-46ae-adf3-60f18cc698f0/trinity_out_dir/Dir_50b6aa8d-3938-4299-9da3-d518d4d3fc19_gdc_realn_rehead.bam.minC1.gff/chr1/196/231702011_231749959.trinity.reads',
-     '/data/project/ding_lab/PUS7KD_samples/combined_fastq_shGFP_teton/CHEUI/temp_13251713.tmp',
-     '/data/project/ding_lab/PUS7KD_samples/combined_fastq_shGFP_teton/CHEUI/temp_35605223.tmp',
-     '/data/project/gersteneckerlab/HR-MICRA_Neuropsychology_finished/processed/processed_Gerstenecker/Hamms010/sagittal_3d_flair/processed_with_ASHS/multispectral/magdeburg_7T/multiatlas/tseg_left_train024/atlas_to_native.nii.gz',
-     '/data/project/grytzlab/alex/testCases2Full/output/27-Nov-2023/outData_878938.mat',
-     '/data/project/hendrickslab/Package_1232404/EP_imaging/workdir/fmriprep_22_1_wf/single_subject_1017_wf/func_preproc_task_rest_dir_AP_run_1_wf/bold_bold_trans_wf/threshold/mapflow/_threshold392/_0x0bb065784c07809c8c519025dec4604a.json',
-     '/data/project/hendrickslab/Package_1232404/EP_imaging/workdir/fmriprep_22_1_wf/single_subject_4029_wf/func_preproc_task_rest_dir_PA_run_2_wf/ica_aroma_wf/ds_report_ica_aroma/_report/report.rst',
-     '/data/project/iqml/datasets/ADNI_bella/ASHS/ashs_outfile_abc_240201/6455_2020-09-29/tse_native_chunk_right.nii.gz',
-     '/data/project/iqml/datasets/ADNI_bella/ASHS/ashs_outfile_updated_230910/6612_2021-02-12/multiatlas/fusion/posterior_corr_usegray_right_011.nii.gz',
-     '/data/project/iqml/datasets/ADNI_bella/MR_2013-2022_cheaha/subjects/2079_2016-12-08/label/lh.BA3a_exvivo.label',
-     '/data/project/kanalab/UA_data/BrainREAD/derivatives-in/fmriprep_24_1_wf/sub_Br0137_wf/anat_fit_wf/ds_template_registration_wf/ds_std2anat_xfm/mapflow/_ds_std2anat_xfm2/_node.pklz',
-     '/data/project/kanalab/UA_data/study_LMB/Autism_S1/Wait_List_Controls/0046_LMB/0046_vissen/rafCFNL0000002028-0008-00314-000314-00.hdr',
-     '/data/project/kes/circrnaflow_mouse_20240516/circRNAFlow/quick_start_2/work/6f/5832b5316f90d475a38230b7b8e7fd/here_home/kipoi_user/.local/lib/python3.7/site-packages/Bio/Align/substitution_matrices/__init__.py',
-     '/data/project/kes/circrnaflow_mouse_20240516/circRNAFlow/quick_start_2/work/e4/a0c5e8b60fcb4e8da185f8d1248ca0/here_home/kipoi_user/.conda/envs/kipoi-deepTarget/lib/python3.7/site-packages/sklearn/externals/joblib/__pycache__/_memory_helpers.cpython-37.pyc',
-     '/data/project/lahtilab/NK_diffusion/intrinsic_diffusivity/sub-patient46/dwi/sub-patient46.ses-01.drbuddi_final_OF_slices/42/38.raw',
-     '/data/project/lahtilab/all_data_raw/SZ0184_DUP_SZ96_16_PARMI_SZ042/RESEARCH_PROTOCOLS_LAHTI_20190509_130745_013000/T2W_SPC_0008/PARMI_SZ042_16.MR.RESEARCH_PROTOCOLS_LAHTI.0008.0063.2019.05.09.15.15.57.349478.52471132.IMA',
-     '/data/project/lahtilab/prismaNK/patient/sub-patient41/ses-01/dwi/sub-patient41.ses-01.drbuddi_final_OF_slices/164/27.raw',
-     '/data/project/lulab/1_analysis_sajesan/20220216_HPC7_D3A_memory/work/c0/cc129d7f3395590d6a4e06de47fb43/IL6EV_untreated_rep2/css/report.css',
-     '/data/project/mcdonaldlab/wl-in-copd/fine_mapping/aou_cosmo/nhw_ld/paintor_chr3_73345901_chr3:73345901:A:G_enum2/LogFile.A4711',
-     '/data/project/neurocomputing/FreeSurfer10092019/subjects/SUBJ09/surf/lh.sulc',
-     '/data/project/neurocomputing/WUSTL_amyloid/CNDA_E211163_PUPTIMECOURSE_20170607111515/out/resources/DATA/files/pet_proc/1011_505_mMR_v1n_RSF_ROI2_f9',
-     '/data/project/neurocomputing/WUSTL_amyloid_11212019/CNDA_E138763_freesurfer_20150709120133/out/resources/SNAPSHOTS/files/snapshots/1011_083_MMR_V1_brnmsk_cor_207.gif',
-     '/data/project/nvl_lab/HoloBMI/Raw/191004/NVI12/D9/im/baseline/baseline_191004T133031-002/baseline_191004T133031-002_Cycle00001_Ch2_026492.ome.tif',
-     '/data/project/nvl_lab/HoloBMI/Raw/191015/NVI13/D20/im/HoloVTA_pretrain/HoloVTA_pretrain_191015T162420-010/HoloVTA_pretrain_191015T162420-010_Cycle00001_Ch2_032404.ome.tif',
-     '/data/project/nvl_lab/HoloBMI/Raw/191102/NVI12/D33/im/baseline/baseline_191102T125501-073/baseline_191102T125501-073_Cycle00001_Ch2_006325.ome.tif',
-     '/data/project/nvl_lab/HoloBMI/Raw/191112/NVI17/D08/im/BMI/BMI_191112T225755-022/BMI_191112T225755-022_Cycle00001_Ch2_024719.ome.tif',
-     '/data/project/public_datasets/ngs/genomes/cov2WA1_vero/.snakemake/conda/edabcfac/lib/python3.8/xml/parsers/expat.py',
-     '/data/project/ssg-big-data/Sadeep_KD_Seq/analytics/sandbox/hwiener/AllDB/DB_Geis/chr14$1$107043718/__23e90abe-cc6e-45bb-82e5-43a7805d8c3446916276086528_1649129377192/QUAL.tdb',
-     '/data/project/ssg-big-data/goldn/goldn/analytics/mmap/ldl/chr16/ldlcv2d1.ldlcv2d1_gradcol_with_interaction_chr16_chunk7.mle.pval.csv',
-     '/data/project/szaflarskilab/NODDI_2020/completed/008NES2037_V1/20190724172327_dMRI_dir99_AP_1.5mmISO_12_proc/20190724172327_dMRI_dir99_AP_1.5mmISO_12_up_RAWFLOAT/bval.0038_sl.0139.raw',
-     '/data/project/szaflarskilab/UTAK/MRI_rawdata/008NES2025_V2/008NES2025_V2/LAFRANCE_SZAFLARSKI_20190412_160119_749000/FACES_TASK_AP_MB6_2_5MMISO_1000MS_0010/2025.MR.LAFRANCE_SZAFLARSKI.0010.0253.2019.04.12.16.53.42.563854.210295506.IMA',
-     '/data/project/thymelab/october_2024_hcrtr2_discovery_results/old_agonist_data/confs_for_placements/conf_placements_for_analysis/real_and_hbonds/Z2899428259/placements/7l1u_receptor_only_Z2899428259_54_1.pdb',
-     '/data/project/thymelab/october_2024_hcrtr2_discovery_results/old_antagonist_data_to_keep/placements_for_refinement/expanded_conformer_set_placements/low_ddg/PV-004442973527/placements/4s0v_receptor_only_PV-004442973527_68_0/4s0v_receptor_only_PV-004442973527_68_0.pdb',
-     '/data/project/thymelab/october_2024_hcrtr2_discovery_results/old_antagonist_data_to_keep/placements_for_refinement/expanded_conformer_set_placements/real_and_hbonds/PV-004819573350/placements/4s0v_receptor_only_PV-004819573350_15_2.pdb',
-     '/data/project/thymelab/october_2024_hcrtr2_discovery_results/old_antagonist_data_to_keep/placements_for_refinement/expanded_conformer_set_placements/real_and_hbonds/Z4303193566/placements/4s0v_receptor_only_Z4303193566_93_0.pdb',
-     '/data/project/triplab/backups/react/G/REACT/Participant Data/Development for Databases and Syntax/REACT_annotation_development/QC_urban_pedestrian_sets_oct_2021/Inaara/Completed Frames/106199_61/frame13851_2020-08-03 09_23_31.485000-05_00.jpg',
-     '/data/project/triplab/data/REACT/EYE/EYE_screen3/EYE_s3_fr/103599_73_frames/frame13129_2020-06-17 15:27:44.743000-05:00.jpg',
-     '/data/project/triplab/data/REACT/EYE/EYE_screen3/EYE_s3_fr/201399_21_frames/frame9386_2019-11-20 11:51:02.295000-06:00.jpg',
-     '/data/project/triplab/data/REACT/EYE/EYE_screen3/EYE_s3_fr/302699_22_frames/frame12047_2019-03-10 10:49:10.952000-05:00.jpg',
-     '/data/project/triplab/data/REACT/EYE/EYE_screen3/EYE_s3_fr/401099_22_frames/frame9018_2019-04-26 17:55:41.594000-05:00.jpg',
-     '/data/project/vislab/ENACT/VINES/VINES_CFNL/CFNL0000002009/1/series_6/image_5.dcm',
-     '/data/project/vislab/RSTORE_FILES/media/2be3886d-fcad-4718-a479-4dfe0829b7e2/store2/MATLABold/toolbox/stateflow/sfdemos/html/sf_newtons_cradle_01.png',
-     '/data/project/vislab/a/FLAP/preMRIscans/raw_files/not_organized/fr1003 - Dropped/MRI Pre-Test/FLAP_FR1003_Pre.MR.PSYCHOLOGY-Seit.20.84.2024.04.25.12.44.49.493.42979612.dcm',
-     '/data/project/vislab/a/MDP/Matt/Retinotopy/A/PreData/Subjects/MDP003/Series_016_RETINO_PA_run2/mc/MAT_0345',
-     '/data/project/vislab/raw/MDP/BACKUP/MDP/workingdir/mriqc/workflow_enumerator/funcMRIQC/SpatialNormalization/_in_file_..data..project..vislab..raw..MDP..BACKUP..MDP..sub-MDP013..func..sub-MDP013_task-Emotion_dir-AP_run-01_bold.nii.gz/SharpenEPI/_node.pklz',
-     '/data/project/vislab/raw/MDP/workdir/fmriprep_wf/single_subject_MDP120_wf/func_preproc_task_Movie4_dir_PA_run_01_wf/initial_boldref_wf/enhance_and_skullstrip_bold_wf/fixhdr_skullstrip2/uni_xform_mask_xform.nii.gz',
-     '/data/project/weaverlab/freybf/VDR_high_low_atac/Tobias_analysis/BINDetect_output_homer/NR1H2RXRA_MA0115.1/beds/NR1H2RXRA_MA0115.1_VDR_high_r1_footprints_bound.bed',
-     '/data/project/worthey_lab/projects/experimental_pipelines/tarun/DITTO/data/external/splits/xzzeacp',
-     '/data/project/xnat/archive/ememre/arc001/CINL6468/SCANS/3/DICOM/1.3.12.2.1107.5.2.43.66069.30000023062210210673600000012-3-63-1eksgae.dcm',
-     '/data/project/zindl_lab/H2Ab1/cellranger_output/D9ICneg_1/SC_RNA_COUNTER_CS/SC_MULTI_CORE/MULTI_GEM_WELL_PROCESSOR/VDJ_T_GEM_WELL_PROCESSOR/SC_VDJ_CONTIG_ASSEMBLER/RUST_BRIDGE/fork0/_outs',
-     '/data/project/zindl_lab/vdr/data/vdr_metadata.csv')
 %% Cell type:code id: tags:
 ``` python
 def split_into_groups(df, threshold = 100*(1024**3), partition_info=None):
    df = df.sort_values('size')
    df['cumsum'] = df['size'].cumsum()
    df['group'] = (df['cumsum'] // threshold).astype(str)
    if partition_info is not None:
        partition_number = str(partition_info.get('number'))
    else:
        partition_number = 'nan'
    df['group'] = df['group'].str.insert(-1,f'_{partition_number}')
    return df[['size','group']]
 ```
 %% Cell type:code id: tags:
 ``` python
 # Split files into groups by size where each group is 100 GiB at most. Any files larger than the cutoff are separated
 # into their own group.
 ddf = ddf.map_partitions(split_into_groups)
 ```
 %% Cell type:code id: tags:
 ``` python
 out_path = gpfs_ds.joinpath('file_grps')
 out_path.mkdir(exist_ok=True)
 ```
 %% Cell type:code id: tags:
 ``` python
 ddf.to_parquet(out_path)
 ```
+%% Cell type:markdown id: tags:
+### Example: Aggregate Data By File Size
+%% Cell type:code id: tags:
+``` python
+import cudf
+```
+%% Cell type:code id: tags:
+``` python
+ddf = dd.read_parquet(gpfs_ds.joinpath('parquet'),columns=['path','size'])
+```
+%% Cell type:code id: tags:
+``` python
+bins = [1024**n for n in range(0,6)]
+labels = ['1B-1kiB','1kiB-1MiB','1MiB-1GiB','1GiB-1TiB','>1TiB']
+```
+%% Cell type:code id: tags:
+``` python
+ddf['size_group'] = ddf['size'].map_partitions(lambda x: cudf.cut(x,bins=bins,labels=labels,right=False))
+```
+%% Cell type:code id: tags:
+``` python
+df_agg = ddf.groupby('size_group',observed=True)['size'].agg(['count','sum']).compute().to_pandas()
+```
+%% Cell type:code id: tags:
+``` python
+df_agg.columns = ['file_count','bytes']
+```
+%% Cell type:code id: tags:
+``` python
+df_agg = df_agg.reset_index()
+```
+%% Cell type:code id: tags:
+``` python
+df_agg['size_group'] = df_agg['size_group'].astype('category').cat.set_categories(labels,ordered=True)
+df_agg = df_agg.sort_values('size_group')
+```
+%% Cell type:code id: tags:
+``` python
+from rc_gpfs.report import plotting
+```
+%% Cell type:code id: tags:
+``` python
+exp,unit = plotting.choose_appropriate_storage_unit(df_agg['bytes'])
+df_agg[unit] = df_agg['bytes']/(1024**exp)
+```
+%% Cell type:code id: tags:
+``` python
+df_agg[['file_count_cum',f'{unit}_cum']] = df_agg[['file_count',unit]].cumsum()
+df_agg[[unit,f'{unit}_cum']] = df_agg[[unit,f'{unit}_cum']].round(3)
+```
+%% Cell type:code id: tags:
+``` python
+f1 = plotting.pareto_chart(df_agg,x='size_group',y=unit,
+                           title="Storage Used By File Size", xlabel='Size Group',ylabel='Size (TiB)')
+f1.show()
+```
+%% Output
+%% Cell type:code id: tags:
+``` python
+f2 = plotting.pareto_chart(df_agg, x='size_group', y='file_count',
+                           title="File Count By File Size", xlabel='Size Group',ylabel='Count')
+f2.show()
+```
+%% Output