From 37f0045609ee6b76907acaa7f8447150984b7f48 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Thu, 24 Apr 2025 10:10:46 -0500
Subject: [PATCH 01/11] add pytest to dependencies

---
 poetry.lock    | 64 +++++++++++++++++++++++++++++++++++++++++++++++++-
 pyproject.toml |  1 +
 2 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 02961ca..545215b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,5 +1,18 @@
 # This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
 
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["main"]
+markers = "sys_platform == \"win32\""
+files = [
+    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+
 [[package]]
 name = "colormaps"
 version = "0.4.2"
@@ -82,6 +95,18 @@ files = [
 docs = ["Sphinx", "furo"]
 test = ["objgraph", "psutil"]
 
+[[package]]
+name = "iniconfig"
+version = "2.1.0"
+description = "brain-dead simple config-ini parsing"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"},
+    {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"},
+]
+
 [[package]]
 name = "numpy"
 version = "2.2.5"
@@ -175,6 +200,22 @@ files = [
 packaging = "*"
 tenacity = ">=6.2.0"
 
+[[package]]
+name = "pluggy"
+version = "1.5.0"
+description = "plugin and hook calling mechanisms for python"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
+    {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
+]
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
+
 [[package]]
 name = "polars"
 version = "1.27.1"
@@ -274,6 +315,27 @@ files = [
 [package.extras]
 test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"]
 
+[[package]]
+name = "pytest"
+version = "8.3.5"
+description = "pytest: simple powerful testing with Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"},
+    {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+iniconfig = "*"
+packaging = "*"
+pluggy = ">=1.5,<2"
+
+[package.extras]
+dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
+
 [[package]]
 name = "sqlalchemy"
 version = "2.0.40"
@@ -420,4 +482,4 @@ files = [
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12"
-content-hash = "b94c26465ac498c7656fafdf498f424379a1e882e5da6ca3a9dcac48b076b770"
+content-hash = "e5d35bf795dfae55f35d9e809e407295bdb8ff0dc0edf6fa252f940255279f51"
diff --git a/pyproject.toml b/pyproject.toml
index 62cb958..b87f6e4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,6 +39,7 @@ numpy = "*"
 plotly = "^5.24.1"
 polars = ">=1.27.0"
 pyarrow = "^19.0.1"
+pytest = "^8.3.5"
 sqlalchemy = "*"
 typeguard = "*"
 
-- 
GitLab


From 739f53560d7a2609aecd182c5eb32d3d8be4b274 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Thu, 24 Apr 2025 10:33:04 -0500
Subject: [PATCH 02/11] add flake8 plugin for pytest linting

---
 poetry.lock    | 31 +++++++++++++++++++++++++++++--
 pyproject.toml | 10 ++++++++--
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 545215b..090f938 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -25,6 +25,33 @@ files = [
     {file = "colormaps-0.4.2.tar.gz", hash = "sha256:c703d62a4fededfcfed57bef6aa772422b535896645cbf8c58690e4bbe16005c"},
 ]
 
+[[package]]
+name = "flake8-plugin-utils"
+version = "1.3.3"
+description = "The package provides base classes and utils for flake8 plugin writing"
+optional = false
+python-versions = ">=3.6,<4.0"
+groups = ["main"]
+files = [
+    {file = "flake8-plugin-utils-1.3.3.tar.gz", hash = "sha256:39f6f338d038b301c6fd344b06f2e81e382b68fa03c0560dff0d9b1791a11a2c"},
+    {file = "flake8_plugin_utils-1.3.3-py3-none-any.whl", hash = "sha256:e4848c57d9d50f19100c2d75fa794b72df068666a9041b4b0409be923356a3ed"},
+]
+
+[[package]]
+name = "flake8-pytest-style"
+version = "2.1.0"
+description = "A flake8 plugin checking common style issues or inconsistencies with pytest-based tests."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "flake8_pytest_style-2.1.0-py3-none-any.whl", hash = "sha256:a0d6dddcd533bfc13f19b8445907be0330c5e6ccf7090bcd9d5fa5a0b1b65e71"},
+    {file = "flake8_pytest_style-2.1.0.tar.gz", hash = "sha256:fee6befdb5915d600ef24e38d48a077d0dcffb032945ae0169486e7ff8a1079a"},
+]
+
+[package.dependencies]
+flake8-plugin-utils = ">=1.3.2,<2.0.0"
+
 [[package]]
 name = "greenlet"
 version = "3.2.1"
@@ -481,5 +508,5 @@ files = [
 
 [metadata]
 lock-version = "2.1"
-python-versions = ">=3.12"
-content-hash = "e5d35bf795dfae55f35d9e809e407295bdb8ff0dc0edf6fa252f940255279f51"
+python-versions = ">=3.12,<4.0"
+content-hash = "f7cca41d57b1b597fdc1f0d6584dbe3148b9d39515a1830c3ef96920e8969ffd"
diff --git a/pyproject.toml b/pyproject.toml
index b87f6e4..cd10390 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,7 @@ maintainers = [
 ]
 license = "AFL"
 readme = "README.md"
-requires-python = ">=3.12"
+requires-python = ">=3.12,<4.0"
 keywords = ["GPFS", "policy", "aggregation", "reporting"]
 dynamic = ["version","dependencies","classifiers"]
 
@@ -33,8 +33,9 @@ classifiers = [
 version = "0.0.0"
 
 [tool.poetry.dependencies]
-python = ">=3.12"
+python = ">=3.12,<4.0"
 colormaps = "*"
+flake8-pytest-style = "^2.1.0"
 numpy = "*"
 plotly = "^5.24.1"
 polars = ">=1.27.0"
@@ -67,3 +68,8 @@ folders = [
 requires = ["poetry-core>=2.0.0,<3.0.0","poetry-dynamic-versioning>=1.0.0,<2.0.0"]
 build-backend = "poetry_dynamic_versioning.backend"
 
+[tool.pytest.ini_options]
+pythonpath = "src"
+addopts = [
+    "--import-mode=importlib",
+]
\ No newline at end of file
-- 
GitLab


From a9f686673b528254d6831eda51707164aec51654 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Thu, 24 Apr 2025 11:06:29 -0500
Subject: [PATCH 03/11] add pytest datafiles plugin

---
 poetry.lock    | 17 ++++++++++++++++-
 pyproject.toml |  1 +
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 090f938..5f2e5fd 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -363,6 +363,21 @@ pluggy = ">=1.5,<2"
 [package.extras]
 dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 
+[[package]]
+name = "pytest-datafiles"
+version = "3.0.0"
+description = "py.test plugin to create a 'tmp_path' containing predefined files/directories."
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "pytest-datafiles-3.0.0.tar.gz", hash = "sha256:a70c4c66a36d1cdcfc095607f04eee66eaef3fa64cbb62d60c47ce169901d1d4"},
+    {file = "pytest_datafiles-3.0.0-py2.py3-none-any.whl", hash = "sha256:2176e10d3f6e76f358925a897e21e2bcc5a0170b92fac4e66ed055eaa2ca6a22"},
+]
+
+[package.dependencies]
+pytest = ">=3.6"
+
 [[package]]
 name = "sqlalchemy"
 version = "2.0.40"
@@ -509,4 +524,4 @@ files = [
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12,<4.0"
-content-hash = "f7cca41d57b1b597fdc1f0d6584dbe3148b9d39515a1830c3ef96920e8969ffd"
+content-hash = "6a3e0566e33e1c0966f07ee437a94df8b9c396d372a462e6b7daaa574eb7e0e0"
diff --git a/pyproject.toml b/pyproject.toml
index cd10390..75a8d00 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,7 @@ plotly = "^5.24.1"
 polars = ">=1.27.0"
 pyarrow = "^19.0.1"
 pytest = "^8.3.5"
+pytest-datafiles = "^3.0.0"
 sqlalchemy = "*"
 typeguard = "*"
 
-- 
GitLab


From 80e482bf6c818f79fa4c2bf64c1a1fb87567d4de Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Thu, 24 Apr 2025 15:55:14 -0500
Subject: [PATCH 04/11] remove flake8 plugin

---
 poetry.lock    | 29 +----------------------------
 pyproject.toml |  7 ++++---
 2 files changed, 5 insertions(+), 31 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 5f2e5fd..24f58d2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -25,33 +25,6 @@ files = [
     {file = "colormaps-0.4.2.tar.gz", hash = "sha256:c703d62a4fededfcfed57bef6aa772422b535896645cbf8c58690e4bbe16005c"},
 ]
 
-[[package]]
-name = "flake8-plugin-utils"
-version = "1.3.3"
-description = "The package provides base classes and utils for flake8 plugin writing"
-optional = false
-python-versions = ">=3.6,<4.0"
-groups = ["main"]
-files = [
-    {file = "flake8-plugin-utils-1.3.3.tar.gz", hash = "sha256:39f6f338d038b301c6fd344b06f2e81e382b68fa03c0560dff0d9b1791a11a2c"},
-    {file = "flake8_plugin_utils-1.3.3-py3-none-any.whl", hash = "sha256:e4848c57d9d50f19100c2d75fa794b72df068666a9041b4b0409be923356a3ed"},
-]
-
-[[package]]
-name = "flake8-pytest-style"
-version = "2.1.0"
-description = "A flake8 plugin checking common style issues or inconsistencies with pytest-based tests."
-optional = false
-python-versions = ">=3.9"
-groups = ["main"]
-files = [
-    {file = "flake8_pytest_style-2.1.0-py3-none-any.whl", hash = "sha256:a0d6dddcd533bfc13f19b8445907be0330c5e6ccf7090bcd9d5fa5a0b1b65e71"},
-    {file = "flake8_pytest_style-2.1.0.tar.gz", hash = "sha256:fee6befdb5915d600ef24e38d48a077d0dcffb032945ae0169486e7ff8a1079a"},
-]
-
-[package.dependencies]
-flake8-plugin-utils = ">=1.3.2,<2.0.0"
-
 [[package]]
 name = "greenlet"
 version = "3.2.1"
@@ -524,4 +497,4 @@ files = [
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12,<4.0"
-content-hash = "6a3e0566e33e1c0966f07ee437a94df8b9c396d372a462e6b7daaa574eb7e0e0"
+content-hash = "b774e71210a804105de6c03976133f42953ebfc5be59ef0d2c6cc62c842a6372"
diff --git a/pyproject.toml b/pyproject.toml
index 75a8d00..0120b6f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,7 +35,6 @@ version = "0.0.0"
 [tool.poetry.dependencies]
 python = ">=3.12,<4.0"
 colormaps = "*"
-flake8-pytest-style = "^2.1.0"
 numpy = "*"
 plotly = "^5.24.1"
 polars = ">=1.27.0"
@@ -69,8 +68,10 @@ folders = [
 requires = ["poetry-core>=2.0.0,<3.0.0","poetry-dynamic-versioning>=1.0.0,<2.0.0"]
 build-backend = "poetry_dynamic_versioning.backend"
 
+
 [tool.pytest.ini_options]
-pythonpath = "src"
 addopts = [
     "--import-mode=importlib",
-]
\ No newline at end of file
+]
+required_plugins = ["pytest-datafiles>=3.0.0"]
+testpaths = ["tests"]
\ No newline at end of file
-- 
GitLab


From dfff5f38c409a8c2a3055806879e224a12939866 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Thu, 24 Apr 2025 15:55:30 -0500
Subject: [PATCH 05/11] add ruff linter and formatter

---
 ruff.toml | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 ruff.toml

diff --git a/ruff.toml b/ruff.toml
new file mode 100644
index 0000000..10bc61b
--- /dev/null
+++ b/ruff.toml
@@ -0,0 +1,85 @@
+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".gitlab-ci.yml",
+    ".git-rewrite",
+    ".hg",
+    ".ipynb_checkpoints",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".poetry",
+    ".pyenv",
+    ".pytest_cache",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    ".vscode",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "site-packages",
+    "venv",
+    "poetry.toml",
+    "poetry.lock",
+    "test-data",
+    "legacy-scripts",
+    "extra",
+    "data"
+]
+
+# Same as Black.
+line-length = 88
+indent-width = 4
+
+# Assume Python 3.13
+target-version = "py313"
+
+[lint]
+# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.
+# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
+# McCabe complexity (`C901`) by default.
+select = ["E4", "E7", "E9", "F"]
+ignore = []
+
+# Allow fix for all enabled rules (when `--fix`) is provided.
+fixable = ["ALL"]
+unfixable = []
+
+# Allow unused variables when underscore-prefixed.
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+
+[format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+
+# Like Black, indent with spaces, rather than tabs.
+indent-style = "space"
+
+# Like Black, respect magic trailing commas.
+skip-magic-trailing-comma = false
+
+# Like Black, automatically detect the appropriate line ending.
+line-ending = "auto"
+
+# Enable auto-formatting of code examples in docstrings. Markdown,
+# reStructuredText code/literal blocks and doctests are all supported.
+#
+# This is currently disabled by default, but it is planned for this
+# to be opt-out in the future.
+docstring-code-format = false
+
+# Set the line length limit used when formatting code snippets in
+# docstrings.
+#
+# This only has an effect when the `docstring-code-format` setting is
+# enabled.
+docstring-code-line-length = "dynamic"
\ No newline at end of file
-- 
GitLab


From 5b1c794a9a07571fbff5587efd14315fbb3e9989 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Fri, 25 Apr 2025 10:29:20 -0500
Subject: [PATCH 06/11] add initial testing of utility functions

---
 tests/test_utils.py | 65 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 tests/test_utils.py

diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..1f3b029
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,65 @@
+import pytest
+from typing import Literal
+from pathlib import Path
+from rc_gpfs import utils
+
+
+@pytest.mark.parametrize("path", ["/data/rc/gpfs-policy", Path("/data/rc/gpfs-policy")])
+def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]):
+    p_path = utils.as_path(path)
+    assert isinstance(p_path, Path)
+
+@pytest.mark.parametrize(
+    "value,unit,to_unit,use_binary,expected",
+    [
+        (1,"G","K",False,1000000),
+        (4,'K','base',True,4096),
+        ('100','base','T',False,1e-10)
+    ]
+)
+def test_convert_si(value,unit,to_unit,use_binary,expected):
+    assert utils.convert_si(value,unit,to_unit,use_binary) == expected
+
+@pytest.mark.parametrize(
+    "val,default,expected",
+    [
+        ('1 kiB',None,1024),
+        ('1 kiB',(1024**4),1024),
+        ('10 TiB',None,10*(1024**4)),
+        (None,1024,1024),
+        (None,None,None),
+        ('1.5 MiB',None,1572864)
+    ]
+)
+def test_as_bytes(val,default,expected):
+    assert utils.as_bytes(val,default) == expected
+
+class TestSizeDistribution:
+    inputs = [
+        ["4 KiB", "4 MiB", "1 GiB", "10 GiB", "100 GiB", "1 TiB"],
+        [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776],
+        ["10 GiB", 1024, 4096, "1 KiB",0],
+        "1 MiB"
+    ]
+
+    expected_bins = [
+        [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776],
+        [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776],
+        [1024, 4096, 10737418240],
+        [1048576]
+    ]
+
+    expected_labels = [
+        ["0 B-4 KiB","4 KiB-4 MiB","4 MiB-1 GiB","1 GiB-10 GiB","10 GiB-100 GiB","100 GiB-1 TiB",">1 TiB"],
+        ["0 B-4 KiB","4 KiB-4 MiB","4 MiB-1 GiB","1 GiB-10 GiB","10 GiB-100 GiB","100 GiB-1 TiB",">1 TiB"],
+        ["0 B-1 KiB","1 KiB-4 KiB","4 KiB-10 GiB",">10 GiB"],
+        ["0 B-1 MiB",">1 MiB"]
+    ]
+
+    @pytest.mark.parametrize("bins,expected",list(zip(expected_bins,expected_labels)))
+    def test_create_size_bin_labels(self,bins,expected):
+        assert utils.create_size_bin_labels(bins) == expected
+    
+    @pytest.mark.parametrize("bins,expected",list(zip(inputs,list(zip(expected_bins,expected_labels)))))
+    def test_prep_size_distribution(self,bins,expected):
+        assert utils.prep_size_distribution(size_bins=bins) == expected
\ No newline at end of file
-- 
GitLab


From 6f887736901d6240a722a9d5f17dc180d4098aa4 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Fri, 25 Apr 2025 10:29:39 -0500
Subject: [PATCH 07/11] specify the names of the functions to import instead of
 *

---
 src/rc_gpfs/utils/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rc_gpfs/utils/core.py b/src/rc_gpfs/utils/core.py
index 2596c4c..d62115a 100644
--- a/src/rc_gpfs/utils/core.py
+++ b/src/rc_gpfs/utils/core.py
@@ -8,7 +8,7 @@ import pyarrow.parquet as pq
 import numpy as np
 
 from .units import as_bytes, convert_si, create_size_bin_labels
-from .datetime import *
+from .datetime import as_datetime,create_timedelta_breakpoints,create_timedelta_labels
 
 def parse_scontrol():
     job_id = os.getenv('SLURM_JOB_ID')
-- 
GitLab


From 705609889fffcb3caf65153fd2a5f9f0ac4b7fb7 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Thu, 1 May 2025 10:37:10 -0500
Subject: [PATCH 08/11] add test for size group assignment

---
 tests/test_utils.py | 109 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 105 insertions(+), 4 deletions(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 1f3b029..48e7063 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -3,6 +3,9 @@ from typing import Literal
 from pathlib import Path
 from rc_gpfs import utils
 
+from polars.testing import assert_series_equal
+import polars as pl
+
 
 @pytest.mark.parametrize("path", ["/data/rc/gpfs-policy", Path("/data/rc/gpfs-policy")])
 def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]):
@@ -34,8 +37,102 @@ def test_convert_si(value,unit,to_unit,use_binary,expected):
 def test_as_bytes(val,default,expected):
     assert utils.as_bytes(val,default) == expected
 
-class TestSizeDistribution:
-    inputs = [
+class TestSizeGrouping:
+    input_sizes = pl.Series(
+        name="size",
+        values = [
+            0,
+            2048,
+            4096,
+            1024**2,  # 1 MiB
+            1024**3,  # 1 GiB
+            20*1024**3,  # 20 GiB
+            1024**5   # 1 PiB
+        ],
+        dtype = pl.Int128()
+    )
+
+    expected_size_groups = [
+        pl.Series(
+            name="size",
+            values=[
+                "0 B-4 KiB",
+                "0 B-4 KiB",
+                "0 B-4 KiB",
+                "4 KiB-4 MiB",
+                "4 MiB-1 GiB",
+                "10 GiB-100 GiB",
+                ">1 TiB",
+            ],
+            dtype=pl.Enum(
+                [
+                    "0 B-4 KiB",
+                    "4 KiB-4 MiB",
+                    "4 MiB-1 GiB",
+                    "1 GiB-10 GiB",
+                    "10 GiB-100 GiB",
+                    "100 GiB-1 TiB",
+                    ">1 TiB",
+                ]
+            ),
+        ),
+        pl.Series(
+            name="size",
+            values=[
+                "0 B-4 KiB",
+                "0 B-4 KiB",
+                "0 B-4 KiB",
+                "4 KiB-4 MiB",
+                "4 MiB-1 GiB",
+                "10 GiB-100 GiB",
+                ">1 TiB",
+            ],
+            dtype=pl.Enum(
+                [
+                    "0 B-4 KiB",
+                    "4 KiB-4 MiB",
+                    "4 MiB-1 GiB",
+                    "1 GiB-10 GiB",
+                    "10 GiB-100 GiB",
+                    "100 GiB-1 TiB",
+                    ">1 TiB",
+                ]
+            ),
+        ),
+        pl.Series(
+            name="size",
+            values=[
+                "0 B-1 KiB",
+                "1 KiB-4 KiB",
+                "1 KiB-4 KiB",
+                "4 KiB-10 GiB",
+                "4 KiB-10 GiB",
+                ">10 GiB",
+                ">10 GiB",
+            ],
+            dtype=pl.Enum(["0 B-1 KiB", "1 KiB-4 KiB", "4 KiB-10 GiB", ">10 GiB"]),
+        ),
+        pl.Series(
+            name="size",
+            values=[
+                "0 B-1 MiB",
+                "0 B-1 MiB",
+                "0 B-1 MiB",
+                "0 B-1 MiB",
+                ">1 MiB",
+                ">1 MiB",
+                ">1 MiB",
+            ],
+            dtype=pl.Enum(
+                [
+                    "0 B-1 MiB",
+                    ">1 MiB",
+                ]
+            ),
+        ),
+    ]
+
+    input_bins = [
         ["4 KiB", "4 MiB", "1 GiB", "10 GiB", "100 GiB", "1 TiB"],
         [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776],
         ["10 GiB", 1024, 4096, "1 KiB",0],
@@ -60,6 +157,10 @@ class TestSizeDistribution:
     def test_create_size_bin_labels(self,bins,expected):
         assert utils.create_size_bin_labels(bins) == expected
     
-    @pytest.mark.parametrize("bins,expected",list(zip(inputs,list(zip(expected_bins,expected_labels)))))
+    @pytest.mark.parametrize("bins,expected",list(zip(input_bins,list(zip(expected_bins,expected_labels)))))
     def test_prep_size_distribution(self,bins,expected):
-        assert utils.prep_size_distribution(size_bins=bins) == expected
\ No newline at end of file
+        assert utils.prep_size_distribution(size_bins=bins) == expected
+    
+    @pytest.mark.parametrize("bins,expected",list(zip(input_bins,expected_size_groups)),ids=[1,2,3,4])
+    def test_calculate_size_distribution(self, bins, expected):
+        assert_series_equal(utils.calculate_size_distribution(self.input_sizes, size_bins=bins),expected)
\ No newline at end of file
-- 
GitLab


From bda1b8953a74e701592bf1bc34cf4686e1f46966 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Thu, 1 May 2025 13:07:58 -0500
Subject: [PATCH 09/11] add initial datetime tests

---
 tests/test_utils.py | 91 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 2 deletions(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 48e7063..1433660 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -5,8 +5,9 @@ from rc_gpfs import utils
 
 from polars.testing import assert_series_equal
 import polars as pl
+import numpy as np
 
-
+### General Purpose Utils
 @pytest.mark.parametrize("path", ["/data/rc/gpfs-policy", Path("/data/rc/gpfs-policy")])
 def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]):
     p_path = utils.as_path(path)
@@ -23,6 +24,7 @@ def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]):
 def test_convert_si(value,unit,to_unit,use_binary,expected):
     assert utils.convert_si(value,unit,to_unit,use_binary) == expected
 
+### Memory and File Size Utils
 @pytest.mark.parametrize(
     "val,default,expected",
     [
@@ -163,4 +165,89 @@ class TestSizeGrouping:
     
     @pytest.mark.parametrize("bins,expected",list(zip(input_bins,expected_size_groups)),ids=[1,2,3,4])
     def test_calculate_size_distribution(self, bins, expected):
-        assert_series_equal(utils.calculate_size_distribution(self.input_sizes, size_bins=bins),expected)
\ No newline at end of file
+        assert_series_equal(utils.calculate_size_distribution(self.input_sizes, size_bins=bins),expected)
+
+
+### DateTime and File Age Utils
+@pytest.mark.parametrize(
+        'date,expected',
+        [
+            ('2025-01-01',np.datetime64('2025-01-01T00:00:00.000000000')),
+            (np.datetime64('2025-01-01T00:00:00.000000000'),np.datetime64('2025-01-01T00:00:00.000000000')),
+            (1735689600000000000,1735689600000000000),
+            (None,None)
+        ]
+)
+def test_as_datetime(date,expected):
+    assert utils.as_datetime(date) == expected
+
+def test_as_datetime_fails():
+    with pytest.raises(ValueError):
+        utils.as_datetime('not a date')
+
+@pytest.mark.parametrize('val',[1,3,5])
+@pytest.mark.parametrize('unit',['D','W'])
+def test_as_timedelta(val,unit):
+    assert utils.as_timedelta(val,unit) == np.timedelta64(val,unit)
+
+class TestAgeGrouping:
+    atime = np.arange('2024-08-15','2025-01-15',dtype='datetime64[D]',step=np.timedelta64(30,'D'))
+    
+    acq_date = '2025-02-01'
+
+    delta_vals_days = [
+        [30,60,90,180],
+        [0,15,45],
+        365
+    ]
+
+    delta_vals_weeks = [
+        [4,8,12,16],
+        [0,1,5,10],
+        52
+    ]
+    
+    expected_breakpoints_days = [
+        [
+            np.datetime64("2025-01-02T00:00:00.000000000"),
+            np.datetime64("2024-12-03T00:00:00.000000000"),
+            np.datetime64("2024-11-03T00:00:00.000000000"),
+            np.datetime64("2024-08-05T00:00:00.000000000"),
+        ],
+        [
+            np.datetime64("2025-01-17T00:00:00.000000000"),
+            np.datetime64("2024-12-18T00:00:00.000000000"),
+        ],
+        [
+            np.datetime64("2024-02-02T00:00:00.000000000")
+        ]
+    ]
+
+    expected_labels_days = [
+        [
+            '>180D',
+            '90D-180D',
+            '60D-90D',
+            '30D-60D',
+            '<30D'
+        ],
+        [
+            '>45D',
+            '15D-45D',
+            '<15D'
+        ],
+        [
+            '>365D',
+            '<365D',
+        ]
+    ]
+
+    @pytest.mark.parametrize('delta_vals,expected',list(zip(delta_vals_days,expected_breakpoints_days)))
+    def test_create_timedelta_breakpoints_days(self,delta_vals,expected):
+        delta_unit = 'D'
+        assert utils.create_timedelta_breakpoints(self.acq_date,delta_vals,delta_unit) == expected
+
+    @pytest.mark.parametrize('delta_vals,expected',list(zip(delta_vals_days,expected_labels_days)))
+    def test_create_timedelta_labels_days(self,delta_vals,expected):
+        delta_unit = "D"
+        assert utils.create_timedelta_labels(delta_vals,delta_unit) == expected
\ No newline at end of file
-- 
GitLab


From 2c213b87aee8ef1724e3b1bd05f5c9a9387245c1 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Thu, 1 May 2025 14:38:43 -0500
Subject: [PATCH 10/11] finish tests for age grouping. combine days and weeks
 params into single variables to reduce duplicate effort

---
 tests/test_utils.py | 128 ++++++++++++++++++++++++++++++++------------
 1 file changed, 95 insertions(+), 33 deletions(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 1433660..e19bb08 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -191,39 +191,46 @@ def test_as_timedelta(val,unit):
     assert utils.as_timedelta(val,unit) == np.timedelta64(val,unit)
 
 class TestAgeGrouping:
-    atime = np.arange('2024-08-15','2025-01-15',dtype='datetime64[D]',step=np.timedelta64(30,'D'))
+    timestamps = pl.Series(
+        name="access",
+        values=[
+            "2024-05-12",
+            "2025-02-21",
+            "2025-04-02",
+            "2025-04-17",
+            "2025-05-17",
+        ],
+    ).str.to_datetime(time_unit="ns")
     
-    acq_date = '2025-02-01'
-
-    delta_vals_days = [
-        [30,60,90,180],
-        [0,15,45],
-        365
-    ]
+    acq_date = '2025-06-01'
 
-    delta_vals_weeks = [
-        [4,8,12,16],
-        [0,1,5,10],
+    delta_vals = [
+        [-2,30,60,90,180],
+        365,
+        [0,4,8,12,16],
         52
     ]
     
-    expected_breakpoints_days = [
+    delta_unit = ['D','D','W','W']
+    
+    expected_breakpoints = [
         [
-            np.datetime64("2025-01-02T00:00:00.000000000"),
+            np.datetime64("2025-05-02T00:00:00.000000000"),
+            np.datetime64("2025-04-02T00:00:00.000000000"),
+            np.datetime64("2025-03-03T00:00:00.000000000"),
             np.datetime64("2024-12-03T00:00:00.000000000"),
-            np.datetime64("2024-11-03T00:00:00.000000000"),
-            np.datetime64("2024-08-05T00:00:00.000000000"),
         ],
+        np.datetime64("2024-06-01T00:00:00.000000000"),
         [
-            np.datetime64("2025-01-17T00:00:00.000000000"),
-            np.datetime64("2024-12-18T00:00:00.000000000"),
+            np.datetime64("2025-05-04T00:00:00.000000000"),
+            np.datetime64("2025-04-06T00:00:00.000000000"),
+            np.datetime64("2025-03-09T00:00:00.000000000"),
+            np.datetime64("2025-02-09T00:00:00.000000000"),
         ],
-        [
-            np.datetime64("2024-02-02T00:00:00.000000000")
-        ]
+        np.datetime64("2024-06-02T00:00:00.000000000"),
     ]
 
-    expected_labels_days = [
+    expected_labels = [
         [
             '>180D',
             '90D-180D',
@@ -231,23 +238,78 @@ class TestAgeGrouping:
             '30D-60D',
             '<30D'
         ],
-        [
-            '>45D',
-            '15D-45D',
-            '<15D'
-        ],
         [
             '>365D',
             '<365D',
+        ],
+        [
+            ">16W",
+            "12W-16W",
+            "8W-12W",
+            "4W-8W",
+            "<4W"
+        ],
+        [
+            ">52W",
+            "<52W"
         ]
     ]
 
-    @pytest.mark.parametrize('delta_vals,expected',list(zip(delta_vals_days,expected_breakpoints_days)))
-    def test_create_timedelta_breakpoints_days(self,delta_vals,expected):
-        delta_unit = 'D'
+    expected_age_groups_days = [
+        pl.Series(
+            name="access",
+            values=[
+                ">180D",
+                "90D-180D",
+                "60D-90D",
+                "30D-60D",
+                "<30D",
+            ],
+            dtype=pl.Enum([">180D", "90D-180D", "60D-90D", "30D-60D", "<30D"]),
+        ),
+        pl.Series(
+            name="access",
+            values=[">365D", "<365D", "<365D", "<365D", "<365D"],
+            dtype=pl.Enum([">365D", "<365D"]),
+        ),
+        pl.Series(
+            name="access",
+            values=[">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"],
+            dtype=pl.Enum(
+                [">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"],
+            ),
+        ),
+        pl.Series(name="access", values=[">52W", "<52W", "<52W", "<52W", "<52W"], dtype=pl.Enum([">52W","<52W"])),
+    ]
+
+    @pytest.mark.parametrize('delta_vals,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_breakpoints)))
+    def test_create_timedelta_breakpoints_days(self,delta_vals,delta_unit,expected):
         assert utils.create_timedelta_breakpoints(self.acq_date,delta_vals,delta_unit) == expected
 
-    @pytest.mark.parametrize('delta_vals,expected',list(zip(delta_vals_days,expected_labels_days)))
-    def test_create_timedelta_labels_days(self,delta_vals,expected):
-        delta_unit = "D"
-        assert utils.create_timedelta_labels(delta_vals,delta_unit) == expected
\ No newline at end of file
+    @pytest.mark.parametrize('delta_vals,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_labels)))
+    def test_create_timedelta_labels(self,delta_vals,delta_unit,expected):
+        assert utils.create_timedelta_labels(delta_vals,delta_unit) == expected
+    
+    @pytest.mark.parametrize(
+        'age_breakpoints,delta_unit,expected',
+        list(
+            zip(
+                delta_vals,
+                delta_unit,
+                list(zip(
+                        expected_breakpoints,
+                        expected_labels
+                    )
+                )
+            )
+        )
+    )
+    def test_prep_age_distribution_days(self,age_breakpoints,delta_unit,expected):
+        assert utils.prep_age_distribution(self.acq_date,age_breakpoints,delta_unit) == expected
+
+    @pytest.mark.parametrize('age_breakpoints,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_age_groups_days)))
+    def test_calculate_age_distribution_days(self,age_breakpoints,delta_unit,expected):
+        assert_series_equal(
+            utils.calculate_age_distribution(self.timestamps,self.acq_date,age_breakpoints,delta_unit),
+            expected
+        )
\ No newline at end of file
-- 
GitLab


From 81ff7e0849caa228b352a144c1dbafd91fa49465 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Thu, 1 May 2025 14:40:31 -0500
Subject: [PATCH 11/11] apply ruff formatting

---
 tests/test_utils.py | 237 +++++++++++++++++++++++++-------------------
 1 file changed, 134 insertions(+), 103 deletions(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index e19bb08..dc1802e 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -7,51 +7,55 @@ from polars.testing import assert_series_equal
 import polars as pl
 import numpy as np
 
+
 ### General Purpose Utils
 @pytest.mark.parametrize("path", ["/data/rc/gpfs-policy", Path("/data/rc/gpfs-policy")])
 def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]):
     p_path = utils.as_path(path)
     assert isinstance(p_path, Path)
 
+
 @pytest.mark.parametrize(
     "value,unit,to_unit,use_binary,expected",
     [
-        (1,"G","K",False,1000000),
-        (4,'K','base',True,4096),
-        ('100','base','T',False,1e-10)
-    ]
+        (1, "G", "K", False, 1000000),
+        (4, "K", "base", True, 4096),
+        ("100", "base", "T", False, 1e-10),
+    ],
 )
-def test_convert_si(value,unit,to_unit,use_binary,expected):
-    assert utils.convert_si(value,unit,to_unit,use_binary) == expected
+def test_convert_si(value, unit, to_unit, use_binary, expected):
+    assert utils.convert_si(value, unit, to_unit, use_binary) == expected
+
 
 ### Memory and File Size Utils
 @pytest.mark.parametrize(
     "val,default,expected",
     [
-        ('1 kiB',None,1024),
-        ('1 kiB',(1024**4),1024),
-        ('10 TiB',None,10*(1024**4)),
-        (None,1024,1024),
-        (None,None,None),
-        ('1.5 MiB',None,1572864)
-    ]
+        ("1 kiB", None, 1024),
+        ("1 kiB", (1024**4), 1024),
+        ("10 TiB", None, 10 * (1024**4)),
+        (None, 1024, 1024),
+        (None, None, None),
+        ("1.5 MiB", None, 1572864),
+    ],
 )
-def test_as_bytes(val,default,expected):
-    assert utils.as_bytes(val,default) == expected
+def test_as_bytes(val, default, expected):
+    assert utils.as_bytes(val, default) == expected
+
 
 class TestSizeGrouping:
     input_sizes = pl.Series(
         name="size",
-        values = [
+        values=[
             0,
             2048,
             4096,
             1024**2,  # 1 MiB
             1024**3,  # 1 GiB
-            20*1024**3,  # 20 GiB
-            1024**5   # 1 PiB
+            20 * 1024**3,  # 20 GiB
+            1024**5,  # 1 PiB
         ],
-        dtype = pl.Int128()
+        dtype=pl.Int128(),
     )
 
     expected_size_groups = [
@@ -137,58 +141,88 @@ class TestSizeGrouping:
     input_bins = [
         ["4 KiB", "4 MiB", "1 GiB", "10 GiB", "100 GiB", "1 TiB"],
         [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776],
-        ["10 GiB", 1024, 4096, "1 KiB",0],
-        "1 MiB"
+        ["10 GiB", 1024, 4096, "1 KiB", 0],
+        "1 MiB",
     ]
 
     expected_bins = [
         [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776],
         [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776],
         [1024, 4096, 10737418240],
-        [1048576]
+        [1048576],
     ]
 
     expected_labels = [
-        ["0 B-4 KiB","4 KiB-4 MiB","4 MiB-1 GiB","1 GiB-10 GiB","10 GiB-100 GiB","100 GiB-1 TiB",">1 TiB"],
-        ["0 B-4 KiB","4 KiB-4 MiB","4 MiB-1 GiB","1 GiB-10 GiB","10 GiB-100 GiB","100 GiB-1 TiB",">1 TiB"],
-        ["0 B-1 KiB","1 KiB-4 KiB","4 KiB-10 GiB",">10 GiB"],
-        ["0 B-1 MiB",">1 MiB"]
+        [
+            "0 B-4 KiB",
+            "4 KiB-4 MiB",
+            "4 MiB-1 GiB",
+            "1 GiB-10 GiB",
+            "10 GiB-100 GiB",
+            "100 GiB-1 TiB",
+            ">1 TiB",
+        ],
+        [
+            "0 B-4 KiB",
+            "4 KiB-4 MiB",
+            "4 MiB-1 GiB",
+            "1 GiB-10 GiB",
+            "10 GiB-100 GiB",
+            "100 GiB-1 TiB",
+            ">1 TiB",
+        ],
+        ["0 B-1 KiB", "1 KiB-4 KiB", "4 KiB-10 GiB", ">10 GiB"],
+        ["0 B-1 MiB", ">1 MiB"],
     ]
 
-    @pytest.mark.parametrize("bins,expected",list(zip(expected_bins,expected_labels)))
-    def test_create_size_bin_labels(self,bins,expected):
+    @pytest.mark.parametrize("bins,expected", list(zip(expected_bins, expected_labels)))
+    def test_create_size_bin_labels(self, bins, expected):
         assert utils.create_size_bin_labels(bins) == expected
-    
-    @pytest.mark.parametrize("bins,expected",list(zip(input_bins,list(zip(expected_bins,expected_labels)))))
-    def test_prep_size_distribution(self,bins,expected):
+
+    @pytest.mark.parametrize(
+        "bins,expected",
+        list(zip(input_bins, list(zip(expected_bins, expected_labels)))),
+    )
+    def test_prep_size_distribution(self, bins, expected):
         assert utils.prep_size_distribution(size_bins=bins) == expected
-    
-    @pytest.mark.parametrize("bins,expected",list(zip(input_bins,expected_size_groups)),ids=[1,2,3,4])
+
+    @pytest.mark.parametrize(
+        "bins,expected", list(zip(input_bins, expected_size_groups)), ids=[1, 2, 3, 4]
+    )
     def test_calculate_size_distribution(self, bins, expected):
-        assert_series_equal(utils.calculate_size_distribution(self.input_sizes, size_bins=bins),expected)
+        assert_series_equal(
+            utils.calculate_size_distribution(self.input_sizes, size_bins=bins),
+            expected,
+        )
 
 
 ### DateTime and File Age Utils
 @pytest.mark.parametrize(
-        'date,expected',
-        [
-            ('2025-01-01',np.datetime64('2025-01-01T00:00:00.000000000')),
-            (np.datetime64('2025-01-01T00:00:00.000000000'),np.datetime64('2025-01-01T00:00:00.000000000')),
-            (1735689600000000000,1735689600000000000),
-            (None,None)
-        ]
+    "date,expected",
+    [
+        ("2025-01-01", np.datetime64("2025-01-01T00:00:00.000000000")),
+        (
+            np.datetime64("2025-01-01T00:00:00.000000000"),
+            np.datetime64("2025-01-01T00:00:00.000000000"),
+        ),
+        (1735689600000000000, 1735689600000000000),
+        (None, None),
+    ],
 )
-def test_as_datetime(date,expected):
+def test_as_datetime(date, expected):
     assert utils.as_datetime(date) == expected
 
+
 def test_as_datetime_fails():
     with pytest.raises(ValueError):
-        utils.as_datetime('not a date')
+        utils.as_datetime("not a date")
+
+
+@pytest.mark.parametrize("val", [1, 3, 5])
+@pytest.mark.parametrize("unit", ["D", "W"])
+def test_as_timedelta(val, unit):
+    assert utils.as_timedelta(val, unit) == np.timedelta64(val, unit)
 
-@pytest.mark.parametrize('val',[1,3,5])
-@pytest.mark.parametrize('unit',['D','W'])
-def test_as_timedelta(val,unit):
-    assert utils.as_timedelta(val,unit) == np.timedelta64(val,unit)
 
 class TestAgeGrouping:
     timestamps = pl.Series(
@@ -201,18 +235,13 @@ class TestAgeGrouping:
             "2025-05-17",
         ],
     ).str.to_datetime(time_unit="ns")
-    
-    acq_date = '2025-06-01'
-
-    delta_vals = [
-        [-2,30,60,90,180],
-        365,
-        [0,4,8,12,16],
-        52
-    ]
-    
-    delta_unit = ['D','D','W','W']
-    
+
+    acq_date = "2025-06-01"
+
+    delta_vals = [[-2, 30, 60, 90, 180], 365, [0, 4, 8, 12, 16], 52]
+
+    delta_unit = ["D", "D", "W", "W"]
+
     expected_breakpoints = [
         [
             np.datetime64("2025-05-02T00:00:00.000000000"),
@@ -231,28 +260,13 @@ class TestAgeGrouping:
     ]
 
     expected_labels = [
+        [">180D", "90D-180D", "60D-90D", "30D-60D", "<30D"],
         [
-            '>180D',
-            '90D-180D',
-            '60D-90D',
-            '30D-60D',
-            '<30D'
+            ">365D",
+            "<365D",
         ],
-        [
-            '>365D',
-            '<365D',
-        ],
-        [
-            ">16W",
-            "12W-16W",
-            "8W-12W",
-            "4W-8W",
-            "<4W"
-        ],
-        [
-            ">52W",
-            "<52W"
-        ]
+        [">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"],
+        [">52W", "<52W"],
     ]
 
     expected_age_groups_days = [
@@ -279,37 +293,54 @@ class TestAgeGrouping:
                 [">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"],
             ),
         ),
-        pl.Series(name="access", values=[">52W", "<52W", "<52W", "<52W", "<52W"], dtype=pl.Enum([">52W","<52W"])),
+        pl.Series(
+            name="access",
+            values=[">52W", "<52W", "<52W", "<52W", "<52W"],
+            dtype=pl.Enum([">52W", "<52W"]),
+        ),
     ]
 
-    @pytest.mark.parametrize('delta_vals,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_breakpoints)))
-    def test_create_timedelta_breakpoints_days(self,delta_vals,delta_unit,expected):
-        assert utils.create_timedelta_breakpoints(self.acq_date,delta_vals,delta_unit) == expected
+    @pytest.mark.parametrize(
+        "delta_vals,delta_unit,expected",
+        list(zip(delta_vals, delta_unit, expected_breakpoints)),
+    )
+    def test_create_timedelta_breakpoints_days(self, delta_vals, delta_unit, expected):
+        assert (
+            utils.create_timedelta_breakpoints(self.acq_date, delta_vals, delta_unit)
+            == expected
+        )
+
+    @pytest.mark.parametrize(
+        "delta_vals,delta_unit,expected",
+        list(zip(delta_vals, delta_unit, expected_labels)),
+    )
+    def test_create_timedelta_labels(self, delta_vals, delta_unit, expected):
+        assert utils.create_timedelta_labels(delta_vals, delta_unit) == expected
 
-    @pytest.mark.parametrize('delta_vals,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_labels)))
-    def test_create_timedelta_labels(self,delta_vals,delta_unit,expected):
-        assert utils.create_timedelta_labels(delta_vals,delta_unit) == expected
-    
     @pytest.mark.parametrize(
-        'age_breakpoints,delta_unit,expected',
+        "age_breakpoints,delta_unit,expected",
         list(
             zip(
-                delta_vals,
-                delta_unit,
-                list(zip(
-                        expected_breakpoints,
-                        expected_labels
-                    )
-                )
+                delta_vals, delta_unit, list(zip(expected_breakpoints, expected_labels))
             )
-        )
+        ),
     )
-    def test_prep_age_distribution_days(self,age_breakpoints,delta_unit,expected):
-        assert utils.prep_age_distribution(self.acq_date,age_breakpoints,delta_unit) == expected
+    def test_prep_age_distribution_days(self, age_breakpoints, delta_unit, expected):
+        assert (
+            utils.prep_age_distribution(self.acq_date, age_breakpoints, delta_unit)
+            == expected
+        )
 
-    @pytest.mark.parametrize('age_breakpoints,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_age_groups_days)))
-    def test_calculate_age_distribution_days(self,age_breakpoints,delta_unit,expected):
+    @pytest.mark.parametrize(
+        "age_breakpoints,delta_unit,expected",
+        list(zip(delta_vals, delta_unit, expected_age_groups_days)),
+    )
+    def test_calculate_age_distribution_days(
+        self, age_breakpoints, delta_unit, expected
+    ):
         assert_series_equal(
-            utils.calculate_age_distribution(self.timestamps,self.acq_date,age_breakpoints,delta_unit),
-            expected
-        )
\ No newline at end of file
+            utils.calculate_age_distribution(
+                self.timestamps, self.acq_date, age_breakpoints, delta_unit
+            ),
+            expected,
+        )
-- 
GitLab