From 37f0045609ee6b76907acaa7f8447150984b7f48 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Thu, 24 Apr 2025 10:10:46 -0500 Subject: [PATCH 01/11] add pytest to dependencies --- poetry.lock | 64 +++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 02961ca..545215b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,18 @@ # This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main"] +markers = "sys_platform == \"win32\"" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + [[package]] name = "colormaps" version = "0.4.2" @@ -82,6 +95,18 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] +[[package]] +name = "iniconfig" +version = "2.1.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, +] + [[package]] name = "numpy" version = "2.2.5" @@ -175,6 +200,22 @@ files = [ packaging = "*" tenacity = ">=6.2.0" +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "polars" version = "1.27.1" @@ -274,6 +315,27 @@ files = [ [package.extras] test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] +[[package]] +name = "pytest" +version = "8.3.5" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, + {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2" + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + [[package]] name = "sqlalchemy" version = "2.0.40" @@ -420,4 +482,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.12" -content-hash = "b94c26465ac498c7656fafdf498f424379a1e882e5da6ca3a9dcac48b076b770" +content-hash = "e5d35bf795dfae55f35d9e809e407295bdb8ff0dc0edf6fa252f940255279f51" diff --git a/pyproject.toml b/pyproject.toml index 62cb958..b87f6e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ numpy = "*" plotly = "^5.24.1" polars = ">=1.27.0" pyarrow = "^19.0.1" +pytest = "^8.3.5" sqlalchemy = "*" typeguard = "*" -- GitLab From 739f53560d7a2609aecd182c5eb32d3d8be4b274 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Thu, 24 Apr 2025 10:33:04 -0500 Subject: [PATCH 02/11] add flake8 plugin for pytest linting --- poetry.lock | 31 +++++++++++++++++++++++++++++-- pyproject.toml | 10 ++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 545215b..090f938 100644 --- a/poetry.lock +++ b/poetry.lock @@ -25,6 +25,33 @@ files = [ {file = "colormaps-0.4.2.tar.gz", hash = "sha256:c703d62a4fededfcfed57bef6aa772422b535896645cbf8c58690e4bbe16005c"}, ] +[[package]] +name = "flake8-plugin-utils" +version = "1.3.3" +description = "The package provides base classes and utils for flake8 plugin writing" +optional = false +python-versions = ">=3.6,<4.0" +groups = ["main"] +files = [ + {file = "flake8-plugin-utils-1.3.3.tar.gz", hash = "sha256:39f6f338d038b301c6fd344b06f2e81e382b68fa03c0560dff0d9b1791a11a2c"}, + {file = "flake8_plugin_utils-1.3.3-py3-none-any.whl", hash = "sha256:e4848c57d9d50f19100c2d75fa794b72df068666a9041b4b0409be923356a3ed"}, +] + +[[package]] +name = "flake8-pytest-style" +version = "2.1.0" +description = "A flake8 plugin checking common style issues or inconsistencies with pytest-based tests." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "flake8_pytest_style-2.1.0-py3-none-any.whl", hash = "sha256:a0d6dddcd533bfc13f19b8445907be0330c5e6ccf7090bcd9d5fa5a0b1b65e71"}, + {file = "flake8_pytest_style-2.1.0.tar.gz", hash = "sha256:fee6befdb5915d600ef24e38d48a077d0dcffb032945ae0169486e7ff8a1079a"}, +] + +[package.dependencies] +flake8-plugin-utils = ">=1.3.2,<2.0.0" + [[package]] name = "greenlet" version = "3.2.1" @@ -481,5 +508,5 @@ files = [ [metadata] lock-version = "2.1" -python-versions = ">=3.12" -content-hash = "e5d35bf795dfae55f35d9e809e407295bdb8ff0dc0edf6fa252f940255279f51" +python-versions = ">=3.12,<4.0" +content-hash = "f7cca41d57b1b597fdc1f0d6584dbe3148b9d39515a1830c3ef96920e8969ffd" diff --git a/pyproject.toml b/pyproject.toml index b87f6e4..cd10390 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ maintainers = [ ] license = "AFL" readme = "README.md" -requires-python = ">=3.12" +requires-python = ">=3.12,<4.0" keywords = ["GPFS", "policy", "aggregation", "reporting"] dynamic = ["version","dependencies","classifiers"] @@ -33,8 +33,9 @@ classifiers = [ version = "0.0.0" [tool.poetry.dependencies] -python = ">=3.12" +python = ">=3.12,<4.0" colormaps = "*" +flake8-pytest-style = "^2.1.0" numpy = "*" plotly = "^5.24.1" polars = ">=1.27.0" @@ -67,3 +68,8 @@ folders = [ requires = ["poetry-core>=2.0.0,<3.0.0","poetry-dynamic-versioning>=1.0.0,<2.0.0"] build-backend = "poetry_dynamic_versioning.backend" +[tool.pytest.ini_options] +pythonpath = "src" +addopts = [ + "--import-mode=importlib", +] \ No newline at end of file -- GitLab From a9f686673b528254d6831eda51707164aec51654 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Thu, 24 Apr 2025 11:06:29 -0500 Subject: [PATCH 03/11] add pytest datafiles plugin --- poetry.lock | 17 ++++++++++++++++- pyproject.toml | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 090f938..5f2e5fd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -363,6 +363,21 @@ pluggy = ">=1.5,<2" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-datafiles" +version = "3.0.0" +description = "py.test plugin to create a 'tmp_path' containing predefined files/directories." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "pytest-datafiles-3.0.0.tar.gz", hash = "sha256:a70c4c66a36d1cdcfc095607f04eee66eaef3fa64cbb62d60c47ce169901d1d4"}, + {file = "pytest_datafiles-3.0.0-py2.py3-none-any.whl", hash = "sha256:2176e10d3f6e76f358925a897e21e2bcc5a0170b92fac4e66ed055eaa2ca6a22"}, +] + +[package.dependencies] +pytest = ">=3.6" + [[package]] name = "sqlalchemy" version = "2.0.40" @@ -509,4 +524,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.12,<4.0" -content-hash = "f7cca41d57b1b597fdc1f0d6584dbe3148b9d39515a1830c3ef96920e8969ffd" +content-hash = "6a3e0566e33e1c0966f07ee437a94df8b9c396d372a462e6b7daaa574eb7e0e0" diff --git a/pyproject.toml b/pyproject.toml index cd10390..75a8d00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ plotly = "^5.24.1" polars = ">=1.27.0" pyarrow = "^19.0.1" pytest = "^8.3.5" +pytest-datafiles = "^3.0.0" sqlalchemy = "*" typeguard = "*" -- GitLab From 80e482bf6c818f79fa4c2bf64c1a1fb87567d4de Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Thu, 24 Apr 2025 15:55:14 -0500 Subject: [PATCH 04/11] remove flake8 plugin --- poetry.lock | 29 +---------------------------- pyproject.toml | 7 ++++--- 2 files changed, 5 insertions(+), 31 deletions(-) diff --git a/poetry.lock b/poetry.lock index 5f2e5fd..24f58d2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -25,33 +25,6 @@ files = [ {file = "colormaps-0.4.2.tar.gz", hash = "sha256:c703d62a4fededfcfed57bef6aa772422b535896645cbf8c58690e4bbe16005c"}, ] -[[package]] -name = "flake8-plugin-utils" -version = "1.3.3" -description = "The package provides base classes and utils for flake8 plugin writing" -optional = false -python-versions = ">=3.6,<4.0" -groups = ["main"] -files = [ - {file = "flake8-plugin-utils-1.3.3.tar.gz", hash = "sha256:39f6f338d038b301c6fd344b06f2e81e382b68fa03c0560dff0d9b1791a11a2c"}, - {file = "flake8_plugin_utils-1.3.3-py3-none-any.whl", hash = "sha256:e4848c57d9d50f19100c2d75fa794b72df068666a9041b4b0409be923356a3ed"}, -] - -[[package]] -name = "flake8-pytest-style" -version = "2.1.0" -description = "A flake8 plugin checking common style issues or inconsistencies with pytest-based tests." -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "flake8_pytest_style-2.1.0-py3-none-any.whl", hash = "sha256:a0d6dddcd533bfc13f19b8445907be0330c5e6ccf7090bcd9d5fa5a0b1b65e71"}, - {file = "flake8_pytest_style-2.1.0.tar.gz", hash = "sha256:fee6befdb5915d600ef24e38d48a077d0dcffb032945ae0169486e7ff8a1079a"}, -] - -[package.dependencies] -flake8-plugin-utils = ">=1.3.2,<2.0.0" - [[package]] name = "greenlet" version = "3.2.1" @@ -524,4 +497,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.12,<4.0" -content-hash = "6a3e0566e33e1c0966f07ee437a94df8b9c396d372a462e6b7daaa574eb7e0e0" +content-hash = "b774e71210a804105de6c03976133f42953ebfc5be59ef0d2c6cc62c842a6372" diff --git a/pyproject.toml b/pyproject.toml index 75a8d00..0120b6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,6 @@ version = "0.0.0" [tool.poetry.dependencies] python = ">=3.12,<4.0" colormaps = "*" -flake8-pytest-style = "^2.1.0" numpy = "*" plotly = "^5.24.1" polars = ">=1.27.0" @@ -69,8 +68,10 @@ folders = [ requires = ["poetry-core>=2.0.0,<3.0.0","poetry-dynamic-versioning>=1.0.0,<2.0.0"] build-backend = "poetry_dynamic_versioning.backend" + [tool.pytest.ini_options] -pythonpath = "src" addopts = [ "--import-mode=importlib", -] \ No newline at end of file +] +required_plugins = ["pytest-datafiles>=3.0.0"] +testpaths = ["tests"] \ No newline at end of file -- GitLab From dfff5f38c409a8c2a3055806879e224a12939866 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Thu, 24 Apr 2025 15:55:30 -0500 Subject: [PATCH 05/11] add ruff linter and formatter --- ruff.toml | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 ruff.toml diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..10bc61b --- /dev/null +++ b/ruff.toml @@ -0,0 +1,85 @@ +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".gitlab-ci.yml", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".poetry", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", + "poetry.toml", + "poetry.lock", + "test-data", + "legacy-scripts", + "extra", + "data" +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.13 +target-version = "py313" + +[lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + +# Enable auto-formatting of code examples in docstrings. Markdown, +# reStructuredText code/literal blocks and doctests are all supported. +# +# This is currently disabled by default, but it is planned for this +# to be opt-out in the future. +docstring-code-format = false + +# Set the line length limit used when formatting code snippets in +# docstrings. +# +# This only has an effect when the `docstring-code-format` setting is +# enabled. +docstring-code-line-length = "dynamic" \ No newline at end of file -- GitLab From 5b1c794a9a07571fbff5587efd14315fbb3e9989 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Fri, 25 Apr 2025 10:29:20 -0500 Subject: [PATCH 06/11] add initial testing of utility functions --- tests/test_utils.py | 65 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 tests/test_utils.py diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..1f3b029 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,65 @@ +import pytest +from typing import Literal +from pathlib import Path +from rc_gpfs import utils + + +@pytest.mark.parametrize("path", ["/data/rc/gpfs-policy", Path("/data/rc/gpfs-policy")]) +def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]): + p_path = utils.as_path(path) + assert isinstance(p_path, Path) + +@pytest.mark.parametrize( + "value,unit,to_unit,use_binary,expected", + [ + (1,"G","K",False,1000000), + (4,'K','base',True,4096), + ('100','base','T',False,1e-10) + ] +) +def test_convert_si(value,unit,to_unit,use_binary,expected): + assert utils.convert_si(value,unit,to_unit,use_binary) == expected + +@pytest.mark.parametrize( + "val,default,expected", + [ + ('1 kiB',None,1024), + ('1 kiB',(1024**4),1024), + ('10 TiB',None,10*(1024**4)), + (None,1024,1024), + (None,None,None), + ('1.5 MiB',None,1572864) + ] +) +def test_as_bytes(val,default,expected): + assert utils.as_bytes(val,default) == expected + +class TestSizeDistribution: + inputs = [ + ["4 KiB", "4 MiB", "1 GiB", "10 GiB", "100 GiB", "1 TiB"], + [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776], + ["10 GiB", 1024, 4096, "1 KiB",0], + "1 MiB" + ] + + expected_bins = [ + [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776], + [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776], + [1024, 4096, 10737418240], + [1048576] + ] + + expected_labels = [ + ["0 B-4 KiB","4 KiB-4 MiB","4 MiB-1 GiB","1 GiB-10 GiB","10 GiB-100 GiB","100 GiB-1 TiB",">1 TiB"], + ["0 B-4 KiB","4 KiB-4 MiB","4 MiB-1 GiB","1 GiB-10 GiB","10 GiB-100 GiB","100 GiB-1 TiB",">1 TiB"], + ["0 B-1 KiB","1 KiB-4 KiB","4 KiB-10 GiB",">10 GiB"], + ["0 B-1 MiB",">1 MiB"] + ] + + @pytest.mark.parametrize("bins,expected",list(zip(expected_bins,expected_labels))) + def test_create_size_bin_labels(self,bins,expected): + assert utils.create_size_bin_labels(bins) == expected + + @pytest.mark.parametrize("bins,expected",list(zip(inputs,list(zip(expected_bins,expected_labels))))) + def test_prep_size_distribution(self,bins,expected): + assert utils.prep_size_distribution(size_bins=bins) == expected \ No newline at end of file -- GitLab From 6f887736901d6240a722a9d5f17dc180d4098aa4 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Fri, 25 Apr 2025 10:29:39 -0500 Subject: [PATCH 07/11] specify the names of the functions to import instead of * --- src/rc_gpfs/utils/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rc_gpfs/utils/core.py b/src/rc_gpfs/utils/core.py index 2596c4c..d62115a 100644 --- a/src/rc_gpfs/utils/core.py +++ b/src/rc_gpfs/utils/core.py @@ -8,7 +8,7 @@ import pyarrow.parquet as pq import numpy as np from .units import as_bytes, convert_si, create_size_bin_labels -from .datetime import * +from .datetime import as_datetime,create_timedelta_breakpoints,create_timedelta_labels def parse_scontrol(): job_id = os.getenv('SLURM_JOB_ID') -- GitLab From 705609889fffcb3caf65153fd2a5f9f0ac4b7fb7 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Thu, 1 May 2025 10:37:10 -0500 Subject: [PATCH 08/11] add test for size group assignment --- tests/test_utils.py | 109 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 4 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 1f3b029..48e7063 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,6 +3,9 @@ from typing import Literal from pathlib import Path from rc_gpfs import utils +from polars.testing import assert_series_equal +import polars as pl + @pytest.mark.parametrize("path", ["/data/rc/gpfs-policy", Path("/data/rc/gpfs-policy")]) def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]): @@ -34,8 +37,102 @@ def test_convert_si(value,unit,to_unit,use_binary,expected): def test_as_bytes(val,default,expected): assert utils.as_bytes(val,default) == expected -class TestSizeDistribution: - inputs = [ +class TestSizeGrouping: + input_sizes = pl.Series( + name="size", + values = [ + 0, + 2048, + 4096, + 1024**2, # 1 MiB + 1024**3, # 1 GiB + 20*1024**3, # 20 GiB + 1024**5 # 1 PiB + ], + dtype = pl.Int128() + ) + + expected_size_groups = [ + pl.Series( + name="size", + values=[ + "0 B-4 KiB", + "0 B-4 KiB", + "0 B-4 KiB", + "4 KiB-4 MiB", + "4 MiB-1 GiB", + "10 GiB-100 GiB", + ">1 TiB", + ], + dtype=pl.Enum( + [ + "0 B-4 KiB", + "4 KiB-4 MiB", + "4 MiB-1 GiB", + "1 GiB-10 GiB", + "10 GiB-100 GiB", + "100 GiB-1 TiB", + ">1 TiB", + ] + ), + ), + pl.Series( + name="size", + values=[ + "0 B-4 KiB", + "0 B-4 KiB", + "0 B-4 KiB", + "4 KiB-4 MiB", + "4 MiB-1 GiB", + "10 GiB-100 GiB", + ">1 TiB", + ], + dtype=pl.Enum( + [ + "0 B-4 KiB", + "4 KiB-4 MiB", + "4 MiB-1 GiB", + "1 GiB-10 GiB", + "10 GiB-100 GiB", + "100 GiB-1 TiB", + ">1 TiB", + ] + ), + ), + pl.Series( + name="size", + values=[ + "0 B-1 KiB", + "1 KiB-4 KiB", + "1 KiB-4 KiB", + "4 KiB-10 GiB", + "4 KiB-10 GiB", + ">10 GiB", + ">10 GiB", + ], + dtype=pl.Enum(["0 B-1 KiB", "1 KiB-4 KiB", "4 KiB-10 GiB", ">10 GiB"]), + ), + pl.Series( + name="size", + values=[ + "0 B-1 MiB", + "0 B-1 MiB", + "0 B-1 MiB", + "0 B-1 MiB", + ">1 MiB", + ">1 MiB", + ">1 MiB", + ], + dtype=pl.Enum( + [ + "0 B-1 MiB", + ">1 MiB", + ] + ), + ), + ] + + input_bins = [ ["4 KiB", "4 MiB", "1 GiB", "10 GiB", "100 GiB", "1 TiB"], [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776], ["10 GiB", 1024, 4096, "1 KiB",0], @@ -60,6 +157,10 @@ class TestSizeDistribution: def test_create_size_bin_labels(self,bins,expected): assert utils.create_size_bin_labels(bins) == expected - @pytest.mark.parametrize("bins,expected",list(zip(inputs,list(zip(expected_bins,expected_labels))))) + @pytest.mark.parametrize("bins,expected",list(zip(input_bins,list(zip(expected_bins,expected_labels))))) def test_prep_size_distribution(self,bins,expected): - assert utils.prep_size_distribution(size_bins=bins) == expected \ No newline at end of file + assert utils.prep_size_distribution(size_bins=bins) == expected + + @pytest.mark.parametrize("bins,expected",list(zip(input_bins,expected_size_groups)),ids=[1,2,3,4]) + def test_calculate_size_distribution(self, bins, expected): + assert_series_equal(utils.calculate_size_distribution(self.input_sizes, size_bins=bins),expected) \ No newline at end of file -- GitLab From bda1b8953a74e701592bf1bc34cf4686e1f46966 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Thu, 1 May 2025 13:07:58 -0500 Subject: [PATCH 09/11] add initial datetime tests --- tests/test_utils.py | 91 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 48e7063..1433660 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,8 +5,9 @@ from rc_gpfs import utils from polars.testing import assert_series_equal import polars as pl +import numpy as np - +### General Purpose Utils @pytest.mark.parametrize("path", ["/data/rc/gpfs-policy", Path("/data/rc/gpfs-policy")]) def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]): p_path = utils.as_path(path) @@ -23,6 +24,7 @@ def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]): def test_convert_si(value,unit,to_unit,use_binary,expected): assert utils.convert_si(value,unit,to_unit,use_binary) == expected +### Memory and File Size Utils @pytest.mark.parametrize( "val,default,expected", [ @@ -163,4 +165,89 @@ class TestSizeGrouping: @pytest.mark.parametrize("bins,expected",list(zip(input_bins,expected_size_groups)),ids=[1,2,3,4]) def test_calculate_size_distribution(self, bins, expected): - assert_series_equal(utils.calculate_size_distribution(self.input_sizes, size_bins=bins),expected) \ No newline at end of file + assert_series_equal(utils.calculate_size_distribution(self.input_sizes, size_bins=bins),expected) + + +### DateTime and File Age Utils +@pytest.mark.parametrize( + 'date,expected', + [ + ('2025-01-01',np.datetime64('2025-01-01T00:00:00.000000000')), + (np.datetime64('2025-01-01T00:00:00.000000000'),np.datetime64('2025-01-01T00:00:00.000000000')), + (1735689600000000000,1735689600000000000), + (None,None) + ] +) +def test_as_datetime(date,expected): + assert utils.as_datetime(date) == expected + +def test_as_datetime_fails(): + with pytest.raises(ValueError): + utils.as_datetime('not a date') + +@pytest.mark.parametrize('val',[1,3,5]) +@pytest.mark.parametrize('unit',['D','W']) +def test_as_timedelta(val,unit): + assert utils.as_timedelta(val,unit) == np.timedelta64(val,unit) + +class TestAgeGrouping: + atime = np.arange('2024-08-15','2025-01-15',dtype='datetime64[D]',step=np.timedelta64(30,'D')) + + acq_date = '2025-02-01' + + delta_vals_days = [ + [30,60,90,180], + [0,15,45], + 365 + ] + + delta_vals_weeks = [ + [4,8,12,16], + [0,1,5,10], + 52 + ] + + expected_breakpoints_days = [ + [ + np.datetime64("2025-01-02T00:00:00.000000000"), + np.datetime64("2024-12-03T00:00:00.000000000"), + np.datetime64("2024-11-03T00:00:00.000000000"), + np.datetime64("2024-08-05T00:00:00.000000000"), + ], + [ + np.datetime64("2025-01-17T00:00:00.000000000"), + np.datetime64("2024-12-18T00:00:00.000000000"), + ], + [ + np.datetime64("2024-02-02T00:00:00.000000000") + ] + ] + + expected_labels_days = [ + [ + '>180D', + '90D-180D', + '60D-90D', + '30D-60D', + '<30D' + ], + [ + '>45D', + '15D-45D', + '<15D' + ], + [ + '>365D', + '<365D', + ] + ] + + @pytest.mark.parametrize('delta_vals,expected',list(zip(delta_vals_days,expected_breakpoints_days))) + def test_create_timedelta_breakpoints_days(self,delta_vals,expected): + delta_unit = 'D' + assert utils.create_timedelta_breakpoints(self.acq_date,delta_vals,delta_unit) == expected + + @pytest.mark.parametrize('delta_vals,expected',list(zip(delta_vals_days,expected_labels_days))) + def test_create_timedelta_labels_days(self,delta_vals,expected): + delta_unit = "D" + assert utils.create_timedelta_labels(delta_vals,delta_unit) == expected \ No newline at end of file -- GitLab From 2c213b87aee8ef1724e3b1bd05f5c9a9387245c1 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Thu, 1 May 2025 14:38:43 -0500 Subject: [PATCH 10/11] finish tests for age grouping. combine days and weeks params into single variables to reduce duplicate effort --- tests/test_utils.py | 128 ++++++++++++++++++++++++++++++++------------ 1 file changed, 95 insertions(+), 33 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 1433660..e19bb08 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -191,39 +191,46 @@ def test_as_timedelta(val,unit): assert utils.as_timedelta(val,unit) == np.timedelta64(val,unit) class TestAgeGrouping: - atime = np.arange('2024-08-15','2025-01-15',dtype='datetime64[D]',step=np.timedelta64(30,'D')) + timestamps = pl.Series( + name="access", + values=[ + "2024-05-12", + "2025-02-21", + "2025-04-02", + "2025-04-17", + "2025-05-17", + ], + ).str.to_datetime(time_unit="ns") - acq_date = '2025-02-01' - - delta_vals_days = [ - [30,60,90,180], - [0,15,45], - 365 - ] + acq_date = '2025-06-01' - delta_vals_weeks = [ - [4,8,12,16], - [0,1,5,10], + delta_vals = [ + [-2,30,60,90,180], + 365, + [0,4,8,12,16], 52 ] - expected_breakpoints_days = [ + delta_unit = ['D','D','W','W'] + + expected_breakpoints = [ [ - np.datetime64("2025-01-02T00:00:00.000000000"), + np.datetime64("2025-05-02T00:00:00.000000000"), + np.datetime64("2025-04-02T00:00:00.000000000"), + np.datetime64("2025-03-03T00:00:00.000000000"), np.datetime64("2024-12-03T00:00:00.000000000"), - np.datetime64("2024-11-03T00:00:00.000000000"), - np.datetime64("2024-08-05T00:00:00.000000000"), ], + np.datetime64("2024-06-01T00:00:00.000000000"), [ - np.datetime64("2025-01-17T00:00:00.000000000"), - np.datetime64("2024-12-18T00:00:00.000000000"), + np.datetime64("2025-05-04T00:00:00.000000000"), + np.datetime64("2025-04-06T00:00:00.000000000"), + np.datetime64("2025-03-09T00:00:00.000000000"), + np.datetime64("2025-02-09T00:00:00.000000000"), ], - [ - np.datetime64("2024-02-02T00:00:00.000000000") - ] + np.datetime64("2024-06-02T00:00:00.000000000"), ] - expected_labels_days = [ + expected_labels = [ [ '>180D', '90D-180D', @@ -231,23 +238,78 @@ class TestAgeGrouping: '30D-60D', '<30D' ], - [ - '>45D', - '15D-45D', - '<15D' - ], [ '>365D', '<365D', + ], + [ + ">16W", + "12W-16W", + "8W-12W", + "4W-8W", + "<4W" + ], + [ + ">52W", + "<52W" ] ] - @pytest.mark.parametrize('delta_vals,expected',list(zip(delta_vals_days,expected_breakpoints_days))) - def test_create_timedelta_breakpoints_days(self,delta_vals,expected): - delta_unit = 'D' + expected_age_groups_days = [ + pl.Series( + name="access", + values=[ + ">180D", + "90D-180D", + "60D-90D", + "30D-60D", + "<30D", + ], + dtype=pl.Enum([">180D", "90D-180D", "60D-90D", "30D-60D", "<30D"]), + ), + pl.Series( + name="access", + values=[">365D", "<365D", "<365D", "<365D", "<365D"], + dtype=pl.Enum([">365D", "<365D"]), + ), + pl.Series( + name="access", + values=[">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"], + dtype=pl.Enum( + [">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"], + ), + ), + pl.Series(name="access", values=[">52W", "<52W", "<52W", "<52W", "<52W"], dtype=pl.Enum([">52W","<52W"])), + ] + + @pytest.mark.parametrize('delta_vals,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_breakpoints))) + def test_create_timedelta_breakpoints_days(self,delta_vals,delta_unit,expected): assert utils.create_timedelta_breakpoints(self.acq_date,delta_vals,delta_unit) == expected - @pytest.mark.parametrize('delta_vals,expected',list(zip(delta_vals_days,expected_labels_days))) - def test_create_timedelta_labels_days(self,delta_vals,expected): - delta_unit = "D" - assert utils.create_timedelta_labels(delta_vals,delta_unit) == expected \ No newline at end of file + @pytest.mark.parametrize('delta_vals,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_labels))) + def test_create_timedelta_labels(self,delta_vals,delta_unit,expected): + assert utils.create_timedelta_labels(delta_vals,delta_unit) == expected + + @pytest.mark.parametrize( + 'age_breakpoints,delta_unit,expected', + list( + zip( + delta_vals, + delta_unit, + list(zip( + expected_breakpoints, + expected_labels + ) + ) + ) + ) + ) + def test_prep_age_distribution_days(self,age_breakpoints,delta_unit,expected): + assert utils.prep_age_distribution(self.acq_date,age_breakpoints,delta_unit) == expected + + @pytest.mark.parametrize('age_breakpoints,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_age_groups_days))) + def test_calculate_age_distribution_days(self,age_breakpoints,delta_unit,expected): + assert_series_equal( + utils.calculate_age_distribution(self.timestamps,self.acq_date,age_breakpoints,delta_unit), + expected + ) \ No newline at end of file -- GitLab From 81ff7e0849caa228b352a144c1dbafd91fa49465 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Thu, 1 May 2025 14:40:31 -0500 Subject: [PATCH 11/11] apply ruff formatting --- tests/test_utils.py | 237 +++++++++++++++++++++++++------------------- 1 file changed, 134 insertions(+), 103 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index e19bb08..dc1802e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -7,51 +7,55 @@ from polars.testing import assert_series_equal import polars as pl import numpy as np + ### General Purpose Utils @pytest.mark.parametrize("path", ["/data/rc/gpfs-policy", Path("/data/rc/gpfs-policy")]) def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]): p_path = utils.as_path(path) assert isinstance(p_path, Path) + @pytest.mark.parametrize( "value,unit,to_unit,use_binary,expected", [ - (1,"G","K",False,1000000), - (4,'K','base',True,4096), - ('100','base','T',False,1e-10) - ] + (1, "G", "K", False, 1000000), + (4, "K", "base", True, 4096), + ("100", "base", "T", False, 1e-10), + ], ) -def test_convert_si(value,unit,to_unit,use_binary,expected): - assert utils.convert_si(value,unit,to_unit,use_binary) == expected +def test_convert_si(value, unit, to_unit, use_binary, expected): + assert utils.convert_si(value, unit, to_unit, use_binary) == expected + ### Memory and File Size Utils @pytest.mark.parametrize( "val,default,expected", [ - ('1 kiB',None,1024), - ('1 kiB',(1024**4),1024), - ('10 TiB',None,10*(1024**4)), - (None,1024,1024), - (None,None,None), - ('1.5 MiB',None,1572864) - ] + ("1 kiB", None, 1024), + ("1 kiB", (1024**4), 1024), + ("10 TiB", None, 10 * (1024**4)), + (None, 1024, 1024), + (None, None, None), + ("1.5 MiB", None, 1572864), + ], ) -def test_as_bytes(val,default,expected): - assert utils.as_bytes(val,default) == expected +def test_as_bytes(val, default, expected): + assert utils.as_bytes(val, default) == expected + class TestSizeGrouping: input_sizes = pl.Series( name="size", - values = [ + values=[ 0, 2048, 4096, 1024**2, # 1 MiB 1024**3, # 1 GiB - 20*1024**3, # 20 GiB - 1024**5 # 1 PiB + 20 * 1024**3, # 20 GiB + 1024**5, # 1 PiB ], - dtype = pl.Int128() + dtype=pl.Int128(), ) expected_size_groups = [ @@ -137,58 +141,88 @@ class TestSizeGrouping: input_bins = [ ["4 KiB", "4 MiB", "1 GiB", "10 GiB", "100 GiB", "1 TiB"], [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776], - ["10 GiB", 1024, 4096, "1 KiB",0], - "1 MiB" + ["10 GiB", 1024, 4096, "1 KiB", 0], + "1 MiB", ] expected_bins = [ [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776], [4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776], [1024, 4096, 10737418240], - [1048576] + [1048576], ] expected_labels = [ - ["0 B-4 KiB","4 KiB-4 MiB","4 MiB-1 GiB","1 GiB-10 GiB","10 GiB-100 GiB","100 GiB-1 TiB",">1 TiB"], - ["0 B-4 KiB","4 KiB-4 MiB","4 MiB-1 GiB","1 GiB-10 GiB","10 GiB-100 GiB","100 GiB-1 TiB",">1 TiB"], - ["0 B-1 KiB","1 KiB-4 KiB","4 KiB-10 GiB",">10 GiB"], - ["0 B-1 MiB",">1 MiB"] + [ + "0 B-4 KiB", + "4 KiB-4 MiB", + "4 MiB-1 GiB", + "1 GiB-10 GiB", + "10 GiB-100 GiB", + "100 GiB-1 TiB", + ">1 TiB", + ], + [ + "0 B-4 KiB", + "4 KiB-4 MiB", + "4 MiB-1 GiB", + "1 GiB-10 GiB", + "10 GiB-100 GiB", + "100 GiB-1 TiB", + ">1 TiB", + ], + ["0 B-1 KiB", "1 KiB-4 KiB", "4 KiB-10 GiB", ">10 GiB"], + ["0 B-1 MiB", ">1 MiB"], ] - @pytest.mark.parametrize("bins,expected",list(zip(expected_bins,expected_labels))) - def test_create_size_bin_labels(self,bins,expected): + @pytest.mark.parametrize("bins,expected", list(zip(expected_bins, expected_labels))) + def test_create_size_bin_labels(self, bins, expected): assert utils.create_size_bin_labels(bins) == expected - - @pytest.mark.parametrize("bins,expected",list(zip(input_bins,list(zip(expected_bins,expected_labels))))) - def test_prep_size_distribution(self,bins,expected): + + @pytest.mark.parametrize( + "bins,expected", + list(zip(input_bins, list(zip(expected_bins, expected_labels)))), + ) + def test_prep_size_distribution(self, bins, expected): assert utils.prep_size_distribution(size_bins=bins) == expected - - @pytest.mark.parametrize("bins,expected",list(zip(input_bins,expected_size_groups)),ids=[1,2,3,4]) + + @pytest.mark.parametrize( + "bins,expected", list(zip(input_bins, expected_size_groups)), ids=[1, 2, 3, 4] + ) def test_calculate_size_distribution(self, bins, expected): - assert_series_equal(utils.calculate_size_distribution(self.input_sizes, size_bins=bins),expected) + assert_series_equal( + utils.calculate_size_distribution(self.input_sizes, size_bins=bins), + expected, + ) ### DateTime and File Age Utils @pytest.mark.parametrize( - 'date,expected', - [ - ('2025-01-01',np.datetime64('2025-01-01T00:00:00.000000000')), - (np.datetime64('2025-01-01T00:00:00.000000000'),np.datetime64('2025-01-01T00:00:00.000000000')), - (1735689600000000000,1735689600000000000), - (None,None) - ] + "date,expected", + [ + ("2025-01-01", np.datetime64("2025-01-01T00:00:00.000000000")), + ( + np.datetime64("2025-01-01T00:00:00.000000000"), + np.datetime64("2025-01-01T00:00:00.000000000"), + ), + (1735689600000000000, 1735689600000000000), + (None, None), + ], ) -def test_as_datetime(date,expected): +def test_as_datetime(date, expected): assert utils.as_datetime(date) == expected + def test_as_datetime_fails(): with pytest.raises(ValueError): - utils.as_datetime('not a date') + utils.as_datetime("not a date") + + +@pytest.mark.parametrize("val", [1, 3, 5]) +@pytest.mark.parametrize("unit", ["D", "W"]) +def test_as_timedelta(val, unit): + assert utils.as_timedelta(val, unit) == np.timedelta64(val, unit) -@pytest.mark.parametrize('val',[1,3,5]) -@pytest.mark.parametrize('unit',['D','W']) -def test_as_timedelta(val,unit): - assert utils.as_timedelta(val,unit) == np.timedelta64(val,unit) class TestAgeGrouping: timestamps = pl.Series( @@ -201,18 +235,13 @@ class TestAgeGrouping: "2025-05-17", ], ).str.to_datetime(time_unit="ns") - - acq_date = '2025-06-01' - - delta_vals = [ - [-2,30,60,90,180], - 365, - [0,4,8,12,16], - 52 - ] - - delta_unit = ['D','D','W','W'] - + + acq_date = "2025-06-01" + + delta_vals = [[-2, 30, 60, 90, 180], 365, [0, 4, 8, 12, 16], 52] + + delta_unit = ["D", "D", "W", "W"] + expected_breakpoints = [ [ np.datetime64("2025-05-02T00:00:00.000000000"), @@ -231,28 +260,13 @@ class TestAgeGrouping: ] expected_labels = [ + [">180D", "90D-180D", "60D-90D", "30D-60D", "<30D"], [ - '>180D', - '90D-180D', - '60D-90D', - '30D-60D', - '<30D' + ">365D", + "<365D", ], - [ - '>365D', - '<365D', - ], - [ - ">16W", - "12W-16W", - "8W-12W", - "4W-8W", - "<4W" - ], - [ - ">52W", - "<52W" - ] + [">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"], + [">52W", "<52W"], ] expected_age_groups_days = [ @@ -279,37 +293,54 @@ class TestAgeGrouping: [">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"], ), ), - pl.Series(name="access", values=[">52W", "<52W", "<52W", "<52W", "<52W"], dtype=pl.Enum([">52W","<52W"])), + pl.Series( + name="access", + values=[">52W", "<52W", "<52W", "<52W", "<52W"], + dtype=pl.Enum([">52W", "<52W"]), + ), ] - @pytest.mark.parametrize('delta_vals,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_breakpoints))) - def test_create_timedelta_breakpoints_days(self,delta_vals,delta_unit,expected): - assert utils.create_timedelta_breakpoints(self.acq_date,delta_vals,delta_unit) == expected + @pytest.mark.parametrize( + "delta_vals,delta_unit,expected", + list(zip(delta_vals, delta_unit, expected_breakpoints)), + ) + def test_create_timedelta_breakpoints_days(self, delta_vals, delta_unit, expected): + assert ( + utils.create_timedelta_breakpoints(self.acq_date, delta_vals, delta_unit) + == expected + ) + + @pytest.mark.parametrize( + "delta_vals,delta_unit,expected", + list(zip(delta_vals, delta_unit, expected_labels)), + ) + def test_create_timedelta_labels(self, delta_vals, delta_unit, expected): + assert utils.create_timedelta_labels(delta_vals, delta_unit) == expected - @pytest.mark.parametrize('delta_vals,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_labels))) - def test_create_timedelta_labels(self,delta_vals,delta_unit,expected): - assert utils.create_timedelta_labels(delta_vals,delta_unit) == expected - @pytest.mark.parametrize( - 'age_breakpoints,delta_unit,expected', + "age_breakpoints,delta_unit,expected", list( zip( - delta_vals, - delta_unit, - list(zip( - expected_breakpoints, - expected_labels - ) - ) + delta_vals, delta_unit, list(zip(expected_breakpoints, expected_labels)) ) - ) + ), ) - def test_prep_age_distribution_days(self,age_breakpoints,delta_unit,expected): - assert utils.prep_age_distribution(self.acq_date,age_breakpoints,delta_unit) == expected + def test_prep_age_distribution_days(self, age_breakpoints, delta_unit, expected): + assert ( + utils.prep_age_distribution(self.acq_date, age_breakpoints, delta_unit) + == expected + ) - @pytest.mark.parametrize('age_breakpoints,delta_unit,expected',list(zip(delta_vals,delta_unit,expected_age_groups_days))) - def test_calculate_age_distribution_days(self,age_breakpoints,delta_unit,expected): + @pytest.mark.parametrize( + "age_breakpoints,delta_unit,expected", + list(zip(delta_vals, delta_unit, expected_age_groups_days)), + ) + def test_calculate_age_distribution_days( + self, age_breakpoints, delta_unit, expected + ): assert_series_equal( - utils.calculate_age_distribution(self.timestamps,self.acq_date,age_breakpoints,delta_unit), - expected - ) \ No newline at end of file + utils.calculate_age_distribution( + self.timestamps, self.acq_date, age_breakpoints, delta_unit + ), + expected, + ) -- GitLab