Skip to content
Snippets Groups Projects
Commit 733c118c authored by Matthew K Defenderfer's avatar Matthew K Defenderfer
Browse files

Add initial test framework for utils functions

parent df8d1a68
No related branches found
No related tags found
2 merge requests!76Patch v0.5.1,!72Add initial test framework for utils functions
# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
[[package]]
name = "colorama"
version = "0.4.6"
description = "Cross-platform colored terminal text."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
groups = ["main"]
markers = "sys_platform == \"win32\""
files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
[[package]]
name = "colormaps"
version = "0.4.2"
......@@ -82,6 +95,18 @@ files = [
docs = ["Sphinx", "furo"]
test = ["objgraph", "psutil"]
[[package]]
name = "iniconfig"
version = "2.1.0"
description = "brain-dead simple config-ini parsing"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"},
{file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"},
]
[[package]]
name = "numpy"
version = "2.2.5"
......@@ -175,6 +200,22 @@ files = [
packaging = "*"
tenacity = ">=6.2.0"
[[package]]
name = "pluggy"
version = "1.5.0"
description = "plugin and hook calling mechanisms for python"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
{file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
]
[package.extras]
dev = ["pre-commit", "tox"]
testing = ["pytest", "pytest-benchmark"]
[[package]]
name = "polars"
version = "1.27.1"
......@@ -274,6 +315,42 @@ files = [
[package.extras]
test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"]
[[package]]
name = "pytest"
version = "8.3.5"
description = "pytest: simple powerful testing with Python"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"},
{file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"},
]
[package.dependencies]
colorama = {version = "*", markers = "sys_platform == \"win32\""}
iniconfig = "*"
packaging = "*"
pluggy = ">=1.5,<2"
[package.extras]
dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
[[package]]
name = "pytest-datafiles"
version = "3.0.0"
description = "py.test plugin to create a 'tmp_path' containing predefined files/directories."
optional = false
python-versions = "*"
groups = ["main"]
files = [
{file = "pytest-datafiles-3.0.0.tar.gz", hash = "sha256:a70c4c66a36d1cdcfc095607f04eee66eaef3fa64cbb62d60c47ce169901d1d4"},
{file = "pytest_datafiles-3.0.0-py2.py3-none-any.whl", hash = "sha256:2176e10d3f6e76f358925a897e21e2bcc5a0170b92fac4e66ed055eaa2ca6a22"},
]
[package.dependencies]
pytest = ">=3.6"
[[package]]
name = "sqlalchemy"
version = "2.0.40"
......@@ -419,5 +496,5 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">=3.12"
content-hash = "b94c26465ac498c7656fafdf498f424379a1e882e5da6ca3a9dcac48b076b770"
python-versions = ">=3.12,<4.0"
content-hash = "b774e71210a804105de6c03976133f42953ebfc5be59ef0d2c6cc62c842a6372"
......@@ -11,7 +11,7 @@ maintainers = [
]
license = "AFL"
readme = "README.md"
requires-python = ">=3.12"
requires-python = ">=3.12,<4.0"
keywords = ["GPFS", "policy", "aggregation", "reporting"]
dynamic = ["version","dependencies","classifiers"]
......@@ -33,12 +33,14 @@ classifiers = [
version = "0.0.0"
[tool.poetry.dependencies]
python = ">=3.12"
python = ">=3.12,<4.0"
colormaps = "*"
numpy = "*"
plotly = "^5.24.1"
polars = ">=1.27.0"
pyarrow = "^19.0.1"
pytest = "^8.3.5"
pytest-datafiles = "^3.0.0"
sqlalchemy = "*"
typeguard = "*"
......@@ -66,3 +68,10 @@ folders = [
requires = ["poetry-core>=2.0.0,<3.0.0","poetry-dynamic-versioning>=1.0.0,<2.0.0"]
build-backend = "poetry_dynamic_versioning.backend"
[tool.pytest.ini_options]
addopts = [
"--import-mode=importlib",
]
required_plugins = ["pytest-datafiles>=3.0.0"]
testpaths = ["tests"]
\ No newline at end of file
# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".gitlab-ci.yml",
".git-rewrite",
".hg",
".ipynb_checkpoints",
".mypy_cache",
".nox",
".pants.d",
".poetry",
".pyenv",
".pytest_cache",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
".vscode",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"site-packages",
"venv",
"poetry.toml",
"poetry.lock",
"test-data",
"legacy-scripts",
"extra",
"data"
]
# Same as Black.
line-length = 88
indent-width = 4
# Assume Python 3.13
target-version = "py313"
[lint]
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["E4", "E7", "E9", "F"]
ignore = []
# Allow fix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []
# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
[format]
# Like Black, use double quotes for strings.
quote-style = "double"
# Like Black, indent with spaces, rather than tabs.
indent-style = "space"
# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false
# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"
# Enable auto-formatting of code examples in docstrings. Markdown,
# reStructuredText code/literal blocks and doctests are all supported.
#
# This is currently disabled by default, but it is planned for this
# to be opt-out in the future.
docstring-code-format = false
# Set the line length limit used when formatting code snippets in
# docstrings.
#
# This only has an effect when the `docstring-code-format` setting is
# enabled.
docstring-code-line-length = "dynamic"
\ No newline at end of file
......@@ -8,7 +8,7 @@ import pyarrow.parquet as pq
import numpy as np
from .units import as_bytes, convert_si, create_size_bin_labels
from .datetime import *
from .datetime import as_datetime,create_timedelta_breakpoints,create_timedelta_labels
def parse_scontrol():
job_id = os.getenv('SLURM_JOB_ID')
......
import pytest
from typing import Literal
from pathlib import Path
from rc_gpfs import utils
from polars.testing import assert_series_equal
import polars as pl
import numpy as np
### General Purpose Utils
@pytest.mark.parametrize("path", ["/data/rc/gpfs-policy", Path("/data/rc/gpfs-policy")])
def test_as_path_valid(path: Path | Literal["/data/rc/gpfs-policy"]):
p_path = utils.as_path(path)
assert isinstance(p_path, Path)
@pytest.mark.parametrize(
"value,unit,to_unit,use_binary,expected",
[
(1, "G", "K", False, 1000000),
(4, "K", "base", True, 4096),
("100", "base", "T", False, 1e-10),
],
)
def test_convert_si(value, unit, to_unit, use_binary, expected):
assert utils.convert_si(value, unit, to_unit, use_binary) == expected
### Memory and File Size Utils
@pytest.mark.parametrize(
"val,default,expected",
[
("1 kiB", None, 1024),
("1 kiB", (1024**4), 1024),
("10 TiB", None, 10 * (1024**4)),
(None, 1024, 1024),
(None, None, None),
("1.5 MiB", None, 1572864),
],
)
def test_as_bytes(val, default, expected):
assert utils.as_bytes(val, default) == expected
class TestSizeGrouping:
input_sizes = pl.Series(
name="size",
values=[
0,
2048,
4096,
1024**2, # 1 MiB
1024**3, # 1 GiB
20 * 1024**3, # 20 GiB
1024**5, # 1 PiB
],
dtype=pl.Int128(),
)
expected_size_groups = [
pl.Series(
name="size",
values=[
"0 B-4 KiB",
"0 B-4 KiB",
"0 B-4 KiB",
"4 KiB-4 MiB",
"4 MiB-1 GiB",
"10 GiB-100 GiB",
">1 TiB",
],
dtype=pl.Enum(
[
"0 B-4 KiB",
"4 KiB-4 MiB",
"4 MiB-1 GiB",
"1 GiB-10 GiB",
"10 GiB-100 GiB",
"100 GiB-1 TiB",
">1 TiB",
]
),
),
pl.Series(
name="size",
values=[
"0 B-4 KiB",
"0 B-4 KiB",
"0 B-4 KiB",
"4 KiB-4 MiB",
"4 MiB-1 GiB",
"10 GiB-100 GiB",
">1 TiB",
],
dtype=pl.Enum(
[
"0 B-4 KiB",
"4 KiB-4 MiB",
"4 MiB-1 GiB",
"1 GiB-10 GiB",
"10 GiB-100 GiB",
"100 GiB-1 TiB",
">1 TiB",
]
),
),
pl.Series(
name="size",
values=[
"0 B-1 KiB",
"1 KiB-4 KiB",
"1 KiB-4 KiB",
"4 KiB-10 GiB",
"4 KiB-10 GiB",
">10 GiB",
">10 GiB",
],
dtype=pl.Enum(["0 B-1 KiB", "1 KiB-4 KiB", "4 KiB-10 GiB", ">10 GiB"]),
),
pl.Series(
name="size",
values=[
"0 B-1 MiB",
"0 B-1 MiB",
"0 B-1 MiB",
"0 B-1 MiB",
">1 MiB",
">1 MiB",
">1 MiB",
],
dtype=pl.Enum(
[
"0 B-1 MiB",
">1 MiB",
]
),
),
]
input_bins = [
["4 KiB", "4 MiB", "1 GiB", "10 GiB", "100 GiB", "1 TiB"],
[4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776],
["10 GiB", 1024, 4096, "1 KiB", 0],
"1 MiB",
]
expected_bins = [
[4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776],
[4096, 4194304, 1073741824, 10737418240, 107374182400, 1099511627776],
[1024, 4096, 10737418240],
[1048576],
]
expected_labels = [
[
"0 B-4 KiB",
"4 KiB-4 MiB",
"4 MiB-1 GiB",
"1 GiB-10 GiB",
"10 GiB-100 GiB",
"100 GiB-1 TiB",
">1 TiB",
],
[
"0 B-4 KiB",
"4 KiB-4 MiB",
"4 MiB-1 GiB",
"1 GiB-10 GiB",
"10 GiB-100 GiB",
"100 GiB-1 TiB",
">1 TiB",
],
["0 B-1 KiB", "1 KiB-4 KiB", "4 KiB-10 GiB", ">10 GiB"],
["0 B-1 MiB", ">1 MiB"],
]
@pytest.mark.parametrize("bins,expected", list(zip(expected_bins, expected_labels)))
def test_create_size_bin_labels(self, bins, expected):
assert utils.create_size_bin_labels(bins) == expected
@pytest.mark.parametrize(
"bins,expected",
list(zip(input_bins, list(zip(expected_bins, expected_labels)))),
)
def test_prep_size_distribution(self, bins, expected):
assert utils.prep_size_distribution(size_bins=bins) == expected
@pytest.mark.parametrize(
"bins,expected", list(zip(input_bins, expected_size_groups)), ids=[1, 2, 3, 4]
)
def test_calculate_size_distribution(self, bins, expected):
assert_series_equal(
utils.calculate_size_distribution(self.input_sizes, size_bins=bins),
expected,
)
### DateTime and File Age Utils
@pytest.mark.parametrize(
"date,expected",
[
("2025-01-01", np.datetime64("2025-01-01T00:00:00.000000000")),
(
np.datetime64("2025-01-01T00:00:00.000000000"),
np.datetime64("2025-01-01T00:00:00.000000000"),
),
(1735689600000000000, 1735689600000000000),
(None, None),
],
)
def test_as_datetime(date, expected):
assert utils.as_datetime(date) == expected
def test_as_datetime_fails():
with pytest.raises(ValueError):
utils.as_datetime("not a date")
@pytest.mark.parametrize("val", [1, 3, 5])
@pytest.mark.parametrize("unit", ["D", "W"])
def test_as_timedelta(val, unit):
assert utils.as_timedelta(val, unit) == np.timedelta64(val, unit)
class TestAgeGrouping:
timestamps = pl.Series(
name="access",
values=[
"2024-05-12",
"2025-02-21",
"2025-04-02",
"2025-04-17",
"2025-05-17",
],
).str.to_datetime(time_unit="ns")
acq_date = "2025-06-01"
delta_vals = [[-2, 30, 60, 90, 180], 365, [0, 4, 8, 12, 16], 52]
delta_unit = ["D", "D", "W", "W"]
expected_breakpoints = [
[
np.datetime64("2025-05-02T00:00:00.000000000"),
np.datetime64("2025-04-02T00:00:00.000000000"),
np.datetime64("2025-03-03T00:00:00.000000000"),
np.datetime64("2024-12-03T00:00:00.000000000"),
],
np.datetime64("2024-06-01T00:00:00.000000000"),
[
np.datetime64("2025-05-04T00:00:00.000000000"),
np.datetime64("2025-04-06T00:00:00.000000000"),
np.datetime64("2025-03-09T00:00:00.000000000"),
np.datetime64("2025-02-09T00:00:00.000000000"),
],
np.datetime64("2024-06-02T00:00:00.000000000"),
]
expected_labels = [
[">180D", "90D-180D", "60D-90D", "30D-60D", "<30D"],
[
">365D",
"<365D",
],
[">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"],
[">52W", "<52W"],
]
expected_age_groups_days = [
pl.Series(
name="access",
values=[
">180D",
"90D-180D",
"60D-90D",
"30D-60D",
"<30D",
],
dtype=pl.Enum([">180D", "90D-180D", "60D-90D", "30D-60D", "<30D"]),
),
pl.Series(
name="access",
values=[">365D", "<365D", "<365D", "<365D", "<365D"],
dtype=pl.Enum([">365D", "<365D"]),
),
pl.Series(
name="access",
values=[">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"],
dtype=pl.Enum(
[">16W", "12W-16W", "8W-12W", "4W-8W", "<4W"],
),
),
pl.Series(
name="access",
values=[">52W", "<52W", "<52W", "<52W", "<52W"],
dtype=pl.Enum([">52W", "<52W"]),
),
]
@pytest.mark.parametrize(
"delta_vals,delta_unit,expected",
list(zip(delta_vals, delta_unit, expected_breakpoints)),
)
def test_create_timedelta_breakpoints_days(self, delta_vals, delta_unit, expected):
assert (
utils.create_timedelta_breakpoints(self.acq_date, delta_vals, delta_unit)
== expected
)
@pytest.mark.parametrize(
"delta_vals,delta_unit,expected",
list(zip(delta_vals, delta_unit, expected_labels)),
)
def test_create_timedelta_labels(self, delta_vals, delta_unit, expected):
assert utils.create_timedelta_labels(delta_vals, delta_unit) == expected
@pytest.mark.parametrize(
"age_breakpoints,delta_unit,expected",
list(
zip(
delta_vals, delta_unit, list(zip(expected_breakpoints, expected_labels))
)
),
)
def test_prep_age_distribution_days(self, age_breakpoints, delta_unit, expected):
assert (
utils.prep_age_distribution(self.acq_date, age_breakpoints, delta_unit)
== expected
)
@pytest.mark.parametrize(
"age_breakpoints,delta_unit,expected",
list(zip(delta_vals, delta_unit, expected_age_groups_days)),
)
def test_calculate_age_distribution_days(
self, age_breakpoints, delta_unit, expected
):
assert_series_equal(
utils.calculate_age_distribution(
self.timestamps, self.acq_date, age_breakpoints, delta_unit
),
expected,
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment