From 1967660c7d17e666cad290a1712f2298b215e592 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Tue, 28 Jan 2025 13:11:11 -0600
Subject: [PATCH] Bugfix: Added numpy datetime as valid type for run_date

---
 example-job-scripts/convert-to-hive.sh | 9 +++++----
 src/rc_gpfs/process/factory.py         | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/example-job-scripts/convert-to-hive.sh b/example-job-scripts/convert-to-hive.sh
index c7be080..1d6333d 100644
--- a/example-job-scripts/convert-to-hive.sh
+++ b/example-job-scripts/convert-to-hive.sh
@@ -4,18 +4,19 @@
 #SBATCH --ntasks=1
 #SBATCH --cpus-per-task=16
 #SBATCH --mem=90G
-#SBATCH --partition=amperenodes
+#SBATCH --partition=amperenodes-reserve
 #SBATCH --time=02:00:00
 #SBATCH --reservation=rc-gpfs
 #SBATCH --gres=gpu:1
 #SBATCH --output=out/hive-setup-%A-%a.out
 #SBATCH --error=out/hive-setup-%A-%a.err
-#SBATCH --array=0-49
+#SBATCH --array=0
 
 module load Anaconda3
 conda activate gpfs
 
-parquets=($(find /data/rc/gpfs-policy/data -path "*/list-policy_data-project_list-path-external_slurm-*/parquet"))
+device="data-project" # data-project, data-user, or scratch
+parquets=($(find /data/rc/gpfs-policy/data -path "*/list-policy_${device}_list-path-external_slurm-*2025-01-21*/parquet"))
 pq=${parquets[${SLURM_ARRAY_TASK_ID}]}
 
 convert-to-hive --batch \
@@ -23,4 +24,4 @@ convert-to-hive --batch \
     --partition=amperenodes-reserve \
     --mem=120G \
     ${pq} \
-    /data/rc/gpfs-policy/data/gpfs-hive/data-project
+    /data/rc/gpfs-policy/data/gpfs-hive/${device}
diff --git a/src/rc_gpfs/process/factory.py b/src/rc_gpfs/process/factory.py
index 2eae018..545062e 100644
--- a/src/rc_gpfs/process/factory.py
+++ b/src/rc_gpfs/process/factory.py
@@ -48,7 +48,7 @@ class Aggregator:
         self,
         delta_vals: int | List[int],
         delta_unit: Literal['D','W','M','Y'],
-        run_date: pd.Timestamp
+        run_date: pd.Timestamp | np.datetime64
     ) -> List[int | pd.Timestamp]:
         deltas = pd.Series([as_timedelta(c,delta_unit) for c in delta_vals])
         cutoffs = pd.to_datetime(run_date - deltas)
-- 
GitLab