Skip to content
Snippets Groups Projects
Commit 1967660c authored by Matthew K Defenderfer's avatar Matthew K Defenderfer
Browse files

Bugfix: Added numpy datetime as valid type for run_date

parent 2856ed5c
No related branches found
No related tags found
1 merge request!46Bugfix: Added numpy datetime as valid type for run_date
...@@ -4,18 +4,19 @@ ...@@ -4,18 +4,19 @@
#SBATCH --ntasks=1 #SBATCH --ntasks=1
#SBATCH --cpus-per-task=16 #SBATCH --cpus-per-task=16
#SBATCH --mem=90G #SBATCH --mem=90G
#SBATCH --partition=amperenodes #SBATCH --partition=amperenodes-reserve
#SBATCH --time=02:00:00 #SBATCH --time=02:00:00
#SBATCH --reservation=rc-gpfs #SBATCH --reservation=rc-gpfs
#SBATCH --gres=gpu:1 #SBATCH --gres=gpu:1
#SBATCH --output=out/hive-setup-%A-%a.out #SBATCH --output=out/hive-setup-%A-%a.out
#SBATCH --error=out/hive-setup-%A-%a.err #SBATCH --error=out/hive-setup-%A-%a.err
#SBATCH --array=0-49 #SBATCH --array=0
module load Anaconda3 module load Anaconda3
conda activate gpfs conda activate gpfs
parquets=($(find /data/rc/gpfs-policy/data -path "*/list-policy_data-project_list-path-external_slurm-*/parquet")) device="data-project" # data-project, data-user, or scratch
parquets=($(find /data/rc/gpfs-policy/data -path "*/list-policy_${device}_list-path-external_slurm-*2025-01-21*/parquet"))
pq=${parquets[${SLURM_ARRAY_TASK_ID}]} pq=${parquets[${SLURM_ARRAY_TASK_ID}]}
convert-to-hive --batch \ convert-to-hive --batch \
...@@ -23,4 +24,4 @@ convert-to-hive --batch \ ...@@ -23,4 +24,4 @@ convert-to-hive --batch \
--partition=amperenodes-reserve \ --partition=amperenodes-reserve \
--mem=120G \ --mem=120G \
${pq} \ ${pq} \
/data/rc/gpfs-policy/data/gpfs-hive/data-project /data/rc/gpfs-policy/data/gpfs-hive/${device}
...@@ -48,7 +48,7 @@ class Aggregator: ...@@ -48,7 +48,7 @@ class Aggregator:
self, self,
delta_vals: int | List[int], delta_vals: int | List[int],
delta_unit: Literal['D','W','M','Y'], delta_unit: Literal['D','W','M','Y'],
run_date: pd.Timestamp run_date: pd.Timestamp | np.datetime64
) -> List[int | pd.Timestamp]: ) -> List[int | pd.Timestamp]:
deltas = pd.Series([as_timedelta(c,delta_unit) for c in delta_vals]) deltas = pd.Series([as_timedelta(c,delta_unit) for c in delta_vals])
cutoffs = pd.to_datetime(run_date - deltas) cutoffs = pd.to_datetime(run_date - deltas)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment