From 612008aed1bf22eba688e107d866057daf6b4099 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Fri, 7 Feb 2025 14:21:51 -0600 Subject: [PATCH] Add run order to example script file names --- .../{split-logs.sh => 00-split-logs.sh} | 2 +- .../{convert-logs.sh => 10-convert-logs.sh} | 0 .../{convert-to-hive.sh => 20-convert-to-hive.sh} | 0 .../{calculate-churn.sh => 30-calculate-churn.sh} | 15 +++++++++++---- 4 files changed, 12 insertions(+), 5 deletions(-) rename example-job-scripts/{split-logs.sh => 00-split-logs.sh} (89%) rename example-job-scripts/{convert-logs.sh => 10-convert-logs.sh} (100%) rename example-job-scripts/{convert-to-hive.sh => 20-convert-to-hive.sh} (100%) rename example-job-scripts/{calculate-churn.sh => 30-calculate-churn.sh} (64%) diff --git a/example-job-scripts/split-logs.sh b/example-job-scripts/00-split-logs.sh similarity index 89% rename from example-job-scripts/split-logs.sh rename to example-job-scripts/00-split-logs.sh index 8ac746f..07c4daf 100644 --- a/example-job-scripts/split-logs.sh +++ b/example-job-scripts/00-split-logs.sh @@ -13,7 +13,7 @@ module load Anaconda3 conda activate gpfs -logs=($(find /data/rc/gpfs-policy/data -path "*/list-policy_data-project_list-path-external_slurm-*/raw/*.gz")) +logs=($(find /data/rc/gpfs-policy/data -path "*/list-policy_data-user_list-path-external_slurm-31[35]*/raw/*.gz")) log=${logs[${SLURM_ARRAY_TASK_ID}]} split-log --no-clobber ${log} \ No newline at end of file diff --git a/example-job-scripts/convert-logs.sh b/example-job-scripts/10-convert-logs.sh similarity index 100% rename from example-job-scripts/convert-logs.sh rename to example-job-scripts/10-convert-logs.sh diff --git a/example-job-scripts/convert-to-hive.sh b/example-job-scripts/20-convert-to-hive.sh similarity index 100% rename from example-job-scripts/convert-to-hive.sh rename to example-job-scripts/20-convert-to-hive.sh diff --git a/example-job-scripts/calculate-churn.sh b/example-job-scripts/30-calculate-churn.sh similarity index 64% rename from example-job-scripts/calculate-churn.sh rename to example-job-scripts/30-calculate-churn.sh index 56858a8..2656f18 100644 --- a/example-job-scripts/calculate-churn.sh +++ b/example-job-scripts/30-calculate-churn.sh @@ -10,10 +10,10 @@ #SBATCH --gres=gpu:1 #SBATCH --output=out/churn-%A-%a.out #SBATCH --error=out/churn-%A-%a.err -#SBATCH --array=1-162 +#SBATCH --array=0-166 module load Anaconda3 -conda activate gpfs-dev +conda activate gpfs hive="/data/rc/gpfs-policy/data/gpfs-hive/data-project/" tlds=($(find ${hive} -name "tld=*" -type d | sed -n "s/.*tld=//p")) @@ -23,13 +23,20 @@ echo "TLD: ${tld}" python << END from pathlib import Path +from datetime import datetime +from dateutil.rrule import rrule, DAILY from rc_gpfs.process.process import calculate_churn hive_path = Path("${hive}") tld = "${tld}" -acqs = hive_path.joinpath(f"tld={tld}").glob("acq=*") -acq_dates = [p.name.removeprefix("acq=") for p in acqs] +#acqs = hive_path.joinpath(f"tld={tld}").glob("acq=*") +#acq_dates = [p.name.removeprefix("acq=") for p in acqs] + +start_date = datetime(2025, 1, 20) +end_date = datetime(2025, 2, 4) + +acq_dates = [datetime.strftime(d,'%Y-%m-%d') for d in list(rrule(DAILY, dtstart=start_date, until=end_date))] with_cuda=True -- GitLab