diff --git a/example-job-scripts/split-logs.sh b/example-job-scripts/00-split-logs.sh similarity index 89% rename from example-job-scripts/split-logs.sh rename to example-job-scripts/00-split-logs.sh index 8ac746fca4ddf4607551bc23664e1e410a8b14a4..07c4daf3b7b3d7ecbb7dd92de88b3c50463144e8 100644 --- a/example-job-scripts/split-logs.sh +++ b/example-job-scripts/00-split-logs.sh @@ -13,7 +13,7 @@ module load Anaconda3 conda activate gpfs -logs=($(find /data/rc/gpfs-policy/data -path "*/list-policy_data-project_list-path-external_slurm-*/raw/*.gz")) +logs=($(find /data/rc/gpfs-policy/data -path "*/list-policy_data-user_list-path-external_slurm-31[35]*/raw/*.gz")) log=${logs[${SLURM_ARRAY_TASK_ID}]} split-log --no-clobber ${log} \ No newline at end of file diff --git a/example-job-scripts/convert-logs.sh b/example-job-scripts/10-convert-logs.sh similarity index 100% rename from example-job-scripts/convert-logs.sh rename to example-job-scripts/10-convert-logs.sh diff --git a/example-job-scripts/convert-to-hive.sh b/example-job-scripts/20-convert-to-hive.sh similarity index 100% rename from example-job-scripts/convert-to-hive.sh rename to example-job-scripts/20-convert-to-hive.sh diff --git a/example-job-scripts/calculate-churn.sh b/example-job-scripts/30-calculate-churn.sh similarity index 64% rename from example-job-scripts/calculate-churn.sh rename to example-job-scripts/30-calculate-churn.sh index 56858a8733dc09f3aefe25ec666eeb81a4a7fa6c..2656f1840ce1e7d72e0c2be7c12061641df4a5c6 100644 --- a/example-job-scripts/calculate-churn.sh +++ b/example-job-scripts/30-calculate-churn.sh @@ -10,10 +10,10 @@ #SBATCH --gres=gpu:1 #SBATCH --output=out/churn-%A-%a.out #SBATCH --error=out/churn-%A-%a.err -#SBATCH --array=1-162 +#SBATCH --array=0-166 module load Anaconda3 -conda activate gpfs-dev +conda activate gpfs hive="/data/rc/gpfs-policy/data/gpfs-hive/data-project/" tlds=($(find ${hive} -name "tld=*" -type d | sed -n "s/.*tld=//p")) @@ -23,13 +23,20 @@ echo "TLD: ${tld}" python << END from pathlib import Path +from datetime import datetime +from dateutil.rrule import rrule, DAILY from rc_gpfs.process.process import calculate_churn hive_path = Path("${hive}") tld = "${tld}" -acqs = hive_path.joinpath(f"tld={tld}").glob("acq=*") -acq_dates = [p.name.removeprefix("acq=") for p in acqs] +#acqs = hive_path.joinpath(f"tld={tld}").glob("acq=*") +#acq_dates = [p.name.removeprefix("acq=") for p in acqs] + +start_date = datetime(2025, 1, 20) +end_date = datetime(2025, 2, 4) + +acq_dates = [datetime.strftime(d,'%Y-%m-%d') for d in list(rrule(DAILY, dtstart=start_date, until=end_date))] with_cuda=True