#! /bin/bash # #SBATCH --job-name=calculate-churn #SBATCH --ntasks=1 #SBATCH --cpus-per-task=16 #SBATCH --mem=90G #SBATCH --partition=amperenodes-reserve #SBATCH --time=12:00:00 #SBATCH --reservation=rc-gpfs #SBATCH --gres=gpu:1 #SBATCH --output=out/churn-%A-%a.out #SBATCH --error=out/churn-%A-%a.err #SBATCH --array=0-166 module load Anaconda3 conda activate gpfs hive="/data/rc/gpfs-policy/data/gpfs-hive/data-project/" tlds=($(find ${hive} -name "tld=*" -type d | sed -n "s/.*tld=//p")) tld=${tlds[${SLURM_ARRAY_TASK_ID}]} echo "TLD: ${tld}" python << END from pathlib import Path from datetime import datetime from dateutil.rrule import rrule, DAILY from rc_gpfs.process.process import calculate_churn hive_path = Path("${hive}") tld = "${tld}" #acqs = hive_path.joinpath(f"tld={tld}").glob("acq=*") #acq_dates = [p.name.removeprefix("acq=") for p in acqs] start_date = datetime(2025, 1, 20) end_date = datetime(2025, 2, 4) acq_dates = [datetime.strftime(d,'%Y-%m-%d') for d in list(rrule(DAILY, dtstart=start_date, until=end_date))] with_cuda=True churn = calculate_churn(hive_path,tld,acq_dates,with_cuda) END