Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#! /bin/bash
#
#SBATCH --job-name=calculate-churn
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=16
#SBATCH --mem=90G
#SBATCH --partition=amperenodes-reserve
#SBATCH --time=12:00:00
#SBATCH --reservation=rc-gpfs
#SBATCH --gres=gpu:1
#SBATCH --output=out/churn-%A-%a.out
#SBATCH --error=out/churn-%A-%a.err
#SBATCH --array=1-162
module load Anaconda3
conda activate gpfs-dev
hive="/data/rc/gpfs-policy/data/gpfs-hive/data-project/"
tlds=($(find ${hive} -name "tld=*" -type d | sed -n "s/.*tld=//p"))
tld=${tlds[${SLURM_ARRAY_TASK_ID}]}
echo "TLD: ${tld}"
python << END
from pathlib import Path
from rc_gpfs.process.process import calculate_churn
hive_path = Path("${hive}")
tld = "${tld}"
acqs = hive_path.joinpath(f"tld={tld}").glob("acq=*")
acq_dates = [p.name.removeprefix("acq=") for p in acqs]
with_cuda=True
churn = calculate_churn(hive_path,tld,acq_dates,with_cuda)
END