#! /bin/bash # #SBATCH --job-name=write-meta #SBATCH --ntasks=1 #SBATCH --cpus-per-task=16 #SBATCH --mem=90G #SBATCH --partition=amperenodes-reserve #SBATCH --time=12:00:00 #SBATCH --reservation=rc-gpfs #SBATCH --gres=gpu:1 #SBATCH --output=out/metadata-%A-%a.out #SBATCH --error=out/metadata-%A-%a.err #SBATCH --array=0-162 module load Anaconda3 conda activate gpfs-dev tlds=($(find /data/rc/gpfs-policy/data/gpfs-hive/data-project/ -name "tld=*" -type d)) tld=${tlds[${SLURM_ARRAY_TASK_ID}]} python << END import cudf from rc_gpfs.policy.convert import write_dataset_metadata import rmm from pathlib import Path rmm.reinitialize( pool_allocator=True, managed_memory=True, initial_pool_size='70GiB' ) acqs = Path("${tld}").glob("acq=*") for i in acqs: df = cudf.read_parquet(i) write_dataset_metadata(df,i) del df END