Something went wrong on our end
-
Matthew K Defenderfer authoredbe759db8
write-metadata.sh 850 B
#! /bin/bash
#
#SBATCH --job-name=write-meta
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=16
#SBATCH --mem=90G
#SBATCH --partition=amperenodes-reserve
#SBATCH --time=12:00:00
#SBATCH --reservation=rc-gpfs
#SBATCH --gres=gpu:1
#SBATCH --output=out/metadata-%A-%a.out
#SBATCH --error=out/metadata-%A-%a.err
#SBATCH --array=0-162
module load Anaconda3
conda activate gpfs-dev
tlds=($(find /data/rc/gpfs-policy/data/gpfs-hive/data-project/ -name "tld=*" -type d))
tld=${tlds[${SLURM_ARRAY_TASK_ID}]}
python << END
import cudf
from rc_gpfs.policy.convert import write_dataset_metadata
import rmm
from pathlib import Path
rmm.reinitialize(
pool_allocator=True,
managed_memory=True,
initial_pool_size='70GiB'
)
acqs = Path("${tld}").glob("acq=*")
for i in acqs:
df = cudf.read_parquet(i)
write_dataset_metadata(df,i)
del df
END