Newer
Older
#! /bin/bash
#
#SBATCH --job-name=convert
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=8G
#SBATCH --partition=amd-hdr100,intel-dcb,express
#SBATCH --time=02:00:00
#SBATCH --output=out/convert-%A-%a.out
#SBATCH --error=out/convert-%A-%a.err
#SBATCH --array=0-49
module load Anaconda3
# listcmd env var sets the command to enumerate datasets to process
# supports passing args during sbatch, e.g. listcmd="cat split-list" sbatch <thisscript>
# note: maxdepth speeds execution of find by avoiding deep dirs
listcmd=${listcmd:-find /data/rc/gpfs-policy/data -maxdepth 2 -path "*/list-policy_data-project_list-path-external_slurm-*/chunks"}
logs=($($listcmd))
log=${logs[${SLURM_ARRAY_TASK_ID}]}
# for lazy submit. only do work if there is work to do
if [ ${SLURM_ARRAY_TASK_ID} -lt ${#logs[@]} ]
then
convert-to-parquet --batch --no-clobber --partition=amd-hdr100,express,intel-dcb ${log}
fi