From 517774cf92b80b781eaa8a7ba959b7b8a522dcef Mon Sep 17 00:00:00 2001 From: John-Paul Robinson <jpr@uab.edu> Date: Mon, 17 Feb 2025 12:15:18 -0600 Subject: [PATCH] Add listcmd param to split script This lets caller control the list of directories to split without having to edit the script. Added array task protections for lazy callers who don't care if array size aligns with work tasks. --- example-job-scripts/00-split-logs.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) mode change 100644 => 100755 example-job-scripts/00-split-logs.sh diff --git a/example-job-scripts/00-split-logs.sh b/example-job-scripts/00-split-logs.sh old mode 100644 new mode 100755 index 07c4daf..f152f1a --- a/example-job-scripts/00-split-logs.sh +++ b/example-job-scripts/00-split-logs.sh @@ -13,7 +13,17 @@ module load Anaconda3 conda activate gpfs -logs=($(find /data/rc/gpfs-policy/data -path "*/list-policy_data-user_list-path-external_slurm-31[35]*/raw/*.gz")) +# listcmd env var sets the command to enumerate datasets to process +# supports passing args during sbatch, e.g. listcmd="cat split-list" sbatch <thisscript> +# note: maxdeth speeds execution of find by avoiding deep dirs +listcmd=${listcmd:-find /data/rc/gpfs-policy/data -maxdepth 3 -path "*/list-policy_data-user_list-path-external_slurm-31[35]*/raw/*.gz"} + +logs=($($listcmd)) log=${logs[${SLURM_ARRAY_TASK_ID}]} -split-log --no-clobber ${log} \ No newline at end of file + +# for lazy submit. only do work if there is work to do +if [ ${SLURM_ARRAY_TASK_ID} -lt ${#logs[@]} ] +then + echo split-log --no-clobber ${log} +fi -- GitLab