diff --git a/example-job-scripts/00-split-logs.sh b/example-job-scripts/00-split-logs.sh old mode 100644 new mode 100755 index 07c4daf3b7b3d7ecbb7dd92de88b3c50463144e8..f152f1a737699fe0511e2ff455a638b526717030 --- a/example-job-scripts/00-split-logs.sh +++ b/example-job-scripts/00-split-logs.sh @@ -13,7 +13,17 @@ module load Anaconda3 conda activate gpfs -logs=($(find /data/rc/gpfs-policy/data -path "*/list-policy_data-user_list-path-external_slurm-31[35]*/raw/*.gz")) +# listcmd env var sets the command to enumerate datasets to process +# supports passing args during sbatch, e.g. listcmd="cat split-list" sbatch <thisscript> +# note: maxdeth speeds execution of find by avoiding deep dirs +listcmd=${listcmd:-find /data/rc/gpfs-policy/data -maxdepth 3 -path "*/list-policy_data-user_list-path-external_slurm-31[35]*/raw/*.gz"} + +logs=($($listcmd)) log=${logs[${SLURM_ARRAY_TASK_ID}]} -split-log --no-clobber ${log} \ No newline at end of file + +# for lazy submit. only do work if there is work to do +if [ ${SLURM_ARRAY_TASK_ID} -lt ${#logs[@]} ] +then + echo split-log --no-clobber ${log} +fi