From 326da6fe2d5301b0c31f3ceb301f2577850d663e Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Sat, 26 Apr 2025 22:28:59 -0500
Subject: [PATCH] fix bash math

---
 src/rc_gpfs/cli/convert_to_parquet.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/rc_gpfs/cli/convert_to_parquet.py b/src/rc_gpfs/cli/convert_to_parquet.py
index 9abec14..ac84448 100644
--- a/src/rc_gpfs/cli/convert_to_parquet.py
+++ b/src/rc_gpfs/cli/convert_to_parquet.py
@@ -23,7 +23,7 @@ Local Parallel Processing:
     If processing is done via a local parallel pool, the requested cores need to be accessible by the invoking Python process. When run in a Slurm job context where the number of cores were only specified with the --ntasks property, only 1 core will be available to the Python process regardless of the number of cores requested by the job. Instead, use the --cpus-per-task property to set the number of cores paired with --ntasks=1. This will correctly allow the parallel pool to utilize all cores assigned to the job.
 """
 
-def parse_args():
+def parse_args(arg_str=None):
     parser = argparse.ArgumentParser(
         description=DESCRIPTION,
         parents=[batch_parser(partition='amd-hdr100,express',time='02:00:00',mem='16G',cpus_per_task=1)],
@@ -41,7 +41,7 @@ def parse_args():
                         help="Number of cores to include in the pool for local parallel processing. If None, will default to all cores available to the invoking Python process")
     parser.add_argument('--no-clobber', action='store_true',default=False,
                         help='When set, skips any log chunks that already have corresponding parquet files. Chunks without a parquet file are processed as normal.')
-    args = parser.parse_args()
+    args = parser.parse_args(arg_str)
     return vars(args)
 
 BATCH_SCRIPT = """\
@@ -58,7 +58,9 @@ BATCH_SCRIPT = """\
 
 {env_cmd}
 
-log=$(ls {input}/*.gz | sort | awk "NR==${{SLURM_ARRAY_TASK_ID+1}} {{ print $1 }}")
+idx=$((${{SLURM_ARRAY_TASK_ID}}+1))
+
+log=$(ls {input}/*.gz | sort | awk "NR==${{idx}} {{ print $1 }}")
 
 convert-to-parquet {no_clobber_opt} -o {output_dir} ${{log}}
 """
-- 
GitLab