From 3910955e03eace48db2fbb831e45926c31fd476e Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Sat, 26 Apr 2025 13:07:18 -0500
Subject: [PATCH 1/2] add time delay to sbatch command

---
 src/rc_gpfs/cli/convert_flat_to_hive.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/rc_gpfs/cli/convert_flat_to_hive.py b/src/rc_gpfs/cli/convert_flat_to_hive.py
index 660f072..89d5bb3 100644
--- a/src/rc_gpfs/cli/convert_flat_to_hive.py
+++ b/src/rc_gpfs/cli/convert_flat_to_hive.py
@@ -1,5 +1,7 @@
 import argparse
 import subprocess
+import time
+import random
 from pathlib import Path
 import polars as pl
 
@@ -76,6 +78,12 @@ def submit_batch(**kwargs):
 
     script = f"#!/bin/bash\n#\n{slurm_opts}\n{BATCH_CMDS.format(**kwargs)}"
 
+    # Wait between 1 and 5 seconds before batch submission. This helps avoid a situation where this setup is running in
+    # a batch array job and all of the array tasks submit their child array jobs at the same time. That results in jobs
+    # failing to be submitted due to overwhelming the scheduler with simultaneous requests. Adding a random delay should
+    # fix that
+    time.sleep(random.uniform(1,5))
+
     subprocess.run(['sbatch'],input=script,shell=True,text=True)
     pass
 
-- 
GitLab


From a73e9eb1ab5fbe96a799186890d7e2868e970191 Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Sat, 26 Apr 2025 13:08:10 -0500
Subject: [PATCH 2/2] add time delay to sbatch command

---
 src/rc_gpfs/cli/convert_to_parquet.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/rc_gpfs/cli/convert_to_parquet.py b/src/rc_gpfs/cli/convert_to_parquet.py
index 0a0f78a..ccd96ee 100644
--- a/src/rc_gpfs/cli/convert_to_parquet.py
+++ b/src/rc_gpfs/cli/convert_to_parquet.py
@@ -1,5 +1,6 @@
 import argparse
-import re
+import time
+import random
 import subprocess
 from pathlib import Path
 import multiprocessing
@@ -71,6 +72,12 @@ def submit_batch(**kwargs):
     
     script = BATCH_SCRIPT.format(**kwargs)
 
+    # Wait between 1 and 5 seconds before batch submission. This helps avoid a situation where this setup is running in
+    # a batch array job and all of the array tasks submit their child array jobs at the same time. That results in jobs
+    # failing to be submitted due to overwhelming the scheduler with simultaneous requests. Adding a random delay should
+    # fix that
+    time.sleep(random.uniform(1, 5))
+
     subprocess.run(['sbatch'],input=script,shell=True,text=True)
     pass
 
-- 
GitLab