From 21d0f5f1a00b4685deb05ce128bba986c4cbd281 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Sat, 26 Apr 2025 13:09:51 -0500 Subject: [PATCH] Add random delay to nested sbatch submissions --- src/rc_gpfs/cli/convert_flat_to_hive.py | 8 ++++++++ src/rc_gpfs/cli/convert_to_parquet.py | 9 ++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/rc_gpfs/cli/convert_flat_to_hive.py b/src/rc_gpfs/cli/convert_flat_to_hive.py index 660f072..89d5bb3 100644 --- a/src/rc_gpfs/cli/convert_flat_to_hive.py +++ b/src/rc_gpfs/cli/convert_flat_to_hive.py @@ -1,5 +1,7 @@ import argparse import subprocess +import time +import random from pathlib import Path import polars as pl @@ -76,6 +78,12 @@ def submit_batch(**kwargs): script = f"#!/bin/bash\n#\n{slurm_opts}\n{BATCH_CMDS.format(**kwargs)}" + # Wait between 1 and 5 seconds before batch submission. This helps avoid a situation where this setup is running in + # a batch array job and all of the array tasks submit their child array jobs at the same time. That results in jobs + # failing to be submitted due to overwhelming the scheduler with simultaneous requests. Adding a random delay should + # fix that + time.sleep(random.uniform(1,5)) + subprocess.run(['sbatch'],input=script,shell=True,text=True) pass diff --git a/src/rc_gpfs/cli/convert_to_parquet.py b/src/rc_gpfs/cli/convert_to_parquet.py index 0a0f78a..ccd96ee 100644 --- a/src/rc_gpfs/cli/convert_to_parquet.py +++ b/src/rc_gpfs/cli/convert_to_parquet.py @@ -1,5 +1,6 @@ import argparse -import re +import time +import random import subprocess from pathlib import Path import multiprocessing @@ -71,6 +72,12 @@ def submit_batch(**kwargs): script = BATCH_SCRIPT.format(**kwargs) + # Wait between 1 and 5 seconds before batch submission. This helps avoid a situation where this setup is running in + # a batch array job and all of the array tasks submit their child array jobs at the same time. That results in jobs + # failing to be submitted due to overwhelming the scheduler with simultaneous requests. Adding a random delay should + # fix that + time.sleep(random.uniform(1, 5)) + subprocess.run(['sbatch'],input=script,shell=True,text=True) pass -- GitLab