From 3910955e03eace48db2fbb831e45926c31fd476e Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Sat, 26 Apr 2025 13:07:18 -0500 Subject: [PATCH 1/2] add time delay to sbatch command --- src/rc_gpfs/cli/convert_flat_to_hive.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/rc_gpfs/cli/convert_flat_to_hive.py b/src/rc_gpfs/cli/convert_flat_to_hive.py index 660f072..89d5bb3 100644 --- a/src/rc_gpfs/cli/convert_flat_to_hive.py +++ b/src/rc_gpfs/cli/convert_flat_to_hive.py @@ -1,5 +1,7 @@ import argparse import subprocess +import time +import random from pathlib import Path import polars as pl @@ -76,6 +78,12 @@ def submit_batch(**kwargs): script = f"#!/bin/bash\n#\n{slurm_opts}\n{BATCH_CMDS.format(**kwargs)}" + # Wait between 1 and 5 seconds before batch submission. This helps avoid a situation where this setup is running in + # a batch array job and all of the array tasks submit their child array jobs at the same time. That results in jobs + # failing to be submitted due to overwhelming the scheduler with simultaneous requests. Adding a random delay should + # fix that + time.sleep(random.uniform(1,5)) + subprocess.run(['sbatch'],input=script,shell=True,text=True) pass -- GitLab From a73e9eb1ab5fbe96a799186890d7e2868e970191 Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Sat, 26 Apr 2025 13:08:10 -0500 Subject: [PATCH 2/2] add time delay to sbatch command --- src/rc_gpfs/cli/convert_to_parquet.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/rc_gpfs/cli/convert_to_parquet.py b/src/rc_gpfs/cli/convert_to_parquet.py index 0a0f78a..ccd96ee 100644 --- a/src/rc_gpfs/cli/convert_to_parquet.py +++ b/src/rc_gpfs/cli/convert_to_parquet.py @@ -1,5 +1,6 @@ import argparse -import re +import time +import random import subprocess from pathlib import Path import multiprocessing @@ -71,6 +72,12 @@ def submit_batch(**kwargs): script = BATCH_SCRIPT.format(**kwargs) + # Wait between 1 and 5 seconds before batch submission. This helps avoid a situation where this setup is running in + # a batch array job and all of the array tasks submit their child array jobs at the same time. That results in jobs + # failing to be submitted due to overwhelming the scheduler with simultaneous requests. Adding a random delay should + # fix that + time.sleep(random.uniform(1, 5)) + subprocess.run(['sbatch'],input=script,shell=True,text=True) pass -- GitLab