diff --git a/src/rc_gpfs/cli/convert_flat_to_hive.py b/src/rc_gpfs/cli/convert_flat_to_hive.py index 660f072b250c77fb4bf2d9324ad83a4660adb807..89d5bb30cbe79bc1962faf40f63456c716047c92 100644 --- a/src/rc_gpfs/cli/convert_flat_to_hive.py +++ b/src/rc_gpfs/cli/convert_flat_to_hive.py @@ -1,5 +1,7 @@ import argparse import subprocess +import time +import random from pathlib import Path import polars as pl @@ -76,6 +78,12 @@ def submit_batch(**kwargs): script = f"#!/bin/bash\n#\n{slurm_opts}\n{BATCH_CMDS.format(**kwargs)}" + # Wait between 1 and 5 seconds before batch submission. This helps avoid a situation where this setup is running in + # a batch array job and all of the array tasks submit their child array jobs at the same time. That results in jobs + # failing to be submitted due to overwhelming the scheduler with simultaneous requests. Adding a random delay should + # fix that + time.sleep(random.uniform(1,5)) + subprocess.run(['sbatch'],input=script,shell=True,text=True) pass diff --git a/src/rc_gpfs/cli/convert_to_parquet.py b/src/rc_gpfs/cli/convert_to_parquet.py index 0a0f78a0b578b2b77cf1799657292c35174b1783..ccd96ee9fb5cc04587817489236d8ce50baddcad 100644 --- a/src/rc_gpfs/cli/convert_to_parquet.py +++ b/src/rc_gpfs/cli/convert_to_parquet.py @@ -1,5 +1,6 @@ import argparse -import re +import time +import random import subprocess from pathlib import Path import multiprocessing @@ -71,6 +72,12 @@ def submit_batch(**kwargs): script = BATCH_SCRIPT.format(**kwargs) + # Wait between 1 and 5 seconds before batch submission. This helps avoid a situation where this setup is running in + # a batch array job and all of the array tasks submit their child array jobs at the same time. That results in jobs + # failing to be submitted due to overwhelming the scheduler with simultaneous requests. Adding a random delay should + # fix that + time.sleep(random.uniform(1, 5)) + subprocess.run(['sbatch'],input=script,shell=True,text=True) pass