From 8713a5eb63245a4cfb24c9e9b2105bd7be4bfc13 Mon Sep 17 00:00:00 2001
From: John-Paul Robinson <jpr@uab.edu>
Date: Fri, 26 Jul 2024 11:41:19 -0500
Subject: [PATCH] Add script to split output from policy run

Parameters hard coded to split on 50k enteries per file which provides
reasonable parallel read efficiency for downstream tasks.
Put splits in a .d subdir of file name.
---
 split-info-file | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100755 split-info-file

diff --git a/split-info-file b/split-info-file
new file mode 100755
index 0000000..331b00f
--- /dev/null
+++ b/split-info-file
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# split an info file into individual list files to make searching with an array job fast
+
+file=$1
+dirname=${file}.d
+prefix=${dirname}/list-
+
+mkdir -p ${dirname}
+srun -p amd-hdr100 --time 06:00:00 --mem 4G split -a 3 -d -l 5000000 ${file} ${prefix}
-- 
GitLab