From 8713a5eb63245a4cfb24c9e9b2105bd7be4bfc13 Mon Sep 17 00:00:00 2001 From: John-Paul Robinson <jpr@uab.edu> Date: Fri, 26 Jul 2024 11:41:19 -0500 Subject: [PATCH] Add script to split output from policy run Parameters hard coded to split on 50k enteries per file which provides reasonable parallel read efficiency for downstream tasks. Put splits in a .d subdir of file name. --- split-info-file | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100755 split-info-file diff --git a/split-info-file b/split-info-file new file mode 100755 index 0000000..331b00f --- /dev/null +++ b/split-info-file @@ -0,0 +1,10 @@ +#!/bin/bash + +# split an info file into individual list files to make searching with an array job fast + +file=$1 +dirname=${file}.d +prefix=${dirname}/list- + +mkdir -p ${dirname} +srun -p amd-hdr100 --time 06:00:00 --mem 4G split -a 3 -d -l 5000000 ${file} ${prefix} -- GitLab