diff --git a/src/split-info-file.sh b/src/split-info-file.sh index 871813aa9ea1641be7fc610888c3f17f55660c67..56572e16f28139c999956a810361026932cd13ea 100755 --- a/src/split-info-file.sh +++ b/src/split-info-file.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -euo pipefail +set -euxo pipefail ############################################################ # Default Values # @@ -11,6 +11,7 @@ mem="16G" time="12:00:00" partition="amd-hdr100" lines=5000000 +outdir="" ############################################################ # Help # @@ -18,9 +19,11 @@ lines=5000000 usage() { >&2 cat << EOF -Usage: $0 [ -h ] [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ] - [ -l | --lines ] log +Usage: $0 [ -h ] [ -l | --lines ] [ -o | --outdir ] + [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ] + log EOF +exit 0 } help() @@ -28,18 +31,22 @@ help() # Display Help >&2 cat << EOF Splits a GPFS policy log into multiple parts for batch array processing -Usage: $0 [ -h ] [ -n | --ntasks ] [ -p | --partition ] [ -t | --time ] [ -m | --mem ] - [ -l | --lines ] log +Usage: $0 [ -h ] [ -l | --lines ] [ -o | --outdir ] + [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ] + log General: - -h|--help Print this Help. + -h|--help Print this help. Required: log Path to the log file to split -File Partitioning: +Split Parameters: -l|--lines Max number of records to save in each split (default: 5000000) +File Parameters: + -o|--outdir Directory path to store split files in. Defaults to log.d in log's parent directory. + Job Parameters: -n|--ntasks Number of job tasks (default: 4) -p|--partition Partition to submit tasks to (default: amd-hdr100) @@ -49,7 +56,7 @@ EOF exit 0 } -args=$(getopt -a -o hn:p:t:m:l: --long help,ntasks:,partition:,time:,mem:,lines: -- "$@") +args=$(getopt -a -o hl:o:n:p:t:m: --long help,lines:,outdir:,ntasks:,partition:,time:,mem: -- "$@") if [[ $? -gt 0 ]]; then usage fi @@ -60,11 +67,12 @@ while : do case $1 in -h | --help) help ;; + -l | --lines) lines=$2 ; shift 2 ;; + -o | --outdir) outdir=$2 ; shift 2 ;; -n | --ntasks) ntasks=$2 ; shift 2 ;; -p | --partition) partition=$2 ; shift 2 ;; -t | --time) time=$2 ; shift 2 ;; -m | --mem) mem=$2 ; shift 2 ;; - -l | --lines) lines=$2 ; shift 2 ;; --) shift; break ;; *) >&2 echo Unsupported option: $1 usage ;; @@ -76,8 +84,17 @@ if [[ $# -eq 0 ]]; then fi log=$1 -dirname="$(basename ${log} .gz).d" -prefix=${dirname}/list- + +if [[ -z "${log}" ]]; then + echo "Log path is required" + usage +fi + +if [[ -z "${outdir}" ]]; then + outdir="$(readlink -f ${log}).d" +fi + +prefix=${outdir}/list- split_cmd="cat ${log} | split -a 3 -d -l ${lines} - ${prefix}" zip_cmd="ls ${prefix}* | xargs -i -P 0 bash -c 'gzip {} && echo {} done'" @@ -89,6 +106,7 @@ fi >&2 cat << EOF -------------------------------------------------------------------------------- GPFS log: ${log} +Output Directory ${outdir} Lines per File: ${lines} ntasks: ${ntasks} @@ -101,7 +119,7 @@ zip cmd: ${zip_cmd} -------------------------------------------------------------------------------- EOF -mkdir -p ${dirname} +mkdir -p ${outdir} mkdir -p out mkdir -p err