Skip to content
Snippets Groups Projects

Set default location for split-file-info output

1 file
+ 30
12
Compare changes
  • Side-by-side
  • Inline
+ 30
12
#!/bin/bash
set -euo pipefail
set -euxo pipefail
############################################################
# Default Values #
@@ -11,6 +11,7 @@ mem="16G"
time="12:00:00"
partition="amd-hdr100"
lines=5000000
outdir=""
############################################################
# Help #
@@ -18,9 +19,11 @@ lines=5000000
usage()
{
>&2 cat << EOF
Usage: $0 [ -h ] [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ]
[ -l | --lines ] log
Usage: $0 [ -h ] [ -l | --lines ] [ -o | --outdir ]
[ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ]
log
EOF
exit 0
}
help()
@@ -28,18 +31,22 @@ help()
# Display Help
>&2 cat << EOF
Splits a GPFS policy log into multiple parts for batch array processing
Usage: $0 [ -h ] [ -n | --ntasks ] [ -p | --partition ] [ -t | --time ] [ -m | --mem ]
[ -l | --lines ] log
Usage: $0 [ -h ] [ -l | --lines ] [ -o | --outdir ]
[ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ]
log
General:
-h|--help Print this Help.
-h|--help Print this help.
Required:
log Path to the log file to split
File Partitioning:
Split Parameters:
-l|--lines Max number of records to save in each split (default: 5000000)
File Parameters:
-o|--outdir Directory path to store split files in. Defaults to log.d in log's parent directory.
Job Parameters:
-n|--ntasks Number of job tasks (default: 4)
-p|--partition Partition to submit tasks to (default: amd-hdr100)
@@ -49,7 +56,7 @@ EOF
exit 0
}
args=$(getopt -a -o hn:p:t:m:l: --long help,ntasks:,partition:,time:,mem:,lines: -- "$@")
args=$(getopt -a -o hl:o:n:p:t:m: --long help,lines:,outdir:,ntasks:,partition:,time:,mem: -- "$@")
if [[ $? -gt 0 ]]; then
usage
fi
@@ -60,11 +67,12 @@ while :
do
case $1 in
-h | --help) help ;;
-l | --lines) lines=$2 ; shift 2 ;;
-o | --outdir) outdir=$2 ; shift 2 ;;
-n | --ntasks) ntasks=$2 ; shift 2 ;;
-p | --partition) partition=$2 ; shift 2 ;;
-t | --time) time=$2 ; shift 2 ;;
-m | --mem) mem=$2 ; shift 2 ;;
-l | --lines) lines=$2 ; shift 2 ;;
--) shift; break ;;
*) >&2 echo Unsupported option: $1
usage ;;
@@ -76,8 +84,17 @@ if [[ $# -eq 0 ]]; then
fi
log=$1
dirname="$(basename ${log} .gz).d"
prefix=${dirname}/list-
if [[ -z "${log}" ]]; then
echo "Log path is required"
usage
fi
if [[ -z "${outdir}" ]]; then
outdir="$(readlink -f ${log}).d"
fi
prefix=${outdir}/list-
split_cmd="cat ${log} | split -a 3 -d -l ${lines} - ${prefix}"
zip_cmd="ls ${prefix}* | xargs -i -P 0 bash -c 'gzip {} && echo {} done'"
@@ -89,6 +106,7 @@ fi
>&2 cat << EOF
--------------------------------------------------------------------------------
GPFS log: ${log}
Output Directory ${outdir}
Lines per File: ${lines}
ntasks: ${ntasks}
@@ -101,7 +119,7 @@ zip cmd: ${zip_cmd}
--------------------------------------------------------------------------------
EOF
mkdir -p ${dirname}
mkdir -p ${outdir}
mkdir -p out
mkdir -p err
Loading