#!/bin/bash set -euxo pipefail ############################################################ # Default Values # ############################################################ ntasks=4 mem="16G" time="12:00:00" partition="amd-hdr100" lines=5000000 outdir="" ############################################################ # Help # ############################################################ usage() { >&2 cat << EOF Usage: $0 [ -h ] [ -l | --lines ] [ -o | --outdir ] [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ] log EOF exit 0 } help() { # Display Help >&2 cat << EOF Splits a GPFS policy log into multiple parts for batch array processing Usage: $0 [ -h ] [ -l | --lines ] [ -o | --outdir ] [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ] log General: -h|--help Print this help. Required: log Path to the log file to split Split Parameters: -l|--lines Max number of records to save in each split (default: 5000000) File Parameters: -o|--outdir Directory path to store split files in. Defaults to log.d in log's parent directory. Job Parameters: -n|--ntasks Number of job tasks (default: 4) -p|--partition Partition to submit tasks to (default: amd-hdr100) -t|--time Max walltime (default: 12:00:00) -m|--mem Memory (default: 16G) EOF exit 0 } args=$(getopt -a -o hl:o:n:p:t:m: --long help,lines:,outdir:,ntasks:,partition:,time:,mem: -- "$@") if [[ $? -gt 0 ]]; then usage fi eval set -- ${args} while : do case $1 in -h | --help) help ;; -l | --lines) lines=$2 ; shift 2 ;; -o | --outdir) outdir=$2 ; shift 2 ;; -n | --ntasks) ntasks=$2 ; shift 2 ;; -p | --partition) partition=$2 ; shift 2 ;; -t | --time) time=$2 ; shift 2 ;; -m | --mem) mem=$2 ; shift 2 ;; --) shift; break ;; *) >&2 echo Unsupported option: $1 usage ;; esac done if [[ $# -eq 0 ]]; then usage fi log=$1 if [[ -z "${log}" ]]; then echo "Log path is required" usage fi if [[ -z "${outdir}" ]]; then outdir="$(readlink -f ${log}).d" fi prefix=${outdir}/list- split_cmd="cat ${log} | split -a 3 -d -l ${lines} - ${prefix}" zip_cmd="ls ${prefix}* | xargs -i -P 0 bash -c 'gzip {} && echo {} done'" if [[ $(file -b --mime-type ${log}) == *'gzip'* ]]; then split_cmd="z${split_cmd}" fi >&2 cat << EOF -------------------------------------------------------------------------------- GPFS log: ${log} Output Directory ${outdir} Lines per File: ${lines} ntasks: ${ntasks} partition: ${partition} time: ${time} mem: ${mem} split cmd: ${split_cmd} zip cmd: ${zip_cmd} -------------------------------------------------------------------------------- EOF mkdir -p ${outdir} mkdir -p out mkdir -p err ############################################################ # Create Array Job Script # ############################################################ { cat | sbatch; } << EOF #!/bin/bash # #SBATCH --job-name=split-gpfs-log #SBATCH --ntasks=${ntasks} #SBATCH --partition=${partition} #SBATCH --time=${time} #SBATCH --mem=${mem} #SBATCH --output=out/%A.out #SBATCH --error=err/%A.err ${split_cmd} ${zip_cmd} EOF