Skip to content
Snippets Groups Projects
Commit c89f475e authored by Matthew K Defenderfer's avatar Matthew K Defenderfer
Browse files

expand documentation, provide default options, add automatic zipping of log parts

parent 91e8f412
No related branches found
No related tags found
1 merge request!10Enhance raw GPFS log splitting and compression script
#!/bin/bash
# split an info file into individual list files to make searching with an array job fast
file=$1
dirname=${file}.d
prefix=${dirname}/list-
mkdir -p ${dirname}
srun -p amd-hdr100 --time 06:00:00 --mem 4G split -a 3 -d -l 5000000 ${file} ${prefix}
#!/bin/bash
set -euo pipefail
############################################################
# Default Values #
############################################################
ntasks=4
mem="16G"
time="12:00:00"
partition="amd-hdr100"
lines=5000000
############################################################
# Help #
############################################################
usage()
{
>&2 cat << EOF
Usage: $0 [ -h ] [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ]
[ -l | --lines ] log
EOF
}
help()
{
# Display Help
>&2 cat << EOF
Splits a GPFS policy log into multiple parts for batch array processing
Usage: $0 [ -h ] [ -n | --ntasks ] [ -p | --partition ] [ -t | --time ] [ -m | --mem ]
[ -l | --lines ] log
General:
-h|--help Print this Help.
Required:
log Path to the log file to split
File Partitioning:
-l|--lines Max number of records to save in each split (default: 5000000)
Job Parameters:
-n|--ntasks Number of job tasks (default: 4)
-p|--partition Partition to submit tasks to (default: amd-hdr100)
-t|--time Max walltime (default: 12:00:00)
-m|--mem Memory (default: 16G)
EOF
exit 0
}
args=$(getopt -a -o hn:p:t:m:l: --long help,ntasks:,partition:,time:,mem:,lines: -- "$@")
if [[ $? -gt 0 ]]; then
usage
fi
eval set -- ${args}
while :
do
case $1 in
-h | --help) help ;;
-n | --ntasks) ntasks=$2 ; shift 2 ;;
-p | --partition) partition=$2 ; shift 2 ;;
-t | --time) time=$2 ; shift 2 ;;
-m | --mem) mem=$2 ; shift 2 ;;
-l | --lines) lines=$2 ; shift 2 ;;
--) shift; break ;;
*) >&2 echo Unsupported option: $1
usage ;;
esac
done
if [[ $# -eq 0 ]]; then
usage
fi
log=$1
dirname="$(basename ${log} .gz).d"
prefix=${dirname}/list-
split_cmd="cat ${log} | split -a 3 -d -l ${lines} - ${prefix}"
zip_cmd="ls ${prefix}* | xargs -i -P 0 bash -c 'gzip {} && echo {} done'"
if [[ $(file -b --mime-type ${log}) == *'gzip'* ]]; then
split_cmd="z${split_cmd}"
fi
>&2 cat << EOF
--------------------------------------------------------------------------------
GPFS log: ${log}
Lines per File: ${lines}
ntasks: ${ntasks}
partition: ${partition}
time: ${time}
mem: ${mem}
split cmd: ${split_cmd}
zip cmd: ${zip_cmd}
--------------------------------------------------------------------------------
EOF
mkdir -p ${dirname}
mkdir -p out
mkdir -p err
############################################################
# Create Array Job Script #
############################################################
{ cat | sbatch; } << EOF
#!/bin/bash
#
#SBATCH --job-name=split-gpfs-log
#SBATCH --ntasks=${ntasks}
#SBATCH --partition=${partition}
#SBATCH --time=${time}
#SBATCH --mem=${mem}
#SBATCH --output=out/%A.out
#SBATCH --error=err/%A.err
${split_cmd}
${zip_cmd}
EOF
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment