Skip to content
Snippets Groups Projects
Commit ca9f5e7b authored by Matthew K Defenderfer's avatar Matthew K Defenderfer
Browse files

Merge branch 'bugfix-change-log-ownership-after-policy-run' into 'rel_v0.2.0'

Refactoring run-mmpol scripts

See merge request !20
parents bbf19903 bd52e387
No related branches found
No related tags found
1 merge request!20Refactoring run-mmpol scripts
......@@ -4,43 +4,227 @@ set -euxo pipefail
# run an mmapply policy across the cluster via slurm
# gather info to map mmapplypolicy to runtime configuration
# arguments passed via job env and runtime context
############################################################
# Default Values #
############################################################
filesystem=${FILESYSTEM:-scratch}
policyfile=$POLICYFILE
tmpglobal=$DIR/slurm-tmp-${SLURM_JOBID}
tmpscratch=$DIR/slurm-tmp-${SLURM_JOBID}
outdir="/data/rc/gpfs-policy/data"
policy_file="./policy-def/list-path-external"
device="scratch"
output_log_prefix=""
############################################################
# Help #
############################################################
usage()
{
>&2 cat << EOF
Usage: $0 [ -h ] [ -o | --outdir ] [ -f | --output-prefix ] [-P | --policy-file] device
EOF
exit 1
}
help()
{
>&2 cat << EOF
Runs mmapplypolicy on the specified device/fileset. The policy file dictates the actions performed including list, delete, add, etc. This is most often called by the submit-pol-job wrapper instead of invoked directly
Usage: $0 [ -h ] [ -o | --outdir ] [ -f | --output-prefix ] [ -P | --policy-file ] device
options:
-h|--help Print this Help.
Required:
device GPFS fileset/directory apply the policy to. Can be
specified as either the name of the fileset or the
full path to the directory
(Examples: scratch, /data/user/[username])
Path:
-o|--outdir Parent directory to save policy output to
(default: /data/rc/gpfs-policy/data)
-f|--output-prefix Prefix of the policy output file. Appended with a metadata string containing the policy name,
job ID, and date
Policy Options:
-P|--policy-file Path to policy file to apply to the given GPFS device
EOF
exit 0
}
args=$(getopt -a -o ho:f:P: --long help,outdir:,output-prefix:,policy-file: -- "$@")
if [[ $? -gt 0 ]]; then
usage
fi
eval set -- ${args}
while :
do
case $1 in
-h | --help) help ;;
-o | --outdir) outdir=$2 ; shift 2 ;;
-f | --output-prefix) output_log_prefix=$2 ; shift 2 ;;
-P | --policy-file) policy_file=$2 ; shift 2 ;;
--) shift; break ;;
*) >&2 echo Unsupported option: $1
usage ;;
esac
done
if [[ $# -eq 0 ]]; then
usage
fi
device="$1"
# Ensure device is specified
if [[ -z "$device" ]]; then
echo "Error: Specify either the name of a fileset or a directory path"
usage
fi
# create default output_log_prefix if not specified in the arguments
if [[ -z "$output_log_prefix"]]; then
modified_device=$(echo "$device" | sed -e 's|^/||' -e 's|/$||' -e 's|/|-|g')
output_log_prefix="list-policy_${modified_device}"
fi
# create temporary working directory for list aggregation
tmpglobal=$outdir/slurm-tmp-${SLURM_JOBID}
tmpscratch=$outdir/slurm-tmp-${SLURM_JOBID}
mkdir -p $tmpglobal
nodes=`scontrol show hostnames "${SLURM_JOB_NODELIST}" | tr '\n' ',' | sed -e 's/,$//'`
############################################################
# Help #
############################################################
usage()
{
>&2 cat << EOF
Usage: $0 [ -h ] [ -o | --outdir ] [ -f | --output-prefix ] [ -P | --policy-file] device
EOF
exit 1
}
help()
{
>&2 cat << EOF
Runs mmapplypolicy on the specified device/fileset. The policy file dictates the actions performed including list, delete, add, etc. This is most often called by the submit-pol-job wrapper instead of invoked directly
Usage: $0 [ -h ] [ -o | --outdir ] [ -f | --output-prefix ] [ -P | --policy-file ] device
options:
-h|--help Print this Help.
--dry-run Do not run the policy command or any command that modifies any files or directories such as
mkdir
Required:
device GPFS fileset/directory apply the policy to. Can be
specified as either the name of the fileset or the
full path to the directory
(Examples: scratch, /data/user/[username])
Path:
-o|--outdir Parent directory to save policy output to
(default: /data/rc/gpfs-policy/data)
-f|--output-prefix Prefix of the policy output file. Appended with a metadata string containing the policy name,
job ID, and date
Policy Options:
-P|--policy-file Path to policy file to apply to the given GPFS device
EOF
exit 0
}
args=$(getopt -a -o ho:f:P: --long help,outdir:,output-prefix:,policy-file:,dry-run -- "$@")
if [[ $? -gt 0 ]]; then
usage
fi
eval set -- ${args}
while :
do
case $1 in
-h | --help) help ;;
-o | --outdir) outdir=$2 ; shift 2 ;;
-f | --output-prefix) output_log_prefix=$2 ; shift 2 ;;
-P | --policy-file) policy_file=$2 ; shift 2 ;;
--dry-run) dry_run=true ; shift 1 ;;
--) shift; break ;;
*) >&2 echo Unsupported option: $1
usage ;;
esac
done
if [[ $# -eq 0 ]]; then
usage
fi
device="$1"
# Ensure device is specified
if [[ -z "${device}" ]]; then
echo "Error: Specify either the name of a fileset or a directory path"
usage
fi
# set default output_log_prefix if not specified in the arguments
if [[ -z "${output_log_prefix}" ]]; then
modified_device=$(echo "${device}" | sed -e 's|^/||' -e 's|/$||' -e 's|/|-|g')
output_log_prefix="list-policy_${modified_device}"
fi
# create temporary working directory for list aggregation
tmpglobal="${outdir}/slurm-tmp-${SLURM_JOBID}"
tmpscratch="${outdir}/slurm-tmp-${SLURM_JOBID}"
nodes=$(scontrol show hostnames "${SLURM_JOB_NODELIST}" | tr '\n' ',' | sed -e 's/,$//')
cores="${SLURM_CPUS_PER_TASK}"
DATESTR=`date +'%Y-%m-%d-%H:%M:%S'`
DATESTR=$(date +'%Y-%m-%dT%H:%M:%S')
policy=`basename $policyfile`
policy=$(basename ${policy_file})
filetag="${policy}_slurm-${SLURM_JOBID}_${DATESTR}"
cmd="mmapplypolicy ${filesystem} -I defer \
-P $policyfile \
-g $tmpglobal \
-s $tmpscratch \
-f ${DIR}/list-${SLURM_JOBID} \
-M FILEPATH=${filesystem} \
cmd="mmapplypolicy ${device} -I defer \
-P ${policy_file} \
-g ${tmpglobal} \
-s ${tmpscratch} \
-f ${outdir}/list-${SLURM_JOBID} \
-M FILEPATH=${device} \
-M JOBID=${SLURM_JOBID} \
-M LIST_OUTPUT_FILE=${OUTFILE:-/tmp/gpfs-list-policy}
-M LIST_OUTPUT_FILE=${output_log_prefix} \
-N ${nodes} -n ${cores} -m ${cores}"
# report final command in job log
echo $cmd
if [[ ! ${dry_run} ]]; then
mkdir -p ${tmpglobal}
# run policy command
${cmd}
log_name="${output_log_prefix}_${filetag}"
log_dir="${outdir}/${log_name}"
mkdir -p ${log_dir}/raw
chmod 1770 ${log_dir}
# run policy command
$cmd
# tag output file with run metadata
raw_log_file=$(find ${outdir} -maxdepth 1 -name "list-${SLURM_JOBID}*" -type f | head -1)
if [[ "$raw_log_file" != "" ]]; then
mv -n ${raw_log_file} ${log_dir}/raw/${log_name}
gzip ${log_dir}/raw/${log_name}
# tag output file with run metadata
outfile=`ls -t $tmpglobal | head -1`
if [[ "$outfile" != "" ]]
then
mv -n $tmpglobal/$outfile $tmpglobal/../${outfile}_$filetag
chmod 440 ${log_dir}/raw/${log_name}.gz
chmod 550 ${log_dir}/raw
fi
chown -R ${USER}:atlab ${log_dir}
rmdir ${tmpglobal}
fi
rmdir $tmpglobal
......@@ -36,6 +36,10 @@ def parse_args():
help='Time limit for job formatted as [D-]HH:MM:SS')
sbatch.add_argument('-m','--mem-per-cpu',type=str,default='8G',
help='Amount of RAM to allocate per core')
parser.add_argument('--dry-run', action='store_true',
help="Do not submit any jobs, run any policies, or create or remove any files or directories."
"Used for testing")
parser.add_argument('device',type=str,
help="GPFS fileset/directory apply the policy to. Can be specified as either the name of the"
......@@ -106,11 +110,8 @@ def validate_output_directory(outdir):
return p
def create_default_log_prefix(device):
if device.match('/data/user'):
log_prefix = 'list-policy_data_user'
else:
log_prefix = f'list-policy_{device.stem}'
return log_prefix
mod_device = str(device).strip('/').replace('/','-')
return f"list-policy_{mod_device}"
def main():
args = parse_args()
......@@ -135,7 +136,10 @@ def main():
else:
args['policy'] = './policy-def/list-path-external'
cmd = "./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} {device}".format(**args)
if args['dry_run']:
cmd = "./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} --dry-run {device}".format(**args)
else:
cmd = "./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} {device}".format(**args)
print(f"Command: {cmd}")
subprocess.run(cmd,shell=True)
......
......@@ -14,6 +14,7 @@ partition="amd-hdr100,medium"
outdir="/data/rc/gpfs-policy/data"
policy="./policy-def/list-path-external"
outfile=""
dry_run=""
############################################################
# Help #
......@@ -23,7 +24,7 @@ usage()
>&2 cat << EOF
Usage: $0 [ -h ] [ -o | --outdir ] [ -f | --outfile ] [ --with-dirs ]
[ -N | --nodes ] [ -c | --cores ] [ -p | --partition]
[ -t | --time ] [ -m | --mem-per-cpu ]
[ -t | --time ] [ -m | --mem-per-cpu ] [ --dry_run ]
device
EOF
exit 1
......@@ -38,11 +39,13 @@ as root or via the run-submit-pol-job.py script. The default policy file is
Usage: $0 [ -h ] [ -o | --outdir ] [ -f | --outfile ] [ -P | --policy ]
[ -N | --nodes ] [ -c | --cores ] [ -p | --partition ]
[ -t | --time ] [ -m | --mem ]
[ -t | --time ] [ -m | --mem ] [ --dry-run ]
device
options:
-h|--help Print this Help.
-h|--help Print this Help.
--dry-run Do not submit a Slurm job running the policy. Instead, pass --dry-run to run-mmpol.sh and call
it normally to just print the output to STDOUT
Required:
device GPFS fileset/directory apply the policy to. Can be
......@@ -69,7 +72,8 @@ EOF
exit 0
}
args=$(getopt -a -o ho:f:P:N:c:p:t:m: --long help,outdir:,outfile:,policy:,nodes:,cores:,partition:,time:,mem: -- "$@")
args=$(getopt -a -o ho:f:P:N:c:p:t:m: \
--long help,outdir:,outfile:,policy:,nodes:,cores:,partition:,time:,mem:,dry-run -- "$@")
if [[ $? -gt 0 ]]; then
usage
......@@ -89,6 +93,7 @@ do
-p | --partition) partition=$2 ; shift 2 ;;
-t | --time) time=$2 ; shift 2 ;;
-m | --mem-per-cpu) mem_per_cpu=$2 ; shift 2 ;;
--dry-run) dry_run=true ; shift 1 ;;
--) shift; break ;;
*) >&2 echo Unsupported option: $1
usage ;;
......@@ -108,15 +113,23 @@ if [[ -z "$device" ]]; then
fi
slurm_out="out/pol-%A-$(basename ${policy})-$(basename ${device}).out"
mkdir -p out
DIR=$outdir POLICYFILE=$policy FILESYSTEM=${device} OUTFILE=${outfile} && \
DIR=$DIR POLICYFILE=$POLICYFILE FILESYSTEM=${FILESYSTEM} OUTFILE=${OUTFILE} \
sbatch \
-N $nodes \
-c $cores \
-t $time \
--mem-per-cpu=$mem_per_cpu \
-p $partition \
-o ${slurm_out} \
./run-mmpol.sh
run_mmpol_cmd_base="./run-mmpol.sh -o ${outdir} -f ${outfile} -P ${policy}"
if [[ -z "${dry_run}" ]]; then
mkdir -p out
run_mmpol_cmd="${run_mmpol_cmd_base} ${device}"
sbatch \
-N $nodes \
-c $cores \
-t $time \
--mem-per-cpu=$mem_per_cpu \
-p $partition \
-o ${slurm_out} \
--wrap "${run_mmpol_cmd}"
else
run_mmpol_cmd="${run_mmpol_cmd_base} --dry-run ${device}"
${run_mmpol_cmd}
fi
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment