diff --git a/src/run-policy/run-mmpol.sh b/src/run-policy/run-mmpol.sh index 7eaed0d9378821f8672fc7d545272742ec318049..b20175e52ad6f1e6d4ba0fb71898a2f4e1cfdfb0 100755 --- a/src/run-policy/run-mmpol.sh +++ b/src/run-policy/run-mmpol.sh @@ -4,43 +4,143 @@ set -euxo pipefail # run an mmapply policy across the cluster via slurm -# gather info to map mmapplypolicy to runtime configuration -# arguments passed via job env and runtime context +############################################################ +# Default Values # +############################################################ -filesystem=${FILESYSTEM:-scratch} -policyfile=$POLICYFILE -tmpglobal=$DIR/slurm-tmp-${SLURM_JOBID} -tmpscratch=$DIR/slurm-tmp-${SLURM_JOBID} -mkdir -p $tmpglobal +outdir="/data/rc/gpfs-policy/data" +policy_file="./policy-def/list-path-external" +output_log_prefix="" +dry_run="" -nodes=`scontrol show hostnames "${SLURM_JOB_NODELIST}" | tr '\n' ',' | sed -e 's/,$//'` +############################################################ +# Help # +############################################################ + +usage() +{ +>&2 cat << EOF +Usage: $0 [ -h ] [ -o | --outdir ] [ -f | --output-prefix ] [ -P | --policy-file] device +EOF +exit 1 +} + +help() +{ +>&2 cat << EOF +Runs mmapplypolicy on the specified device/fileset. The policy file dictates the actions performed including list, delete, add, etc. This is most often called by the submit-pol-job wrapper instead of invoked directly + +Usage: $0 [ -h ] [ -o | --outdir ] [ -f | --output-prefix ] [ -P | --policy-file ] device + +options: + -h|--help Print this Help. + --dry-run Do not run the policy command or any command that modifies any files or directories such as + mkdir + +Required: + device GPFS fileset/directory apply the policy to. Can be + specified as either the name of the fileset or the + full path to the directory + (Examples: scratch, /data/user/[username]) + +Path: + -o|--outdir Parent directory to save policy output to + (default: /data/rc/gpfs-policy/data) + -f|--output-prefix Prefix of the policy output file. Appended with a metadata string containing the policy name, + job ID, and date + +Policy Options: + -P|--policy-file Path to policy file to apply to the given GPFS device +EOF +exit 0 +} + +args=$(getopt -a -o ho:f:P: --long help,outdir:,output-prefix:,policy-file:,dry-run -- "$@") + +if [[ $? -gt 0 ]]; then + usage +fi + +eval set -- ${args} + +while : +do + case $1 in + -h | --help) help ;; + -o | --outdir) outdir=$2 ; shift 2 ;; + -f | --output-prefix) output_log_prefix=$2 ; shift 2 ;; + -P | --policy-file) policy_file=$2 ; shift 2 ;; + --dry-run) dry_run=true ; shift 1 ;; + --) shift; break ;; + *) >&2 echo Unsupported option: $1 + usage ;; + esac +done + +if [[ $# -eq 0 ]]; then + usage +fi + +device="$1" + +# Ensure device is specified +if [[ -z "${device}" ]]; then + echo "Error: Specify either the name of a fileset or a directory path" + usage +fi + +# set default output_log_prefix if not specified in the arguments +if [[ -z "${output_log_prefix}" ]]; then + modified_device=$(echo "${device}" | sed -e 's|^/||' -e 's|/$||' -e 's|/|-|g') + output_log_prefix="list-policy_${modified_device}" +fi + +# create temporary working directory for list aggregation +tmpglobal="${outdir}/slurm-tmp-${SLURM_JOBID}" +tmpscratch="${outdir}/slurm-tmp-${SLURM_JOBID}" + +nodes=$(scontrol show hostnames "${SLURM_JOB_NODELIST}" | tr '\n' ',' | sed -e 's/,$//') cores="${SLURM_CPUS_PER_TASK}" -DATESTR=`date +'%Y-%m-%d-%H:%M:%S'` +DATESTR=$(date +'%Y-%m-%dT%H:%M:%S') -policy=`basename $policyfile` +policy=$(basename ${policy_file}) filetag="${policy}_slurm-${SLURM_JOBID}_${DATESTR}" -cmd="mmapplypolicy ${filesystem} -I defer \ - -P $policyfile \ - -g $tmpglobal \ - -s $tmpscratch \ - -f ${DIR}/list-${SLURM_JOBID} \ - -M FILEPATH=${filesystem} \ +cmd="mmapplypolicy ${device} -I defer \ + -P ${policy_file} \ + -g ${tmpglobal} \ + -s ${tmpscratch} \ + -f ${outdir}/list-${SLURM_JOBID} \ + -M FILEPATH=${device} \ -M JOBID=${SLURM_JOBID} \ - -M LIST_OUTPUT_FILE=${OUTFILE:-/tmp/gpfs-list-policy} + -M LIST_OUTPUT_FILE=${output_log_prefix} \ -N ${nodes} -n ${cores} -m ${cores}" -# report final command in job log -echo $cmd +if [[ ! ${dry_run} ]]; then + mkdir -p ${tmpglobal} + + # run policy command + ${cmd} + + log_name="${output_log_prefix}_${filetag}" + log_dir="${outdir}/${log_name}" -# run policy command -$cmd + mkdir -p ${log_dir}/raw + chmod 1770 ${log_dir} -# tag output file with run metadata -outfile=`ls -t $tmpglobal | head -1` -if [[ "$outfile" != "" ]] -then - mv -n $tmpglobal/$outfile $tmpglobal/../${outfile}_$filetag + # tag output file with run metadata + raw_log_file=$(find ${outdir} -maxdepth 1 -name "list-${SLURM_JOBID}*" -type f | head -1) + if [[ "$raw_log_file" != "" ]]; then + mv -n ${raw_log_file} ${log_dir}/raw/${log_name} + gzip ${log_dir}/raw/${log_name} + + chmod 440 ${log_dir}/raw/${log_name}.gz + chmod 550 ${log_dir}/raw + fi + + chown -R ${USER}:atlab ${log_dir} + + rmdir ${tmpglobal} fi -rmdir $tmpglobal + diff --git a/src/run-policy/run-submit-pol-job.py b/src/run-policy/run-submit-pol-job.py index c69cd951123224510017ce76888da37574d895c7..064edc054cecc401396aeff0bce7764bdaed3b2d 100755 --- a/src/run-policy/run-submit-pol-job.py +++ b/src/run-policy/run-submit-pol-job.py @@ -36,6 +36,10 @@ def parse_args(): help='Time limit for job formatted as [D-]HH:MM:SS') sbatch.add_argument('-m','--mem-per-cpu',type=str,default='8G', help='Amount of RAM to allocate per core') + + parser.add_argument('--dry-run', action='store_true', + help="Do not submit any jobs, run any policies, or create or remove any files or directories." + "Used for testing") parser.add_argument('device',type=str, help="GPFS fileset/directory apply the policy to. Can be specified as either the name of the" @@ -106,11 +110,8 @@ def validate_output_directory(outdir): return p def create_default_log_prefix(device): - if device.match('/data/user'): - log_prefix = 'list-policy_data_user' - else: - log_prefix = f'list-policy_{device.stem}' - return log_prefix + mod_device = str(device).strip('/').replace('/','-') + return f"list-policy_{mod_device}" def main(): args = parse_args() @@ -135,7 +136,10 @@ def main(): else: args['policy'] = './policy-def/list-path-external' - cmd = "./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} {device}".format(**args) + if args['dry_run']: + cmd = "./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} --dry-run {device}".format(**args) + else: + cmd = "./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} {device}".format(**args) print(f"Command: {cmd}") subprocess.run(cmd,shell=True) diff --git a/src/run-policy/submit-pol-job b/src/run-policy/submit-pol-job index 1589c248ab8f129a99be859047ac8ca9eb731502..0bad4ea89c85365d73a25b98437cd0827fe97c76 100755 --- a/src/run-policy/submit-pol-job +++ b/src/run-policy/submit-pol-job @@ -14,6 +14,7 @@ partition="amd-hdr100,medium" outdir="/data/rc/gpfs-policy/data" policy="./policy-def/list-path-external" outfile="" +dry_run="" ############################################################ # Help # @@ -23,7 +24,7 @@ usage() >&2 cat << EOF Usage: $0 [ -h ] [ -o | --outdir ] [ -f | --outfile ] [ --with-dirs ] [ -N | --nodes ] [ -c | --cores ] [ -p | --partition] - [ -t | --time ] [ -m | --mem-per-cpu ] + [ -t | --time ] [ -m | --mem-per-cpu ] [ --dry_run ] device EOF exit 1 @@ -38,11 +39,13 @@ as root or via the run-submit-pol-job.py script. The default policy file is Usage: $0 [ -h ] [ -o | --outdir ] [ -f | --outfile ] [ -P | --policy ] [ -N | --nodes ] [ -c | --cores ] [ -p | --partition ] - [ -t | --time ] [ -m | --mem ] + [ -t | --time ] [ -m | --mem ] [ --dry-run ] device options: - -h|--help Print this Help. + -h|--help Print this Help. + --dry-run Do not submit a Slurm job running the policy. Instead, pass --dry-run to run-mmpol.sh and call + it normally to just print the output to STDOUT Required: device GPFS fileset/directory apply the policy to. Can be @@ -69,7 +72,8 @@ EOF exit 0 } -args=$(getopt -a -o ho:f:P:N:c:p:t:m: --long help,outdir:,outfile:,policy:,nodes:,cores:,partition:,time:,mem: -- "$@") +args=$(getopt -a -o ho:f:P:N:c:p:t:m: \ + --long help,outdir:,outfile:,policy:,nodes:,cores:,partition:,time:,mem:,dry-run -- "$@") if [[ $? -gt 0 ]]; then usage @@ -89,6 +93,7 @@ do -p | --partition) partition=$2 ; shift 2 ;; -t | --time) time=$2 ; shift 2 ;; -m | --mem-per-cpu) mem_per_cpu=$2 ; shift 2 ;; + --dry-run) dry_run=true ; shift 1 ;; --) shift; break ;; *) >&2 echo Unsupported option: $1 usage ;; @@ -108,15 +113,23 @@ if [[ -z "$device" ]]; then fi slurm_out="out/pol-%A-$(basename ${policy})-$(basename ${device}).out" -mkdir -p out - -DIR=$outdir POLICYFILE=$policy FILESYSTEM=${device} OUTFILE=${outfile} && \ -DIR=$DIR POLICYFILE=$POLICYFILE FILESYSTEM=${FILESYSTEM} OUTFILE=${OUTFILE} \ -sbatch \ - -N $nodes \ - -c $cores \ - -t $time \ - --mem-per-cpu=$mem_per_cpu \ - -p $partition \ - -o ${slurm_out} \ - ./run-mmpol.sh + +run_mmpol_cmd_base="./run-mmpol.sh -o ${outdir} -f ${outfile} -P ${policy}" + +if [[ -z "${dry_run}" ]]; then + mkdir -p out + + run_mmpol_cmd="${run_mmpol_cmd_base} ${device}" + + sbatch \ + -N $nodes \ + -c $cores \ + -t $time \ + --mem-per-cpu=$mem_per_cpu \ + -p $partition \ + -o ${slurm_out} \ + --wrap "${run_mmpol_cmd}" +else + run_mmpol_cmd="${run_mmpol_cmd_base} --dry-run ${device}" + ${run_mmpol_cmd} +fi