From 8684bd4e770aa730c3f83405fd23bf7681ac5aac Mon Sep 17 00:00:00 2001 From: Matthew K Defenderfer <mdefende@uab.edu> Date: Fri, 30 Aug 2024 16:57:44 -0500 Subject: [PATCH] add python control script for submit-pol-job --- src/exceptions.py | 21 ++++++ src/run-submit-pol-job.py | 140 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 src/exceptions.py create mode 100755 src/run-submit-pol-job.py diff --git a/src/exceptions.py b/src/exceptions.py new file mode 100644 index 0000000..857be66 --- /dev/null +++ b/src/exceptions.py @@ -0,0 +1,21 @@ +# exceptions.py + +import sys + +class CustomException(Exception): + """Base class for other exceptions""" + def __init__(self, message): + super().__init__(message) + self.handle_exception(message) + + def handle_exception(self, message): + print(f"Error: {message}") + sys.exit(1) + +class InvalidDeviceError(CustomException): + """Exception raised when an improper fileset or path is given as the device""" + pass + +class ValueError(CustomException): + """Overloaded exception to exit when improper value is given""" + pass \ No newline at end of file diff --git a/src/run-submit-pol-job.py b/src/run-submit-pol-job.py new file mode 100755 index 0000000..be824cc --- /dev/null +++ b/src/run-submit-pol-job.py @@ -0,0 +1,140 @@ +#!/bin/python3 + +import argparse +from pathlib import Path +import subprocess +import re +from exceptions import InvalidDeviceError, ValueError + +description = """ +Interface for non-privileged users to execute the run-mmpol.sh script with elevated permissions. Calls the +submit-pol-job wrapper +""" + +def parse_args(): + parser = argparse.ArgumentParser(description=description, + formatter_class=argparse.RawTextHelpFormatter + ) + parser.add_argument('-o','--outdir', type=str, + help="Directory to store the policy output in", + default='/data/rc/gpfs-policy/data') + parser.add_argument('-f','--outfile', type=str, + help="Base name of the output file. Defaults to 'list-policy_[device-id]' where 'device-id'" + "is the device stem when device is a path or just device when it is a fileset. The final" + "file name will have the policy type, the job ID, and the run date tagged on the end") + parser.add_argument('--with-dirs', action='store_true', + help="Include directories as entries in the policy output (Default: false)") + sbatch = parser.add_argument_group('sbatch parameters') + sbatch.add_argument('-N','--nodes',type=int,default=1, + help='Number of nodes to run job across') + sbatch.add_argument('-n','--ntasks',type=int,default=16, + help='Number of cores to request') + sbatch.add_argument('-p','--partition',type=str,default='amd-hdr100,medium', + help='Partition to submit job to. Can be a comma-separated list of multiple partitions') + sbatch.add_argument('-t','--time',type=str,default='24:00:00', + help='Time limit for job formatted as [D-]HH:MM:SS') + sbatch.add_argument('-m','--mem-per-cpu',type=str,default='8G', + help='Amount of RAM to allocate per core') + parser.add_argument('device',type=str, + help="GPFS fileset/directory apply the policy to. Can be specified as either the name of the" + "fileset or the full path to the directory. (Examples: scratch, /data/user/[username])')") + args = parser.parse_args() + return args + +# Validate that the string supplied to 'device' is either the name of a valid, predefined fileset (only 'scratch' +# for now) or is a valid path in GPFS. Will not accept 'data' alone. Only valid top-levels in /data are /data/user, +# /home, and /data/project +def validate_device(device): + device = device.strip() + if device in ['data','/data']: + raise InvalidDeviceError("A policy run cannot be performed on the full 'data' fileset. Choose a valid subdirectory such as '/data/user' or '/data/project'") + + if device in ['scratch','home']: + return Path('/').joinpath(device).resolve() + + if device in ['/scratch','/home','/data/user','/data/project']: + return Path(device).resolve() + + p = Path(device).resolve() # resolve given path into absolute path + + # check if p is a valid path and is located in /data or /scratch. If not, raise an exception + valid_parents = [Path(parent).resolve() for parent in ['/data','/home','/scratch']] + if p.exists() and any([parent in p.parents for parent in valid_parents]): + return p + else: + raise InvalidDeviceError('The path or fileset must be within /data or /scratch') + +def validate_time(time): + if not re.match(r'^(?:[0-6]-\d{2}|\d{1,3}):\d{2}:\d{2}$',time): + raise ValueError("Time must have format [[H]H]H:MM:SS or D-HH:MM:SS") + +def validate_mem(mem): + if not re.match(r'[\d]+[GM]?', mem): + raise ValueError("Mem per CPU must be an integer. May be followed by M or G to denote units") + +def validate_partition(partition): + if len(partition.split('\s')) > 1 and not re.search(',',partition): + raise ValueError("Multiple partitions should be given as a comma-separated list") + + partition = re.sub(r'\s',r'',partition) + partitions = [partition.split(r',')] + cmd=r'sinfo -h -o "%R"' + avail_partitions=subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, text=True).stdout.splitlines() + + if ([p for p in partitions if p not in avail_partitions]): + incorrect = [p for p in partitions if p not in avail_partitions] + raise ValueError(f"The following partition(s) do not exist: {', '.join(incorrect)}. To see a list of valid partitions, visit https://rc.uab.edu/") + + return partition + +def validate_nodes(n): + if not isinstance(n,int) and n >= 1 and n <= 4: + raise ValueError('Nodes must be an integer between 1 and 4') + +def validate_ntasks(n): + if not isinstance(n,int) and n >= 1 and n <= 48: + raise ValueError('Ntasks must be an integer between 1 and 48') + +def validate_sbatch_options(args): + validate_nodes(args['nodes']) + validate_ntasks(args['ntasks']) + validate_mem(args['mem_per_cpu']) + validate_time(args['time']) + args['partition'] = validate_partition(args['partition']) + return args + +# Need to validate that the output directory exists. This will not create a directory that does not already exist. +def validate_output_directory(outdir): + p = Path(outdir).resolve() + + if not p.is_dir(): + raise ValueError(f"{p} is not a valid output directory") + return p + +def create_default_outfile(device): + if device.match('/data/user'): + outfile = 'list-policy_data_user' + else: + outfile = f'list-policy_{device.stem}' + return outfile + +def main(): + args = parse_args() + args_dict = vars(args) + + args['device'] = validate_device(args['device']) + + sbatch_args = {(k,v) for k,v in args_dict.items() if k in ['nodes','ntasks','partition','time','mem_per_cpu'] and v is not None} + sbatch_args = validate_sbatch_options(sbatch_args) + + args['outdir'] = validate_output_directory(args['outdir']) + + if 'outfile' not in args.keys(): + args['outfile'] = create_default_outfile(args['device']) + + cmd = f"./submit-pol-job " + + exit() + +if __name__ == '__main__': + main() \ No newline at end of file -- GitLab