From 8684bd4e770aa730c3f83405fd23bf7681ac5aac Mon Sep 17 00:00:00 2001
From: Matthew K Defenderfer <mdefende@uab.edu>
Date: Fri, 30 Aug 2024 16:57:44 -0500
Subject: [PATCH] add python control script for submit-pol-job

---
 src/exceptions.py         |  21 ++++++
 src/run-submit-pol-job.py | 140 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+)
 create mode 100644 src/exceptions.py
 create mode 100755 src/run-submit-pol-job.py

diff --git a/src/exceptions.py b/src/exceptions.py
new file mode 100644
index 0000000..857be66
--- /dev/null
+++ b/src/exceptions.py
@@ -0,0 +1,21 @@
+# exceptions.py
+
+import sys
+
+class CustomException(Exception):
+    """Base class for other exceptions"""
+    def __init__(self, message):
+        super().__init__(message)
+        self.handle_exception(message)
+
+    def handle_exception(self, message):
+        print(f"Error: {message}")
+        sys.exit(1)
+
+class InvalidDeviceError(CustomException):
+    """Exception raised when an improper fileset or path is given as the device"""
+    pass
+
+class ValueError(CustomException):
+    """Overloaded exception to exit when improper value is given"""
+    pass
\ No newline at end of file
diff --git a/src/run-submit-pol-job.py b/src/run-submit-pol-job.py
new file mode 100755
index 0000000..be824cc
--- /dev/null
+++ b/src/run-submit-pol-job.py
@@ -0,0 +1,140 @@
+#!/bin/python3
+
+import argparse
+from pathlib import Path
+import subprocess
+import re
+from exceptions import InvalidDeviceError, ValueError
+
+description = """
+Interface for non-privileged users to execute the run-mmpol.sh script with elevated permissions. Calls the 
+submit-pol-job wrapper
+"""
+
+def parse_args():
+    parser = argparse.ArgumentParser(description=description,
+                                     formatter_class=argparse.RawTextHelpFormatter
+                                     )
+    parser.add_argument('-o','--outdir', type=str,
+                        help="Directory to store the policy output in",
+                        default='/data/rc/gpfs-policy/data')
+    parser.add_argument('-f','--outfile', type=str,
+                        help="Base name of the output file. Defaults to 'list-policy_[device-id]' where 'device-id'"
+                             "is the device stem when device is a path or just device when it is a fileset. The final"
+                             "file name will have the policy type, the job ID, and the run date tagged on the end")
+    parser.add_argument('--with-dirs', action='store_true',
+                        help="Include directories as entries in the policy output (Default: false)")
+    sbatch = parser.add_argument_group('sbatch parameters')
+    sbatch.add_argument('-N','--nodes',type=int,default=1,
+                        help='Number of nodes to run job across')
+    sbatch.add_argument('-n','--ntasks',type=int,default=16,
+                        help='Number of cores to request')
+    sbatch.add_argument('-p','--partition',type=str,default='amd-hdr100,medium',
+                        help='Partition to submit job to. Can be a comma-separated list of multiple partitions')
+    sbatch.add_argument('-t','--time',type=str,default='24:00:00',
+                        help='Time limit for job formatted as [D-]HH:MM:SS')
+    sbatch.add_argument('-m','--mem-per-cpu',type=str,default='8G',
+                        help='Amount of RAM to allocate per core')
+    parser.add_argument('device',type=str,
+                        help="GPFS fileset/directory apply the policy to. Can be specified as either the name of the"
+                             "fileset or the full path to the directory. (Examples: scratch, /data/user/[username])')")
+    args = parser.parse_args()
+    return args
+
+# Validate that the string supplied to 'device' is either the name of a valid, predefined fileset (only 'scratch' 
+# for now) or is a valid path in GPFS. Will not accept 'data' alone. Only valid top-levels in /data are /data/user,
+# /home, and /data/project
+def validate_device(device):
+    device = device.strip()
+    if device in ['data','/data']:
+        raise InvalidDeviceError("A policy run cannot be performed on the full 'data' fileset. Choose a valid subdirectory such as '/data/user' or '/data/project'")
+    
+    if device in ['scratch','home']:
+        return Path('/').joinpath(device).resolve()
+
+    if device in ['/scratch','/home','/data/user','/data/project']:
+        return Path(device).resolve()
+    
+    p = Path(device).resolve() # resolve given path into absolute path
+
+    # check if p is a valid path and is located in /data or /scratch. If not, raise an exception
+    valid_parents = [Path(parent).resolve() for parent in ['/data','/home','/scratch']]
+    if p.exists() and any([parent in p.parents for parent in valid_parents]):
+        return p
+    else:
+        raise InvalidDeviceError('The path or fileset must be within /data or /scratch')
+
+def validate_time(time):
+    if not re.match(r'^(?:[0-6]-\d{2}|\d{1,3}):\d{2}:\d{2}$',time):
+        raise ValueError("Time must have format [[H]H]H:MM:SS or D-HH:MM:SS")
+    
+def validate_mem(mem):
+    if not re.match(r'[\d]+[GM]?', mem):
+        raise ValueError("Mem per CPU must be an integer. May be followed by M or G to denote units")
+
+def validate_partition(partition):
+    if len(partition.split('\s')) > 1 and not re.search(',',partition):
+        raise ValueError("Multiple partitions should be given as a comma-separated list")
+    
+    partition = re.sub(r'\s',r'',partition)
+    partitions = [partition.split(r',')]
+    cmd=r'sinfo -h -o "%R"'
+    avail_partitions=subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, text=True).stdout.splitlines()
+
+    if ([p for p in partitions if p not in avail_partitions]):
+        incorrect = [p for p in partitions if p not in avail_partitions]
+        raise ValueError(f"The following partition(s) do not exist: {', '.join(incorrect)}. To see a list of valid partitions, visit https://rc.uab.edu/")
+
+    return partition
+
+def validate_nodes(n):
+    if not isinstance(n,int) and n >= 1 and n <= 4:
+        raise ValueError('Nodes must be an integer between 1 and 4')
+
+def validate_ntasks(n):
+    if not isinstance(n,int) and n >= 1 and n <= 48:
+        raise ValueError('Ntasks must be an integer between 1 and 48')
+
+def validate_sbatch_options(args):
+    validate_nodes(args['nodes'])
+    validate_ntasks(args['ntasks'])
+    validate_mem(args['mem_per_cpu'])
+    validate_time(args['time'])
+    args['partition'] = validate_partition(args['partition'])
+    return args
+
+# Need to validate that the output directory exists. This will not create a directory that does not already exist.
+def validate_output_directory(outdir):
+    p = Path(outdir).resolve()
+
+    if not p.is_dir():
+        raise ValueError(f"{p} is not a valid output directory")
+    return p
+
+def create_default_outfile(device):
+    if device.match('/data/user'):
+        outfile = 'list-policy_data_user'
+    else:
+        outfile = f'list-policy_{device.stem}'
+    return outfile
+
+def main():
+    args = parse_args()
+    args_dict = vars(args)
+
+    args['device'] = validate_device(args['device'])
+
+    sbatch_args = {(k,v) for k,v in args_dict.items() if k in ['nodes','ntasks','partition','time','mem_per_cpu'] and v is not None}
+    sbatch_args = validate_sbatch_options(sbatch_args)
+
+    args['outdir'] = validate_output_directory(args['outdir'])
+
+    if 'outfile' not in args.keys():
+        args['outfile'] = create_default_outfile(args['device'])
+
+    cmd = f"./submit-pol-job "
+
+    exit()
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
-- 
GitLab