diff --git a/src/run-submit-pol-job.py b/src/run-submit-pol-job.py index be824cc8d760f1a31015446e1b7b2cfae44a7019..21be17f3ea8f0eb406f2d5f1883e570850efb231 100755 --- a/src/run-submit-pol-job.py +++ b/src/run-submit-pol-job.py @@ -15,6 +15,7 @@ def parse_args(): parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter ) + parser.add_argument('-o','--outdir', type=str, help="Directory to store the policy output in", default='/data/rc/gpfs-policy/data') @@ -24,6 +25,7 @@ def parse_args(): "file name will have the policy type, the job ID, and the run date tagged on the end") parser.add_argument('--with-dirs', action='store_true', help="Include directories as entries in the policy output (Default: false)") + sbatch = parser.add_argument_group('sbatch parameters') sbatch.add_argument('-N','--nodes',type=int,default=1, help='Number of nodes to run job across') @@ -35,10 +37,11 @@ def parse_args(): help='Time limit for job formatted as [D-]HH:MM:SS') sbatch.add_argument('-m','--mem-per-cpu',type=str,default='8G', help='Amount of RAM to allocate per core') + parser.add_argument('device',type=str, help="GPFS fileset/directory apply the policy to. Can be specified as either the name of the" "fileset or the full path to the directory. (Examples: scratch, /data/user/[username])')") - args = parser.parse_args() + args = vars(parser.parse_args()) return args # Validate that the string supplied to 'device' is either the name of a valid, predefined fileset (only 'scratch' @@ -62,14 +65,14 @@ def validate_device(device): if p.exists() and any([parent in p.parents for parent in valid_parents]): return p else: - raise InvalidDeviceError('The path or fileset must be within /data or /scratch') + raise InvalidDeviceError(f'The path or fileset {device} does not exist within /data or /scratch') def validate_time(time): if not re.match(r'^(?:[0-6]-\d{2}|\d{1,3}):\d{2}:\d{2}$',time): raise ValueError("Time must have format [[H]H]H:MM:SS or D-HH:MM:SS") def validate_mem(mem): - if not re.match(r'[\d]+[GM]?', mem): + if not re.fullmatch(r'^[\d]+[GM]?$', mem): raise ValueError("Mem per CPU must be an integer. May be followed by M or G to denote units") def validate_partition(partition): @@ -77,11 +80,11 @@ def validate_partition(partition): raise ValueError("Multiple partitions should be given as a comma-separated list") partition = re.sub(r'\s',r'',partition) - partitions = [partition.split(r',')] + partitions = partition.split(r',') cmd=r'sinfo -h -o "%R"' - avail_partitions=subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, text=True).stdout.splitlines() + avail_partitions=subprocess.run(cmd, shell=True, stdout=subprocess.PIPE).stdout.decode('utf-8').splitlines() - if ([p for p in partitions if p not in avail_partitions]): + if any([p for p in partitions if p not in avail_partitions]): incorrect = [p for p in partitions if p not in avail_partitions] raise ValueError(f"The following partition(s) do not exist: {', '.join(incorrect)}. To see a list of valid partitions, visit https://rc.uab.edu/") @@ -95,14 +98,6 @@ def validate_ntasks(n): if not isinstance(n,int) and n >= 1 and n <= 48: raise ValueError('Ntasks must be an integer between 1 and 48') -def validate_sbatch_options(args): - validate_nodes(args['nodes']) - validate_ntasks(args['ntasks']) - validate_mem(args['mem_per_cpu']) - validate_time(args['time']) - args['partition'] = validate_partition(args['partition']) - return args - # Need to validate that the output directory exists. This will not create a directory that does not already exist. def validate_output_directory(outdir): p = Path(outdir).resolve() @@ -120,20 +115,24 @@ def create_default_outfile(device): def main(): args = parse_args() - args_dict = vars(args) - args['device'] = validate_device(args['device']) - sbatch_args = {(k,v) for k,v in args_dict.items() if k in ['nodes','ntasks','partition','time','mem_per_cpu'] and v is not None} - sbatch_args = validate_sbatch_options(sbatch_args) + validate_nodes(args['nodes']) + validate_ntasks(args['ntasks']) + validate_mem(args['mem_per_cpu']) + validate_time(args['time']) + args['partition'] = validate_partition(args['partition']) args['outdir'] = validate_output_directory(args['outdir']) - if 'outfile' not in args.keys(): + if args['outfile'] is None: args['outfile'] = create_default_outfile(args['device']) - cmd = f"./submit-pol-job " + cmd = "./submit-pol-job -o {outdir} -f {outfile} -N {nodes} -n {ntasks} -p {partition} -t {time} -m {mem_per_cpu} {device}".format(**args) + if args['with_dirs']: + cmd = f'{cmd} --with-dirs' + print(cmd) exit() if __name__ == '__main__':