Skip to content
Snippets Groups Projects

Add reservation option for policy run

2 files
+ 62
14
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -36,6 +36,8 @@ def parse_args():
help='Time limit for job formatted as [D-]HH:MM:SS')
sbatch.add_argument('-m','--mem-per-cpu',type=str,default='8G',
help='Amount of RAM to allocate per core')
sbatch.add_argument('-r','--reservation',type=str,default=None,
help='Reservation to submit the job to, if one exists')
parser.add_argument('--dry-run', action='store_true',
help="Do not submit any jobs, run any policies, or create or remove any files or directories."
@@ -100,6 +102,42 @@ def validate_nodes(n):
def validate_cores(n):
if not isinstance(n,int) and n >= 1 and n <= 48:
raise ValueError('Cores must be an integer between 1 and 48')
def validate_reservation(res,nodes,cores):
cmd = 'scontrol show -o reservation'
proc = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE)
resv_list = proc.stdout.decode('utf-8').strip().split('\n')
res_d = {}
for r in resv_list:
name = re.match(r"^ReservationName=([^\s]+)",r).group(1)
details = re.sub(r"^ReservationName=[^\s]+\s","",r)
details = details.split()
details_d = {}
for s in details:
grps = re.match(r"^(.*)=(.*)$",s)
details_d.update({grps.group(1):grps.group(2)})
res_d.update({name:details_d})
if res not in res_d.keys():
raise ValueError(f"Reservation {res} does not exist. Please create the reservation before submitting the job")
res_d = res_d.get(res)
res_d.update({'NodeCnt':int(res_d.get('NodeCnt'))})
res_d.update({'CoreCnt':int(res_d.get('CoreCnt'))})
if nodes > res_d.get('NodeCnt'):
msg = f"Number of requested nodes ({nodes}) exceeds the number of nodes in the reservation ({res_d['NodeCnt']}). Resubmit the job with a node count equal to or less than the reservation."
raise ValueError(msg)
if (nodes*cores) > res_d.get('CoreCnt'):
msg = f"Number of requested cores ({nodes*cores}) exceeds the number of cores in the reservation ({res_d['NodeCnt']}). The --cores option sets the number of cores per node. Reduce the number of cores and resubmit the job"
raise ValueError(msg)
return f"--reservation={res}"
# Need to validate that the output directory exists. This will not create a directory that does not already exist.
def validate_output_directory(outdir):
@@ -121,6 +159,14 @@ def main():
validate_cores(args['cores'])
validate_mem(args['mem_per_cpu'])
validate_time(args['time'])
if args['reservation'] is not None:
res_str = validate_reservation(args['reservation'],args['nodes'],args['cores'])
args.update({'res_str': res_str})
else:
res_str = ''
args.update({'res_str': res_str})
args['partition'] = validate_partition(args['partition'])
args['outdir'] = validate_output_directory(args['outdir'])
@@ -137,9 +183,9 @@ def main():
args['policy'] = './policy-def/list-path-external'
if args['dry_run']:
cmd = "./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} --dry-run {device}".format(**args)
cmd = "./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} {res_str} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} --dry-run {device}".format(**args)
else:
cmd = "./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} {device}".format(**args)
cmd = "./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} {res_str} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} {device}".format(**args)
print(f"Command: {cmd}")
subprocess.run(cmd,shell=True)
Loading