Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
G
gpfs-policy
Manage
Activity
Members
Labels
Plan
Issues
14
Issue boards
Milestones
Wiki
Code
Merge requests
4
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Package Registry
Operate
Terraform modules
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rc
gpfs-policy
Commits
b4079acc
Commit
b4079acc
authored
6 months ago
by
Matthew K Defenderfer
Browse files
Options
Downloads
Plain Diff
Merge branch 'main' into 'prod'
Release v0.1.1-2 See merge request
!24
parents
bbf19903
18b49a7c
No related branches found
Tags
v0.2.0
1 merge request
!24
Release v0.2.0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/run-policy/run-mmpol.sh
+127
-27
127 additions, 27 deletions
src/run-policy/run-mmpol.sh
src/run-policy/run-submit-pol-job.py
+10
-6
10 additions, 6 deletions
src/run-policy/run-submit-pol-job.py
src/run-policy/submit-pol-job
+29
-16
29 additions, 16 deletions
src/run-policy/submit-pol-job
with
166 additions
and
49 deletions
src/run-policy/run-mmpol.sh
+
127
−
27
View file @
b4079acc
...
...
@@ -4,43 +4,143 @@ set -euxo pipefail
# run an mmapply policy across the cluster via slurm
# gather info to map mmapplypolicy to runtime configuration
# arguments passed via job env and runtime context
############################################################
# Default Values #
############################################################
filesystem
=
${
FILESYSTEM
:-
scratch
}
policyfile
=
$POLICYFILE
tmpglobal
=
$DIR
/slurm-tmp-
${
SLURM_JOBID
}
tmpscratch
=
$DIR
/slurm-tmp-
${
SLURM_JOBID
}
mkdir
-p
$tmpglobal
outdir
=
"/data/rc/gpfs-policy/data"
policy_file
=
"./policy-def/list-path-external"
output_log_prefix
=
""
dry_run
=
""
nodes
=
`
scontrol show hostnames
"
${
SLURM_JOB_NODELIST
}
"
|
tr
'\n'
','
|
sed
-e
's/,$//'
`
############################################################
# Help #
############################################################
usage
()
{
>
&2
cat
<<
EOF
Usage:
$0
[ -h ] [ -o | --outdir ] [ -f | --output-prefix ] [ -P | --policy-file] device
EOF
exit
1
}
help
()
{
>
&2
cat
<<
EOF
Runs mmapplypolicy on the specified device/fileset. The policy file dictates the actions performed including list, delete, add, etc. This is most often called by the submit-pol-job wrapper instead of invoked directly
Usage:
$0
[ -h ] [ -o | --outdir ] [ -f | --output-prefix ] [ -P | --policy-file ] device
options:
-h|--help Print this Help.
--dry-run Do not run the policy command or any command that modifies any files or directories such as
mkdir
Required:
device GPFS fileset/directory apply the policy to. Can be
specified as either the name of the fileset or the
full path to the directory
(Examples: scratch, /data/user/[username])
Path:
-o|--outdir Parent directory to save policy output to
(default: /data/rc/gpfs-policy/data)
-f|--output-prefix Prefix of the policy output file. Appended with a metadata string containing the policy name,
job ID, and date
Policy Options:
-P|--policy-file Path to policy file to apply to the given GPFS device
EOF
exit
0
}
args
=
$(
getopt
-a
-o
ho:f:P:
--long
help
,outdir:,output-prefix:,policy-file:,dry-run
--
"
$@
"
)
if
[[
$?
-gt
0
]]
;
then
usage
fi
eval set
--
${
args
}
while
:
do
case
$1
in
-h
|
--help
)
help
;;
-o
|
--outdir
)
outdir
=
$2
;
shift
2
;;
-f
|
--output-prefix
)
output_log_prefix
=
$2
;
shift
2
;;
-P
|
--policy-file
)
policy_file
=
$2
;
shift
2
;;
--dry-run
)
dry_run
=
true
;
shift
1
;;
--
)
shift
;
break
;;
*
)
>
&2
echo
Unsupported option:
$1
usage
;;
esac
done
if
[[
$#
-eq
0
]]
;
then
usage
fi
device
=
"
$1
"
# Ensure device is specified
if
[[
-z
"
${
device
}
"
]]
;
then
echo
"Error: Specify either the name of a fileset or a directory path"
usage
fi
# set default output_log_prefix if not specified in the arguments
if
[[
-z
"
${
output_log_prefix
}
"
]]
;
then
modified_device
=
$(
echo
"
${
device
}
"
|
sed
-e
's|^/||'
-e
's|/$||'
-e
's|/|-|g'
)
output_log_prefix
=
"list-policy_
${
modified_device
}
"
fi
# create temporary working directory for list aggregation
tmpglobal
=
"
${
outdir
}
/slurm-tmp-
${
SLURM_JOBID
}
"
tmpscratch
=
"
${
outdir
}
/slurm-tmp-
${
SLURM_JOBID
}
"
nodes
=
$(
scontrol show hostnames
"
${
SLURM_JOB_NODELIST
}
"
|
tr
'\n'
','
|
sed
-e
's/,$//'
)
cores
=
"
${
SLURM_CPUS_PER_TASK
}
"
DATESTR
=
`
date
+
'%Y-%m-%d
-
%H:%M:%S'
`
DATESTR
=
$(
date
+
'%Y-%m-%d
T
%H:%M:%S'
)
policy
=
`
basename
$policyfile
`
policy
=
$(
basename
$
{
policy
_
file
}
)
filetag
=
"
${
policy
}
_slurm-
${
SLURM_JOBID
}
_
${
DATESTR
}
"
cmd
=
"mmapplypolicy
${
filesystem
}
-I defer
\
-P
$policyfile
\
-g
$tmpglobal
\
-s
$tmpscratch
\
-f
${
DIR
}
/list-
${
SLURM_JOBID
}
\
-M FILEPATH=
${
filesystem
}
\
cmd
=
"mmapplypolicy
${
device
}
-I defer
\
-P
$
{
policy
_
file
}
\
-g
$
{
tmpglobal
}
\
-s
$
{
tmpscratch
}
\
-f
${
outdir
}
/list-
${
SLURM_JOBID
}
\
-M FILEPATH=
${
device
}
\
-M JOBID=
${
SLURM_JOBID
}
\
-M LIST_OUTPUT_FILE=
${
OUTFILE
:-
/tmp/gpfs-list-policy
}
-M LIST_OUTPUT_FILE=
${
output_log_prefix
}
\
-N
${
nodes
}
-n
${
cores
}
-m
${
cores
}
"
# report final command in job log
echo
$cmd
if
[[
!
${
dry_run
}
]]
;
then
mkdir
-p
${
tmpglobal
}
# run policy command
${
cmd
}
log_name
=
"
${
output_log_prefix
}
_
${
filetag
}
"
log_dir
=
"
${
outdir
}
/
${
log_name
}
"
# run policy command
$cmd
mkdir
-p
${
log_dir
}
/raw
chmod
2770
${
log_dir
}
# tag output file with run metadata
outfile
=
`
ls
-t
$tmpglobal
|
head
-1
`
if
[[
"
$outfile
"
!=
""
]]
then
mv
-n
$tmpglobal
/
$outfile
$tmpglobal
/../
${
outfile
}
_
$filetag
# tag output file with run metadata
raw_log_file
=
$(
find
${
outdir
}
-maxdepth
1
-name
"list-
${
SLURM_JOBID
}
*"
-type
f |
head
-1
)
if
[[
"
$raw_log_file
"
!=
""
]]
;
then
mv
-n
${
raw_log_file
}
${
log_dir
}
/raw/
${
log_name
}
gzip
${
log_dir
}
/raw/
${
log_name
}
chmod
440
${
log_dir
}
/raw/
${
log_name
}
.gz
chmod
1550
${
log_dir
}
/raw
fi
chown
-R
${
USER
}
:atlab
${
log_dir
}
rmdir
${
tmpglobal
}
fi
rmdir
$tmpglobal
This diff is collapsed.
Click to expand it.
src/run-policy/run-submit-pol-job.py
+
10
−
6
View file @
b4079acc
...
...
@@ -36,6 +36,10 @@ def parse_args():
help
=
'
Time limit for job formatted as [D-]HH:MM:SS
'
)
sbatch
.
add_argument
(
'
-m
'
,
'
--mem-per-cpu
'
,
type
=
str
,
default
=
'
8G
'
,
help
=
'
Amount of RAM to allocate per core
'
)
parser
.
add_argument
(
'
--dry-run
'
,
action
=
'
store_true
'
,
help
=
"
Do not submit any jobs, run any policies, or create or remove any files or directories.
"
"
Used for testing
"
)
parser
.
add_argument
(
'
device
'
,
type
=
str
,
help
=
"
GPFS fileset/directory apply the policy to. Can be specified as either the name of the
"
...
...
@@ -106,11 +110,8 @@ def validate_output_directory(outdir):
return
p
def
create_default_log_prefix
(
device
):
if
device
.
match
(
'
/data/user
'
):
log_prefix
=
'
list-policy_data_user
'
else
:
log_prefix
=
f
'
list-policy_
{
device
.
stem
}
'
return
log_prefix
mod_device
=
str
(
device
).
strip
(
'
/
'
).
replace
(
'
/
'
,
'
-
'
)
return
f
"
list-policy_
{
mod_device
}
"
def
main
():
args
=
parse_args
()
...
...
@@ -135,7 +136,10 @@ def main():
else
:
args
[
'
policy
'
]
=
'
./policy-def/list-path-external
'
cmd
=
"
./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} {device}
"
.
format
(
**
args
)
if
args
[
'
dry_run
'
]:
cmd
=
"
./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} --dry-run {device}
"
.
format
(
**
args
)
else
:
cmd
=
"
./submit-pol-job -o {outdir} -f {log_prefix} -P {policy} -N {nodes} -c {cores} -p {partition} -t {time} -m {mem_per_cpu} {device}
"
.
format
(
**
args
)
print
(
f
"
Command:
{
cmd
}
"
)
subprocess
.
run
(
cmd
,
shell
=
True
)
...
...
This diff is collapsed.
Click to expand it.
src/run-policy/submit-pol-job
+
29
−
16
View file @
b4079acc
...
...
@@ -14,6 +14,7 @@ partition="amd-hdr100,medium"
outdir
=
"/data/rc/gpfs-policy/data"
policy
=
"./policy-def/list-path-external"
outfile
=
""
dry_run
=
""
############################################################
# Help #
...
...
@@ -23,7 +24,7 @@ usage()
>
&2
cat
<<
EOF
Usage:
$0
[ -h ] [ -o | --outdir ] [ -f | --outfile ] [ --with-dirs ]
[ -N | --nodes ] [ -c | --cores ] [ -p | --partition]
[ -t | --time ] [ -m | --mem-per-cpu ]
[ -t | --time ] [ -m | --mem-per-cpu ]
[ --dry_run ]
device
EOF
exit
1
...
...
@@ -38,11 +39,13 @@ as root or via the run-submit-pol-job.py script. The default policy file is
Usage:
$0
[ -h ] [ -o | --outdir ] [ -f | --outfile ] [ -P | --policy ]
[ -N | --nodes ] [ -c | --cores ] [ -p | --partition ]
[ -t | --time ] [ -m | --mem ]
[ -t | --time ] [ -m | --mem ]
[ --dry-run ]
device
options:
-h|--help Print this Help.
-h|--help Print this Help.
--dry-run Do not submit a Slurm job running the policy. Instead, pass --dry-run to run-mmpol.sh and call
it normally to just print the output to STDOUT
Required:
device GPFS fileset/directory apply the policy to. Can be
...
...
@@ -69,7 +72,8 @@ EOF
exit
0
}
args
=
$(
getopt
-a
-o
ho:f:P:N:c:p:t:m:
--long
help
,outdir:,outfile:,policy:,nodes:,cores:,partition:,time:,mem:
--
"
$@
"
)
args
=
$(
getopt
-a
-o
ho:f:P:N:c:p:t:m:
\
--long
help
,outdir:,outfile:,policy:,nodes:,cores:,partition:,time:,mem:,dry-run
--
"
$@
"
)
if
[[
$?
-gt
0
]]
;
then
usage
...
...
@@ -89,6 +93,7 @@ do
-p
|
--partition
)
partition
=
$2
;
shift
2
;;
-t
|
--time
)
time
=
$2
;
shift
2
;;
-m
|
--mem-per-cpu
)
mem_per_cpu
=
$2
;
shift
2
;;
--dry-run
)
dry_run
=
true
;
shift
1
;;
--
)
shift
;
break
;;
*
)
>
&2
echo
Unsupported option:
$1
usage
;;
...
...
@@ -108,15 +113,23 @@ if [[ -z "$device" ]]; then
fi
slurm_out
=
"out/pol-%A-
$(
basename
${
policy
}
)
-
$(
basename
${
device
}
)
.out"
mkdir
-p
out
DIR
=
$outdir
POLICYFILE
=
$policy
FILESYSTEM
=
${
device
}
OUTFILE
=
${
outfile
}
&&
\
DIR
=
$DIR
POLICYFILE
=
$POLICYFILE
FILESYSTEM
=
${
FILESYSTEM
}
OUTFILE
=
${
OUTFILE
}
\
sbatch
\
-N
$nodes
\
-c
$cores
\
-t
$time
\
--mem-per-cpu
=
$mem_per_cpu
\
-p
$partition
\
-o
${
slurm_out
}
\
./run-mmpol.sh
run_mmpol_cmd_base
=
"./run-mmpol.sh -o
${
outdir
}
-f
${
outfile
}
-P
${
policy
}
"
if
[[
-z
"
${
dry_run
}
"
]]
;
then
mkdir
-p
out
run_mmpol_cmd
=
"
${
run_mmpol_cmd_base
}
${
device
}
"
sbatch
\
-N
$nodes
\
-c
$cores
\
-t
$time
\
--mem-per-cpu
=
$mem_per_cpu
\
-p
$partition
\
-o
${
slurm_out
}
\
--wrap
"
${
run_mmpol_cmd
}
"
else
run_mmpol_cmd
=
"
${
run_mmpol_cmd_base
}
--dry-run
${
device
}
"
${
run_mmpol_cmd
}
fi
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment