Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
G
gpfs-policy
Manage
Activity
Members
Labels
Plan
Issues
14
Issue boards
Milestones
Wiki
Code
Merge requests
4
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Package Registry
Operate
Terraform modules
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rc
gpfs-policy
Commits
8dd5cb12
Commit
8dd5cb12
authored
9 months ago
by
Matthew K Defenderfer
Browse files
Options
Downloads
Patches
Plain Diff
remove SIF option for now. Only use the latest repo container
parent
1a9c7b10
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!8
Automate conversion of GPFS policy outputs to parquet without Jupyter
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
convert-to-parquet/run-convert-to-parquet.sh
+45
-42
45 additions, 42 deletions
convert-to-parquet/run-convert-to-parquet.sh
with
45 additions
and
42 deletions
convert-to-parquet/run-convert-to-parquet.sh
+
45
−
42
View file @
8dd5cb12
...
...
@@ -11,8 +11,7 @@ mem="16G"
time
=
"02:00:00"
partition
=
"amd-hdr100"
outdir
=
""
sif
=
""
default_sif_image
=
"daskdev/dask:2024.8.0-py3.12"
sif
=
"gitlab.rc.uab.edu:4567/mdefende/gpfs-policy:latest"
############################################################
# Help #
...
...
@@ -20,37 +19,36 @@ default_sif_image="daskdev/dask:2024.8.0-py3.12"
usage
()
{
>
&2
cat
<<
EOF
Usage:
$0
[ -h ] [
-s | --sif ] [
-o | --outdir ] [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ]
<
gpfs_logdir
>
"
Usage:
$0
[ -h ] [ -o | --outdir ] [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ] gpfs_logdir"
EOF
exit
1
}
help
()
{
# Display Help
echo
"Submits an array job to convert parts of a GPFS log to parquet format"
echo
echo
"Syntax:
$0
[ -h ] [ -s | --sif ] [ -o | --outdir ] [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ] <gpfs_logdir>"
echo
"options:"
echo
"-h|--help Print this Help."
echo
echo
"Required:"
echo
" gpfs_log_dir Directory containing GPFS log outputs"
echo
echo
"Path:"
echo
" -s|--sif Path to SIF containing dask for processing"
echo
" -o|--outdir Directory to save parquet files to"
echo
echo
"sbatch options:"
echo
" -n|--ntasks Number of tasks for each array index (default: 1)"
echo
" -p|--partition Partition to submit tasks to (default: amd-hdr100)"
echo
" -t|--time Max walltime (default: 02:00:00)"
echo
" -m|--mem Memory for each task (default: 16G)"
echo
exit
1
>
&2
cat
<<
EOF
Submits an array job to convert parts of a GPFS log to parquet format
Syntax:
$0
[ -h ] [ -o | --outdir ] [ -n | --ntasks ] [ -p | --partition] [ -t | --time ] [ -m | --mem ] gpfs_logdir
options:
-h|--help Print this Help.
Required:
gpfs_log_dir Directory containing GPFS log outputs
Path:
-o|--outdir Directory to save parquet files to
sbatch options:
-n|--ntasks Number of tasks for each array index (default: 1)
-p|--partition Partition to submit tasks to (default: amd-hdr100)
-t|--time Max walltime (default: 02:00:00)
-m|--mem Memory for each task (default: 16G)
EOF
exit
0
}
args
=
$(
getopt
-a
-o
h
s:
o:n:p:t:m:
--long
help
,
sif:,
outdir:,ntasks:,partition:,time:,mem:
--
"
$@
"
)
args
=
$(
getopt
-a
-o
ho:n:p:t:m:
--long
help
,outdir:,ntasks:,partition:,time:,mem:
--
"
$@
"
)
if
[[
$?
-gt
0
]]
;
then
usage
fi
...
...
@@ -61,7 +59,6 @@ while :
do
case
$1
in
-h
|
--help
)
help
;;
-s
|
--sif
)
sif
=
$2
;
shift
2
;;
-o
|
--outdir
)
outdir
=
$2
;
shift
2
;;
-n
|
--ntasks
)
ntasks
=
$2
;
shift
2
;;
-p
|
--partition
)
partition
=
$2
;
shift
2
;;
...
...
@@ -85,25 +82,31 @@ if [[ -z "$gpfs_logdir" ]]; then
exit
1
fi
if
[[
-z
"
${
sif
}
"
&&
!
-f
"dask.sif"
]]
;
then
echo
"No SIF set, downloading
${
default_sif_image
}
as dask.sif"
singularity pull dask.sif docker://
${
default_sif_image
}
sif
=
"dask.sif"
elif
[[
-f
"
${
sif
}
"
]]
;
then
echo
"Singualrity file does not exist. Please pull the image first"
exit
1
# If outdir not set, set to ${gpfs_logdir}/parquet
if
[[
-z
"
$outdir
"
]]
;
then
outdir
=
"
${
gpfs_logdir
}
/parquet"
fi
singularity pull
--force
gpfs.sif docker://
${
sif
}
nlogs
=
$(
ls
${
gpfs_logdir
}
/list-
*
|
wc
-l
)
>
&2
echo
"sif:
${
sif
}
"
>
&2
echo
"output dir:
${
outdir
}
"
>
&2
echo
"GPFS logs:
${
gpfs_logdir
}
"
>
&2
echo
"ntasks:
${
ntasks
}
"
>
&2
echo
"partition:
${
partition
}
"
>
&2
echo
"time:
${
time
}
"
>
&2
echo
"mem:
${
mem
}
"
>
&2
echo
"singularity command: singularity exec --bind /data
${
sif
}
python3 convert-to-parquet.py -o
${
outdir
}
-f
\$
{log}"
cmd
=
"singularity exec --bind /data,/scratch gpfs.sif python3 convert-to-parquet.py -o
${
outdir
}
-f
\$
{log}"
>
&2
cat
<<
EOF
--------------------------------------------------------------------------------
sif:
${
sif
}
output dir:
${
outdir
}
GPFS logs:
${
gpfs_logdir
}
ntasks:
${
ntasks
}
partition:
${
partition
}
time:
${
time
}
mem:
${
mem
}
command:
${
cmd
}
--------------------------------------------------------------------------------
EOF
mkdir
-p
out
mkdir
-p
err
...
...
@@ -126,7 +129,7 @@ mkdir -p err
log=
\$
(ls
${
gpfs_logdir
}
/list-* | awk "NR==
\$
{SLURM_ARRAY_TASK_ID} { print
\$
1 }")
singularity exec --bind /data dask.sif python3 convert-to-parquet.py -o
${
outdir
}
-f
\$
{log
}
${
cmd
}
EOF
exit
0
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment