diff --git a/scontext.sh b/scontext.sh index 7c006ebc87fca9587762cb7ba5b0e17245129c69..d8fdc7155111d055c24ea35f6ebf8ff20d7947fb 100755 --- a/scontext.sh +++ b/scontext.sh @@ -1,5 +1,15 @@ #!/bin/bash +#set -euo + +pidstat_recording_duration=${1:-0} + +if [ $pidstat_recording_duration -eq 0 ]; then + pidstat_recording_count="" +else + pidstat_recording_count=1 +fi + # Extract pid-job relationship SCONTROL_PID_COL=1 SCONTROL_JOB_COL=2 @@ -8,19 +18,42 @@ SCONTROL_WIDTH=$(scontrol listpids | awk '{print NF; exit}') # Get pidstat -w output, extract headers and required columns PIDSTAT_PID_COL=1 PIDSTAT_CTXTSW_COL=$(( 2 + $SCONTROL_WIDTH - 1 )) +PIDSTAT_NVCTXTSW_COL=$(( $PIDSTAT_CTXTSW_COL + 1 )) +PIDSTAT_THREAD_COL=$(( $PIDSTAT_CTXTSW_COL + 2 )) # Extract headers and required columns from ps -aux PS_PID_COL=1 PS_USER_COL=2 # Extract headers separately -ps_header=$(ps -eo "pid,user,cp,drs,cmd" --sort pid | head -n 1) +ps_header=$(ps -eo "pid,user,cp,drs,cmd" | head -n 1) pidstat_header=$(pidstat -w | awk 'NR==3 {print $4, $5, $6}') scontrol_header=$(scontrol listpids | head -n 1) # Sort the outputs by PID, excluding headers -ps_sorted=$(ps -eo "pid,user,cp,drs,cmd" --sort pid | tail -n +2 | sort -k$PS_PID_COL,$PS_PID_COL) -pidstat_sorted=$(pidstat -w | awk 'NR>3 {print $4, $5, $6}' | sort -k$PIDSTAT_PID_COL,$PIDSTAT_PID_COL) +ps_sorted=$(ps -eo "pid,user,cp,drs,cmd" | tail -n +2 | sort -k$PS_PID_COL,$PS_PID_COL) +pidstat_sorted=$( \ + pidstat -wt -p ALL $pidstat_recording_duration $pidstat_recording_count \ + | awk ' + NR>3 { + if ($4 != "-") { + current_parent = $4 + #vol[current_parent] += $6 + #nonvol[current_parent] += $7 + #thread_count[current_parent] += 0 + } else if ($4 == "-") { + vol[current_parent] += $6 + nonvol[current_parent] += $7 + thread_count[current_parent] += 1 + } + } + END { + for (i in vol) { + printf "%s %s %s %s\n", i, vol[i], nonvol[i], thread_count[i] + } + }' \ + | sort -k$PIDSTAT_PID_COL,$PIDSTAT_PID_COL \ +) scontrol_sorted=$(scontrol listpids | tail -n +2 | sort -k$SCONTROL_PID_COL,$SCONTROL_PID_COL) # Join the two outputs on the PID column @@ -28,15 +61,42 @@ scontrol_pidstat=$(join -1 $SCONTROL_PID_COL -2 $PIDSTAT_PID_COL <(echo "$scontr scontrol_pidstat_ps=$(join -1 $SCONTROL_PID_COL -2 $PS_PID_COL <(echo "$scontrol_pidstat") <(echo "$ps_sorted")) # Aggregate -agg=$(echo "$scontrol_pidstat_ps" | awk -v key_col=$SCONTROL_JOB_COL -v val_col=$PIDSTAT_CTXTSW_COL \ - '{ - arr[$key_col]+=$val_col - } - END { - for (key in arr) printf("%s\t%s\n", key, arr[key]) - }' | sort -k1,1) +agg=$( \ + echo "$scontrol_pidstat_ps" \ + | awk \ + -v key_col=$SCONTROL_JOB_COL \ + -v val_col=$PIDSTAT_CTXTSW_COL \ + -v val_col_nv=$PIDSTAT_NVCTXTSW_COL \ + -v val_col_thread=$PIDSTAT_THREAD_COL \ + '{ + voluntary[$key_col]+=$val_col + non_voluntary[$key_col]+=$val_col_nv + total[$key_col]+=$val_col+$val_col_nv + thread_count[$key_col]+=$val_col_thread + } + END { + for (i in voluntary) printf("%s\t%s\t%s\t%s\t%s\n", i, voluntary[i], non_voluntary[i], total[i], thread_count[i]) + }' \ + | sort -k1,1 \ +) # Combine with squeue -squeue_header=$(squeue -w $(hostname) | head -n 1 | sed 's/ \+/\t/g' | sed 's/^[ \t]*//') -printf "%s\tcontext-switches/s\n" "$squeue_header" -join -1 1 -2 1 <(squeue -w $(hostname) | sort -k1,1) <(echo "$agg") | sed 's/ \+/\t/g' +squeue_output=$(squeue -w $(hostname) -O "jobid,jobarrayid,account,timeused,minmemory,numcpus") +squeue_header=$(echo "$squeue_output" | head -n 1) +squeue_body=$(echo "$squeue_output" | tail -n +2 | sort -k1,1) + +# Output the array to a file +rm squeue.txt +for element in "${squeue_body[@]}"; do + echo "$element" >> squeue.txt +done + +# Print header with additional columns +squeue_header=$(echo "$squeue_header" | sed 's/JOBID/JOB_ARRAY_ID/2' | sed 's/MIN_MEMORY/REQ_MEMORY/' ) +output_header=$(echo "$squeue_header ctx-sw/s nvctx-sw/s total/s threads" | sed 's/ \+/ /g') +output_body=$(join -1 1 -2 1 <(echo "$squeue_body") <(echo "$agg") | sort -k1,1 | sed 's/ \+/ /g') + +printf "%10s %16s %16s %12s %12s %5s %15s %15s %15s %8s\n" $output_header +for element in "${output_body[@]}"; do + printf "%10s %16s %16s %12s %12s %5s %15.2f %15.2f %15.2f %8s\n" $element +done