diff --git a/uabrc_hw.nhc b/uabrc_hw.nhc index 0d03409112d770484f45a644d189ea90a59951cf..c5663ccdeb7fb9a45d46384fd83e4a645dac2e65 100644 --- a/uabrc_hw.nhc +++ b/uabrc_hw.nhc @@ -5,20 +5,28 @@ # # Copyright and/or license information if different from upstream # +# Requires 'curl' and 'jq' +# declare HW_CONTEXT_SWITCH_RATE=0 +declare HW_CONTEXT_SWITCH_INTERVAL=0 +declare NODENAME="" # Gather the metrics function uabrc_hw_gather_data() { - # HW_CONTEXT_SWITCH_RATE=$(curl --silent http://localhost:9100/metrics | grep ^node_context_switches_total | awk '{print $2}' | awk '{printf("%d\n", $1*1e$2)}') - HW_CONTEXT_SWITCH_RATE=$(curl -fs --data-urlencode 'query=snode_context_switches_total{job="compute-node",name="c0159"}(30m)' https://nagios.rc.uab.edu:9090/api/v1/query | jq -r '.data.result[] | [.metric.container_name, .metric.namespace, .value[1] ] | @csv') + NODENAME="$(hostname -s)" + + # Gather the nodes context switching rate from Prometheus + HW_CONTEXT_SWITCH_RATE=$(curl -fs --data-urlencode "query=irate(node_context_switches_total{job=\"compute-node\",name=\"$NODENAME\"}[$HW_CONTEXT_SWITCH_INTERVAL])" http://nagios.rc.uab.edu:9090/api/v1/query | jq -r '.data.result[] | .value[1]') + # Convert to an integer (hacky as it doesn't round, but insignificant) + HW_CONTEXT_SWITCH_RATE=${HW_CONTEXT_SWITCH_RATE%.*} } -curl -fs --data-urlencode 'query=snode_context_switches_total{job="compute-node",name="c0159"}(30m)' https://nagios.rc.uab.edu:9090/api/v1/query | jq -r '.data.result[] | [.metric.container_name, .metric.namespace, .value[1] ] | @csv' # Check that total context switches are less than max context switches ($1) # The total context switches is collected from Prometheus Node Exporter -function uabrc_check_HW_CONTEXT_SWITCH_RATE() { +function uabrc_check_hw_context_switch_rate() { local CONTEXT_SWITCH_RATE_MAX="$1" + HW_CONTEXT_SWITCH_INTERVAL="$2" if [[ $HW_CONTEXT_SWITCH_RATE -eq 0 ]]; then uabrc_hw_gather_data @@ -28,8 +36,4 @@ function uabrc_check_HW_CONTEXT_SWITCH_RATE() { die 1 "$FUNCNAME: Total Context Switches ($HW_CONTEXT_SWITCH_RATE) greater than maximum allowed ($CONTEXT_SWITCH_RATE_MAX)." return 1 fi - echo "HW_CONTEXT_SWITCH_RATE: $HW_CONTEXT_SWITCH_RATE" - echo "CONTEXT_SWITCH_RATE_MAX: $CONTEXT_SWITCH_RATE_MAX" } - -uabrc_check_HW_CONTEXT_SWITCH_RATE $1 \ No newline at end of file