From ccd84c44f0e5f1d05823e9821bc5ce3d3fe0fafd Mon Sep 17 00:00:00 2001 From: Mike Hanby <mhanby@uab.edu> Date: Tue, 8 Aug 2023 09:23:42 -0400 Subject: [PATCH] Init checkin of context switch check code --- .gitignore | 2 ++ node_context_switches.py | 36 ++++++++++++++++++++++++++++++++++++ uabrc_hw.nhc | 29 +++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 node_context_switches.py create mode 100644 uabrc_hw.nhc diff --git a/.gitignore b/.gitignore index 0624af8..8c9bdde 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ scripts/lbnl_ps.nhc +.history/ +.vscode/ diff --git a/node_context_switches.py b/node_context_switches.py new file mode 100644 index 0000000..078745c --- /dev/null +++ b/node_context_switches.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +# Script expects that Prometheus node_exporter is running on the local host +# https://github.com/prometheus/node_exporter +# +# The script queries node_exporter returns the total number of context +# switches as an integer, ex: 240080031436 +# +# node_eporter field: node_context_switches_total +import requests + +url = "http://localhost:9100/metrics" + +# Send GET request to the URL +response = requests.get(url) + +# Check if the request was successful (status code 200) + +if response.status_code == 200: + # Filter lines containing "node_context_switches_total" and ignore lines + # starting with "#" + filtered_lines = [line for line in response.text.split( + "\n") if "node_context_switches_total" in line and not line.startswith("#")] + + # Parse the second column of the filtered lines + for line in filtered_lines: + columns = line.split() + if len(columns) >= 2: + metric_name = columns[0] + metric_value = int(float(columns[1])) +# print(f"Metric: {metric_name}") +# print(f"Value: {metric_value}") + print(metric_value) +else: + print( + f"Error: Failed to retrieve data from {url}. Status code: {response.status_code}") diff --git a/uabrc_hw.nhc b/uabrc_hw.nhc new file mode 100644 index 0000000..f71a4d6 --- /dev/null +++ b/uabrc_hw.nhc @@ -0,0 +1,29 @@ +# NHC -- UAB Research Computing - Hardware Checks +# +# Mike Hanby <mhanby@uab.edu> +# Date: 2023-08-07 +# +# Copyright and/or license information if different from upstream +# + +declare HW_CONTEXT_SWITCHES=0 + +# Gather the metrics +function uabrc_hw_gather_data() { + HW_CONTEXT_SWITCHES=$(curl --silent http://localhost:9100/metrics | grep ^node_context_switches_total | awk '{print $2}' | awk '{printf("%d\n", $1*1e$2)}') +} + +# Check that total context switches are less than max context switches ($1) +# The total context switches is collected from Prometheus Node Exporter +function uabrc_check_hw_context_switches() { + local CONTEXT_SWITCH_MAX="$1" + + if [[ $HW_CONTEXT_SWITCHES -eq 0 ]]; then + uabrc_hw_gather_data + fi + + if [[ $((HW_CONTEXT_SWITCHES)) -gt $CONTEXT_SWITCH_MAX ]]; then + die 1 "$FUNCNAME: Total Context Switches ($HW_CONTEXT_SWITCHES) greater than maximum allowed ($CONTEXT_SWITCH_MAX)." + return 1 + fi +} -- GitLab