From ccd84c44f0e5f1d05823e9821bc5ce3d3fe0fafd Mon Sep 17 00:00:00 2001
From: Mike Hanby <mhanby@uab.edu>
Date: Tue, 8 Aug 2023 09:23:42 -0400
Subject: [PATCH] Init checkin of context switch check code

---
 .gitignore               |  2 ++
 node_context_switches.py | 36 ++++++++++++++++++++++++++++++++++++
 uabrc_hw.nhc             | 29 +++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+)
 create mode 100644 node_context_switches.py
 create mode 100644 uabrc_hw.nhc

diff --git a/.gitignore b/.gitignore
index 0624af8..8c9bdde 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 scripts/lbnl_ps.nhc
+.history/
+.vscode/
diff --git a/node_context_switches.py b/node_context_switches.py
new file mode 100644
index 0000000..078745c
--- /dev/null
+++ b/node_context_switches.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+# Script expects that Prometheus node_exporter is running on the local host
+# https://github.com/prometheus/node_exporter
+#
+# The script queries node_exporter returns the total number of context
+# switches as an integer, ex: 240080031436
+#
+# node_eporter field: node_context_switches_total
+import requests
+
+url = "http://localhost:9100/metrics"
+
+# Send GET request to the URL
+response = requests.get(url)
+
+# Check if the request was successful (status code 200)
+
+if response.status_code == 200:
+    # Filter lines containing "node_context_switches_total" and ignore lines
+    # starting with "#"
+    filtered_lines = [line for line in response.text.split(
+        "\n") if "node_context_switches_total" in line and not line.startswith("#")]
+
+    # Parse the second column of the filtered lines
+    for line in filtered_lines:
+        columns = line.split()
+        if len(columns) >= 2:
+            metric_name = columns[0]
+            metric_value = int(float(columns[1]))
+#            print(f"Metric: {metric_name}")
+#            print(f"Value: {metric_value}")
+            print(metric_value)
+else:
+    print(
+        f"Error: Failed to retrieve data from {url}. Status code: {response.status_code}")
diff --git a/uabrc_hw.nhc b/uabrc_hw.nhc
new file mode 100644
index 0000000..f71a4d6
--- /dev/null
+++ b/uabrc_hw.nhc
@@ -0,0 +1,29 @@
+# NHC -- UAB Research Computing - Hardware Checks
+#
+# Mike Hanby <mhanby@uab.edu>
+# Date: 2023-08-07
+#
+# Copyright and/or license information if different from upstream
+#
+
+declare HW_CONTEXT_SWITCHES=0
+
+# Gather the metrics
+function uabrc_hw_gather_data() {
+    HW_CONTEXT_SWITCHES=$(curl --silent http://localhost:9100/metrics | grep ^node_context_switches_total | awk '{print $2}' | awk '{printf("%d\n", $1*1e$2)}')
+}
+
+# Check that total context switches are less than max context switches ($1)
+# The total context switches is collected from Prometheus Node Exporter
+function uabrc_check_hw_context_switches() {
+    local CONTEXT_SWITCH_MAX="$1"
+
+    if [[ $HW_CONTEXT_SWITCHES -eq 0 ]]; then
+        uabrc_hw_gather_data
+    fi
+
+    if [[ $((HW_CONTEXT_SWITCHES)) -gt $CONTEXT_SWITCH_MAX ]]; then
+        die 1 "$FUNCNAME:  Total Context Switches ($HW_CONTEXT_SWITCHES) greater than maximum allowed ($CONTEXT_SWITCH_MAX)."
+        return 1
+    fi
+}
-- 
GitLab