diff --git a/ Runtime-and-CoreCount-ReqMemCPU.ipynb b/ Runtime-and-CoreCount-ReqMemCPU.ipynb deleted file mode 100644 index 2060d1efa2e997d8aa8bb3431950dd5ad4d02a90..0000000000000000000000000000000000000000 --- a/ Runtime-and-CoreCount-ReqMemCPU.ipynb +++ /dev/null @@ -1,329 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Notebook Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# must run\n", - "\n", - "import sqlite3\n", - "import slurm2sql\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "import seaborn as sns\n", - "import seaborn as sb\n", - "import plotly.express as px\n", - "import matplotlib.ticker as ticker\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from RC_styles import rc_styles as style" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.cluster import KMeans" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# must run\n", - "\n", - "# creates database of info from March 2020 using sqlite 3\n", - "db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# must run\n", - "\n", - "# df is starting database\n", - "df = pd.read_sql('SELECT * FROM slurm', db)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# voluntary\n", - "\n", - "# for displaying all available column options\n", - "pd.set_option('display.max_columns', None)\n", - "df.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# must run\n", - "\n", - "# converts units in ReqMemCPU column from bytes to gigs\n", - "df['ReqMemCPU'] = df['ReqMemCPU'].div(1024**3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# must run\n", - "\n", - "# converts Elapsed time to hours (from seconds)\n", - "df['Elapsed'] = df['Elapsed'].div(3600)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# must run\n", - "\n", - "# df_completed is dataframe of all completed jobs\n", - "df_completed = df[df.State.str.contains('COMPLETED')]\n", - "#df_completed.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ReqMemCPU,Corecount,Runtime" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "UpperlimitGB = 50" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_1 = df_completed.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n", - "df_1.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_1['ReqMemCPU'] = df_1['ReqMemCPU'].apply(np.ceil)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_1['Elapsed'] = df_1['Elapsed'].apply(np.ceil)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_1_sorted = df_1.sort_values(by='AllocCPUS', ascending=True)\n", - "df_1_sorted.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_runtime = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB)]\n", - "df_runtime.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "style.default_axes_and_ticks()\n", - "style.figsize()\n", - "\n", - "runtime_graph = sns.scatterplot(x=\"ReqMemCPU\", y=\"AllocCPUS\",data=df_runtime)\n", - " #hue=\"AllocCPUS\")\n", - " #, size=\"AllocCPUS\")\n", - "\n", - "#plt.title('Average Requested RAM per CPU by User for all Users Running %i Jobs or less'%UpperlimitJobCount)\n", - "\n", - "plt.xlabel('ReqMemCPU')\n", - "plt.ylabel('AllocCPUS')\n", - "#plt.yscale(\"log\")\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "style.default_axes_and_ticks()\n", - "style.figsize()\n", - "\n", - "g = sns.PairGrid(df_runtime, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n", - "g.map(sns.regplot, color=\"blue\")\n", - "#g.set(ylim=(-1, 11), yticks=[0, 5, 10]);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "style.default_axes_and_ticks()\n", - "style.figsize()\n", - "\n", - "\n", - "g = sb.PairGrid(df_runtime)\n", - "g.map(plt.scatter);\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_runtime_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB)]\n", - "#df_runtime_graph_cluster.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Sum_of_squared_distances = []\n", - "K = range(1,10)\n", - "for k in K:\n", - " km = KMeans(n_clusters=k)\n", - " km = km.fit(df_runtime_cluster)\n", - " Sum_of_squared_distances.append(km.inertia_)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(K, Sum_of_squared_distances, 'bx-')\n", - "plt.xlabel('k')\n", - "plt.ylabel('Sum_of_squared_distances')\n", - "plt.title('Elbow Method For Optimal k')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kmeans = KMeans(n_clusters=3, random_state=111)\n", - "kmeans.fit(df_runtime_cluster)\n", - "print(kmeans.cluster_centers_)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "runtime_cluster_graph = plt.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n", - "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='grey')\n", - "#plt.yscale(\"log\")\n", - "plt.xlabel('ReqMemCPU')\n", - "plt.ylabel('Runtime')\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "language_info": { - "name": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}