diff --git a/Runtime-and-CoreCount.ipynb b/Runtime-and-CoreCount.ipynb
deleted file mode 100644
index 5c421e1f776fbf5ccf1ccfde6e34669c8dc1e48b..0000000000000000000000000000000000000000
--- a/Runtime-and-CoreCount.ipynb
+++ /dev/null
@@ -1,1388 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Notebook Setup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# year-date-month\n",
-    "#start_date = '2020-10-09'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "import sqlite3\n",
-    "import slurm2sql\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "%matplotlib inline\n",
-    "import seaborn as sns\n",
-    "import seaborn as sb\n",
-    "import plotly.express as px\n",
-    "import matplotlib.ticker as ticker\n",
-    "import numpy as np\n",
-    "from mpl_toolkits.mplot3d import Axes3D\n",
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "from RC_styles import rc_styles as style"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "from sklearn.cluster import KMeans"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#connecting to database\n",
-    "#db = sqlite3.connect('runtime_and_core_count.db')\n",
-    "#print(db)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# creates database of info from March 2020 using sqlite 3\n",
-    "db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')\n",
-    "#print(db)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#slurm2sql.slurm2sql(db, ['-S 2020-09-08 -E 2020-09-15 -a  --allocations -o Job,Submit,Start,End'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    " #creating a database based on the start date\n",
-    "#slurm2sql.slurm2sql(db, ['-S', '2020-01-09', '-a'])\n",
-    "#print(db)\n",
-    "#print(start_date)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# df is starting database\n",
-    "df = pd.read_sql('SELECT * FROM slurm', db)\n",
-    "#df = pd.read_sql('SELECT JobID,Submit,Start,End FROM slurm', db)\n",
-    "print(df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    " #Deleting the database\n",
-    "#os.remove('runtime_and_core_count.db')\n",
-    "#os.remove('runtime_and_core_count.db-shm')\n",
-    "#os.remove('runtime_and_core_count.db-wal') "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# voluntary\n",
-    "\n",
-    "# for displaying all available column options\n",
-    "pd.set_option('display.max_columns', None)\n",
-    "df.count()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# converts units in ReqMemCPU column from bytes to gigs\n",
-    "df['ReqMemCPU'] = df['ReqMemCPU'].div(1024**3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# converts Elapsed time to hours (from seconds)\n",
-    "df['Elapsed'] = df['Elapsed'].div(3600)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# df_completed is dataframe of all completed jobs\n",
-    "df_completed = df[df.State.str.contains('COMPLETED')]\n",
-    "#df_completed.head(5)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# ReqMemCPU,Corecount,Runtime FacetGrid"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The next 4 cells set up the df_1 dataset, which will be the base dataset used for the facet grid."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# dataset of needed columns for all graphs below\n",
-    "df_1 = df_completed.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
-    "df_1.head(5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# rounds ReqMemCPU up to nearest whole number\n",
-    "df_1['ReqMemCPU'] = df_1['ReqMemCPU'].apply(np.ceil)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# rounds Elapsed up to nearest 2 decimal places\n",
-    "df_1['Elapsed'] = df_1['Elapsed'].round(2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# makes ReqMemCPU column whole numbers rather than floats for easy readability in graphs\n",
-    "df_1.ReqMemCPU = df_1.ReqMemCPU.apply(int)\n",
-    "df_1.head(5)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The next 3 cells set the min and max parameters for ReqMemCPU, AllocCPUS, and Elapsed. These parameters are used in creating the facet grid and are the parameters for all the cluster graphs."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets min and max parameters for ReqMemCPU\n",
-    "LowerlimitGB = 0\n",
-    "UpperlimitGB = 50"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets min and max parameters for AllocCPUS\n",
-    "LowerlimitAllocCPU = 0\n",
-    "UpperlimitAllocCPU = 50"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets min and max parameters for Elapsed\n",
-    "LowerlimitElapsed = 0\n",
-    "UpperlimitElapsed = 150.02"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "df_facet is a dataset created from df_1 using the parameters above. It will be the dataset that all the cluster graphs will be made from."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n",
-    "df_facet = df_1[(df_1['ReqMemCPU'] <= UpperlimitGB) & \n",
-    "                       (df_1['ReqMemCPU'] >= LowerlimitGB) & \n",
-    "                       (df_1['AllocCPUS'] <= UpperlimitAllocCPU) & \n",
-    "                       (df_1['AllocCPUS'] >= LowerlimitAllocCPU)\n",
-    "                       & \n",
-    "                       (df_1['Elapsed'] <= UpperlimitElapsed) & \n",
-    "                       (df_1['Elapsed'] >= LowerlimitElapsed)]\n",
-    "df_facet.head(5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# creates a facet grid from df_1 dataset\n",
-    "# Elapsed time in hours and ReqMemCPU in gigs\n",
-    "style.default_axes_and_ticks()\n",
-    "style.figsize()\n",
-    "\n",
-    "full_facet = sns.pairplot(df_facet, diag_kind = 'kde') # makes density plots - kernel density estimate\n",
-    "# y axis is count in the diagonal graphs\n",
-    "\n",
-    "full_facet.map(plt.scatter);\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Detailed Look at Elapsed Time - In terms of Requested RAM and Cores"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# voluntary\n",
-    "\n",
-    "# pair grid of the two graphs being clustered using df_facet dataset\n",
-    "style.default_axes_and_ticks()\n",
-    "style.figsize()\n",
-    "\n",
-    "elapsed_reqmem_alloc = sns.PairGrid(df_facet, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n",
-    "elapsed_reqmem_alloc.map(sns.regplot, color=\"blue\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "df_runtime_cluster is a dataset made from df_facet. It is used to make the elbow graph and calculate the clustering for Elapsed/ReqMemCPU and Elapsed/AllocCPUS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#must run if dataset will not be normalized for both Elapsed/ReqMem and Elapsed/Alloc graphs\n",
-    "\n",
-    "#ReqMemCPU = 0 - 50 gigs\n",
-    "#AllocCPUS = 0 - 50 cores\n",
-    "#Elapsed = 0 - 150.02 hours\n",
-    "\n",
-    "# data set without normalization fitting for both the Elapsed/ReqMem and Elapsed/Alloc graphs\n",
-    "df_runtime_cluster = df_facet.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
-    "df_runtime_cluster.head(5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run if dataset will be 0-1 normalized for both Elapsed/ReqMem and Elapsed/Alloc graphs\n",
-    "\n",
-    "# 0-1 normalized dataset\n",
-    "# used for 0-1 normalization fitting for both the Elapsed/ReqMem and Elapsed/Alloc graphs \n",
-    "column_maxes_runtime = df_runtime_cluster.max()\n",
-    "df_runtime_cluster_max = column_maxes_runtime.max()\n",
-    "normalized_runtime_df = df_runtime_cluster / df_runtime_cluster_max\n",
-    "\n",
-    "print(normalized_runtime_df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run if dataset will be log10 normalized for both Elapsed/ReqMem and Elapsed/Alloc graphs\n",
-    "\n",
-    "# log10 normalized dataset\n",
-    "# used for log10 normalization fitting for both the Elapsed/ReqMem and Elapsed/Alloc graphs \n",
-    "\n",
-    "log_runtime_df = np.log10(df_runtime_cluster+1)\n",
-    "log_runtime_df.describe()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets up info for plotting the optimal number of clusters - uses df_runtime_cluster datasaet\n",
-    "Sum_of_squared_distances = []\n",
-    "K = range(1,10)\n",
-    "for k in K:\n",
-    "    km = KMeans(n_clusters=k)\n",
-    "    km = km.fit(df_runtime_cluster)\n",
-    "    Sum_of_squared_distances.append(km.inertia_)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# the bend in the graph is the optimal number of clusters for graphs using the df_runtime_cluster dataset\n",
-    "plt.plot(K, Sum_of_squared_distances, 'bx-')\n",
-    "plt.xlabel('k')\n",
-    "plt.ylabel('Sum_of_squared_distances')\n",
-    "plt.title('Elbow Method For Optimal k')\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Elapsed/ReqMemCPU clustering"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The next 5 cells create the clusters, find each cluster label, and create datasets of data in each cluster.\n",
-    "All the datasets are created for both the cluster graphs and plots of each cluster before those graphs are made."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# In the cell below, set the fit based on the normalization type by uncommenting the line to run"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# uncomment for no normalization\n",
-    "#elapsed_reqmem_fit = df_runtime_cluster\n",
-    "\n",
-    "# uncomment for 0-1 normalization\n",
-    "#elapsed_reqmem_fit = normalized_runtime_df\n",
-    "\n",
-    "# uncomment for log10 normalization\n",
-    "elapsed_reqmem_fit = log_runtime_df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets to clusters and returns the cluster points\n",
-    "kmeans_elapsed_reqmem = KMeans(n_clusters=3, random_state=111)\n",
-    "kmeans_elapsed_reqmem.fit(elapsed_reqmem_fit)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# In the cell below, choose which cluster center to use - uncomment the line that goes with the normalization type"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# uncomment if no normalization\n",
-    "#clusterpoints_elapsed_reqmem = kmeans_elapsed_reqmem.cluster_centers_\n",
-    "\n",
-    "# uncomment if 0-1 normalization\n",
-    "#clusterpoints_elapsed_reqmem = kmeans_elapsed_reqmem.cluster_centers_ * df_runtime_cluster_max\n",
-    "\n",
-    "# uncomment if log10 normalization\n",
-    "clusterpoints_elapsed_reqmem = 10 ** (kmeans_elapsed_reqmem.cluster_centers_) - 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# returns array of labels for each cluster - used to find min and max x and y points for each cluster\n",
-    "\n",
-    "# 0 = purple cluster\n",
-    "# 1 = green cluster\n",
-    "# 2 = red cluster\n",
-    "np.unique(kmeans_elapsed_reqmem.labels_)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the parameters in the labels shown above\n",
-    "\n",
-    "#Purple\n",
-    "df_elapsed_reqmem_0 = df_runtime_cluster[kmeans_elapsed_reqmem.labels_ == 0]\n",
-    "\n",
-    "#Green\n",
-    "df_elapsed_reqmem_1 = df_runtime_cluster[kmeans_elapsed_reqmem.labels_ == 1]\n",
-    "\n",
-    "#Red\n",
-    "df_elapsed_reqmem_2 = df_runtime_cluster[kmeans_elapsed_reqmem.labels_ == 2]\n",
-    "\n",
-    "#df_elapsed_reqmem_0.head(5)\n",
-    "#df_elapsed_reqmem_0.ReqMemCPU.count()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# voluntary\n",
-    "\n",
-    "# returns the min and max ReqMemCPU, Elapsed, and AllocCPUS for each cluster using the datasets created above. \n",
-    "# These are the parameters for the scatter plots of each cluster\n",
-    "print(\"Purple Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_reqmem_0.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_reqmem_0.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_elapsed_reqmem_0.Elapsed.min(),\" \",\"max =\",df_elapsed_reqmem_0.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_elapsed_reqmem_0.AllocCPUS.min(),\" \",\"max =\",df_elapsed_reqmem_0.AllocCPUS.max())\n",
-    "\n",
-    "print(\"\\nGreen Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_reqmem_1.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_reqmem_1.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_elapsed_reqmem_1.Elapsed.min(),\" \",\"max =\",df_elapsed_reqmem_1.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_elapsed_reqmem_1.AllocCPUS.min(),\" \",\"max =\",df_elapsed_reqmem_1.AllocCPUS.max())\n",
-    "\n",
-    "print(\"\\nRed Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_reqmem_2.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_reqmem_2.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_elapsed_reqmem_2.Elapsed.min(),\" \",\"max =\",df_elapsed_reqmem_2.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_elapsed_reqmem_2.AllocCPUS.min(),\" \",\"max =\",df_elapsed_reqmem_2.AllocCPUS.max())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# Creates datasets used to make the swarmplots that correspong to each cluster scatter plot. \n",
-    "# The groupby does not change the data, but it does make a small enough dataset to keep from having a \n",
-    "#runtime error, as will happen if a swarmplot is made using the scatter plot datasets.\n",
-    "\n",
-    "# for purple cluster \n",
-    "df_elapsed_reqmem_swarmplot0 = df_elapsed_reqmem_0.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
-    "\n",
-    "# for green cluster\n",
-    "df_elapsed_reqmem_swarmplot1 = df_elapsed_reqmem_1.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
-    "\n",
-    "# for red cluster\n",
-    "df_elapsed_reqmem_swarmplot2 = df_elapsed_reqmem_2.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# scatterplot of Runtime per Requested gigs of RAM using df_runtime_cluster dataset with clustering\n",
-    "figure = plt.figure(figsize=(14, 8))\n",
-    "figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)\n",
-    "\n",
-    "elapsed_rqmem_clustergraph = figure.add_subplot(121)\n",
-    "elapsed_rqmem_clustergraph.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans_elapsed_reqmem.labels_, cmap='rainbow')\n",
-    "elapsed_rqmem_clustergraph.scatter(clusterpoints_elapsed_reqmem[:,0] ,clusterpoints_elapsed_reqmem[:,1], color='black')\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "# 3d veiw of the scatterplot for better understanding of the data\n",
-    "elapsed_rqmem_clustergraph_3d = figure.add_subplot(122, projection='3d')\n",
-    "elapsed_rqmem_clustergraph_3d.scatter(df_runtime_cluster['ReqMemCPU'], df_runtime_cluster['Elapsed'], df_runtime_cluster['AllocCPUS'], \n",
-    "                                      c=kmeans_elapsed_reqmem.labels_ ,cmap='rainbow')\n",
-    "elapsed_rqmem_clustergraph_3d.scatter(clusterpoints_elapsed_reqmem[:,0] ,clusterpoints_elapsed_reqmem[:,1], color='black')\n",
-    "\n",
-    "\n",
-    "elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n",
-    "elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
-    "elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')\n",
-    "\n",
-    "# sets size and color for gridlines by axis\n",
-    "elapsed_rqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
-    "elapsed_rqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
-    "elapsed_rqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
-    "\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This graph is a facet grid that shows scatterplots by cluster color on the left, and it's corresponging swarmplot in the right. The swarmplots give a better understanding of the distrubition of jobs matching a specific datapoint."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets the figure and size that each subplot is added to - each graph is a subplot\n",
-    "figure = plt.figure( figsize=(16, 16))\n",
-    "\n",
-    "#purple cluster and swarmplot\n",
-    "elapsed_reqmem_clustergraph_0 = figure.add_subplot(423)\n",
-    "elapsed_reqmem_clustergraph_0.scatter(df_elapsed_reqmem_0['ReqMemCPU'],df_elapsed_reqmem_0['Elapsed'], color = \"blueviolet\")\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "figure.add_subplot(424)\n",
-    "elapsed_reqmem_swarmgraph_0 = sns.swarmplot(data=df_elapsed_reqmem_swarmplot0, x='ReqMemCPU', y='Elapsed')\n",
-    "plt.yticks(np.arange(df_elapsed_reqmem_0.Elapsed.min(), df_elapsed_reqmem_0.Elapsed.max(), 5))\n",
-    "plt.margins(0.02)\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "\n",
-    "#green cluster and swarmplot\n",
-    "elapsed_reqmem_clustergraph_1 = figure.add_subplot(425)\n",
-    "elapsed_reqmem_clustergraph_1.scatter(df_elapsed_reqmem_1['ReqMemCPU'],df_elapsed_reqmem_1['Elapsed'], color = \"aquamarine\")\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "figure.add_subplot(426)\n",
-    "elapsed_reqmem_swarmgraph_1 = sns.swarmplot(data=df_elapsed_reqmem_swarmplot1, x='ReqMemCPU', y='Elapsed')\n",
-    "plt.yticks(np.arange(df_elapsed_reqmem_1.Elapsed.min(), df_elapsed_reqmem_1.Elapsed.max(), 5))\n",
-    "plt.margins(0.02)\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "\n",
-    "#red cluster and swarmplot\n",
-    "elapsed_reqmem_clustergraph_2 = figure.add_subplot(427)\n",
-    "elapsed_reqmem_clustergraph_2.scatter(df_elapsed_reqmem_2['ReqMemCPU'],df_elapsed_reqmem_2['Elapsed'], color = \"red\")\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "figure.add_subplot(428)\n",
-    "elapsed_reqmem_swarmgraph_2 = sns.swarmplot(data=df_elapsed_reqmem_swarmplot2, x='ReqMemCPU', y='Elapsed')\n",
-    "plt.yticks(np.arange(df_elapsed_reqmem_2.Elapsed.min(), df_elapsed_reqmem_2.Elapsed.max(), 10))\n",
-    "plt.margins(0.02)\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "\n",
-    "# sets the spacing\n",
-    "# top = space between title and graphs - increase number to bring title down and decrease to bring title up\n",
-    "# left = space to the left\n",
-    "# wspace = padding on both sides of graphs\n",
-    "# hspace = padding on top and bottom of graphs\n",
-    "figure.subplots_adjust(left=0.2, wspace=0.2, top=1.2, hspace=0.3)\n",
-    "\n",
-    "figure.suptitle('Clusters from Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB, fontsize=20)\n",
-    "\n",
-    "plt.show()\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Elapsed/AllocCPUS clustering"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The next 5 cells create the clusters, find each cluster label, and create datasets of data in each cluster.\n",
-    "All the datasets are created for both the cluster graphs and plots of each cluster before those graphs are made."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# In the cell below, set the fit based on the normalization type by uncommenting the line to run"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# uncomment for no normalization\n",
-    "#elapsed_alloc_fit = df_runtime_cluster\n",
-    "\n",
-    "# uncomment for 0-1 normalization\n",
-    "#elapsed_alloc_fit = normalized_runtime_df\n",
-    "\n",
-    "# uncomment for log10 normalization\n",
-    "elapsed_alloc_fit = log_runtime_df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets to clusters and returns the cluster points\n",
-    "kmeans_elapsed_alloc = KMeans(n_clusters=3, random_state=111)\n",
-    "kmeans_elapsed_alloc.fit(elapsed_alloc_fit)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# In the cell below, choose which cluster center to use - uncomment the line that goes with the normalization type"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# uncomment if no normalization\n",
-    "#clusterpoints_elapsed_alloc = kmeans_elapsed_alloc.cluster_centers_\n",
-    "\n",
-    "# uncomment if 0-1 normalization\n",
-    "#clusterpoints_elapsed_alloc = kmeans_elapsed_alloc.cluster_centers_ * df_runtime_cluster_max\n",
-    "\n",
-    "# uncomment if log10 normalization\n",
-    "clusterpoints_elapsed_alloc = 10 ** (kmeans_elapsed_reqmem.cluster_centers_) - 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# returns array of labels for each cluster - used to find min and max x and y points for each cluster\n",
-    "\n",
-    "# 0 = purple cluster\n",
-    "# 1 = green cluster\n",
-    "# 2 = red cluster\n",
-    "np.unique(kmeans_elapsed_alloc.labels_)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the parameters in the labels shown above\n",
-    "\n",
-    "#Purple\n",
-    "df_elapsed_alloc_0 = df_runtime_cluster[kmeans_elapsed_alloc.labels_ == 0]\n",
-    "\n",
-    "#Green\n",
-    "df_elapsed_alloc_1 = df_runtime_cluster[kmeans_elapsed_alloc.labels_ == 1]\n",
-    "\n",
-    "#Red\n",
-    "df_elapsed_alloc_2 = df_runtime_cluster[kmeans_elapsed_alloc.labels_ == 2]\n",
-    "\n",
-    "#df_elapsed_alloc_0.head(5)\n",
-    "#df_elapsed_alloc_0.AllocCPUS.count()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# voluntary\n",
-    "\n",
-    "# returns the min and max ReqMemCPU, Elapsed, and AllocCPUS for each cluster using the datasets created above. \n",
-    "# These are the parameters for the scatter plots of each cluster\n",
-    "print(\"Purple Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_alloc_0.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_alloc_0.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_elapsed_alloc_0.Elapsed.min(),\" \",\"max =\",df_elapsed_alloc_0.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_elapsed_alloc_0.AllocCPUS.min(),\" \",\"max =\",df_elapsed_alloc_0.AllocCPUS.max())\n",
-    "\n",
-    "print(\"\\nGreen Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_alloc_1.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_alloc_1.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_elapsed_alloc_1.Elapsed.min(),\" \",\"max =\",df_elapsed_alloc_1.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_elapsed_alloc_1.AllocCPUS.min(),\" \",\"max =\",df_elapsed_alloc_1.AllocCPUS.max())\n",
-    "\n",
-    "print(\"\\nRed Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_alloc_2.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_alloc_2.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_elapsed_alloc_2.Elapsed.min(),\" \",\"max =\",df_elapsed_alloc_2.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_elapsed_alloc_2.AllocCPUS.min(),\" \",\"max =\",df_elapsed_alloc_2.AllocCPUS.max())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# Creates datasets used to make the swarmplots that correspong to each cluster scatter plot. \n",
-    "# The groupby does not change the data, but it does make a small enough dataset to keep from having a \n",
-    "#runtime error, as will happen if a swarmplot is made using the scatter plot datasets.\n",
-    "\n",
-    "# for purple cluster \n",
-    "df_elapsed_alloc_swarmplot0 = df_elapsed_alloc_0.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
-    "\n",
-    "# for green cluster \n",
-    "df_elapsed_alloc_swarmplot1 = df_elapsed_alloc_1.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
-    "\n",
-    "# for red cluster \n",
-    "df_elapsed_alloc_swarmplot2 = df_elapsed_alloc_2.groupby(['AllocCPUS','Elapsed']).sum().reset_index()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# scatterplot of Runtime per Core using df_runtime_cluster dataset with clustering\n",
-    "figure = plt.figure(figsize=(14, 8))\n",
-    "figure.suptitle('Runtime per Core %i cores or less'%UpperlimitAllocCPU)\n",
-    "\n",
-    "elapsed_alloc_clustergraph = figure.add_subplot(121)\n",
-    "elapsed_alloc_clustergraph.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans_elapsed_alloc.labels_, cmap='rainbow')\n",
-    "elapsed_alloc_clustergraph.scatter(clusterpoints_elapsed_alloc[:,0] ,clusterpoints_elapsed_alloc[:,1], color='black')\n",
-    "plt.xlabel('AllocCPUS')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "# 3d veiw of the scatterplot for better understanding of the data\n",
-    "elapsed_alloc_clustergraph_3d = figure.add_subplot(122, projection='3d')\n",
-    "elapsed_alloc_clustergraph_3d.scatter(df_runtime_cluster['AllocCPUS'], df_runtime_cluster['Elapsed'], df_runtime_cluster['ReqMemCPU'], c=kmeans_elapsed_alloc.labels_ ,cmap='rainbow')\n",
-    "elapsed_alloc_clustergraph_3d.scatter(clusterpoints_elapsed_alloc[:,0] ,clusterpoints_elapsed_alloc[:,1], color='black')\n",
-    "elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')\n",
-    "elapsed_alloc_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
-    "elapsed_alloc_clustergraph_3d.set_zlabel('ReqMemCPU(gigs)')\n",
-    "\n",
-    "# sets size and color for gridlines by axis\n",
-    "elapsed_alloc_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
-    "elapsed_alloc_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
-    "elapsed_alloc_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
-    "\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets the figure and size that each subplot is added to - each graph is a subplot\n",
-    "figure = plt.figure( figsize=(21, 16))\n",
-    "\n",
-    "#purple cluster and swarmplot\n",
-    "elapsed_alloc_clustergraph_0 = figure.add_subplot(321)\n",
-    "elapsed_alloc_clustergraph_0.scatter(df_elapsed_alloc_0['AllocCPUS'],df_elapsed_alloc_0['Elapsed'], color = \"blueviolet\")\n",
-    "plt.xlabel('AllocCPUS')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "figure.add_subplot(322)\n",
-    "elapsed_alloc_swarmgraph_0 = sns.swarmplot(data=df_elapsed_alloc_swarmplot0, x='AllocCPUS', y='Elapsed')\n",
-    "plt.yticks(np.arange(df_elapsed_alloc_0.Elapsed.min(), df_elapsed_alloc_0.Elapsed.max(), 5))\n",
-    "plt.margins(0.02)\n",
-    "plt.xlabel('AllocCPUS')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "\n",
-    "#green cluster and swarmplot\n",
-    "elapsed_alloc_clustergraph_1 = figure.add_subplot(323)\n",
-    "elapsed_alloc_clustergraph_1.scatter(df_elapsed_alloc_1['AllocCPUS'],df_elapsed_alloc_1['Elapsed'], color = \"aquamarine\")\n",
-    "plt.xlabel('AllocCPUS')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "figure.add_subplot(324)\n",
-    "elapsed_alloc_swarmgraph_1 = sns.swarmplot(data=df_elapsed_alloc_swarmplot1, x='AllocCPUS', y='Elapsed')\n",
-    "plt.yticks(np.arange(df_elapsed_alloc_1.Elapsed.min(), df_elapsed_alloc_1.Elapsed.max(), 5))\n",
-    "plt.margins(0.02)\n",
-    "plt.xlabel('AllocCPUS')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "\n",
-    "#red cluster and swarmplot\n",
-    "elapsed_alloc_clustergraph_2 = figure.add_subplot(325)\n",
-    "elapsed_alloc_clustergraph_2.scatter(df_elapsed_alloc_2['AllocCPUS'],df_elapsed_alloc_2['Elapsed'], color = \"red\")\n",
-    "plt.xlabel('AllocCPUS')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "figure.add_subplot(326)\n",
-    "elapsed_alloc_swarmgraph_2 = sns.swarmplot(data=df_elapsed_alloc_swarmplot2, x='AllocCPUS', y='Elapsed')\n",
-    "plt.yticks(np.arange(df_elapsed_alloc_2.Elapsed.min(), df_elapsed_alloc_2.Elapsed.max(), 10))\n",
-    "plt.margins(0.02)\n",
-    "plt.xlabel('AllocCPUS')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "\n",
-    "# sets the spacing\n",
-    "# top = space between title and graphs - increase number to bring title down and decrease to bring title up\n",
-    "# left = space to the left\n",
-    "# wspace = padding on both sides of graphs\n",
-    "# hspace = padding on top and bottom of graphs\n",
-    "figure.subplots_adjust(left=0.2, wspace=0.2, top=.94, hspace=0.3)\n",
-    "figure.suptitle('Clusters from Runtime per Core %i cores or less'%UpperlimitAllocCPU, fontsize=20)\n",
-    "\n",
-    "\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Detailed Look at Cores - In terms of Requested RAM"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# scatterplot of AllocCPUS/ReqMemCPU using df_facet dataset\n",
-    "style.default_axes_and_ticks()\n",
-    "style.figsize()\n",
-    "\n",
-    "elapsed_alloc_reqmem = plt.scatter(df_facet[\"ReqMemCPU\"], df_facet[\"AllocCPUS\"], color = \"blue\")\n",
-    "\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('AllocCPUS')\n",
-    "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "df_alloc_cluster is a dataset made from df_facet. It is used to make the elbow graph and calculate the clustering for AllocCPUS/ReqMemCPU"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run if dataset will not be normalized\n",
-    "\n",
-    "#ReqMemCPU = 0 - 50 gigs\n",
-    "#AllocCPUS = 0 - 50 cores\n",
-    "#Elapsed = 0 - 150.02 hours\n",
-    "\n",
-    "# non normalized dataset\n",
-    "# used for fitting for the Alloc/ReqMem graph without normalization\n",
-    "df_alloc_cluster = df_facet.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
-    "df_alloc_cluster.head(5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run if dataset will be 0-1 normalized\n",
-    "\n",
-    "# 0-1 normalized dataset\n",
-    "# used for 0-1 normalization fitting for the Alloc/ReqMem graph\n",
-    "column_maxes_alloc = df_alloc_cluster.max()\n",
-    "df_alloc_cluster_max = column_maxes_alloc.max()\n",
-    "normalized_alloc_df = df_alloc_cluster / df_alloc_cluster_max\n",
-    "\n",
-    "print(normalized_alloc_df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run if dataset will be log10 normalized for both Elapsed/ReqMem and Elapsed/Alloc graphs\n",
-    "\n",
-    "# log10 normalized dataset\n",
-    "# used for log10 normalization fitting for both the Elapsed/ReqMem and Elapsed/Alloc graphs \n",
-    "\n",
-    "log_alloc_df = np.log10(df_alloc_cluster+1)\n",
-    "log_alloc_df.describe()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets up info for plotting the optimal number of clusters - uses df_alloc_cluster datasaet\n",
-    "Sum_of_squared_distances = []\n",
-    "K = range(1,10)\n",
-    "for k in K:\n",
-    "    km = KMeans(n_clusters=k)\n",
-    "    km = km.fit(df_alloc_cluster)\n",
-    "    Sum_of_squared_distances.append(km.inertia_)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# the bend in the graph is the optimal number of clusters for graphs using the df_alloc_cluster dataset\n",
-    "plt.plot(K, Sum_of_squared_distances, 'bx-')\n",
-    "plt.xlabel('k')\n",
-    "plt.ylabel('Sum_of_squared_distances')\n",
-    "plt.title('Elbow Method For Optimal k')\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# In the cell below, set the fit based on the normalization type by uncommenting the line to run"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# uncomment for no normalization\n",
-    "#alloc_reqmem_fit = df_alloc_cluster\n",
-    "\n",
-    "# uncomment for 0-1 normalization\n",
-    "#alloc_reqmem_fit = normalized_alloc_df\n",
-    "\n",
-    "# uncomment for log10 normalization\n",
-    "alloc_reqmem_fit = log_alloc_df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets to clusters and returns the cluster points\n",
-    "kmeans_alloc_reqmem = KMeans(n_clusters=3, random_state=111)\n",
-    "kmeans_alloc_reqmem.fit(alloc_reqmem_fit)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# In the cell below, choose which cluster center to use - uncomment the line that goes with the normalization type"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# uncomment if no normalization\n",
-    "#clusterpoints_alloc_reqmem = kmeans_alloc_reqmem.cluster_centers_\n",
-    "\n",
-    "# uncomment if 0-1 normalization\n",
-    "#clusterpoints_alloc_reqmem = kmeans_alloc_reqmem.cluster_centers_ * df_alloc_cluster_max\n",
-    "\n",
-    "# uncomment if log10 normalization\n",
-    "clusterpoints_alloc_reqmem = (10 ** (kmeans_alloc_reqmem.cluster_centers_)) - 1\n",
-    "print(clusterpoints_alloc_reqmem)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "clusterpoints_alloc_reqmem[:,0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "clusterpoints_alloc_reqmem[:,2]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The next 5 cells find each cluster label, and create datasets of data in each cluster.\n",
-    "All the datasets are created for both the cluster graphs and plots of each cluster before those graphs are made."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# returns array of labels for each cluster - used to find min and max x and y points for each cluster\n",
-    "\n",
-    "# 0 = purple cluster\n",
-    "# 1 = green cluster\n",
-    "# 2 = red cluster\n",
-    "np.unique(kmeans_alloc_reqmem.labels_)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the parameters in the labels shown above\n",
-    "\n",
-    "#Purple\n",
-    "df_alloc_reqmem_0 = df_alloc_cluster[kmeans_alloc_reqmem.labels_ == 0]\n",
-    "\n",
-    "#Green\n",
-    "df_alloc_reqmem_1 = df_alloc_cluster[kmeans_alloc_reqmem.labels_ == 1]\n",
-    "\n",
-    "#Red\n",
-    "df_alloc_reqmem_2 = df_alloc_cluster[kmeans_alloc_reqmem.labels_ == 2]\n",
-    "\n",
-    "#df_elapsed_alloc_0.head(5)\n",
-    "#df_elapsed_alloc_0.AllocCPUS.count()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# voluntary\n",
-    "\n",
-    "# returns the min and max ReqMemCPU, Elapsed, and AllocCPUS for each cluster using the datasets created above. \n",
-    "# These are the parameters for the scatter plots of each cluster\n",
-    "print(\"Purple Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_alloc_reqmem_0.ReqMemCPU.min(),\" \",\"max =\",df_alloc_reqmem_0.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_alloc_reqmem_0.Elapsed.min(),\" \",\"max =\",df_alloc_reqmem_0.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_alloc_reqmem_0.AllocCPUS.min(),\" \",\"max =\",df_alloc_reqmem_0.AllocCPUS.max())\n",
-    "\n",
-    "print(\"\\nGreen Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_alloc_reqmem_1.ReqMemCPU.min(),\" \",\"max =\",df_alloc_reqmem_1.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_alloc_reqmem_1.Elapsed.min(),\" \",\"max =\",df_alloc_reqmem_1.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_alloc_reqmem_1.AllocCPUS.min(),\" \",\"max =\",df_alloc_reqmem_1.AllocCPUS.max())\n",
-    "\n",
-    "print(\"\\nRed Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_alloc_reqmem_2.ReqMemCPU.min(),\" \",\"max =\",df_alloc_reqmem_2.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_alloc_reqmem_2.Elapsed.min(),\" \",\"max =\",df_alloc_reqmem_2.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_alloc_reqmem_2.AllocCPUS.min(),\" \",\"max =\",df_alloc_reqmem_2.AllocCPUS.max())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# Creates datasets used to make the swarmplots that correspong to each cluster scatter plot. \n",
-    "# The groupby does not change the data, but it does make a small enough dataset to keep from having a \n",
-    "#runtime error, as will happen if a swarmplot is made using the scatter plot datasets.\n",
-    "\n",
-    "# for purple cluster \n",
-    "df_alloc_reqmem_swarmplot0 = df_alloc_reqmem_0.groupby(['AllocCPUS','ReqMemCPU']).sum().reset_index()\n",
-    "\n",
-    "# for green cluster \n",
-    "df_alloc_reqmem_swarmplot1 = df_alloc_reqmem_1.groupby(['AllocCPUS','ReqMemCPU']).sum().reset_index()\n",
-    "\n",
-    "# for red cluster \n",
-    "df_alloc_reqmem_swarmplot2 = df_alloc_reqmem_2.groupby(['AllocCPUS','ReqMemCPU']).sum().reset_index()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# scatterplot of Core per Requested RAM using df_alloc_cluster dataset with clustering\n",
-    "figure = plt.figure(figsize=(14, 8))\n",
-    "figure.suptitle('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB)\n",
-    "\n",
-    "alloc_reqmem_cluster_graph = figure.add_subplot(121)\n",
-    "alloc_reqmem_cluster_graph.scatter(df_alloc_cluster['ReqMemCPU'],df_alloc_cluster['AllocCPUS'], c=kmeans_alloc_reqmem.labels_, cmap='rainbow')\n",
-    "alloc_reqmem_cluster_graph.scatter(clusterpoints_alloc_reqmem[:,0] ,clusterpoints_alloc_reqmem[:,2], color='black')\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('AllocCPUS')\n",
-    "\n",
-    "# 3d veiw of the scatterplot for better understanding of the data\n",
-    "alloc_reqmem_clustergraph_3d = figure.add_subplot(122, projection='3d')\n",
-    "alloc_reqmem_clustergraph_3d.scatter(df_alloc_cluster['ReqMemCPU'], df_alloc_cluster['AllocCPUS'], df_alloc_cluster['Elapsed'], c=kmeans_alloc_reqmem.labels_ ,cmap='rainbow')\n",
-    "alloc_reqmem_clustergraph_3d.scatter(clusterpoints_alloc_reqmem[:,0] ,clusterpoints_alloc_reqmem[:,2], color='black')\n",
-    "alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n",
-    "alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')\n",
-    "alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
-    "\n",
-    "# sets size and color for gridlines by axis\n",
-    "alloc_reqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
-    "alloc_reqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
-    "alloc_reqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
-    "\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# must run\n",
-    "\n",
-    "# sets the figure and size that each subplot is added to - each graph is a subplot\n",
-    "figure = plt.figure(figsize=(21, 16))\n",
-    "\n",
-    "\n",
-    "#purple cluster and swarmplot\n",
-    "alloc_reqmem_clustergraph_0 = figure.add_subplot(321)\n",
-    "alloc_reqmem_clustergraph_0.scatter(df_alloc_reqmem_0['ReqMemCPU'],df_alloc_reqmem_0['AllocCPUS'], color = \"blueviolet\")\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('AllocCPUS')\n",
-    "\n",
-    "figure.add_subplot(322)\n",
-    "alloc_reqmem_swarmgraph_0 = sns.swarmplot(data=df_alloc_reqmem_swarmplot0, x='ReqMemCPU', y='AllocCPUS')\n",
-    "plt.yticks(np.arange(0, df_alloc_reqmem_0.AllocCPUS.max(), 3))\n",
-    "plt.margins(0.02)\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('AllocCPUS')\n",
-    "\n",
-    "\n",
-    "#green cluster and swarmplot\n",
-    "alloc_reqmem_clustergraph_1 = figure.add_subplot(323)\n",
-    "alloc_reqmem_clustergraph_1.scatter(df_alloc_reqmem_1['ReqMemCPU'],df_alloc_reqmem_1['AllocCPUS'], color = \"aquamarine\")\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('AllocCPUS')\n",
-    "\n",
-    "figure.add_subplot(324)\n",
-    "alloc_reqmem_swarmgraph_1 = sns.swarmplot(data=df_alloc_reqmem_swarmplot1, x='ReqMemCPU', y='AllocCPUS')\n",
-    "plt.yticks(np.arange(df_alloc_reqmem_1.AllocCPUS.min(), df_alloc_reqmem_1.AllocCPUS.max(), 5))\n",
-    "plt.margins(0.02)\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('AllocCPUS')\n",
-    "\n",
-    "\n",
-    "#red cluster and swarmplot\n",
-    "alloc_reqmem_clustergraph_2 = figure.add_subplot(325)\n",
-    "alloc_reqmem_clustergraph_2 = plt.scatter(df_alloc_reqmem_2['ReqMemCPU'],df_alloc_reqmem_2['AllocCPUS'], color = \"red\")\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('AllocCPUS')\n",
-    "\n",
-    "figure.add_subplot(326)\n",
-    "alloc_reqmem_swarmgraph_2 = sns.swarmplot(data=df_alloc_reqmem_swarmplot2, x='ReqMemCPU', y='AllocCPUS')\n",
-    "plt.yticks(np.arange(df_alloc_reqmem_2.AllocCPUS.min(), df_alloc_reqmem_2.AllocCPUS.max(), 5))\n",
-    "plt.margins(0.02)\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('AllocCPUS')\n",
-    "\n",
-    "# sets the spacing\n",
-    "# top = space between title and graphs - increase number to bring title down and decrease to bring title up\n",
-    "# left = space to the left\n",
-    "# wspace = padding on both sides of graphs\n",
-    "# hspace = padding on top and bottom of graphs\n",
-    "figure.subplots_adjust(left=0.2, wspace=0.2, top=.94, hspace=0.3)\n",
-    "figure.suptitle('Clusters from Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB, fontsize=20)\n",
-    "\n",
-    "\n",
-    "\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python",
-   "pygments_lexer": "ipython3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}