diff --git a/Runtime-and-CoreCount.ipynb b/Runtime-and-CoreCount.ipynb
index 7c375952737f8f81ce861df369abe641ba50d57a..5c421e1f776fbf5ccf1ccfde6e34669c8dc1e48b 100644
--- a/Runtime-and-CoreCount.ipynb
+++ b/Runtime-and-CoreCount.ipynb
@@ -7,6 +7,16 @@
     "# Notebook Setup"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# year-date-month\n",
+    "#start_date = '2020-10-09'"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -24,7 +34,9 @@
     "import seaborn as sb\n",
     "import plotly.express as px\n",
     "import matplotlib.ticker as ticker\n",
-    "import numpy as np"
+    "import numpy as np\n",
+    "from mpl_toolkits.mplot3d import Axes3D\n",
+    "import os"
    ]
   },
   {
@@ -49,6 +61,17 @@
     "from sklearn.cluster import KMeans"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#connecting to database\n",
+    "#db = sqlite3.connect('runtime_and_core_count.db')\n",
+    "#print(db)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -58,7 +81,29 @@
     "# must run\n",
     "\n",
     "# creates database of info from March 2020 using sqlite 3\n",
-    "db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')"
+    "db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')\n",
+    "#print(db)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#slurm2sql.slurm2sql(db, ['-S 2020-09-08 -E 2020-09-15 -a  --allocations -o Job,Submit,Start,End'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    " #creating a database based on the start date\n",
+    "#slurm2sql.slurm2sql(db, ['-S', '2020-01-09', '-a'])\n",
+    "#print(db)\n",
+    "#print(start_date)"
    ]
   },
   {
@@ -70,7 +115,21 @@
     "# must run\n",
     "\n",
     "# df is starting database\n",
-    "df = pd.read_sql('SELECT * FROM slurm', db)"
+    "df = pd.read_sql('SELECT * FROM slurm', db)\n",
+    "#df = pd.read_sql('SELECT JobID,Submit,Start,End FROM slurm', db)\n",
+    "print(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    " #Deleting the database\n",
+    "#os.remove('runtime_and_core_count.db')\n",
+    "#os.remove('runtime_and_core_count.db-shm')\n",
+    "#os.remove('runtime_and_core_count.db-wal') "
    ]
   },
   {
@@ -83,7 +142,7 @@
     "\n",
     "# for displaying all available column options\n",
     "pd.set_option('display.max_columns', None)\n",
-    "df.head(5)"
+    "df.count()"
    ]
   },
   {
@@ -127,7 +186,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# ReqMemCPU,Corecount,Runtime Clustering"
+    "# ReqMemCPU,Corecount,Runtime FacetGrid"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The next 4 cells set up the df_1 dataset, which will be the base dataset used for the facet grid."
    ]
   },
   {
@@ -175,9 +241,16 @@
    "source": [
     "# must run\n",
     "\n",
-    "# sorts dataset by AllocCPUS for easy visualization\n",
-    "df_1_sorted = df_1.sort_values(by='AllocCPUS', ascending=True)\n",
-    "df_1_sorted.head(5)"
+    "# makes ReqMemCPU column whole numbers rather than floats for easy readability in graphs\n",
+    "df_1.ReqMemCPU = df_1.ReqMemCPU.apply(int)\n",
+    "df_1.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The next 3 cells set the min and max parameters for ReqMemCPU, AllocCPUS, and Elapsed. These parameters are used in creating the facet grid and are the parameters for all the cluster graphs."
    ]
   },
   {
@@ -189,8 +262,8 @@
     "# must run\n",
     "\n",
     "# sets min and max parameters for ReqMemCPU\n",
-    "UpperlimitGB = 50\n",
-    "LowerlimitGB = 0"
+    "LowerlimitGB = 0\n",
+    "UpperlimitGB = 50"
    ]
   },
   {
@@ -202,8 +275,28 @@
     "# must run\n",
     "\n",
     "# sets min and max parameters for AllocCPUS\n",
-    "UpperlimitAllocCPU = 20\n",
-    "LowerlimitAllocCPU = 0"
+    "LowerlimitAllocCPU = 0\n",
+    "UpperlimitAllocCPU = 50"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# sets min and max parameters for Elapsed\n",
+    "LowerlimitElapsed = 0\n",
+    "UpperlimitElapsed = 150.02"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "df_facet is a dataset created from df_1 using the parameters above. It will be the dataset that all the cluster graphs will be made from."
    ]
   },
   {
@@ -215,7 +308,13 @@
     "# must run\n",
     "\n",
     "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n",
-    "df_facet = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU)]\n",
+    "df_facet = df_1[(df_1['ReqMemCPU'] <= UpperlimitGB) & \n",
+    "                       (df_1['ReqMemCPU'] >= LowerlimitGB) & \n",
+    "                       (df_1['AllocCPUS'] <= UpperlimitAllocCPU) & \n",
+    "                       (df_1['AllocCPUS'] >= LowerlimitAllocCPU)\n",
+    "                       & \n",
+    "                       (df_1['Elapsed'] <= UpperlimitElapsed) & \n",
+    "                       (df_1['Elapsed'] >= LowerlimitElapsed)]\n",
     "df_facet.head(5)"
    ]
   },
@@ -227,12 +326,14 @@
    "source": [
     "# must run\n",
     "\n",
-    "# creates a facet grid from df_runtime dataset\n",
+    "# creates a facet grid from df_1 dataset\n",
     "# Elapsed time in hours and ReqMemCPU in gigs\n",
     "style.default_axes_and_ticks()\n",
     "style.figsize()\n",
     "\n",
-    "full_facet = sb.PairGrid(df_facet)\n",
+    "full_facet = sns.pairplot(df_facet, diag_kind = 'kde') # makes density plots - kernel density estimate\n",
+    "# y axis is count in the diagonal graphs\n",
+    "\n",
     "full_facet.map(plt.scatter);\n",
     "plt.show()"
    ]
@@ -250,11 +351,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# must run\n",
+    "# voluntary\n",
     "\n",
-    "# sets min and max parameters for ReqMemCPU for clustered Elapsed Time Graphs\n",
-    "UpperlimitGB_elapsed = 50\n",
-    "LowerlimitGB_elapsed = 0"
+    "# pair grid of the two graphs being clustered using df_facet dataset\n",
+    "style.default_axes_and_ticks()\n",
+    "style.figsize()\n",
+    "\n",
+    "elapsed_reqmem_alloc = sns.PairGrid(df_facet, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n",
+    "elapsed_reqmem_alloc.map(sns.regplot, color=\"blue\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "df_runtime_cluster is a dataset made from df_facet. It is used to make the elbow graph and calculate the clustering for Elapsed/ReqMemCPU and Elapsed/AllocCPUS"
    ]
   },
   {
@@ -263,11 +374,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# must run\n",
+    "#must run if dataset will not be normalized for both Elapsed/ReqMem and Elapsed/Alloc graphs\n",
+    "\n",
+    "#ReqMemCPU = 0 - 50 gigs\n",
+    "#AllocCPUS = 0 - 50 cores\n",
+    "#Elapsed = 0 - 150.02 hours\n",
     "\n",
-    "# sets min and max parameters for AllocCPUS for clustered Elapsed Time Graphs\n",
-    "UpperlimitAllocCPU_elapsed = 20\n",
-    "LowerlimitAllocCPU_elapsed = 0"
+    "# data set without normalization fitting for both the Elapsed/ReqMem and Elapsed/Alloc graphs\n",
+    "df_runtime_cluster = df_facet.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
+    "df_runtime_cluster.head(5)"
    ]
   },
   {
@@ -276,11 +391,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# must run\n",
+    "# must run if dataset will be 0-1 normalized for both Elapsed/ReqMem and Elapsed/Alloc graphs\n",
     "\n",
-    "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n",
-    "df_runtime_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB_elapsed) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB_elapsed) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU_elapsed) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU_elapsed)]\n",
-    "df_runtime_cluster.head(5)"
+    "# 0-1 normalized dataset\n",
+    "# used for 0-1 normalization fitting for both the Elapsed/ReqMem and Elapsed/Alloc graphs \n",
+    "column_maxes_runtime = df_runtime_cluster.max()\n",
+    "df_runtime_cluster_max = column_maxes_runtime.max()\n",
+    "normalized_runtime_df = df_runtime_cluster / df_runtime_cluster_max\n",
+    "\n",
+    "print(normalized_runtime_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run if dataset will be log10 normalized for both Elapsed/ReqMem and Elapsed/Alloc graphs\n",
+    "\n",
+    "# log10 normalized dataset\n",
+    "# used for log10 normalization fitting for both the Elapsed/ReqMem and Elapsed/Alloc graphs \n",
+    "\n",
+    "log_runtime_df = np.log10(df_runtime_cluster+1)\n",
+    "log_runtime_df.describe()"
    ]
   },
   {
@@ -316,6 +450,44 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Elapsed/ReqMemCPU clustering"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The next 5 cells create the clusters, find each cluster label, and create datasets of data in each cluster.\n",
+    "All the datasets are created for both the cluster graphs and plots of each cluster before those graphs are made."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# In the cell below, set the fit based on the normalization type by uncommenting the line to run"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# uncomment for no normalization\n",
+    "#elapsed_reqmem_fit = df_runtime_cluster\n",
+    "\n",
+    "# uncomment for 0-1 normalization\n",
+    "#elapsed_reqmem_fit = normalized_runtime_df\n",
+    "\n",
+    "# uncomment for log10 normalization\n",
+    "elapsed_reqmem_fit = log_runtime_df"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -325,9 +497,15 @@
     "# must run\n",
     "\n",
     "# sets to clusters and returns the cluster points\n",
-    "kmeans = KMeans(n_clusters=3, random_state=111)\n",
-    "kmeans.fit(df_runtime_cluster)\n",
-    "print(kmeans.cluster_centers_)"
+    "kmeans_elapsed_reqmem = KMeans(n_clusters=3, random_state=111)\n",
+    "kmeans_elapsed_reqmem.fit(elapsed_reqmem_fit)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# In the cell below, choose which cluster center to use - uncomment the line that goes with the normalization type"
    ]
   },
   {
@@ -336,14 +514,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# must run\n",
+    "# uncomment if no normalization\n",
+    "#clusterpoints_elapsed_reqmem = kmeans_elapsed_reqmem.cluster_centers_\n",
     "\n",
-    "# facet grid of the two graphs being clustered using df_runtime_cluster dataset\n",
-    "style.default_axes_and_ticks()\n",
-    "style.figsize()\n",
+    "# uncomment if 0-1 normalization\n",
+    "#clusterpoints_elapsed_reqmem = kmeans_elapsed_reqmem.cluster_centers_ * df_runtime_cluster_max\n",
     "\n",
-    "elapsed_reqmem_alloc = sns.PairGrid(df_runtime_cluster, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n",
-    "elapsed_reqmem_alloc.map(sns.regplot, color=\"blue\")"
+    "# uncomment if log10 normalization\n",
+    "clusterpoints_elapsed_reqmem = 10 ** (kmeans_elapsed_reqmem.cluster_centers_) - 1"
    ]
   },
   {
@@ -354,17 +532,12 @@
    "source": [
     "# must run\n",
     "\n",
-    "# clustered graph\n",
-    "style.default_axes_and_ticks()\n",
-    "style.figsize()\n",
-    "\n",
-    "elapsed_runtime_cluster_graph = plt.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n",
-    "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n",
+    "# returns array of labels for each cluster - used to find min and max x and y points for each cluster\n",
     "\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed)\n",
-    "plt.show()"
+    "# 0 = purple cluster\n",
+    "# 1 = green cluster\n",
+    "# 2 = red cluster\n",
+    "np.unique(kmeans_elapsed_reqmem.labels_)"
    ]
   },
   {
@@ -375,24 +548,45 @@
    "source": [
     "# must run\n",
     "\n",
-    "# clustered graph\n",
-    "style.default_axes_and_ticks()\n",
-    "style.figsize()\n",
+    "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the parameters in the labels shown above\n",
     "\n",
-    "elapsed_alloc_cluster_graph = plt.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n",
-    "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n",
+    "#Purple\n",
+    "df_elapsed_reqmem_0 = df_runtime_cluster[kmeans_elapsed_reqmem.labels_ == 0]\n",
     "\n",
-    "plt.xlabel('AllocCPUS')\n",
-    "plt.ylabel('Elapsed(hours)')\n",
-    "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_elapsed)\n",
-    "plt.show()"
+    "#Green\n",
+    "df_elapsed_reqmem_1 = df_runtime_cluster[kmeans_elapsed_reqmem.labels_ == 1]\n",
+    "\n",
+    "#Red\n",
+    "df_elapsed_reqmem_2 = df_runtime_cluster[kmeans_elapsed_reqmem.labels_ == 2]\n",
+    "\n",
+    "#df_elapsed_reqmem_0.head(5)\n",
+    "#df_elapsed_reqmem_0.ReqMemCPU.count()"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "# Detailed Look at Elapsed Time - In terms of Requested RAM and Cores"
+    "# voluntary\n",
+    "\n",
+    "# returns the min and max ReqMemCPU, Elapsed, and AllocCPUS for each cluster using the datasets created above. \n",
+    "# These are the parameters for the scatter plots of each cluster\n",
+    "print(\"Purple Cluster\")\n",
+    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_reqmem_0.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_reqmem_0.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",df_elapsed_reqmem_0.Elapsed.min(),\" \",\"max =\",df_elapsed_reqmem_0.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",df_elapsed_reqmem_0.AllocCPUS.min(),\" \",\"max =\",df_elapsed_reqmem_0.AllocCPUS.max())\n",
+    "\n",
+    "print(\"\\nGreen Cluster\")\n",
+    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_reqmem_1.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_reqmem_1.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",df_elapsed_reqmem_1.Elapsed.min(),\" \",\"max =\",df_elapsed_reqmem_1.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",df_elapsed_reqmem_1.AllocCPUS.min(),\" \",\"max =\",df_elapsed_reqmem_1.AllocCPUS.max())\n",
+    "\n",
+    "print(\"\\nRed Cluster\")\n",
+    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_reqmem_2.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_reqmem_2.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",df_elapsed_reqmem_2.Elapsed.min(),\" \",\"max =\",df_elapsed_reqmem_2.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",df_elapsed_reqmem_2.AllocCPUS.min(),\" \",\"max =\",df_elapsed_reqmem_2.AllocCPUS.max())"
    ]
   },
   {
@@ -403,9 +597,18 @@
    "source": [
     "# must run\n",
     "\n",
-    "# second set of min and max parameters for ReqMemCPU to use for AllocCPU/ReqMemCPU cluster graph \n",
-    "UpperlimitGB_alloc = 50\n",
-    "LowerlimitGB_alloc = 0"
+    "# Creates datasets used to make the swarmplots that correspong to each cluster scatter plot. \n",
+    "# The groupby does not change the data, but it does make a small enough dataset to keep from having a \n",
+    "#runtime error, as will happen if a swarmplot is made using the scatter plot datasets.\n",
+    "\n",
+    "# for purple cluster \n",
+    "df_elapsed_reqmem_swarmplot0 = df_elapsed_reqmem_0.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
+    "\n",
+    "# for green cluster\n",
+    "df_elapsed_reqmem_swarmplot1 = df_elapsed_reqmem_1.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
+    "\n",
+    "# for red cluster\n",
+    "df_elapsed_reqmem_swarmplot2 = df_elapsed_reqmem_2.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()"
    ]
   },
   {
@@ -416,9 +619,40 @@
    "source": [
     "# must run\n",
     "\n",
-    "# sets min and max parameters for AllocCPUS\n",
-    "UpperlimitAllocCPU_alloc = 60\n",
-    "LowerlimitAllocCPU_alloc = 0"
+    "# scatterplot of Runtime per Requested gigs of RAM using df_runtime_cluster dataset with clustering\n",
+    "figure = plt.figure(figsize=(14, 8))\n",
+    "figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)\n",
+    "\n",
+    "elapsed_rqmem_clustergraph = figure.add_subplot(121)\n",
+    "elapsed_rqmem_clustergraph.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans_elapsed_reqmem.labels_, cmap='rainbow')\n",
+    "elapsed_rqmem_clustergraph.scatter(clusterpoints_elapsed_reqmem[:,0] ,clusterpoints_elapsed_reqmem[:,1], color='black')\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "# 3d veiw of the scatterplot for better understanding of the data\n",
+    "elapsed_rqmem_clustergraph_3d = figure.add_subplot(122, projection='3d')\n",
+    "elapsed_rqmem_clustergraph_3d.scatter(df_runtime_cluster['ReqMemCPU'], df_runtime_cluster['Elapsed'], df_runtime_cluster['AllocCPUS'], \n",
+    "                                      c=kmeans_elapsed_reqmem.labels_ ,cmap='rainbow')\n",
+    "elapsed_rqmem_clustergraph_3d.scatter(clusterpoints_elapsed_reqmem[:,0] ,clusterpoints_elapsed_reqmem[:,1], color='black')\n",
+    "\n",
+    "\n",
+    "elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n",
+    "elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
+    "elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')\n",
+    "\n",
+    "# sets size and color for gridlines by axis\n",
+    "elapsed_rqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_rqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_rqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This graph is a facet grid that shows scatterplots by cluster color on the left, and it's corresponging swarmplot in the right. The swarmplots give a better understanding of the distrubition of jobs matching a specific datapoint."
    ]
   },
   {
@@ -429,9 +663,83 @@
    "source": [
     "# must run\n",
     "\n",
-    "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n",
-    "df_allocCPUS_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB_alloc) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB_alloc) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU_alloc) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU_alloc)]\n",
-    "df_allocCPUS.head(5)"
+    "# sets the figure and size that each subplot is added to - each graph is a subplot\n",
+    "figure = plt.figure( figsize=(16, 16))\n",
+    "\n",
+    "#purple cluster and swarmplot\n",
+    "elapsed_reqmem_clustergraph_0 = figure.add_subplot(423)\n",
+    "elapsed_reqmem_clustergraph_0.scatter(df_elapsed_reqmem_0['ReqMemCPU'],df_elapsed_reqmem_0['Elapsed'], color = \"blueviolet\")\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "figure.add_subplot(424)\n",
+    "elapsed_reqmem_swarmgraph_0 = sns.swarmplot(data=df_elapsed_reqmem_swarmplot0, x='ReqMemCPU', y='Elapsed')\n",
+    "plt.yticks(np.arange(df_elapsed_reqmem_0.Elapsed.min(), df_elapsed_reqmem_0.Elapsed.max(), 5))\n",
+    "plt.margins(0.02)\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "\n",
+    "#green cluster and swarmplot\n",
+    "elapsed_reqmem_clustergraph_1 = figure.add_subplot(425)\n",
+    "elapsed_reqmem_clustergraph_1.scatter(df_elapsed_reqmem_1['ReqMemCPU'],df_elapsed_reqmem_1['Elapsed'], color = \"aquamarine\")\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "figure.add_subplot(426)\n",
+    "elapsed_reqmem_swarmgraph_1 = sns.swarmplot(data=df_elapsed_reqmem_swarmplot1, x='ReqMemCPU', y='Elapsed')\n",
+    "plt.yticks(np.arange(df_elapsed_reqmem_1.Elapsed.min(), df_elapsed_reqmem_1.Elapsed.max(), 5))\n",
+    "plt.margins(0.02)\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "\n",
+    "#red cluster and swarmplot\n",
+    "elapsed_reqmem_clustergraph_2 = figure.add_subplot(427)\n",
+    "elapsed_reqmem_clustergraph_2.scatter(df_elapsed_reqmem_2['ReqMemCPU'],df_elapsed_reqmem_2['Elapsed'], color = \"red\")\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "figure.add_subplot(428)\n",
+    "elapsed_reqmem_swarmgraph_2 = sns.swarmplot(data=df_elapsed_reqmem_swarmplot2, x='ReqMemCPU', y='Elapsed')\n",
+    "plt.yticks(np.arange(df_elapsed_reqmem_2.Elapsed.min(), df_elapsed_reqmem_2.Elapsed.max(), 10))\n",
+    "plt.margins(0.02)\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "\n",
+    "# sets the spacing\n",
+    "# top = space between title and graphs - increase number to bring title down and decrease to bring title up\n",
+    "# left = space to the left\n",
+    "# wspace = padding on both sides of graphs\n",
+    "# hspace = padding on top and bottom of graphs\n",
+    "figure.subplots_adjust(left=0.2, wspace=0.2, top=1.2, hspace=0.3)\n",
+    "\n",
+    "figure.suptitle('Clusters from Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB, fontsize=20)\n",
+    "\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Elapsed/AllocCPUS clustering"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The next 5 cells create the clusters, find each cluster label, and create datasets of data in each cluster.\n",
+    "All the datasets are created for both the cluster graphs and plots of each cluster before those graphs are made."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# In the cell below, set the fit based on the normalization type by uncommenting the line to run"
    ]
   },
   {
@@ -440,15 +748,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# must run\n",
+    "# uncomment for no normalization\n",
+    "#elapsed_alloc_fit = df_runtime_cluster\n",
     "\n",
-    "# sets up info for plotting the optimal number of clusters - uses df_runtime_cluster datasaet\n",
-    "Sum_of_squared_distances = []\n",
-    "K = range(1,10)\n",
-    "for k in K:\n",
-    "    km = KMeans(n_clusters=k)\n",
-    "    km = km.fit(df_allocCPUS_cluster)\n",
-    "    Sum_of_squared_distances.append(km.inertia_)"
+    "# uncomment for 0-1 normalization\n",
+    "#elapsed_alloc_fit = normalized_runtime_df\n",
+    "\n",
+    "# uncomment for log10 normalization\n",
+    "elapsed_alloc_fit = log_runtime_df"
    ]
   },
   {
@@ -459,12 +766,16 @@
    "source": [
     "# must run\n",
     "\n",
-    "# the bend in the graph is the optimal number of clusters for graphs using the df_runtime_cluster dataset\n",
-    "plt.plot(K, Sum_of_squared_distances, 'bx-')\n",
-    "plt.xlabel('k')\n",
-    "plt.ylabel('Sum_of_squared_distances')\n",
-    "plt.title('Elbow Method For Optimal k')\n",
-    "plt.show()"
+    "# sets to clusters and returns the cluster points\n",
+    "kmeans_elapsed_alloc = KMeans(n_clusters=3, random_state=111)\n",
+    "kmeans_elapsed_alloc.fit(elapsed_alloc_fit)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# In the cell below, choose which cluster center to use - uncomment the line that goes with the normalization type"
    ]
   },
   {
@@ -473,12 +784,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# must run\n",
+    "# uncomment if no normalization\n",
+    "#clusterpoints_elapsed_alloc = kmeans_elapsed_alloc.cluster_centers_\n",
     "\n",
-    "# sets to clusters and returns the cluster points\n",
-    "kmeans = KMeans(n_clusters=3, random_state=111)\n",
-    "kmeans.fit(df_allocCPUS_cluster)\n",
-    "print(kmeans.cluster_centers_)"
+    "# uncomment if 0-1 normalization\n",
+    "#clusterpoints_elapsed_alloc = kmeans_elapsed_alloc.cluster_centers_ * df_runtime_cluster_max\n",
+    "\n",
+    "# uncomment if log10 normalization\n",
+    "clusterpoints_elapsed_alloc = 10 ** (kmeans_elapsed_reqmem.cluster_centers_) - 1"
    ]
   },
   {
@@ -487,18 +800,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "style.default_axes_and_ticks()\n",
-    "style.figsize()\n",
-    "\n",
-    "alloc_reqmem_graph = sns.scatterplot(x=\"ReqMemCPU\", y=\"AllocCPUS\",data=df_allocCPUS_cluster)\n",
-    "\n",
-    "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB_alloc)\n",
+    "# must run\n",
     "\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
-    "plt.ylabel('AllocCPUS')\n",
-    "#plt.yscale(\"log\")\n",
+    "# returns array of labels for each cluster - used to find min and max x and y points for each cluster\n",
     "\n",
-    "plt.show()"
+    "# 0 = purple cluster\n",
+    "# 1 = green cluster\n",
+    "# 2 = red cluster\n",
+    "np.unique(kmeans_elapsed_alloc.labels_)"
    ]
   },
   {
@@ -509,16 +818,554 @@
    "source": [
     "# must run\n",
     "\n",
-    "# clustered graph\n",
-    "style.default_axes_and_ticks()\n",
-    "style.figsize()\n",
+    "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the parameters in the labels shown above\n",
     "\n",
-    "alloc_reqmem_cluster_graph = plt.scatter(df_allocCPUS_cluster['ReqMemCPU'],df_allocCPUS_cluster['AllocCPUS'], c=kmeans.labels_, cmap='rainbow')\n",
-    "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n",
+    "#Purple\n",
+    "df_elapsed_alloc_0 = df_runtime_cluster[kmeans_elapsed_alloc.labels_ == 0]\n",
     "\n",
-    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "#Green\n",
+    "df_elapsed_alloc_1 = df_runtime_cluster[kmeans_elapsed_alloc.labels_ == 1]\n",
+    "\n",
+    "#Red\n",
+    "df_elapsed_alloc_2 = df_runtime_cluster[kmeans_elapsed_alloc.labels_ == 2]\n",
+    "\n",
+    "#df_elapsed_alloc_0.head(5)\n",
+    "#df_elapsed_alloc_0.AllocCPUS.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# voluntary\n",
+    "\n",
+    "# returns the min and max ReqMemCPU, Elapsed, and AllocCPUS for each cluster using the datasets created above. \n",
+    "# These are the parameters for the scatter plots of each cluster\n",
+    "print(\"Purple Cluster\")\n",
+    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_alloc_0.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_alloc_0.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",df_elapsed_alloc_0.Elapsed.min(),\" \",\"max =\",df_elapsed_alloc_0.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",df_elapsed_alloc_0.AllocCPUS.min(),\" \",\"max =\",df_elapsed_alloc_0.AllocCPUS.max())\n",
+    "\n",
+    "print(\"\\nGreen Cluster\")\n",
+    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_alloc_1.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_alloc_1.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",df_elapsed_alloc_1.Elapsed.min(),\" \",\"max =\",df_elapsed_alloc_1.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",df_elapsed_alloc_1.AllocCPUS.min(),\" \",\"max =\",df_elapsed_alloc_1.AllocCPUS.max())\n",
+    "\n",
+    "print(\"\\nRed Cluster\")\n",
+    "print(\"ReqMemCPU:\", \"min =\",df_elapsed_alloc_2.ReqMemCPU.min(),\" \",\"max =\",df_elapsed_alloc_2.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",df_elapsed_alloc_2.Elapsed.min(),\" \",\"max =\",df_elapsed_alloc_2.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",df_elapsed_alloc_2.AllocCPUS.min(),\" \",\"max =\",df_elapsed_alloc_2.AllocCPUS.max())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# Creates datasets used to make the swarmplots that correspong to each cluster scatter plot. \n",
+    "# The groupby does not change the data, but it does make a small enough dataset to keep from having a \n",
+    "#runtime error, as will happen if a swarmplot is made using the scatter plot datasets.\n",
+    "\n",
+    "# for purple cluster \n",
+    "df_elapsed_alloc_swarmplot0 = df_elapsed_alloc_0.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
+    "\n",
+    "# for green cluster \n",
+    "df_elapsed_alloc_swarmplot1 = df_elapsed_alloc_1.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
+    "\n",
+    "# for red cluster \n",
+    "df_elapsed_alloc_swarmplot2 = df_elapsed_alloc_2.groupby(['AllocCPUS','Elapsed']).sum().reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# scatterplot of Runtime per Core using df_runtime_cluster dataset with clustering\n",
+    "figure = plt.figure(figsize=(14, 8))\n",
+    "figure.suptitle('Runtime per Core %i cores or less'%UpperlimitAllocCPU)\n",
+    "\n",
+    "elapsed_alloc_clustergraph = figure.add_subplot(121)\n",
+    "elapsed_alloc_clustergraph.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans_elapsed_alloc.labels_, cmap='rainbow')\n",
+    "elapsed_alloc_clustergraph.scatter(clusterpoints_elapsed_alloc[:,0] ,clusterpoints_elapsed_alloc[:,1], color='black')\n",
+    "plt.xlabel('AllocCPUS')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "# 3d veiw of the scatterplot for better understanding of the data\n",
+    "elapsed_alloc_clustergraph_3d = figure.add_subplot(122, projection='3d')\n",
+    "elapsed_alloc_clustergraph_3d.scatter(df_runtime_cluster['AllocCPUS'], df_runtime_cluster['Elapsed'], df_runtime_cluster['ReqMemCPU'], c=kmeans_elapsed_alloc.labels_ ,cmap='rainbow')\n",
+    "elapsed_alloc_clustergraph_3d.scatter(clusterpoints_elapsed_alloc[:,0] ,clusterpoints_elapsed_alloc[:,1], color='black')\n",
+    "elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')\n",
+    "elapsed_alloc_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
+    "elapsed_alloc_clustergraph_3d.set_zlabel('ReqMemCPU(gigs)')\n",
+    "\n",
+    "# sets size and color for gridlines by axis\n",
+    "elapsed_alloc_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_alloc_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_alloc_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# sets the figure and size that each subplot is added to - each graph is a subplot\n",
+    "figure = plt.figure( figsize=(21, 16))\n",
+    "\n",
+    "#purple cluster and swarmplot\n",
+    "elapsed_alloc_clustergraph_0 = figure.add_subplot(321)\n",
+    "elapsed_alloc_clustergraph_0.scatter(df_elapsed_alloc_0['AllocCPUS'],df_elapsed_alloc_0['Elapsed'], color = \"blueviolet\")\n",
+    "plt.xlabel('AllocCPUS')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "figure.add_subplot(322)\n",
+    "elapsed_alloc_swarmgraph_0 = sns.swarmplot(data=df_elapsed_alloc_swarmplot0, x='AllocCPUS', y='Elapsed')\n",
+    "plt.yticks(np.arange(df_elapsed_alloc_0.Elapsed.min(), df_elapsed_alloc_0.Elapsed.max(), 5))\n",
+    "plt.margins(0.02)\n",
+    "plt.xlabel('AllocCPUS')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "\n",
+    "#green cluster and swarmplot\n",
+    "elapsed_alloc_clustergraph_1 = figure.add_subplot(323)\n",
+    "elapsed_alloc_clustergraph_1.scatter(df_elapsed_alloc_1['AllocCPUS'],df_elapsed_alloc_1['Elapsed'], color = \"aquamarine\")\n",
+    "plt.xlabel('AllocCPUS')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "figure.add_subplot(324)\n",
+    "elapsed_alloc_swarmgraph_1 = sns.swarmplot(data=df_elapsed_alloc_swarmplot1, x='AllocCPUS', y='Elapsed')\n",
+    "plt.yticks(np.arange(df_elapsed_alloc_1.Elapsed.min(), df_elapsed_alloc_1.Elapsed.max(), 5))\n",
+    "plt.margins(0.02)\n",
+    "plt.xlabel('AllocCPUS')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "\n",
+    "#red cluster and swarmplot\n",
+    "elapsed_alloc_clustergraph_2 = figure.add_subplot(325)\n",
+    "elapsed_alloc_clustergraph_2.scatter(df_elapsed_alloc_2['AllocCPUS'],df_elapsed_alloc_2['Elapsed'], color = \"red\")\n",
+    "plt.xlabel('AllocCPUS')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "figure.add_subplot(326)\n",
+    "elapsed_alloc_swarmgraph_2 = sns.swarmplot(data=df_elapsed_alloc_swarmplot2, x='AllocCPUS', y='Elapsed')\n",
+    "plt.yticks(np.arange(df_elapsed_alloc_2.Elapsed.min(), df_elapsed_alloc_2.Elapsed.max(), 10))\n",
+    "plt.margins(0.02)\n",
+    "plt.xlabel('AllocCPUS')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "# sets the spacing\n",
+    "# top = space between title and graphs - increase number to bring title down and decrease to bring title up\n",
+    "# left = space to the left\n",
+    "# wspace = padding on both sides of graphs\n",
+    "# hspace = padding on top and bottom of graphs\n",
+    "figure.subplots_adjust(left=0.2, wspace=0.2, top=.94, hspace=0.3)\n",
+    "figure.suptitle('Clusters from Runtime per Core %i cores or less'%UpperlimitAllocCPU, fontsize=20)\n",
+    "\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Detailed Look at Cores - In terms of Requested RAM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# scatterplot of AllocCPUS/ReqMemCPU using df_facet dataset\n",
+    "style.default_axes_and_ticks()\n",
+    "style.figsize()\n",
+    "\n",
+    "elapsed_alloc_reqmem = plt.scatter(df_facet[\"ReqMemCPU\"], df_facet[\"AllocCPUS\"], color = \"blue\")\n",
+    "\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
     "plt.ylabel('AllocCPUS')\n",
-    "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB_alloc)\n",
+    "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "df_alloc_cluster is a dataset made from df_facet. It is used to make the elbow graph and calculate the clustering for AllocCPUS/ReqMemCPU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run if dataset will not be normalized\n",
+    "\n",
+    "#ReqMemCPU = 0 - 50 gigs\n",
+    "#AllocCPUS = 0 - 50 cores\n",
+    "#Elapsed = 0 - 150.02 hours\n",
+    "\n",
+    "# non normalized dataset\n",
+    "# used for fitting for the Alloc/ReqMem graph without normalization\n",
+    "df_alloc_cluster = df_facet.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
+    "df_alloc_cluster.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run if dataset will be 0-1 normalized\n",
+    "\n",
+    "# 0-1 normalized dataset\n",
+    "# used for 0-1 normalization fitting for the Alloc/ReqMem graph\n",
+    "column_maxes_alloc = df_alloc_cluster.max()\n",
+    "df_alloc_cluster_max = column_maxes_alloc.max()\n",
+    "normalized_alloc_df = df_alloc_cluster / df_alloc_cluster_max\n",
+    "\n",
+    "print(normalized_alloc_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run if dataset will be log10 normalized for both Elapsed/ReqMem and Elapsed/Alloc graphs\n",
+    "\n",
+    "# log10 normalized dataset\n",
+    "# used for log10 normalization fitting for both the Elapsed/ReqMem and Elapsed/Alloc graphs \n",
+    "\n",
+    "log_alloc_df = np.log10(df_alloc_cluster+1)\n",
+    "log_alloc_df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# sets up info for plotting the optimal number of clusters - uses df_alloc_cluster datasaet\n",
+    "Sum_of_squared_distances = []\n",
+    "K = range(1,10)\n",
+    "for k in K:\n",
+    "    km = KMeans(n_clusters=k)\n",
+    "    km = km.fit(df_alloc_cluster)\n",
+    "    Sum_of_squared_distances.append(km.inertia_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# the bend in the graph is the optimal number of clusters for graphs using the df_alloc_cluster dataset\n",
+    "plt.plot(K, Sum_of_squared_distances, 'bx-')\n",
+    "plt.xlabel('k')\n",
+    "plt.ylabel('Sum_of_squared_distances')\n",
+    "plt.title('Elbow Method For Optimal k')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# In the cell below, set the fit based on the normalization type by uncommenting the line to run"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# uncomment for no normalization\n",
+    "#alloc_reqmem_fit = df_alloc_cluster\n",
+    "\n",
+    "# uncomment for 0-1 normalization\n",
+    "#alloc_reqmem_fit = normalized_alloc_df\n",
+    "\n",
+    "# uncomment for log10 normalization\n",
+    "alloc_reqmem_fit = log_alloc_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# sets to clusters and returns the cluster points\n",
+    "kmeans_alloc_reqmem = KMeans(n_clusters=3, random_state=111)\n",
+    "kmeans_alloc_reqmem.fit(alloc_reqmem_fit)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# In the cell below, choose which cluster center to use - uncomment the line that goes with the normalization type"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# uncomment if no normalization\n",
+    "#clusterpoints_alloc_reqmem = kmeans_alloc_reqmem.cluster_centers_\n",
+    "\n",
+    "# uncomment if 0-1 normalization\n",
+    "#clusterpoints_alloc_reqmem = kmeans_alloc_reqmem.cluster_centers_ * df_alloc_cluster_max\n",
+    "\n",
+    "# uncomment if log10 normalization\n",
+    "clusterpoints_alloc_reqmem = (10 ** (kmeans_alloc_reqmem.cluster_centers_)) - 1\n",
+    "print(clusterpoints_alloc_reqmem)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clusterpoints_alloc_reqmem[:,0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clusterpoints_alloc_reqmem[:,2]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The next 5 cells find each cluster label, and create datasets of data in each cluster.\n",
+    "All the datasets are created for both the cluster graphs and plots of each cluster before those graphs are made."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# returns array of labels for each cluster - used to find min and max x and y points for each cluster\n",
+    "\n",
+    "# 0 = purple cluster\n",
+    "# 1 = green cluster\n",
+    "# 2 = red cluster\n",
+    "np.unique(kmeans_alloc_reqmem.labels_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the parameters in the labels shown above\n",
+    "\n",
+    "#Purple\n",
+    "df_alloc_reqmem_0 = df_alloc_cluster[kmeans_alloc_reqmem.labels_ == 0]\n",
+    "\n",
+    "#Green\n",
+    "df_alloc_reqmem_1 = df_alloc_cluster[kmeans_alloc_reqmem.labels_ == 1]\n",
+    "\n",
+    "#Red\n",
+    "df_alloc_reqmem_2 = df_alloc_cluster[kmeans_alloc_reqmem.labels_ == 2]\n",
+    "\n",
+    "#df_elapsed_alloc_0.head(5)\n",
+    "#df_elapsed_alloc_0.AllocCPUS.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# voluntary\n",
+    "\n",
+    "# returns the min and max ReqMemCPU, Elapsed, and AllocCPUS for each cluster using the datasets created above. \n",
+    "# These are the parameters for the scatter plots of each cluster\n",
+    "print(\"Purple Cluster\")\n",
+    "print(\"ReqMemCPU:\", \"min =\",df_alloc_reqmem_0.ReqMemCPU.min(),\" \",\"max =\",df_alloc_reqmem_0.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",df_alloc_reqmem_0.Elapsed.min(),\" \",\"max =\",df_alloc_reqmem_0.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",df_alloc_reqmem_0.AllocCPUS.min(),\" \",\"max =\",df_alloc_reqmem_0.AllocCPUS.max())\n",
+    "\n",
+    "print(\"\\nGreen Cluster\")\n",
+    "print(\"ReqMemCPU:\", \"min =\",df_alloc_reqmem_1.ReqMemCPU.min(),\" \",\"max =\",df_alloc_reqmem_1.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",df_alloc_reqmem_1.Elapsed.min(),\" \",\"max =\",df_alloc_reqmem_1.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",df_alloc_reqmem_1.AllocCPUS.min(),\" \",\"max =\",df_alloc_reqmem_1.AllocCPUS.max())\n",
+    "\n",
+    "print(\"\\nRed Cluster\")\n",
+    "print(\"ReqMemCPU:\", \"min =\",df_alloc_reqmem_2.ReqMemCPU.min(),\" \",\"max =\",df_alloc_reqmem_2.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",df_alloc_reqmem_2.Elapsed.min(),\" \",\"max =\",df_alloc_reqmem_2.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",df_alloc_reqmem_2.AllocCPUS.min(),\" \",\"max =\",df_alloc_reqmem_2.AllocCPUS.max())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# Creates datasets used to make the swarmplots that correspong to each cluster scatter plot. \n",
+    "# The groupby does not change the data, but it does make a small enough dataset to keep from having a \n",
+    "#runtime error, as will happen if a swarmplot is made using the scatter plot datasets.\n",
+    "\n",
+    "# for purple cluster \n",
+    "df_alloc_reqmem_swarmplot0 = df_alloc_reqmem_0.groupby(['AllocCPUS','ReqMemCPU']).sum().reset_index()\n",
+    "\n",
+    "# for green cluster \n",
+    "df_alloc_reqmem_swarmplot1 = df_alloc_reqmem_1.groupby(['AllocCPUS','ReqMemCPU']).sum().reset_index()\n",
+    "\n",
+    "# for red cluster \n",
+    "df_alloc_reqmem_swarmplot2 = df_alloc_reqmem_2.groupby(['AllocCPUS','ReqMemCPU']).sum().reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# scatterplot of Core per Requested RAM using df_alloc_cluster dataset with clustering\n",
+    "figure = plt.figure(figsize=(14, 8))\n",
+    "figure.suptitle('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB)\n",
+    "\n",
+    "alloc_reqmem_cluster_graph = figure.add_subplot(121)\n",
+    "alloc_reqmem_cluster_graph.scatter(df_alloc_cluster['ReqMemCPU'],df_alloc_cluster['AllocCPUS'], c=kmeans_alloc_reqmem.labels_, cmap='rainbow')\n",
+    "alloc_reqmem_cluster_graph.scatter(clusterpoints_alloc_reqmem[:,0] ,clusterpoints_alloc_reqmem[:,2], color='black')\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('AllocCPUS')\n",
+    "\n",
+    "# 3d veiw of the scatterplot for better understanding of the data\n",
+    "alloc_reqmem_clustergraph_3d = figure.add_subplot(122, projection='3d')\n",
+    "alloc_reqmem_clustergraph_3d.scatter(df_alloc_cluster['ReqMemCPU'], df_alloc_cluster['AllocCPUS'], df_alloc_cluster['Elapsed'], c=kmeans_alloc_reqmem.labels_ ,cmap='rainbow')\n",
+    "alloc_reqmem_clustergraph_3d.scatter(clusterpoints_alloc_reqmem[:,0] ,clusterpoints_alloc_reqmem[:,2], color='black')\n",
+    "alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n",
+    "alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')\n",
+    "alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
+    "\n",
+    "# sets size and color for gridlines by axis\n",
+    "alloc_reqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "alloc_reqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "alloc_reqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# sets the figure and size that each subplot is added to - each graph is a subplot\n",
+    "figure = plt.figure(figsize=(21, 16))\n",
+    "\n",
+    "\n",
+    "#purple cluster and swarmplot\n",
+    "alloc_reqmem_clustergraph_0 = figure.add_subplot(321)\n",
+    "alloc_reqmem_clustergraph_0.scatter(df_alloc_reqmem_0['ReqMemCPU'],df_alloc_reqmem_0['AllocCPUS'], color = \"blueviolet\")\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('AllocCPUS')\n",
+    "\n",
+    "figure.add_subplot(322)\n",
+    "alloc_reqmem_swarmgraph_0 = sns.swarmplot(data=df_alloc_reqmem_swarmplot0, x='ReqMemCPU', y='AllocCPUS')\n",
+    "plt.yticks(np.arange(0, df_alloc_reqmem_0.AllocCPUS.max(), 3))\n",
+    "plt.margins(0.02)\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('AllocCPUS')\n",
+    "\n",
+    "\n",
+    "#green cluster and swarmplot\n",
+    "alloc_reqmem_clustergraph_1 = figure.add_subplot(323)\n",
+    "alloc_reqmem_clustergraph_1.scatter(df_alloc_reqmem_1['ReqMemCPU'],df_alloc_reqmem_1['AllocCPUS'], color = \"aquamarine\")\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('AllocCPUS')\n",
+    "\n",
+    "figure.add_subplot(324)\n",
+    "alloc_reqmem_swarmgraph_1 = sns.swarmplot(data=df_alloc_reqmem_swarmplot1, x='ReqMemCPU', y='AllocCPUS')\n",
+    "plt.yticks(np.arange(df_alloc_reqmem_1.AllocCPUS.min(), df_alloc_reqmem_1.AllocCPUS.max(), 5))\n",
+    "plt.margins(0.02)\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('AllocCPUS')\n",
+    "\n",
+    "\n",
+    "#red cluster and swarmplot\n",
+    "alloc_reqmem_clustergraph_2 = figure.add_subplot(325)\n",
+    "alloc_reqmem_clustergraph_2 = plt.scatter(df_alloc_reqmem_2['ReqMemCPU'],df_alloc_reqmem_2['AllocCPUS'], color = \"red\")\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('AllocCPUS')\n",
+    "\n",
+    "figure.add_subplot(326)\n",
+    "alloc_reqmem_swarmgraph_2 = sns.swarmplot(data=df_alloc_reqmem_swarmplot2, x='ReqMemCPU', y='AllocCPUS')\n",
+    "plt.yticks(np.arange(df_alloc_reqmem_2.AllocCPUS.min(), df_alloc_reqmem_2.AllocCPUS.max(), 5))\n",
+    "plt.margins(0.02)\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('AllocCPUS')\n",
+    "\n",
+    "# sets the spacing\n",
+    "# top = space between title and graphs - increase number to bring title down and decrease to bring title up\n",
+    "# left = space to the left\n",
+    "# wspace = padding on both sides of graphs\n",
+    "# hspace = padding on top and bottom of graphs\n",
+    "figure.subplots_adjust(left=0.2, wspace=0.2, top=.94, hspace=0.3)\n",
+    "figure.suptitle('Clusters from Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB, fontsize=20)\n",
+    "\n",
+    "\n",
+    "\n",
     "plt.show()"
    ]
   },
diff --git a/gitattributes.txt b/gitattributes.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c2ef3020c839cc1bcf08b88e3041a704aa98dfa
--- /dev/null
+++ b/gitattributes.txt
@@ -0,0 +1 @@
+*.ipynb filter=nbstrip_full
diff --git a/gitconfig.txt b/gitconfig.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fb335e9cbd93454934ebfa7cd59c9cc20c2a86f
--- /dev/null
+++ b/gitconfig.txt
@@ -0,0 +1,11 @@
+[core]
+attributesfile = ~/.gitattributes
+[filter "nbstrip_full"]
+clean = "jq --indent 1 \
+        '(.cells[] | select(has(\"outputs\")) | .outputs) = []  \
+        | (.cells[] | select(has(\"execution_count\")) | .execution_count) = null  \
+        | .metadata = {\"language_info\": {\"name\": \"python\", \"pygments_lexer\": \"ipython3\"}} \
+        | .cells[].metadata = {} \
+        '"
+smudge = cat
+required = true
diff --git a/requirements.txt b/requirements.txt
index 36faad6e414084dc05ef4e6639de433f1f8e3580..452a651e794717af9370c1c6124b6dc8a06efe99 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -48,7 +48,7 @@ parso==0.6.2
 pexpect==4.8.0
 phik==0.9.9
 pickleshare==0.7.5
-plotly==4.5.2
+plotly==4.8.2
 pluggy==0.13.1
 prometheus-client==0.7.1
 prompt-toolkit==3.0.3