diff --git a/Cluster_Analysis.ipynb b/Cluster_Analysis.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..f1dd1446cb9b22367aef0b44e1c6eaab4db27d79
--- /dev/null
+++ b/Cluster_Analysis.ipynb
@@ -0,0 +1,436 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Setup Options"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# year-date-month\n",
+    "#start_date = '2020-10-09'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# sets min and max parameters for ReqMemCPU\n",
+    "LowerlimitGB = 0\n",
+    "UpperlimitGB = 50"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# sets min and max parameters for AllocCPUS\n",
+    "LowerlimitAllocCPU = 0\n",
+    "UpperlimitAllocCPU = 50"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# sets min and max parameters for Elapsed\n",
+    "LowerlimitElapsed = 0\n",
+    "UpperlimitElapsed = 150.02"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Enter 'none', '0-1', or 'log' as achoice for data nomralization\n",
+    "Data_Normalization_Choice = 'none'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "import sqlite3\n",
+    "import slurm2sql\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "import seaborn as sns\n",
+    "import seaborn as sb\n",
+    "import plotly.express as px\n",
+    "import matplotlib.ticker as ticker\n",
+    "import numpy as np\n",
+    "from mpl_toolkits.mplot3d import Axes3D\n",
+    "import os\n",
+    "from RC_styles import rc_styles as style\n",
+    "from sklearn.cluster import KMeans"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Database Creation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# creates database of info from March 2020 using sqlite 3\n",
+    "db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# df is starting database\n",
+    "df = pd.read_sql('SELECT * FROM slurm', db)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# df_1 is dataframe of all completed jobs\n",
+    "df_1 = df[df.State.str.contains('COMPLETED')]\n",
+    "#df_completed.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# dataset of needed columns for all graphs below\n",
+    "df_completed = df_1.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
+    "#df_1.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# converts units in ReqMemCPU column from bytes to gigs and rounds up to nearest whole number\n",
+    "df_completed['ReqMemCPU'] = df_completed['ReqMemCPU'].div(1024**3).apply(np.ceil).apply(int)\n",
+    "#df_completed.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# converts Elapsed time to hours (from seconds) and rounds up to nearest 2 decimal places\n",
+    "df_completed['Elapsed'] = df_completed['Elapsed'].div(3600).round(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS for completed jobs using the min and max parameters created above\n",
+    "df_clustering = df_completed[(df_completed['ReqMemCPU'] <= UpperlimitGB) & \n",
+    "                       (df_completed['ReqMemCPU'] >= LowerlimitGB) & \n",
+    "                       (df_completed['AllocCPUS'] <= UpperlimitAllocCPU) & \n",
+    "                       (df_completed['AllocCPUS'] >= LowerlimitAllocCPU)\n",
+    "                       & \n",
+    "                       (df_completed['Elapsed'] <= UpperlimitElapsed) & \n",
+    "                       (df_completed['Elapsed'] >= LowerlimitElapsed)]\n",
+    "df_clustering.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Normalizing the Data for ReqMem/Elapsed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if Data_Normalization_Choice == '0-1':\n",
+    "    column_max = df_clustering.max()\n",
+    "    df_clustering_max = column_max.max()\n",
+    "    fit = df_clustering / df_clustering_max\n",
+    "    print(\"0-1\")\n",
+    "    \n",
+    "elif Data_Normalization_Choice == 'log':\n",
+    "    fit = np.log10(df_clustering+1)\n",
+    "    print(\"log\")\n",
+    "    \n",
+    "else:\n",
+    "    fit = df_clustering\n",
+    "    print(\"none\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# kmeans Clustering"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "# sets to clusters and returns the cluster points\n",
+    "kmeans_cluster = KMeans(n_clusters=3, random_state=111)\n",
+    "kmeans_cluster.fit(fit)\n",
+    "print(kmeans_cluster.cluster_centers_)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Reverting Cluster Points Back to align with UnNormalized data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if Data_Normalization_Choice == '0-1':\n",
+    "    clusterpoints = kmeans_cluster.cluster_centers_ * df_clustering_max\n",
+    "    print(\"0-1\")\n",
+    "    \n",
+    "elif Data_Normalization_Choice == 'log':\n",
+    "    clusterpoints = 10 ** (kmeans_cluster.cluster_centers_) - 1\n",
+    "    print(\"log\")\n",
+    "    \n",
+    "else:\n",
+    "    clusterpoints = kmeans_cluster.cluster_centers_\n",
+    "    print(\"none\")\n",
+    "    print(clusterpoints[:,0],clusterpoints[:,1])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# must run\n",
+    "\n",
+    "figure = plt.figure()\n",
+    "\n",
+    "figure.set_size_inches(20,20)\n",
+    "\n",
+    "# Elapsed/ReqMem 2d Graph\n",
+    "elapsed_rqmem_clustergraph = figure.add_subplot(3,3,1)\n",
+    "#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)\n",
+    "elapsed_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['Elapsed'], \n",
+    "                                   c=kmeans_cluster.labels_, cmap='rainbow')\n",
+    "elapsed_rqmem_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "\n",
+    "# Elapsed/Alloc 2d Graph\n",
+    "elapsed_alloc_clustergraph = figure.add_subplot(3,3,2)\n",
+    "#figure.suptitle('Runtime per Core %i cores or less'%UpperlimitAllocCPU)\n",
+    "elapsed_alloc_clustergraph.scatter(df_clustering['AllocCPUS'],df_clustering['Elapsed'], \n",
+    "                                   c=kmeans_cluster.labels_, cmap='rainbow')\n",
+    "elapsed_alloc_clustergraph.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
+    "plt.xlabel('AllocCPUS')\n",
+    "plt.ylabel('Elapsed(hours)')\n",
+    "\n",
+    "# Alloc/ReqMem 2d Graph\n",
+    "alloc_rqmem_clustergraph = figure.add_subplot(3,3,3)\n",
+    "#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)\n",
+    "alloc_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['AllocCPUS'], \n",
+    "                                   c=kmeans_cluster.labels_, cmap='rainbow')\n",
+    "elapsed_rqmem_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
+    "plt.xlabel('ReqMemCPU(gigs)')\n",
+    "plt.ylabel('AllocCPUS')\n",
+    "\n",
+    "###########\n",
+    "# Alloc/ReqMem 3d Graph\n",
+    "alloc_reqmem_clustergraph_3d = figure.add_subplot(3,3,4, projection='3d')\n",
+    "alloc_reqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'], \n",
+    "                                     c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
+    "alloc_reqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
+    "alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n",
+    "alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')\n",
+    "alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
+    "\n",
+    "# sets size and color for gridlines by axis\n",
+    "alloc_reqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "alloc_reqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "alloc_reqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "\n",
+    "\n",
+    "# Elapsed/Alloc 3d Graph\n",
+    "elapsed_alloc_clustergraph_3d = figure.add_subplot(3,3,5, projection='3d')\n",
+    "elapsed_alloc_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'], \n",
+    "                                      c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
+    "elapsed_alloc_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
+    "elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')\n",
+    "elapsed_alloc_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')\n",
+    "elapsed_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
+    "\n",
+    "elapsed_alloc_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_alloc_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_alloc_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Elapsed/ReqMem 3d Graph\n",
+    "elapsed_rqmem_clustergraph_3d = figure.add_subplot(3,3,6, projection='3d')\n",
+    "elapsed_rqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'], \n",
+    "                                      c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
+    "elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
+    "\n",
+    "elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n",
+    "elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
+    "elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')\n",
+    "\n",
+    "elapsed_rqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_rqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_rqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "\n",
+    "\n",
+    "##############\n",
+    "# Alloc/ReqMem 3d Graph\n",
+    "alloc_reqmem_clustergraph_3d = figure.add_subplot(3,3,7, projection='3d')\n",
+    "alloc_reqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'], \n",
+    "                                     c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)\n",
+    "alloc_reqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
+    "alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n",
+    "alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')\n",
+    "alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
+    "\n",
+    "# sets size and color for gridlines by axis\n",
+    "alloc_reqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "alloc_reqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "alloc_reqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "\n",
+    "\n",
+    "# Elapsed/Alloc 3d Graph\n",
+    "elapsed_alloc_clustergraph_3d = figure.add_subplot(3,3,8, projection='3d')\n",
+    "elapsed_alloc_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'], \n",
+    "                                      c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)\n",
+    "elapsed_alloc_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
+    "elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')\n",
+    "elapsed_alloc_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')\n",
+    "elapsed_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
+    "\n",
+    "elapsed_alloc_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_alloc_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_alloc_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Elapsed/ReqMem 3d Graph\n",
+    "elapsed_rqmem_clustergraph_3d = figure.add_subplot(3,3,9, projection='3d')\n",
+    "elapsed_rqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'], \n",
+    "                                      c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)\n",
+    "elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
+    "\n",
+    "elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n",
+    "elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
+    "elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')\n",
+    "\n",
+    "elapsed_rqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_rqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "elapsed_rqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
+    "\n",
+    "\n",
+    "# sets the spacing\n",
+    "# top = space between title and graphs - increase number to bring title down and decrease to bring title up\n",
+    "# left = space to the left\n",
+    "# wspace = padding on both sides of graphs\n",
+    "# hspace = padding on top and bottom of graphs\n",
+    "figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)\n",
+    "figure.suptitle('Clusters', fontsize=20)\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}