diff --git a/Cluster_Analysis.ipynb b/Cluster_Analysis.ipynb
index 45b46e4b3937ab5983871abd60d5cb3a87b6df30..6cca51e9f0f184a7b8d4ad842030f27ce3b98fae 100644
--- a/Cluster_Analysis.ipynb
+++ b/Cluster_Analysis.ipynb
@@ -217,7 +217,7 @@
    "outputs": [],
    "source": [
     "#creating a database based on the start date\n",
-    "slurm2sql.slurm2sql(db, ['-S',start_date, '-E', end_date,'-a'])"
+    "slurm2sql.slurm2sql(db, ['-S',start_date, '-E', end_date,'-X', '-a']) #-X is allocations, -a is all users"
    ]
   },
   {
@@ -256,7 +256,7 @@
    "source": [
     "# df_1 is dataframe of all completed jobs\n",
     "df_1 = df[df.State.str.contains('COMPLETED')]\n",
-    "df_1.head(5)"
+    "df_1.head(20)"
    ]
   },
   {
@@ -304,8 +304,7 @@
     "                       (df_completed['AllocCPUS'] >= LowerlimitAllocCPU)\n",
     "                       & \n",
     "                       (df_completed['Elapsed'] <= UpperlimitElapsed) & \n",
-    "                       (df_completed['Elapsed'] >= LowerlimitElapsed)]\n",
-    "df_clustering.head(5)"
+    "                       (df_completed['Elapsed'] >= LowerlimitElapsed)]"
    ]
   },
   {
@@ -415,7 +414,7 @@
     "else:\n",
     "    clusterpoints = kmeans_cluster.cluster_centers_\n",
     "    print(\"none\")\n",
-    "    print(clusterpoints[:,0],clusterpoints[:,1])\n"
+    "    print(clusterpoints[:,0],clusterpoints[:,1])"
    ]
   },
   {
@@ -434,16 +433,16 @@
     "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the parameters in the labels shown above\n",
     "\n",
     "#Purple\n",
-    "df_0 = df_clustering[kmeans_cluster.labels_ == 0]\n",
+    "cluster_0 = df_clustering[kmeans_cluster.labels_ == 0]\n",
     "\n",
     "#Green\n",
-    "df_1 = df_clustering[kmeans_cluster.labels_ == 1]\n",
+    "cluster_1 = df_clustering[kmeans_cluster.labels_ == 1]\n",
     "\n",
     "#Yellow\n",
-    "df_2 = df_clustering[kmeans_cluster.labels_ == 2]\n",
+    "cluster_2 = df_clustering[kmeans_cluster.labels_ == 2]\n",
     "\n",
     "#Red\n",
-    "df_3 = df_clustering[kmeans_cluster.labels_ == 3]"
+    "cluster_3 = df_clustering[kmeans_cluster.labels_ == 3]"
    ]
   },
   {
@@ -455,24 +454,24 @@
     "# returns the min and max ReqMemCPU, Elapsed, and AllocCPUS for each cluster using the datasets created above. \n",
     "# These are the parameters for the scatter plots of each cluster\n",
     "print(\"Purple Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_0.ReqMemCPU.min(),\" \",\"max =\",df_0.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_0.Elapsed.min(),\" \",\"max =\",df_0.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_0.AllocCPUS.min(),\" \",\"max =\",df_0.AllocCPUS.max())\n",
+    "print(\"ReqMemCPU:\", \"min =\",cluster_0.ReqMemCPU.min(),\" \",\"max =\",cluster_0.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",cluster_0.Elapsed.min(),\" \",\"max =\",cluster_0.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",cluster_0.AllocCPUS.min(),\" \",\"max =\",cluster_0.AllocCPUS.max())\n",
     "\n",
     "print(\"\\nBlue Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_1.ReqMemCPU.min(),\" \",\"max =\",df_1.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_1.Elapsed.min(),\" \",\"max =\",df_1.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_1.AllocCPUS.min(),\" \",\"max =\",df_1.AllocCPUS.max())\n",
+    "print(\"ReqMemCPU:\", \"min =\",cluster_1.ReqMemCPU.min(),\" \",\"max =\",cluster_1.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",cluster_1.Elapsed.min(),\" \",\"max =\",cluster_1.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",cluster_1.AllocCPUS.min(),\" \",\"max =\",cluster_1.AllocCPUS.max())\n",
     "\n",
     "print(\"\\nYellow Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_2.ReqMemCPU.min(),\" \",\"max =\",df_2.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_2.Elapsed.min(),\" \",\"max =\",df_2.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_2.AllocCPUS.min(),\" \",\"max =\",df_2.AllocCPUS.max())\n",
+    "print(\"ReqMemCPU:\", \"min =\",cluster_2.ReqMemCPU.min(),\" \",\"max =\",cluster_2.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",cluster_2.Elapsed.min(),\" \",\"max =\",cluster_2.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",cluster_2.AllocCPUS.min(),\" \",\"max =\",cluster_2.AllocCPUS.max())\n",
     "\n",
     "print(\"\\nRed Cluster\")\n",
-    "print(\"ReqMemCPU:\", \"min =\",df_3.ReqMemCPU.min(),\" \",\"max =\",df_3.ReqMemCPU.max())\n",
-    "print(\"Elapsed:\", \"min =\",df_3.Elapsed.min(),\" \",\"max =\",df_3.Elapsed.max())\n",
-    "print(\"AllocCPUS:\", \"min =\",df_3.AllocCPUS.min(),\" \",\"max =\",df_3.AllocCPUS.max())"
+    "print(\"ReqMemCPU:\", \"min =\",cluster_3.ReqMemCPU.min(),\" \",\"max =\",cluster_3.ReqMemCPU.max())\n",
+    "print(\"Elapsed:\", \"min =\",cluster_3.Elapsed.min(),\" \",\"max =\",cluster_3.Elapsed.max())\n",
+    "print(\"AllocCPUS:\", \"min =\",cluster_3.AllocCPUS.min(),\" \",\"max =\",cluster_3.AllocCPUS.max())"
    ]
   },
   {
@@ -485,24 +484,24 @@
     "# The groupby does not change the data, but it does make a small enough dataset\n",
     "\n",
     "# for purple cluster \n",
-    "df_0_2d1 = df_0.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
-    "df_0_2d2 = df_0.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
-    "df_0_2d3 = df_0.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()\n",
+    "df_0_2d1 = cluster_0.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
+    "df_0_2d2 = cluster_0.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
+    "df_0_2d3 = cluster_0.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()\n",
     "\n",
     "# for blue cluster\n",
-    "df_1_2d1 = df_1.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
-    "df_1_2d2 = df_1.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
-    "df_1_2d3 = df_1.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()\n",
+    "df_1_2d1 = cluster_1.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
+    "df_1_2d2 = cluster_1.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
+    "df_1_2d3 = cluster_1.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()\n",
     "\n",
     "# for yellow cluster\n",
-    "df_2_2d1 = df_2.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
-    "df_2_2d2 = df_2.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
-    "df_2_2d3 = df_2.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()\n",
+    "df_2_2d1 = cluster_2.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
+    "df_2_2d2 = cluster_2.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
+    "df_2_2d3 = cluster_2.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()\n",
     "\n",
     "# for red cluster\n",
-    "df_3_2d1 = df_3.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
-    "df_3_2d2 = df_3.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
-    "df_3_2d3 = df_3.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()"
+    "df_3_2d1 = cluster_3.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
+    "df_3_2d2 = cluster_3.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
+    "df_3_2d3 = cluster_3.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()"
    ]
   },
   {
@@ -514,14 +513,14 @@
     "# Creating bins \n",
     "\n",
     "####Purple\n",
-    "purple_rqmem_min = np.min(df_0.ReqMemCPU.min())\n",
-    "purple_rqmem_max = np.max(df_0.ReqMemCPU.max())\n",
+    "purple_rqmem_min = np.min(cluster_0.ReqMemCPU.min())\n",
+    "purple_rqmem_max = np.max(cluster_0.ReqMemCPU.max())\n",
     "  \n",
-    "purple_elapsed_min = np.min(df_0.Elapsed.min()) \n",
-    "purple_elapsed_max = np.max(df_0.Elapsed.max()) \n",
+    "purple_elapsed_min = np.min(cluster_0.Elapsed.min()) \n",
+    "purple_elapsed_max = np.max(cluster_0.Elapsed.max()) \n",
     "\n",
-    "purple_alloc_min = np.min(df_0.AllocCPUS.min()) \n",
-    "purple_alloc_max = np.max(df_0.AllocCPUS.max())\n",
+    "purple_alloc_min = np.min(cluster_0.AllocCPUS.min()) \n",
+    "purple_alloc_max = np.max(cluster_0.AllocCPUS.max())\n",
     "  \n",
     "    \n",
     "x_purple_rqmem_elapsed_bins = list(range(purple_rqmem_max))\n",
@@ -535,14 +534,14 @@
     "\n",
     "\n",
     "####Blue\n",
-    "blue_rqmem_min = np.min(df_1.ReqMemCPU.min())\n",
-    "blue_rqmem_max = np.max(df_1.ReqMemCPU.max())\n",
+    "blue_rqmem_min = np.min(cluster_1.ReqMemCPU.min())\n",
+    "blue_rqmem_max = np.max(cluster_1.ReqMemCPU.max())\n",
     "  \n",
-    "blue_elapsed_min = np.min(df_1.Elapsed.min()) \n",
-    "blue_elapsed_max = np.max(df_1.Elapsed.max()) \n",
+    "blue_elapsed_min = np.min(cluster_1.Elapsed.min()) \n",
+    "blue_elapsed_max = np.max(cluster_1.Elapsed.max()) \n",
     "\n",
-    "blue_alloc_min = np.min(df_1.AllocCPUS.min()) \n",
-    "blue_alloc_max = np.max(df_1.AllocCPUS.max())\n",
+    "blue_alloc_min = np.min(cluster_1.AllocCPUS.min()) \n",
+    "blue_alloc_max = np.max(cluster_1.AllocCPUS.max())\n",
     "  \n",
     "    \n",
     "x_blue_rqmem_elapsed_bins = list(range(blue_rqmem_max))\n",
@@ -555,14 +554,14 @@
     "y_blue_reqmem_alloc_bins = list(range(int(blue_alloc_max))) \n",
     "\n",
     "####Yellow\n",
-    "yellow_rqmem_min = np.min(df_2.ReqMemCPU.min())\n",
-    "yellow_rqmem_max = np.max(df_2.ReqMemCPU.max())\n",
+    "yellow_rqmem_min = np.min(cluster_2.ReqMemCPU.min())\n",
+    "yellow_rqmem_max = np.max(cluster_2.ReqMemCPU.max())\n",
     "  \n",
-    "yellow_elapsed_min = np.min(df_2.Elapsed.min()) \n",
-    "yellow_elapsed_max = np.max(df_2.Elapsed.max()) \n",
+    "yellow_elapsed_min = np.min(cluster_2.Elapsed.min()) \n",
+    "yellow_elapsed_max = np.max(cluster_2.Elapsed.max()) \n",
     "\n",
-    "yellow_alloc_min = np.min(df_2.AllocCPUS.min()) \n",
-    "yellow_alloc_max = np.max(df_2.AllocCPUS.max())\n",
+    "yellow_alloc_min = np.min(cluster_2.AllocCPUS.min()) \n",
+    "yellow_alloc_max = np.max(cluster_2.AllocCPUS.max())\n",
     "  \n",
     "    \n",
     "x_yellow_rqmem_elapsed_bins = list(range(yellow_rqmem_max))\n",
@@ -576,14 +575,14 @@
     "\n",
     "\n",
     "####Red\n",
-    "red_rqmem_min = np.min(df_3.ReqMemCPU.min())\n",
-    "red_rqmem_max = np.max(df_3.ReqMemCPU.max())\n",
+    "red_rqmem_min = np.min(cluster_3.ReqMemCPU.min())\n",
+    "red_rqmem_max = np.max(cluster_3.ReqMemCPU.max())\n",
     "  \n",
-    "red_elapsed_min = np.min(df_3.Elapsed.min()) \n",
-    "red_elapsed_max = np.max(df_3.Elapsed.max()) \n",
+    "red_elapsed_min = np.min(cluster_3.Elapsed.min()) \n",
+    "red_elapsed_max = np.max(cluster_3.Elapsed.max()) \n",
     "\n",
-    "red_alloc_min = np.min(df_3.AllocCPUS.min()) \n",
-    "red_alloc_max = np.max(df_3.AllocCPUS.max())\n",
+    "red_alloc_min = np.min(cluster_3.AllocCPUS.min()) \n",
+    "red_alloc_max = np.max(cluster_3.AllocCPUS.max())\n",
     "  \n",
     "    \n",
     "x_red_rqmem_elapsed_bins = list(range(red_rqmem_max))\n",
@@ -596,6 +595,50 @@
     "y_red_reqmem_alloc_bins = list(range(red_alloc_max)) # list range gives one bin per cpu"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Summary Stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# number of purple cluster jobs and users\n",
+    "cluster_0_jobs = cluster_0.shape[0]\n",
+    "users_0 = cluster_0.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
+    "users_0['user'] = pd.Series(df_1['User'])\n",
+    "cluster_0_users = users_0.drop_duplicates(subset=['user'])\n",
+    "\n",
+    "# number of green cluster jobs and users\n",
+    "cluster_1_jobs = cluster_1.shape[0]\n",
+    "users_1 = cluster_1.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
+    "users_1['user'] = pd.Series(df_1['User'])\n",
+    "cluster_1_users = users_1.drop_duplicates(subset=['user'])\n",
+    "\n",
+    "# number of yellow cluster jobs and users\n",
+    "cluster_2_jobs = cluster_2.shape[0]\n",
+    "users_2 = cluster_2.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
+    "users_2['user'] = pd.Series(df_1['User'])\n",
+    "cluster_2_users = users_2.drop_duplicates(subset=['user'])\n",
+    "\n",
+    "# number of red cluster jobs and users\n",
+    "cluster_3_jobs = cluster_3.shape[0]\n",
+    "users_3 = cluster_3.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
+    "users_3['user'] = pd.Series(df_1['User'])\n",
+    "cluster_3_users = users_3.drop_duplicates(subset=['user'])\n",
+    "\n",
+    "\n",
+    "summary_stats = pd.DataFrame({'Job Count': [cluster_0_jobs, cluster_1_jobs, cluster_2_jobs, cluster_3_jobs],\n",
+    "                              'User Count': [cluster_0_users.shape[0], cluster_1_users.shape[0], cluster_2_users.shape[0], cluster_3_users.shape[0]]},\n",
+    "                               index=['Purple Cluster','Blue Cluster', 'Yellow Cluster', 'Red Cluster'])\n",
+    "summary_stats.head()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -609,6 +652,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "print(summary_stats)\n",
+    "\n",
     "figure = plt.figure()\n",
     "\n",
     "figure.set_size_inches(20,15)\n",
@@ -863,6 +908,13 @@
     "\n",
     "plt.show()\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {