diff --git a/Runtime-and-CoreCount.ipynb b/Runtime-and-CoreCount.ipynb index 88f5862e7d5ed401ae7aa43d4873d12083bef9db..8448720fa0c8b782b5cfab175d8ecf0d591b2a7f 100644 --- a/Runtime-and-CoreCount.ipynb +++ b/Runtime-and-CoreCount.ipynb @@ -359,6 +359,19 @@ "plt.show()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "column_maxes_runtime = df_runtime_cluster.max()\n", + "df_runtime_cluster_max = column_maxes_runtime.max()\n", + "normalized_runtime_df = df_runtime_cluster / df_runtime_cluster_max\n", + "\n", + "print(normalized_runtime_df)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -384,8 +397,8 @@ "\n", "# sets to clusters and returns the cluster points\n", "kmeans_elapsed_reqmem = KMeans(n_clusters=3, random_state=111)\n", - "kmeans_elapsed_reqmem.fit(df_runtime_cluster)\n", - "print(kmeans_elapsed_reqmem.cluster_centers_)" + "kmeans_elapsed_reqmem.fit(normalized_runtime_df)\n", + "clusterpoints_elapsed_reqmem = kmeans_elapsed_reqmem.cluster_centers_ * df_runtime_cluster_max" ] }, { @@ -489,7 +502,7 @@ "\n", "elapsed_rqmem_clustergraph = figure.add_subplot(121)\n", "elapsed_rqmem_clustergraph.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans_elapsed_reqmem.labels_, cmap='rainbow')\n", - "elapsed_rqmem_clustergraph.scatter(kmeans_elapsed_reqmem.cluster_centers_[:,0] ,kmeans_elapsed_reqmem.cluster_centers_[:,1], color='black')\n", + "elapsed_rqmem_clustergraph.scatter(clusterpoints_elapsed_reqmem[:,0] ,clusterpoints_elapsed_reqmem[:,1], color='black')\n", "plt.xlabel('ReqMemCPU(gigs)')\n", "plt.ylabel('Elapsed(hours)')\n", "\n", @@ -497,7 +510,7 @@ "elapsed_rqmem_clustergraph_3d = figure.add_subplot(122, projection='3d')\n", "elapsed_rqmem_clustergraph_3d.scatter(df_runtime_cluster['ReqMemCPU'], df_runtime_cluster['Elapsed'], df_runtime_cluster['AllocCPUS'], \n", " c=kmeans_elapsed_reqmem.labels_ ,cmap='rainbow')\n", - "elapsed_rqmem_clustergraph_3d.scatter(kmeans_elapsed_reqmem.cluster_centers_[:,0] ,kmeans_elapsed_reqmem.cluster_centers_[:,1], color='black')\n", + "elapsed_rqmem_clustergraph_3d.scatter(clusterpoints_elapsed_reqmem[:,0] ,clusterpoints_elapsed_reqmem[:,1], color='black')\n", "\n", "\n", "elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n", @@ -509,7 +522,7 @@ "elapsed_rqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n", "elapsed_rqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n", "\n", - "plt.show()\n" + "plt.show()" ] }, { @@ -609,8 +622,8 @@ "\n", "# sets to clusters and returns the cluster points\n", "kmeans_elapsed_alloc = KMeans(n_clusters=3, random_state=111)\n", - "kmeans_elapsed_alloc.fit(df_runtime_cluster)\n", - "print(kmeans_elapsed_alloc.cluster_centers_)" + "kmeans_elapsed_alloc.fit(normalized_runtime_df)\n", + "clusterpoints_elapsed_alloc = kmeans_elapsed_alloc.cluster_centers_ * df_runtime_cluster_max" ] }, { @@ -714,14 +727,14 @@ "\n", "elapsed_alloc_clustergraph = figure.add_subplot(121)\n", "elapsed_alloc_clustergraph.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans_elapsed_alloc.labels_, cmap='rainbow')\n", - "elapsed_alloc_clustergraph.scatter(kmeans_elapsed_alloc.cluster_centers_[:,0] ,kmeans_elapsed_alloc.cluster_centers_[:,1], color='black')\n", + "elapsed_alloc_clustergraph.scatter(clusterpoints_elapsed_alloc[:,0] ,clusterpoints_elapsed_alloc[:,1], color='black')\n", "plt.xlabel('AllocCPUS')\n", "plt.ylabel('Elapsed(hours)')\n", "\n", "# 3d veiw of the scatterplot for better understanding of the data\n", "elapsed_alloc_clustergraph_3d = figure.add_subplot(122, projection='3d')\n", "elapsed_alloc_clustergraph_3d.scatter(df_runtime_cluster['AllocCPUS'], df_runtime_cluster['Elapsed'], df_runtime_cluster['ReqMemCPU'], c=kmeans_elapsed_alloc.labels_ ,cmap='rainbow')\n", - "elapsed_alloc_clustergraph_3d.scatter(kmeans_elapsed_alloc.cluster_centers_[:,0] ,kmeans_elapsed_alloc.cluster_centers_[:,1], color='black')\n", + "elapsed_alloc_clustergraph_3d.scatter(clusterpoints_elapsed_alloc[:,0] ,clusterpoints_elapsed_alloc[:,1], color='black')\n", "elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')\n", "elapsed_alloc_clustergraph_3d.set_ylabel('Elapsed(hours)')\n", "elapsed_alloc_clustergraph_3d.set_zlabel('ReqMemCPU(gigs)')\n", @@ -881,11 +894,16 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "The next 5 cells create the clusters, find each cluster label, and create datasets of data in each cluster.\n", - "All the datasets are created for both the cluster graphs and plots of each cluster before those graphs are made." + "column_maxes_alloc = df_alloc_cluster.max()\n", + "df_alloc_cluster_max = column_maxes_alloc.max()\n", + "normalized_alloc_df = df_alloc_cluster / df_alloc_cluster_max\n", + "\n", + "print(normalized_alloc_df)" ] }, { @@ -898,8 +916,16 @@ "\n", "# sets to clusters and returns the cluster points\n", "kmeans_alloc_reqmem = KMeans(n_clusters=3, random_state=111)\n", - "kmeans_alloc_reqmem.fit(df_alloc_cluster)\n", - "print(kmeans_alloc_reqmem.cluster_centers_)" + "kmeans_alloc_reqmem.fit(normalized_alloc_df)\n", + "clusterpoints_alloc_reqmem = kmeans_alloc_reqmem.cluster_centers_ * df_alloc_cluster_max" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next 5 cells create the clusters, find each cluster label, and create datasets of data in each cluster.\n", + "All the datasets are created for both the cluster graphs and plots of each cluster before those graphs are made." ] }, { @@ -915,7 +941,7 @@ "# 0 = purple cluster\n", "# 1 = green cluster\n", "# 2 = red cluster\n", - "np.unique(kmeans_elapsed_alloc.labels_)" + "np.unique(kmeans_alloc_reqmem.labels_)" ] }, { @@ -1003,14 +1029,14 @@ "\n", "alloc_reqmem_cluster_graph = figure.add_subplot(121)\n", "alloc_reqmem_cluster_graph.scatter(df_alloc_cluster['ReqMemCPU'],df_alloc_cluster['AllocCPUS'], c=kmeans_alloc_reqmem.labels_, cmap='rainbow')\n", - "alloc_reqmem_cluster_graph.scatter(kmeans_alloc_reqmem.cluster_centers_[:,0] ,kmeans_alloc_reqmem.cluster_centers_[:,1], color='black')\n", + "alloc_reqmem_cluster_graph.scatter(clusterpoints_alloc_reqmem[:,0] ,clusterpoints_alloc_reqmem[:,1], color='black')\n", "plt.xlabel('ReqMemCPU(gigs)')\n", "plt.ylabel('AllocCPUS')\n", "\n", "# 3d veiw of the scatterplot for better understanding of the data\n", "alloc_reqmem_clustergraph_3d = figure.add_subplot(122, projection='3d')\n", "alloc_reqmem_clustergraph_3d.scatter(df_alloc_cluster['ReqMemCPU'], df_alloc_cluster['AllocCPUS'], df_alloc_cluster['Elapsed'], c=kmeans_alloc_reqmem.labels_ ,cmap='rainbow')\n", - "alloc_reqmem_clustergraph_3d.scatter(kmeans_alloc_reqmem.cluster_centers_[:,0] ,kmeans_alloc_reqmem.cluster_centers_[:,1], color='black')\n", + "alloc_reqmem_clustergraph_3d.scatter(clusterpoints_alloc_reqmem[:,0] ,clusterpoints_alloc_reqmem[:,1], color='black')\n", "alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n", "alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')\n", "alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",