diff --git a/Runtime-and-CoreCount.ipynb b/Runtime-and-CoreCount.ipynb index 097a6bbd217c85aa7aa22a6d879bcbd940d2b3d6..7c375952737f8f81ce861df369abe641ba50d57a 100644 --- a/Runtime-and-CoreCount.ipynb +++ b/Runtime-and-CoreCount.ipynb @@ -127,7 +127,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# ReqMemCPU,Corecount,Runtime" + "# ReqMemCPU,Corecount,Runtime Clustering" ] }, { @@ -138,9 +138,9 @@ "source": [ "# must run\n", "\n", - "# sets min and max parameters for ReqMemCPU\n", - "UpperlimitGB = 50\n", - "LowerlimitGB = 0" + "# dataset of needed columns for all graphs below\n", + "df_1 = df_completed.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n", + "df_1.head(5)" ] }, { @@ -151,9 +151,8 @@ "source": [ "# must run\n", "\n", - "# sets min and max parameters for AllocCPUS\n", - "UpperlimitAllocCPU = 20\n", - "LowerlimitAllocCPU = 0" + "# rounds ReqMemCPU up to nearest whole number\n", + "df_1['ReqMemCPU'] = df_1['ReqMemCPU'].apply(np.ceil)" ] }, { @@ -164,9 +163,8 @@ "source": [ "# must run\n", "\n", - "# dataset of needed columns for all graphs below\n", - "df_1 = df_completed.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n", - "df_1.head(5)" + "# rounds Elapsed up to nearest 2 decimal places\n", + "df_1['Elapsed'] = df_1['Elapsed'].round(2)" ] }, { @@ -177,8 +175,9 @@ "source": [ "# must run\n", "\n", - "# rounds ReqMemCPU up to nearest whole number\n", - "df_1['ReqMemCPU'] = df_1['ReqMemCPU'].apply(np.ceil)" + "# sorts dataset by AllocCPUS for easy visualization\n", + "df_1_sorted = df_1.sort_values(by='AllocCPUS', ascending=True)\n", + "df_1_sorted.head(5)" ] }, { @@ -189,8 +188,9 @@ "source": [ "# must run\n", "\n", - "# rounds Elapsed up to nearest 2 decimal places\n", - "df_1['Elapsed'] = df_1['Elapsed'].round(2)" + "# sets min and max parameters for ReqMemCPU\n", + "UpperlimitGB = 50\n", + "LowerlimitGB = 0" ] }, { @@ -201,9 +201,9 @@ "source": [ "# must run\n", "\n", - "# sorts dataset by AllocCPUS for easy visualization\n", - "df_1_sorted = df_1.sort_values(by='AllocCPUS', ascending=True)\n", - "df_1_sorted.head(5)" + "# sets min and max parameters for AllocCPUS\n", + "UpperlimitAllocCPU = 20\n", + "LowerlimitAllocCPU = 0" ] }, { @@ -215,8 +215,8 @@ "# must run\n", "\n", "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", - "df_runtime = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU)]\n", - "df_runtime.head(5)" + "df_facet = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU)]\n", + "df_facet.head(5)" ] }, { @@ -232,30 +232,42 @@ "style.default_axes_and_ticks()\n", "style.figsize()\n", "\n", - "full_facet = sb.PairGrid(df_runtime)\n", + "full_facet = sb.PairGrid(df_facet)\n", "full_facet.map(plt.scatter);\n", "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Detailed Look at Elapsed Time - In terms of Requested RAM and Cores" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "# must run\n", "\n", - "style.default_axes_and_ticks()\n", - "style.figsize()\n", - "\n", - "runtime_graph = sns.scatterplot(x=\"ReqMemCPU\", y=\"AllocCPUS\",data=df_runtime)\n", - "\n", - "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB)\n", - "\n", - "plt.xlabel('ReqMemCPU(gigs)')\n", - "plt.ylabel('AllocCPUS')\n", - "#plt.yscale(\"log\")\n", + "# sets min and max parameters for ReqMemCPU for clustered Elapsed Time Graphs\n", + "UpperlimitGB_elapsed = 50\n", + "LowerlimitGB_elapsed = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", "\n", - "plt.show()" + "# sets min and max parameters for AllocCPUS for clustered Elapsed Time Graphs\n", + "UpperlimitAllocCPU_elapsed = 20\n", + "LowerlimitAllocCPU_elapsed = 0" ] }, { @@ -266,9 +278,9 @@ "source": [ "# must run\n", "\n", - "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above for clustering\n", - "df_runtime_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU)]\n", - "df_runtime_cluster.tail(5)" + "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", + "df_runtime_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB_elapsed) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB_elapsed) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU_elapsed) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU_elapsed)]\n", + "df_runtime_cluster.head(5)" ] }, { @@ -330,8 +342,8 @@ "style.default_axes_and_ticks()\n", "style.figsize()\n", "\n", - "reqmem_alloc = sns.PairGrid(df_runtime_cluster, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n", - "reqmem_alloc.map(sns.regplot, color=\"blue\")" + "elapsed_reqmem_alloc = sns.PairGrid(df_runtime_cluster, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n", + "elapsed_reqmem_alloc.map(sns.regplot, color=\"blue\")" ] }, { @@ -346,12 +358,12 @@ "style.default_axes_and_ticks()\n", "style.figsize()\n", "\n", - "runtime_cluster_graph = plt.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n", + "elapsed_runtime_cluster_graph = plt.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n", "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n", "\n", "plt.xlabel('ReqMemCPU(gigs)')\n", "plt.ylabel('Elapsed(hours)')\n", - "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)\n", + "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed)\n", "plt.show()" ] }, @@ -367,12 +379,146 @@ "style.default_axes_and_ticks()\n", "style.figsize()\n", "\n", - "alloc_cluster_graph = plt.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n", + "elapsed_alloc_cluster_graph = plt.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n", "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n", "\n", "plt.xlabel('AllocCPUS')\n", "plt.ylabel('Elapsed(hours)')\n", - "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU)\n", + "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_elapsed)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Detailed Look at Elapsed Time - In terms of Requested RAM and Cores" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# second set of min and max parameters for ReqMemCPU to use for AllocCPU/ReqMemCPU cluster graph \n", + "UpperlimitGB_alloc = 50\n", + "LowerlimitGB_alloc = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# sets min and max parameters for AllocCPUS\n", + "UpperlimitAllocCPU_alloc = 60\n", + "LowerlimitAllocCPU_alloc = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", + "df_allocCPUS_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB_alloc) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB_alloc) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU_alloc) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU_alloc)]\n", + "df_allocCPUS.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# sets up info for plotting the optimal number of clusters - uses df_runtime_cluster datasaet\n", + "Sum_of_squared_distances = []\n", + "K = range(1,10)\n", + "for k in K:\n", + " km = KMeans(n_clusters=k)\n", + " km = km.fit(df_allocCPUS_cluster)\n", + " Sum_of_squared_distances.append(km.inertia_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# the bend in the graph is the optimal number of clusters for graphs using the df_runtime_cluster dataset\n", + "plt.plot(K, Sum_of_squared_distances, 'bx-')\n", + "plt.xlabel('k')\n", + "plt.ylabel('Sum_of_squared_distances')\n", + "plt.title('Elbow Method For Optimal k')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# sets to clusters and returns the cluster points\n", + "kmeans = KMeans(n_clusters=3, random_state=111)\n", + "kmeans.fit(df_allocCPUS_cluster)\n", + "print(kmeans.cluster_centers_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "alloc_reqmem_graph = sns.scatterplot(x=\"ReqMemCPU\", y=\"AllocCPUS\",data=df_allocCPUS_cluster)\n", + "\n", + "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB_alloc)\n", + "\n", + "plt.xlabel('ReqMemCPU(gigs)')\n", + "plt.ylabel('AllocCPUS')\n", + "#plt.yscale(\"log\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# clustered graph\n", + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "alloc_reqmem_cluster_graph = plt.scatter(df_allocCPUS_cluster['ReqMemCPU'],df_allocCPUS_cluster['AllocCPUS'], c=kmeans.labels_, cmap='rainbow')\n", + "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n", + "\n", + "plt.xlabel('ReqMemCPU(gigs)')\n", + "plt.ylabel('AllocCPUS')\n", + "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB_alloc)\n", "plt.show()" ] },