diff --git a/Runtime-and-CoreCount.ipynb b/Runtime-and-CoreCount.ipynb index 7c375952737f8f81ce861df369abe641ba50d57a..062dfbf4a863dfbee8962648c3d2f5d0ac72617f 100644 --- a/Runtime-and-CoreCount.ipynb +++ b/Runtime-and-CoreCount.ipynb @@ -83,7 +83,7 @@ "\n", "# for displaying all available column options\n", "pd.set_option('display.max_columns', None)\n", - "df.head(5)" + "#df.head(5)" ] }, { @@ -173,11 +173,8 @@ "metadata": {}, "outputs": [], "source": [ - "# must run\n", - "\n", - "# sorts dataset by AllocCPUS for easy visualization\n", - "df_1_sorted = df_1.sort_values(by='AllocCPUS', ascending=True)\n", - "df_1_sorted.head(5)" + "df_1.ReqMemCPU = df_1.ReqMemCPU.apply(int)\n", + "df_1.head(5)" ] }, { @@ -202,10 +199,23 @@ "# must run\n", "\n", "# sets min and max parameters for AllocCPUS\n", - "UpperlimitAllocCPU = 20\n", + "UpperlimitAllocCPU = 40\n", "LowerlimitAllocCPU = 0" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# sets min and max parameters for Elapsed\n", + "UpperlimitElapsed = 150\n", + "LowerlimitElapsed = 0.5" + ] + }, { "cell_type": "code", "execution_count": null, @@ -215,7 +225,13 @@ "# must run\n", "\n", "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", - "df_facet = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU)]\n", + "df_facet = df_1[(df_1['ReqMemCPU'] <= UpperlimitGB) & \n", + " (df_1['ReqMemCPU'] >= LowerlimitGB) & \n", + " (df_1['AllocCPUS'] <= UpperlimitAllocCPU) & \n", + " (df_1['AllocCPUS'] >= LowerlimitAllocCPU)\n", + " & \n", + " (df_1['Elapsed'] <= UpperlimitElapsed) & \n", + " (df_1['Elapsed'] >= LowerlimitElapsed)]\n", "df_facet.head(5)" ] }, @@ -232,7 +248,10 @@ "style.default_axes_and_ticks()\n", "style.figsize()\n", "\n", - "full_facet = sb.PairGrid(df_facet)\n", + "full_facet = sns.pairplot(df_facet, diag_kind = 'kde') # makes density plots - kernel density estimate\n", + "# y axis is count in the diagonal graphs\n", + "\n", + "#full_facet = sb.PairGrid(df_facet)\n", "full_facet.map(plt.scatter);\n", "plt.show()" ] @@ -266,7 +285,7 @@ "# must run\n", "\n", "# sets min and max parameters for AllocCPUS for clustered Elapsed Time Graphs\n", - "UpperlimitAllocCPU_elapsed = 20\n", + "UpperlimitAllocCPU_elapsed = 40\n", "LowerlimitAllocCPU_elapsed = 0" ] }, @@ -279,10 +298,36 @@ "# must run\n", "\n", "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", - "df_runtime_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB_elapsed) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB_elapsed) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU_elapsed) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU_elapsed)]\n", + "df_runtime_cluster = df_1[(df_1['ReqMemCPU'] <= UpperlimitGB_elapsed) & \n", + " (df_1['ReqMemCPU'] >= LowerlimitGB_elapsed) & \n", + " (df_1['AllocCPUS'] <= UpperlimitAllocCPU_elapsed) & \n", + " (df_1['AllocCPUS'] >= LowerlimitAllocCPU_elapsed)]\n", "df_runtime_cluster.head(5)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# facet grid of the two graphs being clustered using df_runtime_cluster dataset\n", + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_reqmem_alloc = sns.PairGrid(df_runtime_cluster, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n", + "elapsed_reqmem_alloc.map(sns.regplot, color=\"blue\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Elapsed/ReqMemCPU clustering" + ] + }, { "cell_type": "code", "execution_count": null, @@ -338,12 +383,91 @@ "source": [ "# must run\n", "\n", - "# facet grid of the two graphs being clustered using df_runtime_cluster dataset\n", + "# clustered graph\n", "style.default_axes_and_ticks()\n", "style.figsize()\n", "\n", - "elapsed_reqmem_alloc = sns.PairGrid(df_runtime_cluster, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n", - "elapsed_reqmem_alloc.map(sns.regplot, color=\"blue\")" + "elapsed_mem_cluster_graph = plt.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n", + "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n", + "\n", + "plt.xlabel('ReqMemCPU(gigs)')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_2 = df_1.loc[:,['ReqMemCPU', 'Elapsed']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# sets min and max parameters for ReqMemCPU for histogram of clustered Elapsed Time Graphs\n", + "\n", + "#purple\n", + "LowerlimitGB_elapsed_1 = 0\n", + "UpperlimitGB_elapsed_1 = 20\n", + "LowerlimitElapsed_elapsed_1 = 0\n", + "UpperlimitElapsed_elapsed_1 = 30\n", + "\n", + "#red\n", + "LowerlimitGB_elapsed_2 = 20\n", + "UpperlimitGB_elapsed_2 = 60\n", + "LowerlimitElapsed_elapsed_2 = 0\n", + "UpperlimitElapsed_elapsed_2 = 25\n", + "\n", + "#green\n", + "LowerlimitGB_elapsed_3 = 0\n", + "UpperlimitGB_elapsed_3 = 50\n", + "LowerlimitElapsed_elapsed_3 = 30\n", + "UpperlimitElapsed_elapsed_3 = 150" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Graphing of the Purple Section of Runtime per Requested gigs of RAM Clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", + "df_elapsed_1 = df_2[(df_2['ReqMemCPU'] <= UpperlimitGB_elapsed_1) & \n", + " (df_2['ReqMemCPU'] >= LowerlimitGB_elapsed_1) & \n", + " (df_2['Elapsed'] <= UpperlimitElapsed_elapsed_1) & \n", + " (df_2['Elapsed'] > LowerlimitElapsed_elapsed_1)]\n", + "df_elapsed_1.head(10)\n", + "#df_elapsed_1.ReqMemCPU.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_swarmplot1 = df_elapsed_1.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n", + "\n", + "df_swarmplot1.head(10)\n", + "#df_swarmplot1.ReqMemCPU.count()" ] }, { @@ -354,19 +478,68 @@ "source": [ "# must run\n", "\n", - "# clustered graph\n", "style.default_axes_and_ticks()\n", "style.figsize()\n", "\n", - "elapsed_runtime_cluster_graph = plt.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n", - "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n", + "elapsed_mem_cluster_graph_1 = plt.scatter(df_elapsed_1['ReqMemCPU'],df_elapsed_1['Elapsed'])\n", "\n", "plt.xlabel('ReqMemCPU(gigs)')\n", "plt.ylabel('Elapsed(hours)')\n", - "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed)\n", + "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed_1)\n", "plt.show()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_mem_swarm_graph_1 = sns.swarmplot(data=df_swarmplot1, x='ReqMemCPU', y='Elapsed')\n", + "plt.margins(0.02)\n", + "plt.xlabel('ReqMemCPU(gigs)')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed_1)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Graphing of the Red Section of Runtime per Requested gigs of RAM Clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", + "df_elapsed_2 = df_2[(df_2['ReqMemCPU'] <= UpperlimitGB_elapsed_2) & \n", + " (df_2['ReqMemCPU'] >= LowerlimitGB_elapsed_2) & \n", + " (df_2['Elapsed'] <= UpperlimitElapsed_elapsed_2) & \n", + " (df_2['Elapsed'] > LowerlimitElapsed_elapsed_2)]\n", + "#df_elapsed_2.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_swarmplot2 = df_elapsed_2.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n", + "\n", + "#df_swarmplot2.head(5)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -375,16 +548,31 @@ "source": [ "# must run\n", "\n", - "# clustered graph\n", "style.default_axes_and_ticks()\n", "style.figsize()\n", "\n", - "elapsed_alloc_cluster_graph = plt.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n", - "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n", + "elapsed_mem_cluster_graph_2 = plt.scatter(df_elapsed_2['ReqMemCPU'],df_elapsed_2['Elapsed'])\n", "\n", - "plt.xlabel('AllocCPUS')\n", + "plt.xlabel('ReqMemCPU(gigs)')\n", "plt.ylabel('Elapsed(hours)')\n", - "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_elapsed)\n", + "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed_2)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_mem_swarm_graph_2 = sns.swarmplot(data=df_swarmplot2, x='ReqMemCPU', y='Elapsed')\n", + "plt.margins(0.02)\n", + "plt.xlabel('ReqMemCPU(gigs)')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed_2)\n", "plt.show()" ] }, @@ -392,7 +580,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Detailed Look at Elapsed Time - In terms of Requested RAM and Cores" + "# Graphing of the Green Section of Runtime per Requested gigs of RAM Clusters" ] }, { @@ -403,9 +591,12 @@ "source": [ "# must run\n", "\n", - "# second set of min and max parameters for ReqMemCPU to use for AllocCPU/ReqMemCPU cluster graph \n", - "UpperlimitGB_alloc = 50\n", - "LowerlimitGB_alloc = 0" + "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", + "df_elapsed_3 = df_2[(df_2['ReqMemCPU'] <= UpperlimitGB_elapsed_3) & \n", + " (df_2['ReqMemCPU'] >= LowerlimitGB_elapsed_3) & \n", + " (df_2['Elapsed'] <= UpperlimitElapsed_elapsed_3) & \n", + " (df_2['Elapsed'] > LowerlimitElapsed_elapsed_3)]\n", + "#df_elapsed_3.head(5)" ] }, { @@ -414,11 +605,9 @@ "metadata": {}, "outputs": [], "source": [ - "# must run\n", + "df_swarmplot3 = df_elapsed_3.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n", "\n", - "# sets min and max parameters for AllocCPUS\n", - "UpperlimitAllocCPU_alloc = 60\n", - "LowerlimitAllocCPU_alloc = 0" + "#df_swarmplot3.head(5)" ] }, { @@ -429,9 +618,39 @@ "source": [ "# must run\n", "\n", - "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", - "df_allocCPUS_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB_alloc) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB_alloc) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU_alloc) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU_alloc)]\n", - "df_allocCPUS.head(5)" + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_mem_cluster_graph_3 = plt.scatter(df_elapsed_3['ReqMemCPU'],df_elapsed_3['Elapsed'])\n", + "\n", + "plt.xlabel('ReqMemCPU(gigs)')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed_3)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_mem_swarm_graph_3 = sns.swarmplot(data=df_swarmplot3, x='ReqMemCPU', y='Elapsed')\n", + "plt.margins(0.02)\n", + "plt.xlabel('ReqMemCPU(gigs)')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed_3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Elapsed/AllocCPUS clustering" ] }, { @@ -447,7 +666,7 @@ "K = range(1,10)\n", "for k in K:\n", " km = KMeans(n_clusters=k)\n", - " km = km.fit(df_allocCPUS_cluster)\n", + " km = km.fit(df_runtime_cluster)\n", " Sum_of_squared_distances.append(km.inertia_)" ] }, @@ -477,10 +696,263 @@ "\n", "# sets to clusters and returns the cluster points\n", "kmeans = KMeans(n_clusters=3, random_state=111)\n", - "kmeans.fit(df_allocCPUS_cluster)\n", + "kmeans.fit(df_runtime_cluster)\n", "print(kmeans.cluster_centers_)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# clustered graph\n", + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_alloc_cluster_graph = plt.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n", + "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n", + "\n", + "plt.xlabel('AllocCPUS')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_elapsed)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_3 = df_1.loc[:,['Elapsed', 'AllocCPUS']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# purple\n", + "LowerlimitElapsed_alloc1 = 0\n", + "UpperlimitElapsed_alloc1 = 35\n", + "\n", + "LowerlimitAllocCPU_alloc1 = 0\n", + "UpperlimitAllocCPU_alloc1 = 40\n", + "\n", + "#red\n", + "LowerlimitElapsed_alloc2 = 0\n", + "UpperlimitElapsed_alloc2 = 35\n", + "\n", + "LowerlimitAllocCPU_alloc2 = 0\n", + "UpperlimitAllocCPU_alloc2 = 16\n", + "\n", + "#green\n", + "LowerlimitElapsed_alloc3 = 35\n", + "UpperlimitElapsed_alloc3 = 160\n", + "\n", + "LowerlimitAllocCPU_alloc3 = 0\n", + "UpperlimitAllocCPU_alloc3 = 32" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Graphing of the Purple Section of Runtime per Requested gigs of RAM Clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", + "df_allocCPUS_1 = df_3[(df_3['Elapsed'] <= UpperlimitElapsed_alloc1) & \n", + " (df_3['Elapsed'] >= LowerlimitElapsed_alloc1) & \n", + " (df_3['AllocCPUS'] <= UpperlimitAllocCPU_alloc1) & \n", + " (df_3['AllocCPUS'] >= LowerlimitAllocCPU_alloc1)]\n", + "df_allocCPUS_1.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_swarmplot12 = df_allocCPUS_1.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n", + "\n", + "#df_swarmplot12.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_alloc_cluster_graph_1 = plt.scatter(df_allocCPUS_1['AllocCPUS'],df_allocCPUS_1['Elapsed'])\n", + "\n", + "plt.xlabel('AllocCPUS')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_alloc1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_alloc_swarm_graph_1 = sns.swarmplot(data=df_swarmplot12, x='AllocCPUS', y='Elapsed')\n", + "plt.margins(0.02)\n", + "plt.xlabel('AllocCPUS')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_alloc1)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Graphing of the Red Section of Runtime per Requested gigs of RAM Clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", + "df_allocCPUS_2 = df_3[(df_3['Elapsed'] <= UpperlimitElapsed_alloc2) & \n", + " (df_3['Elapsed'] >= LowerlimitElapsed_alloc2) & \n", + " (df_3['AllocCPUS'] <= UpperlimitAllocCPU_alloc2) & \n", + " (df_3['AllocCPUS'] >= LowerlimitAllocCPU_alloc2)]\n", + "df_allocCPUS_2.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_swarmplot22 = df_allocCPUS_2.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n", + "\n", + "#df_swarmplot22.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_alloc_cluster_graph_2 = plt.scatter(df_allocCPUS_2['AllocCPUS'],df_allocCPUS_2['Elapsed'])\n", + "\n", + "plt.xlabel('AllocCPUS')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_alloc2)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_alloc_swarm_graph_2 = sns.swarmplot(data=df_swarmplot22, x='AllocCPUS', y='Elapsed')\n", + "plt.margins(0.02)\n", + "plt.xlabel('AllocCPUS')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_alloc2)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Graphing of the Green Section of Runtime per Requested gigs of RAM Clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", + "df_allocCPUS_3 = df_3[(df_3['Elapsed'] <= UpperlimitElapsed_alloc3) & \n", + " (df_3['Elapsed'] >= LowerlimitElapsed_alloc3) & \n", + " (df_3['AllocCPUS'] <= UpperlimitAllocCPU_alloc3) & \n", + " (df_3['AllocCPUS'] >= LowerlimitAllocCPU_alloc3)]\n", + "df_allocCPUS_2.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_swarmplot32 = df_allocCPUS_3.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n", + "\n", + "#df_swarmplot32.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "elapsed_alloc_cluster_graph_3 = plt.scatter(df_allocCPUS_3['AllocCPUS'],df_allocCPUS_3['Elapsed'])\n", + "\n", + "plt.xlabel('AllocCPUS')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_alloc3)\n", + "plt.show()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -490,17 +962,117 @@ "style.default_axes_and_ticks()\n", "style.figsize()\n", "\n", - "alloc_reqmem_graph = sns.scatterplot(x=\"ReqMemCPU\", y=\"AllocCPUS\",data=df_allocCPUS_cluster)\n", + "elapsed_alloc_swarm_graph_3 = sns.swarmplot(data=df_swarmplot32, x='AllocCPUS', y='Elapsed')\n", + "plt.margins(0.02)\n", + "plt.xlabel('AllocCPUS')\n", + "plt.ylabel('Elapsed(hours)')\n", + "plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_alloc3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Detailed Look at Cores - In terms of Requested RAM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "LowerlimitGB_core = 0\n", + "UpperlimitGB_core = 50\n", "\n", - "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB_alloc)\n", + "LowerlimitAllocCPU_core = 0\n", + "UpperlimitAllocCPU_core = 65" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n", + "df_cores_cluster = df_1[(df_1['ReqMemCPU'] <= UpperlimitGB_core) & \n", + " (df_1['ReqMemCPU'] >= LowerlimitGB_core) & \n", + " (df_1['AllocCPUS'] <= UpperlimitAllocCPU_core) & \n", + " (df_1['AllocCPUS'] >= LowerlimitAllocCPU_core)]\n", + "df_runtime_cluster.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# clustered graph\n", + "style.default_axes_and_ticks()\n", + "style.figsize()\n", + "\n", + "alloc_reqmem_cluster_graph = plt.scatter(df_cores_cluster['ReqMemCPU'],df_cores_cluster['AllocCPUS'])\n", "\n", "plt.xlabel('ReqMemCPU(gigs)')\n", "plt.ylabel('AllocCPUS')\n", - "#plt.yscale(\"log\")\n", + "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB_core)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", "\n", + "# sets up info for plotting the optimal number of clusters - uses df_runtime_cluster datasaet\n", + "Sum_of_squared_distances = []\n", + "K = range(1,10)\n", + "for k in K:\n", + " km = KMeans(n_clusters=k)\n", + " km = km.fit(df_cores_cluster)\n", + " Sum_of_squared_distances.append(km.inertia_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# the bend in the graph is the optimal number of clusters for graphs using the df_runtime_cluster dataset\n", + "plt.plot(K, Sum_of_squared_distances, 'bx-')\n", + "plt.xlabel('k')\n", + "plt.ylabel('Sum_of_squared_distances')\n", + "plt.title('Elbow Method For Optimal k')\n", "plt.show()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# sets to clusters and returns the cluster points\n", + "kmeans = KMeans(n_clusters=5, random_state=111)\n", + "kmeans.fit(df_cores_cluster)\n", + "print(kmeans.cluster_centers_)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -513,15 +1085,54 @@ "style.default_axes_and_ticks()\n", "style.figsize()\n", "\n", - "alloc_reqmem_cluster_graph = plt.scatter(df_allocCPUS_cluster['ReqMemCPU'],df_allocCPUS_cluster['AllocCPUS'], c=kmeans.labels_, cmap='rainbow')\n", + "alloc_reqmem_cluster_graph = plt.scatter(df_cores_cluster['ReqMemCPU'],df_cores_cluster['AllocCPUS'], c=kmeans.labels_, cmap='rainbow')\n", "plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n", "\n", "plt.xlabel('ReqMemCPU(gigs)')\n", "plt.ylabel('AllocCPUS')\n", - "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB_alloc)\n", + "plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB_core)\n", "plt.show()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_4 = df_1.loc[:,['ReqMemCPU', 'AllocCPUS']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# purple\n", + "LowerlimitElapsed_alloc1 = 0\n", + "UpperlimitElapsed_alloc1 = 35\n", + "\n", + "LowerlimitAllocCPU_alloc1 = 0\n", + "UpperlimitAllocCPU_alloc1 = 40\n", + "\n", + "#red\n", + "LowerlimitElapsed_alloc2 = 0\n", + "UpperlimitElapsed_alloc2 = 35\n", + "\n", + "LowerlimitAllocCPU_alloc2 = 0\n", + "UpperlimitAllocCPU_alloc2 = 16\n", + "\n", + "#green\n", + "LowerlimitElapsed_alloc3 = 35\n", + "UpperlimitElapsed_alloc3 = 160\n", + "\n", + "LowerlimitAllocCPU_alloc3 = 0\n", + "UpperlimitAllocCPU_alloc3 = 32" + ] + }, { "cell_type": "code", "execution_count": null,