Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
createAndParseSACCT
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Ryan Randles Jones
createAndParseSACCT
Commits
d6c6d60e
Commit
d6c6d60e
authored
4 years ago
by
Ryan Randles Jones
Browse files
Options
Downloads
Patches
Plain Diff
added 2d histograms for clusters
parent
af17830d
Branches
ClusterAnalysis-Excluded-Data
Branches containing commit
No related tags found
1 merge request
!4
Cluster Analysis(ReqMemCPU, AllocCPUS, Elapsed)
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Cluster_Analysis.ipynb
+452
-86
452 additions, 86 deletions
Cluster_Analysis.ipynb
with
452 additions
and
86 deletions
Cluster_Analysis.ipynb
+
452
−
86
View file @
d6c6d60e
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
"\n",
"\n",
"# sets min and max parameters for ReqMemCPU\n",
"# sets min and max parameters for ReqMemCPU\n",
"LowerlimitGB = 0\n",
"LowerlimitGB = 0\n",
"UpperlimitGB = 50"
"UpperlimitGB = 50
# gigs per cpu
"
]
]
},
},
{
{
...
@@ -40,7 +40,7 @@
...
@@ -40,7 +40,7 @@
"\n",
"\n",
"# sets min and max parameters for AllocCPUS\n",
"# sets min and max parameters for AllocCPUS\n",
"LowerlimitAllocCPU = 0\n",
"LowerlimitAllocCPU = 0\n",
"UpperlimitAllocCPU = 50"
"UpperlimitAllocCPU = 50
#cpus
"
]
]
},
},
{
{
...
@@ -53,7 +53,7 @@
...
@@ -53,7 +53,7 @@
"\n",
"\n",
"# sets min and max parameters for Elapsed\n",
"# sets min and max parameters for Elapsed\n",
"LowerlimitElapsed = 0\n",
"LowerlimitElapsed = 0\n",
"UpperlimitElapsed = 150.02"
"UpperlimitElapsed = 150.02
#in hours - 6.25 days
"
]
]
},
},
{
{
...
@@ -62,7 +62,7 @@
...
@@ -62,7 +62,7 @@
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"# Enter 'none', '0-1', or 'log' as achoice for data nomralization\n",
"# Enter 'none', '0-1', or 'log' as a
choice for data nomralization\n",
"Data_Normalization_Choice = 'none'"
"Data_Normalization_Choice = 'none'"
]
]
},
},
...
@@ -274,6 +274,86 @@
...
@@ -274,6 +274,86 @@
" print(clusterpoints[:,0],clusterpoints[:,1])\n"
" print(clusterpoints[:,0],clusterpoints[:,1])\n"
]
]
},
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Separating the Clusters for 2d Histograms"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the parameters in the labels shown above\n",
"\n",
"#Purple\n",
"df_purple = df_clustering[kmeans_cluster.labels_ == 0]\n",
"\n",
"#Green\n",
"df_green = df_clustering[kmeans_cluster.labels_ == 1]\n",
"\n",
"#Red\n",
"df_red = df_clustering[kmeans_cluster.labels_ == 2]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# voluntary\n",
"\n",
"# returns the min and max ReqMemCPU, Elapsed, and AllocCPUS for each cluster using the datasets created above. \n",
"# These are the parameters for the scatter plots of each cluster\n",
"print(\"Purple Cluster\")\n",
"print(\"ReqMemCPU:\", \"min =\",df_purple.ReqMemCPU.min(),\" \",\"max =\",df_purple.ReqMemCPU.max())\n",
"print(\"Elapsed:\", \"min =\",df_purple.Elapsed.min(),\" \",\"max =\",df_purple.Elapsed.max())\n",
"print(\"AllocCPUS:\", \"min =\",df_purple.AllocCPUS.min(),\" \",\"max =\",df_purple.AllocCPUS.max())\n",
"\n",
"print(\"\\nGreen Cluster\")\n",
"print(\"ReqMemCPU:\", \"min =\",df_green.ReqMemCPU.min(),\" \",\"max =\",df_green.ReqMemCPU.max())\n",
"print(\"Elapsed:\", \"min =\",df_green.Elapsed.min(),\" \",\"max =\",df_green.Elapsed.max())\n",
"print(\"AllocCPUS:\", \"min =\",df_green.AllocCPUS.min(),\" \",\"max =\",df_green.AllocCPUS.max())\n",
"\n",
"print(\"\\nRed Cluster\")\n",
"print(\"ReqMemCPU:\", \"min =\",df_red.ReqMemCPU.min(),\" \",\"max =\",df_red.ReqMemCPU.max())\n",
"print(\"Elapsed:\", \"min =\",df_red.Elapsed.min(),\" \",\"max =\",df_red.Elapsed.max())\n",
"print(\"AllocCPUS:\", \"min =\",df_red.AllocCPUS.min(),\" \",\"max =\",df_red.AllocCPUS.max())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# Creates datasets used to make the 2d histograms that correspond to each cluster scatter plot. \n",
"# The groupby does not change the data, but it does make a small enough dataset\n",
"\n",
"# for purple cluster \n",
"df_purlple_2d1 = df_purple.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
"df_purlple_2d2 = df_purple.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
"df_purlple_2d3 = df_purple.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()\n",
"\n",
"# for green cluster\n",
"df_green_2d1 = df_green.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
"df_green_2d2 = df_green.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
"df_green_2d3 = df_green.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()\n",
"\n",
"# for red cluster\n",
"df_red_2d1 = df_red.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()\n",
"df_red_2d2 = df_red.groupby(['AllocCPUS','Elapsed']).sum().reset_index()\n",
"df_red_2d3 = df_red.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()"
]
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
...
@@ -284,126 +364,267 @@
...
@@ -284,126 +364,267 @@
"\n",
"\n",
"figure = plt.figure()\n",
"figure = plt.figure()\n",
"\n",
"\n",
"figure.set_size_inches(20,20)\n",
"figure.set_size_inches(20,40)\n",
"\n",
"# ReqMem/Elapsed 2d Graph\n",
"rqmem_elapsed_clustergraph = figure.add_subplot(5,3,1)\n",
"\n",
"\n",
"# Elapsed/ReqMem 2d Graph\n",
"rqmem_elapsed_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['Elapsed'], \n",
"elapsed_rqmem_clustergraph = figure.add_subplot(3,3,1)\n",
"#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)\n",
"elapsed_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_, cmap='rainbow')\n",
" c=kmeans_cluster.labels_, cmap='rainbow')\n",
"elapsed_
rqmem_
clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
"
rqmem_
elapsed_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.ylabel('Elapsed(hours)')\n",
"plt.ylabel('Elapsed(hours)')\n",
"plt.title('Runtime/Requested Gigs RAM')\n",
"\n",
"\n",
"\n",
"\n",
"# Elapsed/Alloc 2d Graph\n",
"# Alloc/Elapsed 2d Graph\n",
"elapsed_alloc_clustergraph = figure.add_subplot(3,3,2)\n",
"alloc_elapsed_clustergraph = figure.add_subplot(5,3,2)\n",
"#figure.suptitle('Runtime per Core %i cores or less'%UpperlimitAllocCPU)\n",
"alloc_elapsed_clustergraph.scatter(df_clustering['AllocCPUS'],df_clustering['Elapsed'], \n",
"elapsed_alloc_clustergraph.scatter(df_clustering['AllocCPUS'],df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_, cmap='rainbow')\n",
" c=kmeans_cluster.labels_, cmap='rainbow')\n",
"elapsed_
alloc_
clustergraph.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
"
alloc_
elapsed_clustergraph.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
"plt.xlabel('AllocCPUS')\n",
"plt.xlabel('AllocCPUS')\n",
"plt.ylabel('Elapsed(hours)')\n",
"plt.ylabel('Elapsed(hours)')\n",
"plt.title('Runtime/Core')\n",
"\n",
"\n",
"# Alloc/ReqMem 2d Graph\n",
"# ReqMem/Alloc 2d Graph\n",
"alloc_rqmem_clustergraph = figure.add_subplot(3,3,3)\n",
"rqmem_alloc_clustergraph = figure.add_subplot(5,3,3)\n",
"#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)\n",
"rqmem_alloc_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['AllocCPUS'], \n",
"alloc_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['AllocCPUS'], \n",
" c=kmeans_cluster.labels_, cmap='rainbow')\n",
" c=kmeans_cluster.labels_, cmap='rainbow')\n",
"
elapsed_rqmem
_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
"
rqmem_alloc
_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.ylabel('AllocCPUS')\n",
"plt.ylabel('AllocCPUS')\n",
"plt.title('Cores/Requested Gigs RAM')\n",
"\n",
"\n",
"###########\n",
"###########
3d Graphs
\n",
"#
Alloc/
ReqMem 3d Graph\n",
"# ReqMem
/Alloc
3d Graph\n",
"
alloc_reqmem
_clustergraph_3d = figure.add_subplot(
3
,3,4, projection='3d')\n",
"
rqmem_alloc
_clustergraph_3d = figure.add_subplot(
5
,3,4, projection='3d')\n",
"
alloc_reqmem
_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'], \n",
"
rqmem_alloc
_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
" c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
"
alloc_reqmem
_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
"
rqmem_alloc
_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
"
alloc_reqmem
_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n",
"
rqmem_alloc
_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n",
"
alloc_reqmem
_clustergraph_3d.set_ylabel('AllocCPUS')\n",
"
rqmem_alloc
_clustergraph_3d.set_ylabel('AllocCPUS')\n",
"
alloc_reqmem
_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
"
rqmem_alloc
_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
"\n",
"\n",
"# sets size and color for gridlines by axis\n",
"# sets size and color for gridlines by axis\n",
"
alloc_reqmem
_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"
rqmem_alloc
_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"
alloc_reqmem
_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"
rqmem_alloc
_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"
alloc_reqmem
_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"
rqmem_alloc
_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"\n",
"\n",
"# Elapsed
/Alloc
3d Graph\n",
"#
Alloc/
Elapsed 3d Graph\n",
"elapsed_
alloc_
clustergraph_3d = figure.add_subplot(
3
,3,5, projection='3d')\n",
"
alloc_
elapsed_clustergraph_3d = figure.add_subplot(
5
,3,5, projection='3d')\n",
"elapsed_
alloc_
clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'], \n",
"
alloc_
elapsed_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
" c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
"elapsed_
alloc_
clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
"
alloc_
elapsed_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
"elapsed_
alloc_
clustergraph_3d.set_xlabel('AllocCPUS')\n",
"
alloc_
elapsed_clustergraph_3d.set_xlabel('AllocCPUS')\n",
"elapsed_
alloc_
clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')\n",
"
alloc_
elapsed_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')\n",
"elapsed_
alloc_
clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
"
alloc_
elapsed_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
"\n",
"\n",
"elapsed_
alloc_
clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"
alloc_
elapsed_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_
alloc_
clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"
alloc_
elapsed_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_
alloc_
clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"
alloc_
elapsed_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"# Elapsed
/ReqMem
3d Graph\n",
"#
ReqMem/
Elapsed 3d Graph\n",
"elapsed_
rqmem_
clustergraph_3d = figure.add_subplot(
3
,3,6, projection='3d')\n",
"
rqmem_
elapsed_clustergraph_3d = figure.add_subplot(
5
,3,6, projection='3d')\n",
"elapsed_
rqmem_
clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'], \n",
"
rqmem_
elapsed_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
" c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
"elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
"rqmem_elapsed_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
"\n",
"rqmem_elapsed_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n",
"rqmem_elapsed_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
"rqmem_elapsed_clustergraph_3d.set_zlabel('AllocCPUS')\n",
"\n",
"rqmem_elapsed_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"rqmem_elapsed_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"rqmem_elapsed_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Creating bins \n",
"\n",
"####Purple\n",
"purple_rqmem_min = np.min(df_purple.ReqMemCPU.min())\n",
"purple_rqmem_max = np.max(df_purple.ReqMemCPU.max())\n",
" \n",
"purple_elapsed_min = np.min(df_purple.Elapsed.min()) \n",
"purple_elapsed_max = np.max(df_purple.Elapsed.max()) \n",
"\n",
"purple_alloc_min = np.min(df_purple.AllocCPUS.min()) \n",
"purple_alloc_max = np.max(df_purple.AllocCPUS.max())\n",
" \n",
" \n",
"x_purple_rqmem_elapsed_bins = list(range(purple_rqmem_max))\n",
"y_purple_rqmem_elapsed_bins = list(range(int(purple_elapsed_max)))\n",
"\n",
"\n",
"elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n",
"x_purple_alloc_elapsed_bins = list(range(purple_alloc_max))\n",
"elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
"y_purple_alloc_elapsed_bins = list(range(int(purple_elapsed_max))) \n",
"elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')\n",
"\n",
"\n",
"elapsed_rqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"x_purple_reqmem_alloc_bins = list(range(purple_rqmem_max))\n",
"elapsed_rqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"y_purple_reqmem_alloc_bins = list(range(int(purple_alloc_max))) \n",
"elapsed_rqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"\n",
"\n",
"##############\n",
"####Green\n",
"# Alloc/ReqMem 3d Graph\n",
"green_rqmem_min = np.min(df_green.ReqMemCPU.min())\n",
"alloc_reqmem_clustergraph_3d = figure.add_subplot(3,3,7, projection='3d')\n",
"green_rqmem_max = np.max(df_green.ReqMemCPU.max())\n",
"alloc_reqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'], \n",
" \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)\n",
"green_elapsed_min = np.min(df_green.Elapsed.min()) \n",
"alloc_reqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
"green_elapsed_max = np.max(df_green.Elapsed.max()) \n",
"alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n",
"alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')\n",
"alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
"\n",
"\n",
"# sets size and color for gridlines by axis\n",
"green_alloc_min = np.min(df_green.AllocCPUS.min()) \n",
"alloc_reqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"green_alloc_max = np.max(df_green.AllocCPUS.max())\n",
"alloc_reqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
" \n",
"alloc_reqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
" \n",
"x_green_rqmem_elapsed_bins = list(range(green_rqmem_max))\n",
"y_green_rqmem_elapsed_bins = list(range(int(green_elapsed_max)))\n",
"\n",
"\n",
"x_green_alloc_elapsed_bins = list(range(green_alloc_max))\n",
"y_green_alloc_elapsed_bins = list(range(int(green_elapsed_max))) \n",
"\n",
"\n",
"# Elapsed/Alloc 3d Graph\n",
"x_green_reqmem_alloc_bins = list(range(green_rqmem_max))\n",
"elapsed_alloc_clustergraph_3d = figure.add_subplot(3,3,8, projection='3d')\n",
"y_green_reqmem_alloc_bins = list(range(int(green_alloc_max))) \n",
"elapsed_alloc_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)\n",
"elapsed_alloc_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
"elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')\n",
"elapsed_alloc_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')\n",
"elapsed_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
"\n",
"\n",
"elapsed_alloc_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_alloc_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_alloc_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"####Red\n",
"red_rqmem_min = np.min(df_red.ReqMemCPU.min())\n",
"red_rqmem_max = np.max(df_red.ReqMemCPU.max())\n",
" \n",
"red_elapsed_min = np.min(df_red.Elapsed.min()) \n",
"red_elapsed_max = np.max(df_red.Elapsed.max()) \n",
"\n",
"\n",
"red_alloc_min = np.min(df_red.AllocCPUS.min()) \n",
"red_alloc_max = np.max(df_red.AllocCPUS.max())\n",
" \n",
" \n",
"x_red_rqmem_elapsed_bins = list(range(red_rqmem_max))\n",
"y_red_rqmem_elapsed_bins = list(range(int (red_elapsed_max)))\n",
"\n",
"\n",
"# Elapsed/ReqMem 3d Graph\n",
"x_red_alloc_elapsed_bins = list(range(red_alloc_max))\n",
"elapsed_rqmem_clustergraph_3d = figure.add_subplot(3,3,9, projection='3d')\n",
"y_red_alloc_elapsed_bins = list(range(int (red_elapsed_max)))\n",
"elapsed_rqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)\n",
"elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
"\n",
"\n",
"elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n",
"x_red_reqmem_alloc_bins = list(range(red_rqmem_max)) # list range gives one bin per gig\n",
"elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
"y_red_reqmem_alloc_bins = list(range(red_alloc_max)) # list range gives one bin per cpu"
"elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')\n",
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure()\n",
"fig.set_size_inches(20,20)\n",
"\n",
"#####Green\n",
"ax = fig.add_subplot(331)\n",
"rqmem_elapsed_green_hist = ax.hist2d(df_green_2d1['ReqMemCPU'],df_green_2d1['Elapsed'], \n",
" bins =[x_green_rqmem_elapsed_bins, y_green_rqmem_elapsed_bins], \n",
" cmap = plt.cm.Blues)\n",
"ax.set_xlabel('ReqMemCPU(gigs)')\n",
"ax.set_ylabel('Elapsed(hours)')\n",
"ax.set_title('Green Cluster')\n",
"ax.set_xlim(0,40)\n",
"ax.set_ylim(0,140)\n",
"\n",
"\n",
"ax2 = fig.add_subplot(332)\n",
"alloc_elapsed_green_hist = ax2.hist2d(df_green_2d2['AllocCPUS'],df_green_2d2['Elapsed'], \n",
" bins =[x_green_alloc_elapsed_bins, y_green_alloc_elapsed_bins],\n",
" cmap = plt.cm.Blues)\n",
"ax2.set_xlabel('AllocCPUS')\n",
"ax2.set_ylabel('Elapsed(hours)')\n",
"ax2.set_title('Green Cluster')\n",
"ax2.set_xlim(0,40)\n",
"ax2.set_ylim(0,140)\n",
"\n",
"\n",
"ax3 = fig.add_subplot(333)\n",
"reqmem_alloc_green_hist = ax3.hist2d(df_green_2d3['ReqMemCPU'],df_green_2d3['AllocCPUS'], \n",
" bins =[x_green_reqmem_alloc_bins, y_green_reqmem_alloc_bins],\n",
" cmap = plt.cm.Blues)\n",
"ax3.set_xlabel('ReqMemCPU(gigs)')\n",
"ax3.set_ylabel('AllocCPUS')\n",
"ax3.set_title('Green Cluster')\n",
"ax3.set_xlim(0,40)\n",
"ax3.set_ylim(0,140)\n",
"\n",
" \n",
"####Purple\n",
"ax4 = fig.add_subplot(334) # This represents a (3x3) grid (row x col) and we are plotting the (1) subplot. The last number increments row-wise.\n",
"rqmem_elapsed_purple_hist = ax4.hist2d(df_purlple_2d1['ReqMemCPU'],df_purlple_2d1['Elapsed'], \n",
" bins =[x_purple_rqmem_elapsed_bins, y_purple_rqmem_elapsed_bins], \n",
" cmap = plt.cm.Blues)\n",
"ax4.set_xlabel('ReqMemCPU(gigs)')\n",
"ax4.set_ylabel('Elapsed(hours)')\n",
"ax4.set_title('Purple Cluster')\n",
"ax4.set_xlim(0,40)\n",
"ax4.set_ylim(0,140)\n",
"\n",
"\n",
"ax5 = fig.add_subplot(335) # Second subplot\n",
"alloc_elapsed_purple_hist = ax5.hist2d(df_purlple_2d2['AllocCPUS'],df_purlple_2d2['Elapsed'], \n",
" bins =[x_purple_alloc_elapsed_bins, y_purple_alloc_elapsed_bins], \n",
" cmap = plt.cm.Blues)\n",
"ax5.set_xlabel('AllocCPUS')\n",
"ax5.set_ylabel('Elapsed(hours)')\n",
"ax5.set_title('Purple Cluster')\n",
"ax5.set_xlim(0,40)\n",
"ax5.set_ylim(0,140)\n",
"\n",
"\n",
"ax6 = fig.add_subplot(336)\n",
"reqmem_alloc_purple_hist = ax6.hist2d(df_purlple_2d3['ReqMemCPU'],df_purlple_2d3['AllocCPUS'], \n",
" bins =[x_purple_reqmem_alloc_bins, y_purple_reqmem_alloc_bins], \n",
" cmap = plt.cm.Blues) # use magma or\n",
"ax6.set_xlabel('ReqMemCPU(gigs)')\n",
"ax6.set_ylabel('AllocCPUS')\n",
"ax6.set_title('Purple Cluster')\n",
"ax6.set_xlim(0,40)\n",
"ax6.set_ylim(0,140)\n",
"\n",
"\n",
"#####Red\n",
"ax7 = fig.add_subplot(337)\n",
"rqmem_elapsed_red_hist = ax7.hist2d(df_red_2d1['ReqMemCPU'],df_red_2d1['Elapsed'], \n",
" bins =[x_red_rqmem_elapsed_bins, y_red_rqmem_elapsed_bins],\n",
" cmap = plt.cm.Blues)\n",
"ax7.set_xlabel('ReqMemCPU(gigs)')\n",
"ax7.set_ylabel('Elapsed(hours)')\n",
"ax7.set_title('Red Cluster')\n",
"ax7.set_xlim(0,40)\n",
"ax7.set_ylim(0,140)\n",
"\n",
"\n",
"ax8 = fig.add_subplot(338)\n",
"alloc_elapsed_red_hist = ax8.hist2d(df_red_2d2['AllocCPUS'],df_red_2d2['Elapsed'], \n",
" bins =[x_red_reqmem_alloc_bins, y_red_reqmem_alloc_bins],\n",
" cmap = plt.cm.Blues)\n",
"ax8.set_xlabel('AllocCPUS')\n",
"ax8.set_ylabel('Elapsed(hours)')\n",
"ax8.set_title('Red Cluster')\n",
"ax8.set_xlim(0,40)\n",
"ax8.set_ylim(0,140)\n",
"\n",
"\n",
"ax9 = fig.add_subplot(339)\n",
"reqmem_alloc_red_hist = ax9.hist2d(df_red_2d3['ReqMemCPU'],df_red_2d3['AllocCPUS'], \n",
" bins =[x_red_reqmem_alloc_bins, y_red_reqmem_alloc_bins],\n",
" cmap = plt.cm.Blues)\n",
"ax9.set_xlabel('ReqMemCPU(gigs)')\n",
"ax9.set_ylabel('AllocCPUS')\n",
"ax9.set_title('Red Cluster')\n",
"ax9.set_xlim(0,40)\n",
"ax9.set_ylim(0,140)\n",
"\n",
"\n",
"elapsed_rqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_rqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_rqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"\n",
"\n",
"# sets the spacing\n",
"# sets the spacing\n",
...
@@ -411,6 +632,151 @@
...
@@ -411,6 +632,151 @@
"# left = space to the left\n",
"# left = space to the left\n",
"# wspace = padding on both sides of graphs\n",
"# wspace = padding on both sides of graphs\n",
"# hspace = padding on top and bottom of graphs\n",
"# hspace = padding on top and bottom of graphs\n",
"\n",
"figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)\n",
"figure.suptitle('Clusters', fontsize=20)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Creating bins \n",
"\n",
"#### Reqmem/Elapsed\n",
"x_rqmem_elapsed_min = np.min(LowerlimitGB)\n",
"x_rqmem_elapsed_max = np.max(UpperlimitGB)\n",
" \n",
"y_rqmem_elapsed_min = np.min(LowerlimitElapsed) \n",
"y_rqmem_elapsed_max = np.max(UpperlimitElapsed) \n",
" \n",
"x_rqmem_elapsed_bins = np.linspace(x_rqmem_elapsed_min, x_rqmem_elapsed_max, 50) \n",
"y_rqmem_elapsed_bins = np.linspace(y_rqmem_elapsed_min, y_rqmem_elapsed_max, 20) \n",
"\n",
"####Alloc/Elapsed\n",
"x_alloc_elapsed_min = np.min(LowerlimitAllocCPU)\n",
"x_alloc_elapsed_max = np.max(UpperlimitAllocCPU)\n",
" \n",
"y_alloc_elapsed_min = np.min(LowerlimitElapsed) \n",
"y_alloc_elapsed_max = np.max(UpperlimitElapsed)\n",
"\n",
"x_alloc_elapsed_bins = np.linspace(x_alloc_elapsed_min, x_alloc_elapsed_max, 50) \n",
"y_alloc_elapsed_bins = np.linspace(y_alloc_elapsed_min, y_alloc_elapsed_max, 20) \n",
"\n",
"\n",
"###Alloc/Reqmem\n",
"x_reqmem_alloc_min = np.min(LowerlimitGB)\n",
"x_reqmem_alloc_max = np.max(UpperlimitGB)\n",
" \n",
"y_reqmem_alloc_min = np.min(LowerlimitAllocCPU) \n",
"y_reqmem_alloc_max = np.max(UpperlimitAllocCPU)\n",
"\n",
"x_reqmem_alloc_bins = np.linspace(x_reqmem_alloc_min, x_reqmem_alloc_max, 50) \n",
"y_reqmem_alloc_bins = np.linspace(y_reqmem_alloc_min, y_reqmem_alloc_max, 20) \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure()\n",
"fig.set_size_inches(20,20)\n",
" \n",
"####Purple\n",
"ax = fig.add_subplot(331) # This represents a (3x3) grid (row x col) and we are plotting the (1) subplot. The last number increments row-wise.\n",
"rqmem_elapsed_purple_hist = ax.hist2d(df_purlple_2d1['ReqMemCPU'],df_purlple_2d1['Elapsed'], \n",
" bins =[x_rqmem_elapsed_bins, y_rqmem_elapsed_bins], \n",
" cmap = plt.cm.Greys)\n",
"ax.set_xlabel('ReqMemCPU(gigs)')\n",
"ax.set_ylabel('Elapsed(hours)')\n",
"ax.set_title('Purple Cluster')\n",
"\n",
"\n",
"ax2 = fig.add_subplot(332) # Second subplot\n",
"alloc_elapsed_purple_hist = ax2.hist2d(df_purlple_2d2['AllocCPUS'],df_purlple_2d2['Elapsed'], \n",
" bins =[x_alloc_elapsed_bins, y_alloc_elapsed_bins], \n",
" cmap = plt.cm.Greys)\n",
"ax2.set_xlabel('AllocCPUS')\n",
"ax2.set_ylabel('Elapsed(hours)')\n",
"ax2.set_title('Purple Cluster')\n",
"\n",
"\n",
"ax3 = fig.add_subplot(333)\n",
"reqmem_alloc_purple_hist = ax3.hist2d(df_purlple_2d3['ReqMemCPU'],df_purlple_2d3['AllocCPUS'], \n",
" bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins], \n",
" cmap = plt.cm.Greys) # use magma or\n",
"ax3.set_xlabel('ReqMemCPU(gigs)')\n",
"ax3.set_ylabel('AllocCPUS')\n",
"ax3.set_title('Purple Cluster')\n",
"\n",
"\n",
"#####Green\n",
"ax4 = fig.add_subplot(334)\n",
"rqmem_elapsed_green_hist = ax4.hist2d(df_green_2d1['ReqMemCPU'],df_green_2d1['Elapsed'], \n",
" bins =[x_rqmem_elapsed_bins, y_rqmem_elapsed_bins], \n",
" cmap = plt.cm.Greys)\n",
"ax4.set_xlabel('ReqMemCPU(gigs)')\n",
"ax4.set_ylabel('Elapsed(hours)')\n",
"ax4.set_title('Green Cluster')\n",
"\n",
"\n",
"ax5 = fig.add_subplot(335)\n",
"alloc_elapsed_green_hist = ax5.hist2d(df_green_2d2['AllocCPUS'],df_green_2d2['Elapsed'], \n",
" bins =[x_alloc_elapsed_bins, y_alloc_elapsed_bins],\n",
" cmap = plt.cm.Greys)\n",
"ax5.set_xlabel('AllocCPUS')\n",
"ax5.set_ylabel('Elapsed(hours)')\n",
"ax5.set_title('Green Cluster')\n",
"\n",
"\n",
"ax6 = fig.add_subplot(336)\n",
"reqmem_alloc_green_hist = ax6.hist2d(df_green_2d3['ReqMemCPU'],df_green_2d3['AllocCPUS'], \n",
" bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],\n",
" cmap = plt.cm.Greys)\n",
"ax6.set_xlabel('ReqMemCPU(gigs)')\n",
"ax6.set_ylabel('AllocCPUS')\n",
"ax6.set_title('Green Cluster')\n",
"\n",
"\n",
"#####Red\n",
"ax7 = fig.add_subplot(337)\n",
"rqmem_elapsed_red_hist = ax7.hist2d(df_red_2d1['ReqMemCPU'],df_red_2d1['Elapsed'], \n",
" bins =[x_rqmem_elapsed_bins, y_rqmem_elapsed_bins],\n",
" cmap = plt.cm.Greys)\n",
"ax7.set_xlabel('ReqMemCPU(gigs)')\n",
"ax7.set_ylabel('Elapsed(hours)')\n",
"ax7.set_title('Red Cluster')\n",
"\n",
"\n",
"ax8 = fig.add_subplot(338)\n",
"alloc_elapsed_red_hist = ax8.hist2d(df_red_2d2['AllocCPUS'],df_red_2d2['Elapsed'], \n",
" bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],\n",
" cmap = plt.cm.Greys)\n",
"ax8.set_xlabel('AllocCPUS')\n",
"ax8.set_ylabel('Elapsed(hours)')\n",
"ax8.set_title('Red Cluster')\n",
"\n",
"\n",
"ax9 = fig.add_subplot(339)\n",
"reqmem_alloc_red_hist = ax9.hist2d(df_red_2d3['ReqMemCPU'],df_red_2d3['AllocCPUS'], \n",
" bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],\n",
" cmap = plt.cm.Greys)\n",
"ax9.set_xlabel('ReqMemCPU(gigs)')\n",
"ax9.set_ylabel('AllocCPUS')\n",
"ax9.set_title('Red Cluster')\n",
"\n",
"# sets the spacing\n",
"# top = space between title and graphs - increase number to bring title down and decrease to bring title up\n",
"# left = space to the left\n",
"# wspace = padding on both sides of graphs\n",
"# hspace = padding on top and bottom of graphs\n",
"\n",
"figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)\n",
"figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)\n",
"figure.suptitle('Clusters', fontsize=20)\n",
"figure.suptitle('Clusters', fontsize=20)\n",
"\n",
"\n",
...
...
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# Data Setup Options
# Data Setup Options
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# year-date-month
# year-date-month
#start_date = '2020-10-09'
#start_date = '2020-10-09'
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# sets min and max parameters for ReqMemCPU
# sets min and max parameters for ReqMemCPU
LowerlimitGB = 0
LowerlimitGB = 0
UpperlimitGB = 50
UpperlimitGB = 50
# gigs per cpu
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# sets min and max parameters for AllocCPUS
# sets min and max parameters for AllocCPUS
LowerlimitAllocCPU = 0
LowerlimitAllocCPU = 0
UpperlimitAllocCPU = 50
UpperlimitAllocCPU = 50
#cpus
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# sets min and max parameters for Elapsed
# sets min and max parameters for Elapsed
LowerlimitElapsed = 0
LowerlimitElapsed = 0
UpperlimitElapsed = 150.02
UpperlimitElapsed = 150.02
#in hours - 6.25 days
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# Enter 'none', '0-1', or 'log' as achoice for data nomralization
# Enter 'none', '0-1', or 'log' as a
choice for data nomralization
Data_Normalization_Choice = 'none'
Data_Normalization_Choice = 'none'
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# Imports
# Imports
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
import sqlite3
import sqlite3
import slurm2sql
import slurm2sql
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
%matplotlib inline
%matplotlib inline
import seaborn as sns
import seaborn as sns
import seaborn as sb
import seaborn as sb
import plotly.express as px
import plotly.express as px
import matplotlib.ticker as ticker
import matplotlib.ticker as ticker
import numpy as np
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import Axes3D
import os
import os
from RC_styles import rc_styles as style
from RC_styles import rc_styles as style
from sklearn.cluster import KMeans
from sklearn.cluster import KMeans
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# Database Creation
# Database Creation
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# creates database of info from March 2020 using sqlite 3
# creates database of info from March 2020 using sqlite 3
db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')
db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# df is starting database
# df is starting database
df = pd.read_sql('SELECT * FROM slurm', db)
df = pd.read_sql('SELECT * FROM slurm', db)
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# df_1 is dataframe of all completed jobs
# df_1 is dataframe of all completed jobs
df_1 = df[df.State.str.contains('COMPLETED')]
df_1 = df[df.State.str.contains('COMPLETED')]
#df_completed.head(5)
#df_completed.head(5)
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# dataset of needed columns for all graphs below
# dataset of needed columns for all graphs below
df_completed = df_1.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]
df_completed = df_1.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]
#df_1.head(5)
#df_1.head(5)
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# converts units in ReqMemCPU column from bytes to gigs and rounds up to nearest whole number
# converts units in ReqMemCPU column from bytes to gigs and rounds up to nearest whole number
df_completed['ReqMemCPU'] = df_completed['ReqMemCPU'].div(1024**3).apply(np.ceil).apply(int)
df_completed['ReqMemCPU'] = df_completed['ReqMemCPU'].div(1024**3).apply(np.ceil).apply(int)
#df_completed.head()
#df_completed.head()
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# converts Elapsed time to hours (from seconds) and rounds up to nearest 2 decimal places
# converts Elapsed time to hours (from seconds) and rounds up to nearest 2 decimal places
df_completed['Elapsed'] = df_completed['Elapsed'].div(3600).round(2)
df_completed['Elapsed'] = df_completed['Elapsed'].div(3600).round(2)
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS for completed jobs using the min and max parameters created above
# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS for completed jobs using the min and max parameters created above
df_clustering = df_completed[(df_completed['ReqMemCPU'] <= UpperlimitGB) &
df_clustering = df_completed[(df_completed['ReqMemCPU'] <= UpperlimitGB) &
(df_completed['ReqMemCPU'] >= LowerlimitGB) &
(df_completed['ReqMemCPU'] >= LowerlimitGB) &
(df_completed['AllocCPUS'] <= UpperlimitAllocCPU) &
(df_completed['AllocCPUS'] <= UpperlimitAllocCPU) &
(df_completed['AllocCPUS'] >= LowerlimitAllocCPU)
(df_completed['AllocCPUS'] >= LowerlimitAllocCPU)
&
&
(df_completed['Elapsed'] <= UpperlimitElapsed) &
(df_completed['Elapsed'] <= UpperlimitElapsed) &
(df_completed['Elapsed'] >= LowerlimitElapsed)]
(df_completed['Elapsed'] >= LowerlimitElapsed)]
df_clustering.head(5)
df_clustering.head(5)
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# Normalizing the Data for ReqMem/Elapsed
# Normalizing the Data for ReqMem/Elapsed
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
if Data_Normalization_Choice == '0-1':
if Data_Normalization_Choice == '0-1':
column_max = df_clustering.max()
column_max = df_clustering.max()
df_clustering_max = column_max.max()
df_clustering_max = column_max.max()
fit = df_clustering / df_clustering_max
fit = df_clustering / df_clustering_max
print("0-1")
print("0-1")
elif Data_Normalization_Choice == 'log':
elif Data_Normalization_Choice == 'log':
fit = np.log10(df_clustering+1)
fit = np.log10(df_clustering+1)
print("log")
print("log")
else:
else:
fit = df_clustering
fit = df_clustering
print("none")
print("none")
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# kmeans Clustering
# kmeans Clustering
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
# sets to clusters and returns the cluster points
# sets to clusters and returns the cluster points
kmeans_cluster = KMeans(n_clusters=3, random_state=111)
kmeans_cluster = KMeans(n_clusters=3, random_state=111)
kmeans_cluster.fit(fit)
kmeans_cluster.fit(fit)
print(kmeans_cluster.cluster_centers_)
print(kmeans_cluster.cluster_centers_)
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# Reverting Cluster Points Back to align with UnNormalized data
# Reverting Cluster Points Back to align with UnNormalized data
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
if Data_Normalization_Choice == '0-1':
if Data_Normalization_Choice == '0-1':
clusterpoints = kmeans_cluster.cluster_centers_ * df_clustering_max
clusterpoints = kmeans_cluster.cluster_centers_ * df_clustering_max
print("0-1")
print("0-1")
elif Data_Normalization_Choice == 'log':
elif Data_Normalization_Choice == 'log':
clusterpoints = 10 ** (kmeans_cluster.cluster_centers_) - 1
clusterpoints = 10 ** (kmeans_cluster.cluster_centers_) - 1
print("log")
print("log")
else:
else:
clusterpoints = kmeans_cluster.cluster_centers_
clusterpoints = kmeans_cluster.cluster_centers_
print("none")
print("none")
print(clusterpoints[:,0],clusterpoints[:,1])
print(clusterpoints[:,0],clusterpoints[:,1])
```
```
%% Cell type:markdown id: tags:
# Separating the Clusters for 2d Histograms
%% Cell type:code id: tags:
```
# must run
# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the parameters in the labels shown above
#Purple
df_purple = df_clustering[kmeans_cluster.labels_ == 0]
#Green
df_green = df_clustering[kmeans_cluster.labels_ == 1]
#Red
df_red = df_clustering[kmeans_cluster.labels_ == 2]
```
%% Cell type:code id: tags:
```
# voluntary
# returns the min and max ReqMemCPU, Elapsed, and AllocCPUS for each cluster using the datasets created above.
# These are the parameters for the scatter plots of each cluster
print("Purple Cluster")
print("ReqMemCPU:", "min =",df_purple.ReqMemCPU.min()," ","max =",df_purple.ReqMemCPU.max())
print("Elapsed:", "min =",df_purple.Elapsed.min()," ","max =",df_purple.Elapsed.max())
print("AllocCPUS:", "min =",df_purple.AllocCPUS.min()," ","max =",df_purple.AllocCPUS.max())
print("\nGreen Cluster")
print("ReqMemCPU:", "min =",df_green.ReqMemCPU.min()," ","max =",df_green.ReqMemCPU.max())
print("Elapsed:", "min =",df_green.Elapsed.min()," ","max =",df_green.Elapsed.max())
print("AllocCPUS:", "min =",df_green.AllocCPUS.min()," ","max =",df_green.AllocCPUS.max())
print("\nRed Cluster")
print("ReqMemCPU:", "min =",df_red.ReqMemCPU.min()," ","max =",df_red.ReqMemCPU.max())
print("Elapsed:", "min =",df_red.Elapsed.min()," ","max =",df_red.Elapsed.max())
print("AllocCPUS:", "min =",df_red.AllocCPUS.min()," ","max =",df_red.AllocCPUS.max())
```
%% Cell type:code id: tags:
```
# must run
# Creates datasets used to make the 2d histograms that correspond to each cluster scatter plot.
# The groupby does not change the data, but it does make a small enough dataset
# for purple cluster
df_purlple_2d1 = df_purple.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()
df_purlple_2d2 = df_purple.groupby(['AllocCPUS','Elapsed']).sum().reset_index()
df_purlple_2d3 = df_purple.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()
# for green cluster
df_green_2d1 = df_green.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()
df_green_2d2 = df_green.groupby(['AllocCPUS','Elapsed']).sum().reset_index()
df_green_2d3 = df_green.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()
# for red cluster
df_red_2d1 = df_red.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()
df_red_2d2 = df_red.groupby(['AllocCPUS','Elapsed']).sum().reset_index()
df_red_2d3 = df_red.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
# must run
# must run
figure = plt.figure()
figure = plt.figure()
figure.set_size_inches(20,
2
0)
figure.set_size_inches(20,
4
0)
# Elapsed
/ReqMem
2d Graph
#
ReqMem/
Elapsed 2d Graph
elapsed_
rqmem_
clustergraph = figure.add_subplot(
3
,3,1)
rqmem_
elapsed_clustergraph = figure.add_subplot(
5
,3,1)
#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)
elapsed_
rqmem_
clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['Elapsed'],
rqmem_
elapsed_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['Elapsed'],
c=kmeans_cluster.labels_, cmap='rainbow')
c=kmeans_cluster.labels_, cmap='rainbow')
elapsed_
rqmem_
clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')
rqmem_
elapsed_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')
plt.xlabel('ReqMemCPU(gigs)')
plt.xlabel('ReqMemCPU(gigs)')
plt.ylabel('Elapsed(hours)')
plt.ylabel('Elapsed(hours)')
plt.title('Runtime/Requested Gigs RAM')
# Elapsed/Alloc 2d Graph
# Alloc/Elapsed 2d Graph
elapsed_alloc_clustergraph = figure.add_subplot(3,3,2)
alloc_elapsed_clustergraph = figure.add_subplot(5,3,2)
#figure.suptitle('Runtime per Core %i cores or less'%UpperlimitAllocCPU)
alloc_elapsed_clustergraph.scatter(df_clustering['AllocCPUS'],df_clustering['Elapsed'],
elapsed_alloc_clustergraph.scatter(df_clustering['AllocCPUS'],df_clustering['Elapsed'],
c=kmeans_cluster.labels_, cmap='rainbow')
c=kmeans_cluster.labels_, cmap='rainbow')
elapsed_
alloc_
clustergraph.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
alloc_
elapsed_clustergraph.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
plt.xlabel('AllocCPUS')
plt.xlabel('AllocCPUS')
plt.ylabel('Elapsed(hours)')
plt.ylabel('Elapsed(hours)')
plt.title('Runtime/Core')
# Alloc/ReqMem 2d Graph
# ReqMem/Alloc 2d Graph
alloc_rqmem_clustergraph = figure.add_subplot(3,3,3)
rqmem_alloc_clustergraph = figure.add_subplot(5,3,3)
#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)
rqmem_alloc_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['AllocCPUS'],
alloc_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['AllocCPUS'],
c=kmeans_cluster.labels_, cmap='rainbow')
c=kmeans_cluster.labels_, cmap='rainbow')
elapsed_rqmem
_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
rqmem_alloc
_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
plt.xlabel('ReqMemCPU(gigs)')
plt.xlabel('ReqMemCPU(gigs)')
plt.ylabel('AllocCPUS')
plt.ylabel('AllocCPUS')
plt.title('Cores/Requested Gigs RAM')
###########
###########
3d Graphs
#
Alloc/
ReqMem 3d Graph
# ReqMem
/Alloc
3d Graph
alloc_reqmem
_clustergraph_3d = figure.add_subplot(
3
,3,4, projection='3d')
rqmem_alloc
_clustergraph_3d = figure.add_subplot(
5
,3,4, projection='3d')
alloc_reqmem
_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'],
rqmem_alloc
_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow')
c=kmeans_cluster.labels_ ,cmap='rainbow')
alloc_reqmem
_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
rqmem_alloc
_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
alloc_reqmem
_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')
rqmem_alloc
_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')
alloc_reqmem
_clustergraph_3d.set_ylabel('AllocCPUS')
rqmem_alloc
_clustergraph_3d.set_ylabel('AllocCPUS')
alloc_reqmem
_clustergraph_3d.set_zlabel('Elapsed(hours)')
rqmem_alloc
_clustergraph_3d.set_zlabel('Elapsed(hours)')
# sets size and color for gridlines by axis
# sets size and color for gridlines by axis
alloc_reqmem
_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
rqmem_alloc
_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem
_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
rqmem_alloc
_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem
_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
rqmem_alloc
_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# Elapsed
/Alloc
3d Graph
#
Alloc/
Elapsed 3d Graph
elapsed_
alloc_
clustergraph_3d = figure.add_subplot(
3
,3,5, projection='3d')
alloc_
elapsed_clustergraph_3d = figure.add_subplot(
5
,3,5, projection='3d')
elapsed_
alloc_
clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'],
alloc_
elapsed_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow')
c=kmeans_cluster.labels_ ,cmap='rainbow')
elapsed_
alloc_
clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
alloc_
elapsed_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
elapsed_
alloc_
clustergraph_3d.set_xlabel('AllocCPUS')
alloc_
elapsed_clustergraph_3d.set_xlabel('AllocCPUS')
elapsed_
alloc_
clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')
alloc_
elapsed_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')
elapsed_
alloc_
clustergraph_3d.set_zlabel('Elapsed(hours)')
alloc_
elapsed_clustergraph_3d.set_zlabel('Elapsed(hours)')
elapsed_
alloc_
clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_
elapsed_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_
alloc_
clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_
elapsed_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_
alloc_
clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_
elapsed_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# Elapsed
/ReqMem
3d Graph
#
ReqMem/
Elapsed 3d Graph
elapsed_
rqmem_
clustergraph_3d = figure.add_subplot(
3
,3,6, projection='3d')
rqmem_
elapsed_clustergraph_3d = figure.add_subplot(
5
,3,6, projection='3d')
elapsed_
rqmem_
clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'],
rqmem_
elapsed_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'],
c=kmeans_cluster.labels_ ,cmap='rainbow')
c=kmeans_cluster.labels_ ,cmap='rainbow')
elapsed_
rqmem_
clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')
rqmem_
elapsed_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')
elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')
rqmem_elapsed_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')
elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')
rqmem_elapsed_clustergraph_3d.set_ylabel('Elapsed(hours)')
elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')
rqmem_elapsed_clustergraph_3d.set_zlabel('AllocCPUS')
elapsed_rqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
rqmem_elapsed_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_rqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
rqmem_elapsed_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_rqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
rqmem_elapsed_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
##############
# Alloc/ReqMem 3d Graph
alloc_reqmem_clustergraph_3d = figure.add_subplot(3,3,7, projection='3d')
alloc_reqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)
alloc_reqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')
alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')
alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')
# sets size and color for gridlines by axis
alloc_reqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# Elapsed/Alloc 3d Graph
elapsed_alloc_clustergraph_3d = figure.add_subplot(3,3,8, projection='3d')
elapsed_alloc_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)
elapsed_alloc_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')
elapsed_alloc_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')
elapsed_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)')
elapsed_alloc_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_alloc_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_alloc_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
plt.show()
```
%% Cell type:code id: tags:
```
# Creating bins
####Purple
purple_rqmem_min = np.min(df_purple.ReqMemCPU.min())
purple_rqmem_max = np.max(df_purple.ReqMemCPU.max())
purple_elapsed_min = np.min(df_purple.Elapsed.min())
purple_elapsed_max = np.max(df_purple.Elapsed.max())
purple_alloc_min = np.min(df_purple.AllocCPUS.min())
purple_alloc_max = np.max(df_purple.AllocCPUS.max())
x_purple_rqmem_elapsed_bins = list(range(purple_rqmem_max))
y_purple_rqmem_elapsed_bins = list(range(int(purple_elapsed_max)))
x_purple_alloc_elapsed_bins = list(range(purple_alloc_max))
y_purple_alloc_elapsed_bins = list(range(int(purple_elapsed_max)))
x_purple_reqmem_alloc_bins = list(range(purple_rqmem_max))
y_purple_reqmem_alloc_bins = list(range(int(purple_alloc_max)))
####Green
green_rqmem_min = np.min(df_green.ReqMemCPU.min())
green_rqmem_max = np.max(df_green.ReqMemCPU.max())
green_elapsed_min = np.min(df_green.Elapsed.min())
green_elapsed_max = np.max(df_green.Elapsed.max())
green_alloc_min = np.min(df_green.AllocCPUS.min())
green_alloc_max = np.max(df_green.AllocCPUS.max())
x_green_rqmem_elapsed_bins = list(range(green_rqmem_max))
y_green_rqmem_elapsed_bins = list(range(int(green_elapsed_max)))
x_green_alloc_elapsed_bins = list(range(green_alloc_max))
y_green_alloc_elapsed_bins = list(range(int(green_elapsed_max)))
x_green_reqmem_alloc_bins = list(range(green_rqmem_max))
y_green_reqmem_alloc_bins = list(range(int(green_alloc_max)))
####Red
red_rqmem_min = np.min(df_red.ReqMemCPU.min())
red_rqmem_max = np.max(df_red.ReqMemCPU.max())
red_elapsed_min = np.min(df_red.Elapsed.min())
red_elapsed_max = np.max(df_red.Elapsed.max())
red_alloc_min = np.min(df_red.AllocCPUS.min())
red_alloc_max = np.max(df_red.AllocCPUS.max())
x_red_rqmem_elapsed_bins = list(range(red_rqmem_max))
y_red_rqmem_elapsed_bins = list(range(int (red_elapsed_max)))
x_red_alloc_elapsed_bins = list(range(red_alloc_max))
y_red_alloc_elapsed_bins = list(range(int (red_elapsed_max)))
x_red_reqmem_alloc_bins = list(range(red_rqmem_max)) # list range gives one bin per gig
y_red_reqmem_alloc_bins = list(range(red_alloc_max)) # list range gives one bin per cpu
```
%% Cell type:code id: tags:
```
fig = plt.figure()
fig.set_size_inches(20,20)
#####Green
ax = fig.add_subplot(331)
rqmem_elapsed_green_hist = ax.hist2d(df_green_2d1['ReqMemCPU'],df_green_2d1['Elapsed'],
bins =[x_green_rqmem_elapsed_bins, y_green_rqmem_elapsed_bins],
cmap = plt.cm.Blues)
ax.set_xlabel('ReqMemCPU(gigs)')
ax.set_ylabel('Elapsed(hours)')
ax.set_title('Green Cluster')
ax.set_xlim(0,40)
ax.set_ylim(0,140)
ax2 = fig.add_subplot(332)
alloc_elapsed_green_hist = ax2.hist2d(df_green_2d2['AllocCPUS'],df_green_2d2['Elapsed'],
bins =[x_green_alloc_elapsed_bins, y_green_alloc_elapsed_bins],
cmap = plt.cm.Blues)
ax2.set_xlabel('AllocCPUS')
ax2.set_ylabel('Elapsed(hours)')
ax2.set_title('Green Cluster')
ax2.set_xlim(0,40)
ax2.set_ylim(0,140)
ax3 = fig.add_subplot(333)
reqmem_alloc_green_hist = ax3.hist2d(df_green_2d3['ReqMemCPU'],df_green_2d3['AllocCPUS'],
bins =[x_green_reqmem_alloc_bins, y_green_reqmem_alloc_bins],
cmap = plt.cm.Blues)
ax3.set_xlabel('ReqMemCPU(gigs)')
ax3.set_ylabel('AllocCPUS')
ax3.set_title('Green Cluster')
ax3.set_xlim(0,40)
ax3.set_ylim(0,140)
####Purple
ax4 = fig.add_subplot(334) # This represents a (3x3) grid (row x col) and we are plotting the (1) subplot. The last number increments row-wise.
rqmem_elapsed_purple_hist = ax4.hist2d(df_purlple_2d1['ReqMemCPU'],df_purlple_2d1['Elapsed'],
bins =[x_purple_rqmem_elapsed_bins, y_purple_rqmem_elapsed_bins],
cmap = plt.cm.Blues)
ax4.set_xlabel('ReqMemCPU(gigs)')
ax4.set_ylabel('Elapsed(hours)')
ax4.set_title('Purple Cluster')
ax4.set_xlim(0,40)
ax4.set_ylim(0,140)
ax5 = fig.add_subplot(335) # Second subplot
alloc_elapsed_purple_hist = ax5.hist2d(df_purlple_2d2['AllocCPUS'],df_purlple_2d2['Elapsed'],
bins =[x_purple_alloc_elapsed_bins, y_purple_alloc_elapsed_bins],
cmap = plt.cm.Blues)
ax5.set_xlabel('AllocCPUS')
ax5.set_ylabel('Elapsed(hours)')
ax5.set_title('Purple Cluster')
ax5.set_xlim(0,40)
ax5.set_ylim(0,140)
ax6 = fig.add_subplot(336)
reqmem_alloc_purple_hist = ax6.hist2d(df_purlple_2d3['ReqMemCPU'],df_purlple_2d3['AllocCPUS'],
bins =[x_purple_reqmem_alloc_bins, y_purple_reqmem_alloc_bins],
cmap = plt.cm.Blues) # use magma or
ax6.set_xlabel('ReqMemCPU(gigs)')
ax6.set_ylabel('AllocCPUS')
ax6.set_title('Purple Cluster')
ax6.set_xlim(0,40)
ax6.set_ylim(0,140)
#####Red
ax7 = fig.add_subplot(337)
rqmem_elapsed_red_hist = ax7.hist2d(df_red_2d1['ReqMemCPU'],df_red_2d1['Elapsed'],
bins =[x_red_rqmem_elapsed_bins, y_red_rqmem_elapsed_bins],
cmap = plt.cm.Blues)
ax7.set_xlabel('ReqMemCPU(gigs)')
ax7.set_ylabel('Elapsed(hours)')
ax7.set_title('Red Cluster')
ax7.set_xlim(0,40)
ax7.set_ylim(0,140)
ax8 = fig.add_subplot(338)
alloc_elapsed_red_hist = ax8.hist2d(df_red_2d2['AllocCPUS'],df_red_2d2['Elapsed'],
bins =[x_red_reqmem_alloc_bins, y_red_reqmem_alloc_bins],
cmap = plt.cm.Blues)
ax8.set_xlabel('AllocCPUS')
ax8.set_ylabel('Elapsed(hours)')
ax8.set_title('Red Cluster')
ax8.set_xlim(0,40)
ax8.set_ylim(0,140)
ax9 = fig.add_subplot(339)
reqmem_alloc_red_hist = ax9.hist2d(df_red_2d3['ReqMemCPU'],df_red_2d3['AllocCPUS'],
bins =[x_red_reqmem_alloc_bins, y_red_reqmem_alloc_bins],
cmap = plt.cm.Blues)
ax9.set_xlabel('ReqMemCPU(gigs)')
ax9.set_ylabel('AllocCPUS')
ax9.set_title('Red Cluster')
ax9.set_xlim(0,40)
ax9.set_ylim(0,140)
# sets the spacing
# top = space between title and graphs - increase number to bring title down and decrease to bring title up
# left = space to the left
# wspace = padding on both sides of graphs
# hspace = padding on top and bottom of graphs
figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)
figure.suptitle('Clusters', fontsize=20)
plt.show()
```
%% Cell type:code id: tags:
```
# Creating bins
#### Reqmem/Elapsed
x_rqmem_elapsed_min = np.min(LowerlimitGB)
x_rqmem_elapsed_max = np.max(UpperlimitGB)
y_rqmem_elapsed_min = np.min(LowerlimitElapsed)
y_rqmem_elapsed_max = np.max(UpperlimitElapsed)
x_rqmem_elapsed_bins = np.linspace(x_rqmem_elapsed_min, x_rqmem_elapsed_max, 50)
y_rqmem_elapsed_bins = np.linspace(y_rqmem_elapsed_min, y_rqmem_elapsed_max, 20)
####Alloc/Elapsed
x_alloc_elapsed_min = np.min(LowerlimitAllocCPU)
x_alloc_elapsed_max = np.max(UpperlimitAllocCPU)
y_alloc_elapsed_min = np.min(LowerlimitElapsed)
y_alloc_elapsed_max = np.max(UpperlimitElapsed)
x_alloc_elapsed_bins = np.linspace(x_alloc_elapsed_min, x_alloc_elapsed_max, 50)
y_alloc_elapsed_bins = np.linspace(y_alloc_elapsed_min, y_alloc_elapsed_max, 20)
# Elapsed/ReqMem 3d Graph
###Alloc/Reqmem
elapsed_rqmem_clustergraph_3d = figure.add_subplot(3,3,9, projection='3d')
x_reqmem_alloc_min = np.min(LowerlimitGB)
elapsed_rqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'],
x_reqmem_alloc_max = np.max(UpperlimitGB)
c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)
elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')
y_reqmem_alloc_min = np.min(LowerlimitAllocCPU)
y_reqmem_alloc_max = np.max(UpperlimitAllocCPU)
elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')
elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')
x_reqmem_alloc_bins = np.linspace(x_reqmem_alloc_min, x_reqmem_alloc_max, 50)
elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')
y_reqmem_alloc_bins = np.linspace(y_reqmem_alloc_min, y_reqmem_alloc_max, 20)
```
elapsed_rqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_rqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
%% Cell type:code id: tags:
elapsed_rqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
```
fig = plt.figure()
fig.set_size_inches(20,20)
####Purple
ax = fig.add_subplot(331) # This represents a (3x3) grid (row x col) and we are plotting the (1) subplot. The last number increments row-wise.
rqmem_elapsed_purple_hist = ax.hist2d(df_purlple_2d1['ReqMemCPU'],df_purlple_2d1['Elapsed'],
bins =[x_rqmem_elapsed_bins, y_rqmem_elapsed_bins],
cmap = plt.cm.Greys)
ax.set_xlabel('ReqMemCPU(gigs)')
ax.set_ylabel('Elapsed(hours)')
ax.set_title('Purple Cluster')
ax2 = fig.add_subplot(332) # Second subplot
alloc_elapsed_purple_hist = ax2.hist2d(df_purlple_2d2['AllocCPUS'],df_purlple_2d2['Elapsed'],
bins =[x_alloc_elapsed_bins, y_alloc_elapsed_bins],
cmap = plt.cm.Greys)
ax2.set_xlabel('AllocCPUS')
ax2.set_ylabel('Elapsed(hours)')
ax2.set_title('Purple Cluster')
ax3 = fig.add_subplot(333)
reqmem_alloc_purple_hist = ax3.hist2d(df_purlple_2d3['ReqMemCPU'],df_purlple_2d3['AllocCPUS'],
bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],
cmap = plt.cm.Greys) # use magma or
ax3.set_xlabel('ReqMemCPU(gigs)')
ax3.set_ylabel('AllocCPUS')
ax3.set_title('Purple Cluster')
#####Green
ax4 = fig.add_subplot(334)
rqmem_elapsed_green_hist = ax4.hist2d(df_green_2d1['ReqMemCPU'],df_green_2d1['Elapsed'],
bins =[x_rqmem_elapsed_bins, y_rqmem_elapsed_bins],
cmap = plt.cm.Greys)
ax4.set_xlabel('ReqMemCPU(gigs)')
ax4.set_ylabel('Elapsed(hours)')
ax4.set_title('Green Cluster')
ax5 = fig.add_subplot(335)
alloc_elapsed_green_hist = ax5.hist2d(df_green_2d2['AllocCPUS'],df_green_2d2['Elapsed'],
bins =[x_alloc_elapsed_bins, y_alloc_elapsed_bins],
cmap = plt.cm.Greys)
ax5.set_xlabel('AllocCPUS')
ax5.set_ylabel('Elapsed(hours)')
ax5.set_title('Green Cluster')
ax6 = fig.add_subplot(336)
reqmem_alloc_green_hist = ax6.hist2d(df_green_2d3['ReqMemCPU'],df_green_2d3['AllocCPUS'],
bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],
cmap = plt.cm.Greys)
ax6.set_xlabel('ReqMemCPU(gigs)')
ax6.set_ylabel('AllocCPUS')
ax6.set_title('Green Cluster')
#####Red
ax7 = fig.add_subplot(337)
rqmem_elapsed_red_hist = ax7.hist2d(df_red_2d1['ReqMemCPU'],df_red_2d1['Elapsed'],
bins =[x_rqmem_elapsed_bins, y_rqmem_elapsed_bins],
cmap = plt.cm.Greys)
ax7.set_xlabel('ReqMemCPU(gigs)')
ax7.set_ylabel('Elapsed(hours)')
ax7.set_title('Red Cluster')
ax8 = fig.add_subplot(338)
alloc_elapsed_red_hist = ax8.hist2d(df_red_2d2['AllocCPUS'],df_red_2d2['Elapsed'],
bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],
cmap = plt.cm.Greys)
ax8.set_xlabel('AllocCPUS')
ax8.set_ylabel('Elapsed(hours)')
ax8.set_title('Red Cluster')
ax9 = fig.add_subplot(339)
reqmem_alloc_red_hist = ax9.hist2d(df_red_2d3['ReqMemCPU'],df_red_2d3['AllocCPUS'],
bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],
cmap = plt.cm.Greys)
ax9.set_xlabel('ReqMemCPU(gigs)')
ax9.set_ylabel('AllocCPUS')
ax9.set_title('Red Cluster')
# sets the spacing
# sets the spacing
# top = space between title and graphs - increase number to bring title down and decrease to bring title up
# top = space between title and graphs - increase number to bring title down and decrease to bring title up
# left = space to the left
# left = space to the left
# wspace = padding on both sides of graphs
# wspace = padding on both sides of graphs
# hspace = padding on top and bottom of graphs
# hspace = padding on top and bottom of graphs
figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)
figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)
figure.suptitle('Clusters', fontsize=20)
figure.suptitle('Clusters', fontsize=20)
plt.show()
plt.show()
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
```
```
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment