Skip to content
Snippets Groups Projects
Commit d6c6d60e authored by Ryan Randles Jones's avatar Ryan Randles Jones
Browse files

added 2d histograms for clusters

parent af17830d
Branches ClusterAnalysis-Excluded-Data
No related tags found
1 merge request!4Cluster Analysis(ReqMemCPU, AllocCPUS, Elapsed)
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Data Setup Options # Data Setup Options
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# year-date-month # year-date-month
#start_date = '2020-10-09' #start_date = '2020-10-09'
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# sets min and max parameters for ReqMemCPU # sets min and max parameters for ReqMemCPU
LowerlimitGB = 0 LowerlimitGB = 0
UpperlimitGB = 50 UpperlimitGB = 50 # gigs per cpu
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# sets min and max parameters for AllocCPUS # sets min and max parameters for AllocCPUS
LowerlimitAllocCPU = 0 LowerlimitAllocCPU = 0
UpperlimitAllocCPU = 50 UpperlimitAllocCPU = 50 #cpus
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# sets min and max parameters for Elapsed # sets min and max parameters for Elapsed
LowerlimitElapsed = 0 LowerlimitElapsed = 0
UpperlimitElapsed = 150.02 UpperlimitElapsed = 150.02 #in hours - 6.25 days
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# Enter 'none', '0-1', or 'log' as achoice for data nomralization # Enter 'none', '0-1', or 'log' as a choice for data nomralization
Data_Normalization_Choice = 'none' Data_Normalization_Choice = 'none'
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Imports # Imports
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
import sqlite3 import sqlite3
import slurm2sql import slurm2sql
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
%matplotlib inline %matplotlib inline
import seaborn as sns import seaborn as sns
import seaborn as sb import seaborn as sb
import plotly.express as px import plotly.express as px
import matplotlib.ticker as ticker import matplotlib.ticker as ticker
import numpy as np import numpy as np
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
import os import os
from RC_styles import rc_styles as style from RC_styles import rc_styles as style
from sklearn.cluster import KMeans from sklearn.cluster import KMeans
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Database Creation # Database Creation
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# creates database of info from March 2020 using sqlite 3 # creates database of info from March 2020 using sqlite 3
db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3') db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# df is starting database # df is starting database
df = pd.read_sql('SELECT * FROM slurm', db) df = pd.read_sql('SELECT * FROM slurm', db)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# df_1 is dataframe of all completed jobs # df_1 is dataframe of all completed jobs
df_1 = df[df.State.str.contains('COMPLETED')] df_1 = df[df.State.str.contains('COMPLETED')]
#df_completed.head(5) #df_completed.head(5)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# dataset of needed columns for all graphs below # dataset of needed columns for all graphs below
df_completed = df_1.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']] df_completed = df_1.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]
#df_1.head(5) #df_1.head(5)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# converts units in ReqMemCPU column from bytes to gigs and rounds up to nearest whole number # converts units in ReqMemCPU column from bytes to gigs and rounds up to nearest whole number
df_completed['ReqMemCPU'] = df_completed['ReqMemCPU'].div(1024**3).apply(np.ceil).apply(int) df_completed['ReqMemCPU'] = df_completed['ReqMemCPU'].div(1024**3).apply(np.ceil).apply(int)
#df_completed.head() #df_completed.head()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# converts Elapsed time to hours (from seconds) and rounds up to nearest 2 decimal places # converts Elapsed time to hours (from seconds) and rounds up to nearest 2 decimal places
df_completed['Elapsed'] = df_completed['Elapsed'].div(3600).round(2) df_completed['Elapsed'] = df_completed['Elapsed'].div(3600).round(2)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS for completed jobs using the min and max parameters created above # creates dataset of ReqMemCPU, Elapsed, and AllocCPUS for completed jobs using the min and max parameters created above
df_clustering = df_completed[(df_completed['ReqMemCPU'] <= UpperlimitGB) & df_clustering = df_completed[(df_completed['ReqMemCPU'] <= UpperlimitGB) &
(df_completed['ReqMemCPU'] >= LowerlimitGB) & (df_completed['ReqMemCPU'] >= LowerlimitGB) &
(df_completed['AllocCPUS'] <= UpperlimitAllocCPU) & (df_completed['AllocCPUS'] <= UpperlimitAllocCPU) &
(df_completed['AllocCPUS'] >= LowerlimitAllocCPU) (df_completed['AllocCPUS'] >= LowerlimitAllocCPU)
& &
(df_completed['Elapsed'] <= UpperlimitElapsed) & (df_completed['Elapsed'] <= UpperlimitElapsed) &
(df_completed['Elapsed'] >= LowerlimitElapsed)] (df_completed['Elapsed'] >= LowerlimitElapsed)]
df_clustering.head(5) df_clustering.head(5)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Normalizing the Data for ReqMem/Elapsed # Normalizing the Data for ReqMem/Elapsed
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
if Data_Normalization_Choice == '0-1': if Data_Normalization_Choice == '0-1':
column_max = df_clustering.max() column_max = df_clustering.max()
df_clustering_max = column_max.max() df_clustering_max = column_max.max()
fit = df_clustering / df_clustering_max fit = df_clustering / df_clustering_max
print("0-1") print("0-1")
elif Data_Normalization_Choice == 'log': elif Data_Normalization_Choice == 'log':
fit = np.log10(df_clustering+1) fit = np.log10(df_clustering+1)
print("log") print("log")
else: else:
fit = df_clustering fit = df_clustering
print("none") print("none")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# kmeans Clustering # kmeans Clustering
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
# sets to clusters and returns the cluster points # sets to clusters and returns the cluster points
kmeans_cluster = KMeans(n_clusters=3, random_state=111) kmeans_cluster = KMeans(n_clusters=3, random_state=111)
kmeans_cluster.fit(fit) kmeans_cluster.fit(fit)
print(kmeans_cluster.cluster_centers_) print(kmeans_cluster.cluster_centers_)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Reverting Cluster Points Back to align with UnNormalized data # Reverting Cluster Points Back to align with UnNormalized data
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
if Data_Normalization_Choice == '0-1': if Data_Normalization_Choice == '0-1':
clusterpoints = kmeans_cluster.cluster_centers_ * df_clustering_max clusterpoints = kmeans_cluster.cluster_centers_ * df_clustering_max
print("0-1") print("0-1")
elif Data_Normalization_Choice == 'log': elif Data_Normalization_Choice == 'log':
clusterpoints = 10 ** (kmeans_cluster.cluster_centers_) - 1 clusterpoints = 10 ** (kmeans_cluster.cluster_centers_) - 1
print("log") print("log")
else: else:
clusterpoints = kmeans_cluster.cluster_centers_ clusterpoints = kmeans_cluster.cluster_centers_
print("none") print("none")
print(clusterpoints[:,0],clusterpoints[:,1]) print(clusterpoints[:,0],clusterpoints[:,1])
``` ```
%% Cell type:markdown id: tags:
# Separating the Clusters for 2d Histograms
%% Cell type:code id: tags:
```
# must run
# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the parameters in the labels shown above
#Purple
df_purple = df_clustering[kmeans_cluster.labels_ == 0]
#Green
df_green = df_clustering[kmeans_cluster.labels_ == 1]
#Red
df_red = df_clustering[kmeans_cluster.labels_ == 2]
```
%% Cell type:code id: tags:
```
# voluntary
# returns the min and max ReqMemCPU, Elapsed, and AllocCPUS for each cluster using the datasets created above.
# These are the parameters for the scatter plots of each cluster
print("Purple Cluster")
print("ReqMemCPU:", "min =",df_purple.ReqMemCPU.min()," ","max =",df_purple.ReqMemCPU.max())
print("Elapsed:", "min =",df_purple.Elapsed.min()," ","max =",df_purple.Elapsed.max())
print("AllocCPUS:", "min =",df_purple.AllocCPUS.min()," ","max =",df_purple.AllocCPUS.max())
print("\nGreen Cluster")
print("ReqMemCPU:", "min =",df_green.ReqMemCPU.min()," ","max =",df_green.ReqMemCPU.max())
print("Elapsed:", "min =",df_green.Elapsed.min()," ","max =",df_green.Elapsed.max())
print("AllocCPUS:", "min =",df_green.AllocCPUS.min()," ","max =",df_green.AllocCPUS.max())
print("\nRed Cluster")
print("ReqMemCPU:", "min =",df_red.ReqMemCPU.min()," ","max =",df_red.ReqMemCPU.max())
print("Elapsed:", "min =",df_red.Elapsed.min()," ","max =",df_red.Elapsed.max())
print("AllocCPUS:", "min =",df_red.AllocCPUS.min()," ","max =",df_red.AllocCPUS.max())
```
%% Cell type:code id: tags:
```
# must run
# Creates datasets used to make the 2d histograms that correspond to each cluster scatter plot.
# The groupby does not change the data, but it does make a small enough dataset
# for purple cluster
df_purlple_2d1 = df_purple.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()
df_purlple_2d2 = df_purple.groupby(['AllocCPUS','Elapsed']).sum().reset_index()
df_purlple_2d3 = df_purple.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()
# for green cluster
df_green_2d1 = df_green.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()
df_green_2d2 = df_green.groupby(['AllocCPUS','Elapsed']).sum().reset_index()
df_green_2d3 = df_green.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()
# for red cluster
df_red_2d1 = df_red.groupby(['ReqMemCPU','Elapsed']).sum().reset_index()
df_red_2d2 = df_red.groupby(['AllocCPUS','Elapsed']).sum().reset_index()
df_red_2d3 = df_red.groupby(['ReqMemCPU','AllocCPUS']).sum().reset_index()
```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# must run # must run
figure = plt.figure() figure = plt.figure()
figure.set_size_inches(20,20) figure.set_size_inches(20,40)
# Elapsed/ReqMem 2d Graph # ReqMem/Elapsed 2d Graph
elapsed_rqmem_clustergraph = figure.add_subplot(3,3,1) rqmem_elapsed_clustergraph = figure.add_subplot(5,3,1)
#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)
elapsed_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['Elapsed'], rqmem_elapsed_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['Elapsed'],
c=kmeans_cluster.labels_, cmap='rainbow') c=kmeans_cluster.labels_, cmap='rainbow')
elapsed_rqmem_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black') rqmem_elapsed_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')
plt.xlabel('ReqMemCPU(gigs)') plt.xlabel('ReqMemCPU(gigs)')
plt.ylabel('Elapsed(hours)') plt.ylabel('Elapsed(hours)')
plt.title('Runtime/Requested Gigs RAM')
# Elapsed/Alloc 2d Graph # Alloc/Elapsed 2d Graph
elapsed_alloc_clustergraph = figure.add_subplot(3,3,2) alloc_elapsed_clustergraph = figure.add_subplot(5,3,2)
#figure.suptitle('Runtime per Core %i cores or less'%UpperlimitAllocCPU) alloc_elapsed_clustergraph.scatter(df_clustering['AllocCPUS'],df_clustering['Elapsed'],
elapsed_alloc_clustergraph.scatter(df_clustering['AllocCPUS'],df_clustering['Elapsed'],
c=kmeans_cluster.labels_, cmap='rainbow') c=kmeans_cluster.labels_, cmap='rainbow')
elapsed_alloc_clustergraph.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black') alloc_elapsed_clustergraph.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
plt.xlabel('AllocCPUS') plt.xlabel('AllocCPUS')
plt.ylabel('Elapsed(hours)') plt.ylabel('Elapsed(hours)')
plt.title('Runtime/Core')
# Alloc/ReqMem 2d Graph # ReqMem/Alloc 2d Graph
alloc_rqmem_clustergraph = figure.add_subplot(3,3,3) rqmem_alloc_clustergraph = figure.add_subplot(5,3,3)
#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB) rqmem_alloc_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['AllocCPUS'],
alloc_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['AllocCPUS'],
c=kmeans_cluster.labels_, cmap='rainbow') c=kmeans_cluster.labels_, cmap='rainbow')
elapsed_rqmem_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black') rqmem_alloc_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
plt.xlabel('ReqMemCPU(gigs)') plt.xlabel('ReqMemCPU(gigs)')
plt.ylabel('AllocCPUS') plt.ylabel('AllocCPUS')
plt.title('Cores/Requested Gigs RAM')
########### ########### 3d Graphs
# Alloc/ReqMem 3d Graph # ReqMem/Alloc 3d Graph
alloc_reqmem_clustergraph_3d = figure.add_subplot(3,3,4, projection='3d') rqmem_alloc_clustergraph_3d = figure.add_subplot(5,3,4, projection='3d')
alloc_reqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'], rqmem_alloc_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow') c=kmeans_cluster.labels_ ,cmap='rainbow')
alloc_reqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black') rqmem_alloc_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs') rqmem_alloc_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')
alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS') rqmem_alloc_clustergraph_3d.set_ylabel('AllocCPUS')
alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)') rqmem_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)')
# sets size and color for gridlines by axis # sets size and color for gridlines by axis
alloc_reqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"}) rqmem_alloc_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"}) rqmem_alloc_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"}) rqmem_alloc_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# Elapsed/Alloc 3d Graph # Alloc/Elapsed 3d Graph
elapsed_alloc_clustergraph_3d = figure.add_subplot(3,3,5, projection='3d') alloc_elapsed_clustergraph_3d = figure.add_subplot(5,3,5, projection='3d')
elapsed_alloc_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'], alloc_elapsed_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow') c=kmeans_cluster.labels_ ,cmap='rainbow')
elapsed_alloc_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black') alloc_elapsed_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS') alloc_elapsed_clustergraph_3d.set_xlabel('AllocCPUS')
elapsed_alloc_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)') alloc_elapsed_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')
elapsed_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)') alloc_elapsed_clustergraph_3d.set_zlabel('Elapsed(hours)')
elapsed_alloc_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"}) alloc_elapsed_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_alloc_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"}) alloc_elapsed_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_alloc_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"}) alloc_elapsed_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# Elapsed/ReqMem 3d Graph # ReqMem/Elapsed 3d Graph
elapsed_rqmem_clustergraph_3d = figure.add_subplot(3,3,6, projection='3d') rqmem_elapsed_clustergraph_3d = figure.add_subplot(5,3,6, projection='3d')
elapsed_rqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'], rqmem_elapsed_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'],
c=kmeans_cluster.labels_ ,cmap='rainbow') c=kmeans_cluster.labels_ ,cmap='rainbow')
elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black') rqmem_elapsed_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')
elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)') rqmem_elapsed_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')
elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)') rqmem_elapsed_clustergraph_3d.set_ylabel('Elapsed(hours)')
elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS') rqmem_elapsed_clustergraph_3d.set_zlabel('AllocCPUS')
elapsed_rqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"}) rqmem_elapsed_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_rqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"}) rqmem_elapsed_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_rqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"}) rqmem_elapsed_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
##############
# Alloc/ReqMem 3d Graph
alloc_reqmem_clustergraph_3d = figure.add_subplot(3,3,7, projection='3d')
alloc_reqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)
alloc_reqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')
alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')
alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')
# sets size and color for gridlines by axis
alloc_reqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# Elapsed/Alloc 3d Graph
elapsed_alloc_clustergraph_3d = figure.add_subplot(3,3,8, projection='3d')
elapsed_alloc_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)
elapsed_alloc_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')
elapsed_alloc_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')
elapsed_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)')
elapsed_alloc_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_alloc_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_alloc_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
plt.show()
```
%% Cell type:code id: tags:
```
# Creating bins
####Purple
purple_rqmem_min = np.min(df_purple.ReqMemCPU.min())
purple_rqmem_max = np.max(df_purple.ReqMemCPU.max())
purple_elapsed_min = np.min(df_purple.Elapsed.min())
purple_elapsed_max = np.max(df_purple.Elapsed.max())
purple_alloc_min = np.min(df_purple.AllocCPUS.min())
purple_alloc_max = np.max(df_purple.AllocCPUS.max())
x_purple_rqmem_elapsed_bins = list(range(purple_rqmem_max))
y_purple_rqmem_elapsed_bins = list(range(int(purple_elapsed_max)))
x_purple_alloc_elapsed_bins = list(range(purple_alloc_max))
y_purple_alloc_elapsed_bins = list(range(int(purple_elapsed_max)))
x_purple_reqmem_alloc_bins = list(range(purple_rqmem_max))
y_purple_reqmem_alloc_bins = list(range(int(purple_alloc_max)))
####Green
green_rqmem_min = np.min(df_green.ReqMemCPU.min())
green_rqmem_max = np.max(df_green.ReqMemCPU.max())
green_elapsed_min = np.min(df_green.Elapsed.min())
green_elapsed_max = np.max(df_green.Elapsed.max())
green_alloc_min = np.min(df_green.AllocCPUS.min())
green_alloc_max = np.max(df_green.AllocCPUS.max())
x_green_rqmem_elapsed_bins = list(range(green_rqmem_max))
y_green_rqmem_elapsed_bins = list(range(int(green_elapsed_max)))
x_green_alloc_elapsed_bins = list(range(green_alloc_max))
y_green_alloc_elapsed_bins = list(range(int(green_elapsed_max)))
x_green_reqmem_alloc_bins = list(range(green_rqmem_max))
y_green_reqmem_alloc_bins = list(range(int(green_alloc_max)))
####Red
red_rqmem_min = np.min(df_red.ReqMemCPU.min())
red_rqmem_max = np.max(df_red.ReqMemCPU.max())
red_elapsed_min = np.min(df_red.Elapsed.min())
red_elapsed_max = np.max(df_red.Elapsed.max())
red_alloc_min = np.min(df_red.AllocCPUS.min())
red_alloc_max = np.max(df_red.AllocCPUS.max())
x_red_rqmem_elapsed_bins = list(range(red_rqmem_max))
y_red_rqmem_elapsed_bins = list(range(int (red_elapsed_max)))
x_red_alloc_elapsed_bins = list(range(red_alloc_max))
y_red_alloc_elapsed_bins = list(range(int (red_elapsed_max)))
x_red_reqmem_alloc_bins = list(range(red_rqmem_max)) # list range gives one bin per gig
y_red_reqmem_alloc_bins = list(range(red_alloc_max)) # list range gives one bin per cpu
```
%% Cell type:code id: tags:
```
fig = plt.figure()
fig.set_size_inches(20,20)
#####Green
ax = fig.add_subplot(331)
rqmem_elapsed_green_hist = ax.hist2d(df_green_2d1['ReqMemCPU'],df_green_2d1['Elapsed'],
bins =[x_green_rqmem_elapsed_bins, y_green_rqmem_elapsed_bins],
cmap = plt.cm.Blues)
ax.set_xlabel('ReqMemCPU(gigs)')
ax.set_ylabel('Elapsed(hours)')
ax.set_title('Green Cluster')
ax.set_xlim(0,40)
ax.set_ylim(0,140)
ax2 = fig.add_subplot(332)
alloc_elapsed_green_hist = ax2.hist2d(df_green_2d2['AllocCPUS'],df_green_2d2['Elapsed'],
bins =[x_green_alloc_elapsed_bins, y_green_alloc_elapsed_bins],
cmap = plt.cm.Blues)
ax2.set_xlabel('AllocCPUS')
ax2.set_ylabel('Elapsed(hours)')
ax2.set_title('Green Cluster')
ax2.set_xlim(0,40)
ax2.set_ylim(0,140)
ax3 = fig.add_subplot(333)
reqmem_alloc_green_hist = ax3.hist2d(df_green_2d3['ReqMemCPU'],df_green_2d3['AllocCPUS'],
bins =[x_green_reqmem_alloc_bins, y_green_reqmem_alloc_bins],
cmap = plt.cm.Blues)
ax3.set_xlabel('ReqMemCPU(gigs)')
ax3.set_ylabel('AllocCPUS')
ax3.set_title('Green Cluster')
ax3.set_xlim(0,40)
ax3.set_ylim(0,140)
####Purple
ax4 = fig.add_subplot(334) # This represents a (3x3) grid (row x col) and we are plotting the (1) subplot. The last number increments row-wise.
rqmem_elapsed_purple_hist = ax4.hist2d(df_purlple_2d1['ReqMemCPU'],df_purlple_2d1['Elapsed'],
bins =[x_purple_rqmem_elapsed_bins, y_purple_rqmem_elapsed_bins],
cmap = plt.cm.Blues)
ax4.set_xlabel('ReqMemCPU(gigs)')
ax4.set_ylabel('Elapsed(hours)')
ax4.set_title('Purple Cluster')
ax4.set_xlim(0,40)
ax4.set_ylim(0,140)
ax5 = fig.add_subplot(335) # Second subplot
alloc_elapsed_purple_hist = ax5.hist2d(df_purlple_2d2['AllocCPUS'],df_purlple_2d2['Elapsed'],
bins =[x_purple_alloc_elapsed_bins, y_purple_alloc_elapsed_bins],
cmap = plt.cm.Blues)
ax5.set_xlabel('AllocCPUS')
ax5.set_ylabel('Elapsed(hours)')
ax5.set_title('Purple Cluster')
ax5.set_xlim(0,40)
ax5.set_ylim(0,140)
ax6 = fig.add_subplot(336)
reqmem_alloc_purple_hist = ax6.hist2d(df_purlple_2d3['ReqMemCPU'],df_purlple_2d3['AllocCPUS'],
bins =[x_purple_reqmem_alloc_bins, y_purple_reqmem_alloc_bins],
cmap = plt.cm.Blues) # use magma or
ax6.set_xlabel('ReqMemCPU(gigs)')
ax6.set_ylabel('AllocCPUS')
ax6.set_title('Purple Cluster')
ax6.set_xlim(0,40)
ax6.set_ylim(0,140)
#####Red
ax7 = fig.add_subplot(337)
rqmem_elapsed_red_hist = ax7.hist2d(df_red_2d1['ReqMemCPU'],df_red_2d1['Elapsed'],
bins =[x_red_rqmem_elapsed_bins, y_red_rqmem_elapsed_bins],
cmap = plt.cm.Blues)
ax7.set_xlabel('ReqMemCPU(gigs)')
ax7.set_ylabel('Elapsed(hours)')
ax7.set_title('Red Cluster')
ax7.set_xlim(0,40)
ax7.set_ylim(0,140)
ax8 = fig.add_subplot(338)
alloc_elapsed_red_hist = ax8.hist2d(df_red_2d2['AllocCPUS'],df_red_2d2['Elapsed'],
bins =[x_red_reqmem_alloc_bins, y_red_reqmem_alloc_bins],
cmap = plt.cm.Blues)
ax8.set_xlabel('AllocCPUS')
ax8.set_ylabel('Elapsed(hours)')
ax8.set_title('Red Cluster')
ax8.set_xlim(0,40)
ax8.set_ylim(0,140)
ax9 = fig.add_subplot(339)
reqmem_alloc_red_hist = ax9.hist2d(df_red_2d3['ReqMemCPU'],df_red_2d3['AllocCPUS'],
bins =[x_red_reqmem_alloc_bins, y_red_reqmem_alloc_bins],
cmap = plt.cm.Blues)
ax9.set_xlabel('ReqMemCPU(gigs)')
ax9.set_ylabel('AllocCPUS')
ax9.set_title('Red Cluster')
ax9.set_xlim(0,40)
ax9.set_ylim(0,140)
# sets the spacing
# top = space between title and graphs - increase number to bring title down and decrease to bring title up
# left = space to the left
# wspace = padding on both sides of graphs
# hspace = padding on top and bottom of graphs
figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)
figure.suptitle('Clusters', fontsize=20)
plt.show()
```
%% Cell type:code id: tags:
```
# Creating bins
#### Reqmem/Elapsed
x_rqmem_elapsed_min = np.min(LowerlimitGB)
x_rqmem_elapsed_max = np.max(UpperlimitGB)
y_rqmem_elapsed_min = np.min(LowerlimitElapsed)
y_rqmem_elapsed_max = np.max(UpperlimitElapsed)
x_rqmem_elapsed_bins = np.linspace(x_rqmem_elapsed_min, x_rqmem_elapsed_max, 50)
y_rqmem_elapsed_bins = np.linspace(y_rqmem_elapsed_min, y_rqmem_elapsed_max, 20)
####Alloc/Elapsed
x_alloc_elapsed_min = np.min(LowerlimitAllocCPU)
x_alloc_elapsed_max = np.max(UpperlimitAllocCPU)
y_alloc_elapsed_min = np.min(LowerlimitElapsed)
y_alloc_elapsed_max = np.max(UpperlimitElapsed)
x_alloc_elapsed_bins = np.linspace(x_alloc_elapsed_min, x_alloc_elapsed_max, 50)
y_alloc_elapsed_bins = np.linspace(y_alloc_elapsed_min, y_alloc_elapsed_max, 20)
# Elapsed/ReqMem 3d Graph ###Alloc/Reqmem
elapsed_rqmem_clustergraph_3d = figure.add_subplot(3,3,9, projection='3d') x_reqmem_alloc_min = np.min(LowerlimitGB)
elapsed_rqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'], x_reqmem_alloc_max = np.max(UpperlimitGB)
c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)
elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black') y_reqmem_alloc_min = np.min(LowerlimitAllocCPU)
y_reqmem_alloc_max = np.max(UpperlimitAllocCPU)
elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')
elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)') x_reqmem_alloc_bins = np.linspace(x_reqmem_alloc_min, x_reqmem_alloc_max, 50)
elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS') y_reqmem_alloc_bins = np.linspace(y_reqmem_alloc_min, y_reqmem_alloc_max, 20)
```
elapsed_rqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_rqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"}) %% Cell type:code id: tags:
elapsed_rqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
```
fig = plt.figure()
fig.set_size_inches(20,20)
####Purple
ax = fig.add_subplot(331) # This represents a (3x3) grid (row x col) and we are plotting the (1) subplot. The last number increments row-wise.
rqmem_elapsed_purple_hist = ax.hist2d(df_purlple_2d1['ReqMemCPU'],df_purlple_2d1['Elapsed'],
bins =[x_rqmem_elapsed_bins, y_rqmem_elapsed_bins],
cmap = plt.cm.Greys)
ax.set_xlabel('ReqMemCPU(gigs)')
ax.set_ylabel('Elapsed(hours)')
ax.set_title('Purple Cluster')
ax2 = fig.add_subplot(332) # Second subplot
alloc_elapsed_purple_hist = ax2.hist2d(df_purlple_2d2['AllocCPUS'],df_purlple_2d2['Elapsed'],
bins =[x_alloc_elapsed_bins, y_alloc_elapsed_bins],
cmap = plt.cm.Greys)
ax2.set_xlabel('AllocCPUS')
ax2.set_ylabel('Elapsed(hours)')
ax2.set_title('Purple Cluster')
ax3 = fig.add_subplot(333)
reqmem_alloc_purple_hist = ax3.hist2d(df_purlple_2d3['ReqMemCPU'],df_purlple_2d3['AllocCPUS'],
bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],
cmap = plt.cm.Greys) # use magma or
ax3.set_xlabel('ReqMemCPU(gigs)')
ax3.set_ylabel('AllocCPUS')
ax3.set_title('Purple Cluster')
#####Green
ax4 = fig.add_subplot(334)
rqmem_elapsed_green_hist = ax4.hist2d(df_green_2d1['ReqMemCPU'],df_green_2d1['Elapsed'],
bins =[x_rqmem_elapsed_bins, y_rqmem_elapsed_bins],
cmap = plt.cm.Greys)
ax4.set_xlabel('ReqMemCPU(gigs)')
ax4.set_ylabel('Elapsed(hours)')
ax4.set_title('Green Cluster')
ax5 = fig.add_subplot(335)
alloc_elapsed_green_hist = ax5.hist2d(df_green_2d2['AllocCPUS'],df_green_2d2['Elapsed'],
bins =[x_alloc_elapsed_bins, y_alloc_elapsed_bins],
cmap = plt.cm.Greys)
ax5.set_xlabel('AllocCPUS')
ax5.set_ylabel('Elapsed(hours)')
ax5.set_title('Green Cluster')
ax6 = fig.add_subplot(336)
reqmem_alloc_green_hist = ax6.hist2d(df_green_2d3['ReqMemCPU'],df_green_2d3['AllocCPUS'],
bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],
cmap = plt.cm.Greys)
ax6.set_xlabel('ReqMemCPU(gigs)')
ax6.set_ylabel('AllocCPUS')
ax6.set_title('Green Cluster')
#####Red
ax7 = fig.add_subplot(337)
rqmem_elapsed_red_hist = ax7.hist2d(df_red_2d1['ReqMemCPU'],df_red_2d1['Elapsed'],
bins =[x_rqmem_elapsed_bins, y_rqmem_elapsed_bins],
cmap = plt.cm.Greys)
ax7.set_xlabel('ReqMemCPU(gigs)')
ax7.set_ylabel('Elapsed(hours)')
ax7.set_title('Red Cluster')
ax8 = fig.add_subplot(338)
alloc_elapsed_red_hist = ax8.hist2d(df_red_2d2['AllocCPUS'],df_red_2d2['Elapsed'],
bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],
cmap = plt.cm.Greys)
ax8.set_xlabel('AllocCPUS')
ax8.set_ylabel('Elapsed(hours)')
ax8.set_title('Red Cluster')
ax9 = fig.add_subplot(339)
reqmem_alloc_red_hist = ax9.hist2d(df_red_2d3['ReqMemCPU'],df_red_2d3['AllocCPUS'],
bins =[x_reqmem_alloc_bins, y_reqmem_alloc_bins],
cmap = plt.cm.Greys)
ax9.set_xlabel('ReqMemCPU(gigs)')
ax9.set_ylabel('AllocCPUS')
ax9.set_title('Red Cluster')
# sets the spacing # sets the spacing
# top = space between title and graphs - increase number to bring title down and decrease to bring title up # top = space between title and graphs - increase number to bring title down and decrease to bring title up
# left = space to the left # left = space to the left
# wspace = padding on both sides of graphs # wspace = padding on both sides of graphs
# hspace = padding on top and bottom of graphs # hspace = padding on top and bottom of graphs
figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3) figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)
figure.suptitle('Clusters', fontsize=20) figure.suptitle('Clusters', fontsize=20)
plt.show() plt.show()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment