diff --git a/slurm-2sql.ipynb b/slurm-2sql.ipynb index 1e70d597dc4c6ce0feb87e6cbfdd32f051e842cd..74d921283a92efdcf471ff89ad3179c3370ca85d 100644 --- a/slurm-2sql.ipynb +++ b/slurm-2sql.ipynb @@ -35,9 +35,23 @@ "metadata": {}, "outputs": [], "source": [ - "# creates database of allocation info from March 2020 using sqlite 3\n", - "# not using this right now, but is here as an option\n", - "#db_allocation = sqlite3.connect('/data/rc/rc-team/slurm-since-March-allocation.sqlite3')" + "# must run\n", + "\n", + "# df is starting database\n", + "df = pd.read_sql('SELECT * FROM slurm', db)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " # voluntary\n", + "\n", + "# for displaying all available column options\n", + "pd.set_option('display.max_columns', None)\n", + "df.head(5)" ] }, { @@ -48,8 +62,9 @@ "source": [ "# must run\n", "\n", - "# df is starting database\n", - "df = pd.read_sql('SELECT * FROM slurm', db)" + "# df_1 is database with only JobID, JobName, and State\n", + "df_1 = df.loc[:,['JobID','JobName','State']]\n", + "df_1.head(5)" ] }, { @@ -58,11 +73,11 @@ "metadata": {}, "outputs": [], "source": [ - " # voluntary\n", + "# must run\n", "\n", - "# for displaying all available column options\n", - "pd.set_option('display.max_columns', None)\n", - "df.head(5)" + "# df_2 is database with only JobID, JobName, and Partition\n", + "df_2 = df.loc[:,['JobID','JobName','Partition']]\n", + "df_2.head(5)" ] }, { @@ -73,9 +88,39 @@ "source": [ " # must run\n", "\n", - "# df_1 is dataframe of all completed jobs\n", - "df_1 = df[df.State.str.contains('COMPLETED')]\n", - "df_1.head(5)" + "# df_batch is df_1 with only batch jobs\n", + "df_batch = df_1[df_1.JobName.str.contains('batch')]\n", + "df_batch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# fills empty strings in Partition column with NaN and then filters them out to give a dataset of users with no empty strings \n", + "nan_value = float(\"NaN\")\n", + "\n", + "df_2.replace(\"\", nan_value, inplace=True)\n", + "\n", + "df_2.dropna(subset = [\"Partition\"], inplace=True)\n", + "df_2.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", + "\n", + "# df_state is df_batch grouped by state - shows number of jobs that resulted in each state\n", + "df_state = df_batch.groupby('State')['JobID'].describe().reset_index()\n", + "df_state" ] }, { @@ -86,9 +131,9 @@ "source": [ "# must run\n", "\n", - "# df_2 is database with only ReqMemCpu and ReqMemNode, and ArrayTaskID\n", - "df_2 = df_1.loc[:,['JobName','ReqMemCPU', 'ReqMemNode', 'ArrayJobID','ArrayTaskID']]\n", - "#df_2.head(5)" + "# df_partition is df_2 grouped by partition - shows number of jobs for each partition\n", + "df_partition = df_2.groupby('Partition')['JobID'].describe().reset_index()\n", + "df_partition" ] }, { @@ -97,17 +142,75 @@ "metadata": {}, "outputs": [], "source": [ - "# shows all user requested cpu memory for array and non array jobs\n", - "fig = sns.distplot(df_2['ReqMemCPU'], kde=False, label='ReqMemCPU', color = \"green\")\n", + "# must run\n", "\n", - "fig.set_yscale('log')\n", + "# Job State Colors\n", + "Completed = '#91cf60' #green\n", + "Pending = '#1a9850' #tourquiose\n", + "Cancelled = '#ffffbf' #yellow\n", + "Cancelled_by_11618 = '#fee08b' #yellow\n", + "Timeout = '#fc8d59' #orange\n", + "Failed = '#d73027' #red\n", + "OUT_OF_MEMORY = '#3288bd' #blue" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", "\n", - "plt.legend(prop={'size': 12},loc='upper right',bbox_to_anchor=(2.25, 1.0),ncol=1)\n", + "# gives list of states possible in the batch jobs\n", + "state_colors = [Cancelled, Cancelled_by_11618, Completed, Failed, OUT_OF_MEMORY]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "state_graph = df_state.plot.bar(x='State', y='count', rot=110, color = state_colors, logy=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", "\n", - "plt.title('title')\n", + "# Partition Colors\n", + "Express = '#1b9e77'\n", + "Short = '#d95f02' \n", + "Medium = '#7570b3' \n", + "Long = '#e7298a' \n", + "Interactive = '#66a61e' \n", + "PascalNodes = '#e6ab02' " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# must run\n", "\n", - "plt.xlabel('x axis')\n", - "plt.ylabel('y axis')" + "# gives list of partitions possible\n", + "partition_colors = [Express, Short, Medium, Long, Interactive, PascalNodes]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "partition_graph = df_partition.plot.bar(x='Partition', y='count', rot=110, color = partition_colors, logy=True)" ] }, {