diff --git a/power-stats.ipynb b/power-stats.ipynb index 9151e63733315c4c484cea94a2717626178ce030..b2512f6b038522028af0e707775906a086db8643 100644 --- a/power-stats.ipynb +++ b/power-stats.ipynb @@ -121,7 +121,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Some of data values report unrealistic power values. Any reading over 10kW is considered invalid." + "## Clean Data and Resample\n", + "\n", + "Some of data values report unrealistic power values. Any reading over 10kW is considered invalid. \n", + "\n", + "Shouldn't do that until later since it implicitly filters out NaN" ] }, { @@ -130,7 +134,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = df.loc[df['raw'] < 10000]" + "#df = df.loc[df['raw'] < 10000]" ] }, { @@ -176,11 +180,12 @@ "# prepare data frame to append to, use zeros for default column \n", "m6_hourly_pwr=pd.DataFrame(np.zeros((1,len(hourly_idx))).T, index=hourly_idx, columns=['sum'])\n", "\n", - "for num, entity in enumerate(df.entity.unique()):\n", + "for num, entity in enumerate(sorted(df.entity.unique())):\n", " if entity not in ['c0108', 'c0009']:\n", " node_pwr=df[df.entity==entity].set_index(\"datetime\")\n", " node_pwr=node_pwr[['raw']].resample('H').mean()\n", " node_pwr=node_pwr[startdate:enddate].fillna(method=\"ffill\")\n", + " node_pwr=node_pwr[startdate:enddate].fillna(method=\"bfill\")\n", " if debug:\n", " print(node_pwr)\n", " missing = node_pwr['raw'].isnull().sum()\n", @@ -194,8 +199,64 @@ "source": [ "## Plot Per-node Hourly\n", "\n", - "This is just to see the data for each node in one plot and get a feel for how the nodes behave relative to each other. There is too much data to decern individual behavior of specific nodes\n", - "but it does give a sense of how the total power adds up." + "This is just to see the data for each node in one plot and get a feel for how the nodes behave relative to each other. Plot nodes in individual subplotes to decern individual behavior of specific nodes. It does give a sense of how the total power adds up. \n", + "\n", + "Inspect the nodes in the first rack." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_nodes=36\n", + "fig, axes = plt.subplots(num_nodes,1, figsize=(20,30))\n", + "for i in range(num_nodes):\n", + " m6_hourly_pwr['2020-02-01':'2021-02-21'].iloc[:,i+1:i+2].plot(ax=axes[i], legend=\"left\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Overview plot reveals missing power data for a number of nodes. Inspect one up close." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m6_hourly_pwr['2020-02-01':'2021-02-21'].iloc[:,1:2].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Identify nodes that have missing data\n", + "\n", + "Identify nodes by ones that have NaN values over the past month." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nan_mask = m6_hourly_pwr['2021-02-01':'2021-02-02'].isna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "power_missing = nan_mask[nan_mask].apply(lambda row: row[row == True].index, axis=1)[1]" ] }, { @@ -204,7 +265,10 @@ "metadata": {}, "outputs": [], "source": [ - "m6_hourly_pwr['2020-02-01':'2020-07-09'].iloc[:,1:].plot(legend=False)" + "num_nodes=len(power_missing)\n", + "fig, axes = plt.subplots(num_nodes,1, figsize=(20,30))\n", + "for i, node in enumerate(power_missing):\n", + " m6_hourly_pwr[node].plot(ax=axes[i], legend=\"left\")" ] }, {