From 8a286fa16f3160e939b192cbe8e218992a84e6fc Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu <snitish.iitk@gmail.com> Date: Fri, 6 Dec 2024 10:13:45 -0800 Subject: [PATCH] =?UTF-8?q?BUG:=20Fix=20bug=20in=20GroupBy=20that=20ignore?= =?UTF-8?q?s=20group=5Fkeys=20arg=20for=20empty=20datafra=E2=80=A6=20(#605?= =?UTF-8?q?05)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BUG: Fix bug in GroupBy that ignores group_keys arg for empty dataframes/series --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/groupby/generic.py | 4 ++++ pandas/tests/groupby/aggregate/test_aggregate.py | 1 + pandas/tests/groupby/test_all_methods.py | 2 +- pandas/tests/groupby/test_grouping.py | 13 ++++++++++++- 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index bb9f48d17b..ab5746eca1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -733,6 +733,7 @@ Groupby/resample/rolling - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`) - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) +- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`) - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3fa34007a7..f4e3f3e8b1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -583,6 +583,8 @@ class SeriesGroupBy(GroupBy[Series]): if is_transform: # GH#47787 see test_group_on_empty_multiindex res_index = data.index + elif not self.group_keys: + res_index = None else: res_index = self._grouper.result_index @@ -1967,6 +1969,8 @@ class DataFrameGroupBy(GroupBy[DataFrame]): if is_transform: # GH#47787 see test_group_on_empty_multiindex res_index = data.index + elif not self.group_keys: + res_index = None else: res_index = self._grouper.result_index diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 64220f1d3d..b7e6e55739 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -159,6 +159,7 @@ def test_agg_apply_corner(ts, tsframe): tm.assert_frame_equal(grouped.agg("sum"), exp_df) res = grouped.apply(np.sum, axis=0) + exp_df = exp_df.reset_index(drop=True) tm.assert_frame_equal(res, exp_df) diff --git a/pandas/tests/groupby/test_all_methods.py b/pandas/tests/groupby/test_all_methods.py index 945c3e421a..4625c5c27a 100644 --- a/pandas/tests/groupby/test_all_methods.py +++ b/pandas/tests/groupby/test_all_methods.py @@ -22,7 +22,7 @@ from pandas.tests.groupby import get_groupby_method_args def test_multiindex_group_all_columns_when_empty(groupby_func): # GH 32464 df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) - gb = df.groupby(["a", "b", "c"], group_keys=False) + gb = df.groupby(["a", "b", "c"], group_keys=True) method = getattr(gb, groupby_func) args = get_groupby_method_args(groupby_func, df) if groupby_func == "corrwith": diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 366eb59ee2..4e7c0acb12 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -777,10 +777,21 @@ class TestGrouping: # (not testing other agg fns, because they return # different index objects. df = DataFrame({1: [], 2: []}) - g = df.groupby(1, group_keys=False) + g = df.groupby(1, group_keys=True) result = getattr(g[2], func)(lambda x: x) tm.assert_series_equal(result, expected) + def test_groupby_apply_empty_with_group_keys_false(self): + # 60471 + # test apply'ing empty groups with group_keys False + # (not testing other agg fns, because they return + # different index objects. + df = DataFrame({"A": [], "B": [], "C": []}) + g = df.groupby("A", group_keys=False) + result = g.apply(lambda x: x / x.sum(), include_groups=False) + expected = DataFrame({"B": [], "C": []}, index=None) + tm.assert_frame_equal(result, expected) + def test_groupby_empty(self): # https://github.com/pandas-dev/pandas/issues/27190 s = Series([], name="name", dtype="float64") -- GitLab