From 8a286fa16f3160e939b192cbe8e218992a84e6fc Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Fri, 6 Dec 2024 10:13:45 -0800
Subject: [PATCH] =?UTF-8?q?BUG:=20Fix=20bug=20in=20GroupBy=20that=20ignore?=
 =?UTF-8?q?s=20group=5Fkeys=20arg=20for=20empty=20datafra=E2=80=A6=20(#605?=
 =?UTF-8?q?05)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BUG: Fix bug in GroupBy that ignores group_keys arg for empty dataframes/series
---
 doc/source/whatsnew/v3.0.0.rst                   |  1 +
 pandas/core/groupby/generic.py                   |  4 ++++
 pandas/tests/groupby/aggregate/test_aggregate.py |  1 +
 pandas/tests/groupby/test_all_methods.py         |  2 +-
 pandas/tests/groupby/test_grouping.py            | 13 ++++++++++++-
 5 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index bb9f48d17b..ab5746eca1 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -733,6 +733,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
 - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
 - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
+- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)
 - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
 - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 3fa34007a7..f4e3f3e8b1 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -583,6 +583,8 @@ class SeriesGroupBy(GroupBy[Series]):
             if is_transform:
                 # GH#47787 see test_group_on_empty_multiindex
                 res_index = data.index
+            elif not self.group_keys:
+                res_index = None
             else:
                 res_index = self._grouper.result_index
 
@@ -1967,6 +1969,8 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
             if is_transform:
                 # GH#47787 see test_group_on_empty_multiindex
                 res_index = data.index
+            elif not self.group_keys:
+                res_index = None
             else:
                 res_index = self._grouper.result_index
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 64220f1d3d..b7e6e55739 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -159,6 +159,7 @@ def test_agg_apply_corner(ts, tsframe):
     tm.assert_frame_equal(grouped.agg("sum"), exp_df)
 
     res = grouped.apply(np.sum, axis=0)
+    exp_df = exp_df.reset_index(drop=True)
     tm.assert_frame_equal(res, exp_df)
 
 
diff --git a/pandas/tests/groupby/test_all_methods.py b/pandas/tests/groupby/test_all_methods.py
index 945c3e421a..4625c5c27a 100644
--- a/pandas/tests/groupby/test_all_methods.py
+++ b/pandas/tests/groupby/test_all_methods.py
@@ -22,7 +22,7 @@ from pandas.tests.groupby import get_groupby_method_args
 def test_multiindex_group_all_columns_when_empty(groupby_func):
     # GH 32464
     df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
-    gb = df.groupby(["a", "b", "c"], group_keys=False)
+    gb = df.groupby(["a", "b", "c"], group_keys=True)
     method = getattr(gb, groupby_func)
     args = get_groupby_method_args(groupby_func, df)
     if groupby_func == "corrwith":
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 366eb59ee2..4e7c0acb12 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -777,10 +777,21 @@ class TestGrouping:
         # (not testing other agg fns, because they return
         # different index objects.
         df = DataFrame({1: [], 2: []})
-        g = df.groupby(1, group_keys=False)
+        g = df.groupby(1, group_keys=True)
         result = getattr(g[2], func)(lambda x: x)
         tm.assert_series_equal(result, expected)
 
+    def test_groupby_apply_empty_with_group_keys_false(self):
+        # 60471
+        # test apply'ing empty groups with group_keys False
+        # (not testing other agg fns, because they return
+        # different index objects.
+        df = DataFrame({"A": [], "B": [], "C": []})
+        g = df.groupby("A", group_keys=False)
+        result = g.apply(lambda x: x / x.sum(), include_groups=False)
+        expected = DataFrame({"B": [], "C": []}, index=None)
+        tm.assert_frame_equal(result, expected)
+
     def test_groupby_empty(self):
         # https://github.com/pandas-dev/pandas/issues/27190
         s = Series([], name="name", dtype="float64")
-- 
GitLab