clean up

pandeconscious · pandeconscious · commit 0ef7ce37630f · 2025-12-18T20:17:28.000Z
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -1,5 +1,3 @@
-from itertools import combinations
-
 import numpy as np
 import pytest
 
@@ -255,11 +253,10 @@ def test_corr_numeric_only(self, meth, numeric_only):
                 df.corr(meth, numeric_only=numeric_only)
 
     @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @pytest.mark.parametrize("col1", ["ord_cat", "ord_cat_none", "ord_cat_shuff"])
+    @pytest.mark.parametrize("col2", ["ord_cat", "ord_cat_none", "ord_cat_shuff"])
     @td.skip_if_no("scipy")
-    def test_corr_rank_ordered_categorical(
-        self,
-        method,
-    ):
+    def test_corr_rank_ordered_categorical(self, method, col1, col2):
         # GH #60306
         df = DataFrame(
             {
@@ -281,15 +278,15 @@ def test_corr_rank_ordered_categorical(
             }
         )
         corr_calc = df.corr(method=method)
-        for col1, col2 in combinations(df.columns, r=2):
-            corr_expected = df[col1].corr(df[col2], method=method)
-            tm.assert_almost_equal(corr_calc[col1][col2], corr_expected)
+        corr_expected = df[col1].corr(df[col2], method=method)
+        tm.assert_almost_equal(corr_calc[col1][col2], corr_expected)
 
     @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @pytest.mark.parametrize("col1_idx", [0, 1, 2, 3, 4])
+    @pytest.mark.parametrize("col2_idx", [0, 1, 2, 3, 4])
     @td.skip_if_no("scipy")
     def test_corr_rank_ordered_categorical_duplicate_columns(
-        self,
-        method,
+        self, method, col1_idx, col2_idx
     ):
         # GH #60306
         cat = pd.CategoricalDtype(categories=[4, 3, 2, 1], ordered=True)
@@ -305,11 +302,8 @@ def test_corr_rank_ordered_categorical_duplicate_columns(
         df.columns = ["a", "a", "c", "c", "e"]
 
         corr_calc = df.corr(method=method)
-        for col1_idx, col2_idx in combinations(range(len(df.columns)), r=2):
-            corr_expected = df.iloc[:, col1_idx].corr(
-                df.iloc[:, col2_idx], method=method
-            )
-            tm.assert_almost_equal(corr_calc.iloc[col1_idx, col2_idx], corr_expected)
+        corr_expected = df.iloc[:, col1_idx].corr(df.iloc[:, col2_idx], method=method)
+        tm.assert_almost_equal(corr_calc.iloc[col1_idx, col2_idx], corr_expected)
 
 
 class TestDataFrameCorrWith:
@@ -554,49 +548,40 @@ def test_cov_with_missing_values(self):
         tm.assert_frame_equal(result2, expected)
 
     @pytest.mark.parametrize("method", ["kendall", "spearman"])
-    def test_corr_rank_ordered_categorical(
-        self,
-        method,
-    ):
+    @pytest.mark.parametrize("col", ["a", "b", "c", "d"])
+    def test_corr_rank_ordered_categorical(self, method, col):
         # GH #60306
         pytest.importorskip("scipy")
         df1 = DataFrame(
             {
-                "a": Series(
-                    pd.Categorical(
-                        ["low", "m", "h", "vh"],
-                        categories=["low", "m", "h", "vh"],
-                        ordered=True,
-                    )
+                "a": pd.Categorical(
+                    ["low", "m", "h", "vh"],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
                 ),
-                "b": Series(
-                    pd.Categorical(
-                        ["low", "m", "h", None],
-                        categories=["low", "m", "h"],
-                        ordered=True,
-                    )
+                "b": pd.Categorical(
+                    ["low", "m", "h", None],
+                    categories=["low", "m", "h"],
+                    ordered=True,
                 ),
-                "c": Series([0, 1, 2, 3]),
-                "d": Series([2.0, 3.0, 4.5, 6.5]),
+                "c": [0, 1, 2, 3],
+                "d": [2.0, 3.0, 4.5, 6.5],
             }
         )
 
         df2 = DataFrame(
             {
-                "a": Series([2.0, 3.0, 4.5, np.nan]),
-                "b": Series(
-                    pd.Categorical(
-                        ["m", "h", "vh", "low"],
-                        categories=["low", "m", "h", "vh"],
-                        ordered=True,
-                    )
+                "a": [2.0, 3.0, 4.5, np.nan],
+                "b": pd.Categorical(
+                    ["m", "h", "vh", "low"],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
                 ),
-                "c": Series([2, 3, 0, 1]),
-                "d": Series([2.0, 3.0, 4.5, 6.5]),
+                "c": [2, 3, 0, 1],
+                "d": [2.0, 3.0, 4.5, 6.5],
             }
         )
 
         corr_calc = df1.corrwith(df2, method=method)
-        for col in df1.columns:
-            corr_expected = df1[col].corr(df2[col], method=method)
-            tm.assert_almost_equal(corr_calc.get(col), corr_expected)
+        corr_expected = df1[col].corr(df2[col], method=method)
+        tm.assert_almost_equal(corr_calc.get(col), corr_expected)
diff --git a/pandas/tests/methods/corr.py b/pandas/tests/methods/corr.py
@@ -15,136 +15,118 @@
 
 
 @pytest.mark.parametrize(
-    ("input_df", "expected_df"),
+    ("input_df_dict", "expected_df_dict"),
     [
         pytest.param(
             # 1) Simple: two ordered categorical columns (with and without None)
-            DataFrame(
-                {
-                    "ord_cat": Series(
-                        Categorical(
-                            ["low", "m", "h", "vh"],
-                            categories=["low", "m", "h", "vh"],
-                            ordered=True,
-                        )
-                    ),
-                    "ord_cat_none": Series(
-                        Categorical(
-                            ["low", "m", "h", None],
-                            categories=["low", "m", "h"],
-                            ordered=True,
-                        )
-                    ),
-                }
-            ),
-            DataFrame(
-                {
-                    # codes: low=0, m=1, h=2, vh=3
-                    "ord_cat": Series([0, 1, 2, 3], dtype="int8"),
-                    # codes: low=0, m=1, h=2, None -> NaN
-                    "ord_cat_none": Series([0, 1.0, 2.0, np.nan]),
-                }
-            ),
+            {
+                "ord_cat": Categorical(
+                    ["low", "m", "h", "vh"],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
+                ),
+                "ord_cat_none": Categorical(
+                    ["low", "m", "h", None],
+                    categories=["low", "m", "h"],
+                    ordered=True,
+                ),
+            },
+            {
+                # codes: low=0, m=1, h=2, vh=3
+                "ord_cat": Series([0, 1, 2, 3], dtype="int8"),
+                # codes: low=0, m=1, h=2, None -> NaN
+                "ord_cat_none": [0, 1.0, 2.0, np.nan],
+            },
             id="ordered-categoricals-basic",
         ),
         pytest.param(
             # 2) Mixed dtypes: only the ordered categorical should change
-            DataFrame(
-                {
-                    "ordered": Series(
-                        Categorical(
-                            ["a", "c", "b"],
-                            categories=["a", "b", "c"],
-                            ordered=True,
-                        )
-                    ),
-                    "unordered": Series(Categorical(["x", "y", "x"], ordered=False)),
-                    "num": Series([10, 20, 30]),
-                    "text": Series(["u", "v", "w"]),
-                }
-            ),
-            DataFrame(
-                {
-                    # codes: a=0, c=2, b=1
-                    "ordered": Series([0, 2, 1], dtype="int8"),
-                    # unordered categorical should be untouched (still categorical)
-                    "unordered": Series(Categorical(["x", "y", "x"], ordered=False)),
-                    "num": Series([10, 20, 30]),
-                    "text": Series(["u", "v", "w"]),
-                }
-            ),
+            {
+                "ordered": Categorical(
+                    ["a", "c", "b"],
+                    categories=["a", "b", "c"],
+                    ordered=True,
+                ),
+                "unordered": Categorical(["x", "y", "x"], ordered=False),
+                "num": [10, 20, 30],
+                "text": ["u", "v", "w"],
+            },
+            {
+                # codes: a=0, c=2, b=1
+                "ordered": Series([0, 2, 1], dtype="int8"),
+                # unordered categorical should be untouched (still categorical)
+                "unordered": Categorical(["x", "y", "x"], ordered=False),
+                "num": [10, 20, 30],
+                "text": ["u", "v", "w"],
+            },
             id="mixed-types-only-ordered-changes",
         ),
-        pytest.param(
-            # 3 Duplicate column names: first 'dup' is ordered categorical,
-            # second 'dup' is non-categorical
-            DataFrame(
-                {
-                    "dup_1": Series(
-                        Categorical(
-                            ["low", "m", "h"],
-                            categories=["low", "m", "h"],
-                            ordered=True,
-                        )
-                    ),
-                    "dup_2": Series([5, 6, 7]),  # duplicate name, later column
-                }
-            ),
-            DataFrame(
-                {
-                    # After transform: position 0 (ordered cat) becomes codes [0,1,2],
-                    # position 1 remains untouched numbers [5,6,7].
-                    "dup_1": Series([0, 1, 2], dtype="int8"),
-                    "dup_2": Series([5, 6, 7]),
-                }
-            ),
-            id="duplicate-names-ordered-first",
-        ),
-        pytest.param(
-            # 4 Duplicate column names: first 'dup' is non-categorical,
-            # second 'dup' is ordered categorical, third 'dup' is ordered categorical
-            DataFrame(
-                {
-                    "dup_1": Series(["a", "b", "c"]),  # non-categorical (object)
-                    "dup_2": Series(
-                        Categorical(
-                            ["p", "q", None],
-                            categories=["p", "q"],
-                            ordered=True,
-                        )
-                    ),
-                    "dup_3": Series(
-                        Categorical(
-                            ["low", "m", "h"],
-                            categories=["low", "m", "h"],
-                            ordered=True,
-                        )
-                    ),
-                }
-            ),
-            DataFrame(
-                {
-                    # First stays object; second turns into codes [0, 1, NaN]
-                    # and third changes into codes [0, 1, 2]
-                    "dup_1": Series(["a", "b", "c"]),
-                    "dup_2": Series([0.0, 1.0, np.nan]),
-                    "dup_3": Series([0, 1, 2], dtype="int8"),
-                }
-            ),
-            id="duplicate-names-ordered-and-non-categorical-and-none",
-        ),
     ],
 )
 def test_transform_ord_cat_cols_to_coded_cols(
-    input_df: DataFrame, expected_df: DataFrame
+    input_df_dict: dict, expected_df_dict: dict
 ) -> None:
     # GH #60306
-    # duplicate columns creation for dup columns
-    if "dup_1" in input_df.columns:
-        input_df.columns = ["dup" for _ in range(len(input_df.columns))]
-        expected_df.columns = ["dup" for _ in range(len(expected_df.columns))]
-
+    input_df = DataFrame(input_df_dict)
+    expected_df = DataFrame(expected_df_dict)
     out_df = transform_ord_cat_cols_to_coded_cols(input_df)
     assert list(out_df.columns) == list(expected_df.columns)
-    for i, col in enumerate(out_df.columns):
-        tm.assert_series_equal(out_df.iloc[:, i], expected_df.iloc[:, i])
+    tm.assert_frame_equal(out_df, expected_df)
+
+
+def test_transform_ord_cat_cols_to_coded_cols_duplicated_col() -> None:
+    # GH #60306
+    input_df_1 = DataFrame(
+        {
+            "dup_1": Categorical(
+                ["low", "m", "h"],
+                categories=["low", "m", "h"],
+                ordered=True,
+            ),
+            "dup_2": [5, 6, 7],
+        }
+    )
+    expected_df_1 = DataFrame(
+        {
+            # After transform: position 0 (ordered cat) becomes codes [0,1,2],
+            # position 1 remains untouched numbers [5,6,7].
+            "dup_1": Series([0, 1, 2], dtype="int8"),
+            "dup_2": [5, 6, 7],
+        }
+    )
+    input_df_1.columns = ["dup" for _ in range(len(input_df_1.columns))]
+    expected_df_1.columns = ["dup" for _ in range(len(input_df_1.columns))]
+
+    out_df_1 = transform_ord_cat_cols_to_coded_cols(input_df_1)
+    tm.assert_frame_equal(out_df_1, expected_df_1)
+
+    input_df_2 = DataFrame(
+        {
+            "dup_1": ["a", "b", "c"],  # non-categorical
+            "dup_2": Categorical(
+                ["p", "q", None],
+                categories=["p", "q"],
+                ordered=True,
+            ),
+            "dup_3": Categorical(
+                ["low", "m", "h"],
+                categories=["low", "m", "h"],
+                ordered=True,
+            ),
+        }
+    )
+
+    expected_df_2 = DataFrame(
+        {
+            # First stays object; second turns into codes [0, 1, NaN]
+            # and third changes into codes [0, 1, 2]
+            "dup_1": ["a", "b", "c"],
+            "dup_2": [0.0, 1.0, np.nan],
+            "dup_3": Series([0, 1, 2], dtype="int8"),
+        }
+    )
+    input_df_2.columns = ["dup" for _ in range(len(input_df_2.columns))]
+    expected_df_2.columns = ["dup" for _ in range(len(input_df_2.columns))]
+
+    out_df_2 = transform_ord_cat_cols_to_coded_cols(input_df_2)
+    tm.assert_frame_equal(out_df_2, expected_df_2)
diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py