Skip to content

Commit 910baf5

Browse files
fangchenliclaude
andauthored
Perf: avoid fallback to NumPy in value_counts for Arrow-backed array (#63389)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 0901352 commit 910baf5

File tree

1 file changed

+7
-12
lines changed

1 file changed

+7
-12
lines changed

pandas/core/algorithms.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -898,18 +898,15 @@ def value_counts_internal(
898898
result = result.iloc[0:0]
899899

900900
# normalizing is by len of all (regardless of dropna)
901-
counts = np.array([len(ii)])
901+
normalize_denominator = len(ii)
902902

903903
else:
904+
normalize_denominator = None
904905
if is_extension_array_dtype(values):
905906
# handle Categorical and sparse,
906907
result = Series(values, copy=False)._values.value_counts(dropna=dropna)
907908
result.name = name
908909
result.index.name = index_name
909-
counts = result._values
910-
if not isinstance(counts, np.ndarray):
911-
# e.g. ArrowExtensionArray
912-
counts = np.asarray(counts)
913910

914911
elif isinstance(values, ABCMultiIndex):
915912
# GH49558
@@ -920,10 +917,6 @@ def value_counts_internal(
920917
.size()
921918
)
922919
result.index.names = values.names
923-
# error: Incompatible types in assignment (expression has type
924-
# "ndarray[Any, Any] | DatetimeArray | TimedeltaArray | PeriodArray | Any",
925-
# variable has type "ndarray[tuple[int, ...], dtype[Any]]")
926-
counts = result._values # type: ignore[assignment]
927920

928921
else:
929922
values = _ensure_arraylike(values, func_name="value_counts")
@@ -936,8 +929,7 @@ def value_counts_internal(
936929
idx = Index(keys, dtype=keys.dtype, name=index_name)
937930

938931
if (
939-
bins is None
940-
and not sort
932+
not sort
941933
and isinstance(values, (DatetimeIndex, TimedeltaIndex))
942934
and idx.equals(values)
943935
and values.inferred_freq is not None
@@ -951,7 +943,10 @@ def value_counts_internal(
951943
result = result.sort_values(ascending=ascending, kind="stable")
952944

953945
if normalize:
954-
result = result / counts.sum()
946+
if normalize_denominator is not None:
947+
result = result / normalize_denominator
948+
else:
949+
result = result / result.sum()
955950

956951
return result
957952

0 commit comments

Comments
 (0)