Skip to content

Commit 9b8598a

Browse files
API: Copy inputs in Index subclass constructors by default (GH#63388) (#63398)
1 parent b95f65b commit 9b8598a

File tree

12 files changed

+177
-20
lines changed

12 files changed

+177
-20
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,7 @@ Other API changes
820820
:meth:`~DataFrame.ffill`, :meth:`~DataFrame.bfill`, :meth:`~DataFrame.interpolate`,
821821
:meth:`~DataFrame.where`, :meth:`~DataFrame.mask`, :meth:`~DataFrame.clip`) now return
822822
the modified DataFrame or Series (``self``) instead of ``None`` when ``inplace=True`` (:issue:`63207`)
823+
- All Index constructors now copy ``numpy.ndarray`` and ``ExtensionArray`` inputs by default when ``copy=None``, consistent with :class:`Series` behavior (:issue:`63388`)
823824

824825
.. ---------------------------------------------------------------------------
825826
.. _whatsnew_300.deprecations:

pandas/core/indexes/base.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -505,12 +505,8 @@ def __new__(
505505
if not copy and isinstance(data, (ABCSeries, Index)):
506506
refs = data._references
507507

508-
if isinstance(data, (ExtensionArray, np.ndarray)):
509-
# GH 63306
510-
if copy is not False:
511-
if dtype is None or astype_is_view(data.dtype, dtype):
512-
data = data.copy()
513-
copy = False
508+
# GH 63306, GH 63388
509+
data, copy = cls._maybe_copy_array_input(data, copy, dtype)
514510

515511
# range
516512
if isinstance(data, (range, RangeIndex)):
@@ -5197,6 +5193,21 @@ def _raise_scalar_data_error(cls, data):
51975193
"was passed"
51985194
)
51995195

5196+
@classmethod
5197+
def _maybe_copy_array_input(
5198+
cls, data, copy: bool | None, dtype
5199+
) -> tuple[Any, bool]:
5200+
"""
5201+
Ensure that the input data is copied if necessary.
5202+
GH#63388
5203+
"""
5204+
if isinstance(data, (ExtensionArray, np.ndarray)):
5205+
if copy is not False:
5206+
if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)):
5207+
data = data.copy()
5208+
copy = False
5209+
return data, bool(copy)
5210+
52005211
def _validate_fill_value(self, value):
52015212
"""
52025213
Check if the value can be inserted into our array without casting,

pandas/core/indexes/datetimes.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,13 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
181181
If True parse dates in `data` with the year first order.
182182
dtype : numpy.dtype or DatetimeTZDtype or str, default None
183183
Note that the only NumPy dtype allowed is `datetime64[ns]`.
184-
copy : bool, default False
185-
Make a copy of input ndarray.
184+
copy : bool, default None
185+
Whether to copy input data, only relevant for array, Series, and Index
186+
inputs (for other input, e.g. a list, a new array is created anyway).
187+
Defaults to True for array input and False for Index/Series.
188+
Set to False to avoid copying array input at your own risk (if you
189+
know the input data won't be modified elsewhere).
190+
Set to True to force copying Series/Index up front.
186191
name : label, default None
187192
Name to be stored in the index.
188193
@@ -669,7 +674,7 @@ def __new__(
669674
dayfirst: bool = False,
670675
yearfirst: bool = False,
671676
dtype: Dtype | None = None,
672-
copy: bool = False,
677+
copy: bool | None = None,
673678
name: Hashable | None = None,
674679
) -> Self:
675680
if is_scalar(data):
@@ -679,6 +684,9 @@ def __new__(
679684

680685
name = maybe_extract_name(name, data, cls)
681686

687+
# GH#63388
688+
data, copy = cls._maybe_copy_array_input(data, copy, dtype)
689+
682690
if (
683691
isinstance(data, DatetimeArray)
684692
and freq is lib.no_default

pandas/core/indexes/interval.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,13 @@ class IntervalIndex(ExtensionIndex):
169169
neither.
170170
dtype : dtype or None, default None
171171
If None, dtype will be inferred.
172-
copy : bool, default False
173-
Copy the input data.
172+
copy : bool, default None
173+
Whether to copy input data, only relevant for array, Series, and Index
174+
inputs (for other input, e.g. a list, a new array is created anyway).
175+
Defaults to True for array input and False for Index/Series.
176+
Set to False to avoid copying array input at your own risk (if you
177+
know the input data won't be modified elsewhere).
178+
Set to True to force copying Series/Index input up front.
174179
name : object, optional
175180
Name to be stored in the index.
176181
verify_integrity : bool, default True
@@ -252,12 +257,15 @@ def __new__(
252257
data,
253258
closed: IntervalClosedType | None = None,
254259
dtype: Dtype | None = None,
255-
copy: bool = False,
260+
copy: bool | None = None,
256261
name: Hashable | None = None,
257262
verify_integrity: bool = True,
258263
) -> Self:
259264
name = maybe_extract_name(name, data, cls)
260265

266+
# GH#63388
267+
data, copy = cls._maybe_copy_array_input(data, copy, dtype)
268+
261269
with rewrite_exception("IntervalArray", cls.__name__):
262270
array = IntervalArray(
263271
data,

pandas/core/indexes/period.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,13 @@ class PeriodIndex(DatetimeIndexOpsMixin):
101101
One of pandas period strings or corresponding objects.
102102
dtype : str or PeriodDtype, default None
103103
A dtype from which to extract a freq.
104-
copy : bool
105-
Make a copy of input ndarray.
104+
copy : bool, default None
105+
Whether to copy input data, only relevant for array, Series, and Index
106+
inputs (for other input, e.g. a list, a new array is created anyway).
107+
Defaults to True for array input and False for Index/Series.
108+
Set to False to avoid copying array input at your own risk (if you
109+
know the input data won't be modified elsewhere).
110+
Set to True to force copying Series/Index input up front.
106111
name : str, default None
107112
Name of the resulting PeriodIndex.
108113
@@ -220,7 +225,7 @@ def __new__(
220225
data=None,
221226
freq=None,
222227
dtype: Dtype | None = None,
223-
copy: bool = False,
228+
copy: bool | None = None,
224229
name: Hashable | None = None,
225230
) -> Self:
226231
refs = None
@@ -231,6 +236,9 @@ def __new__(
231236

232237
freq = validate_dtype_freq(dtype, freq)
233238

239+
# GH#63388
240+
data, copy = cls._maybe_copy_array_input(data, copy, dtype)
241+
234242
# PeriodIndex allow PeriodIndex(period_index, freq=different)
235243
# Let's not encourage that kind of behavior in PeriodArray.
236244

pandas/core/indexes/timedeltas.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,13 @@ class TimedeltaIndex(DatetimeTimedeltaMixin):
8181
dtype : numpy.dtype or str, default None
8282
Valid ``numpy`` dtypes are ``timedelta64[ns]``, ``timedelta64[us]``,
8383
``timedelta64[ms]``, and ``timedelta64[s]``.
84-
copy : bool
85-
Make a copy of input array.
84+
copy : bool, default None
85+
Whether to copy input data, only relevant for array, Series, and Index
86+
inputs (for other input, e.g. a list, a new array is created anyway).
87+
Defaults to True for array input and False for Index/Series.
88+
Set to False to avoid copying array input at your own risk (if you
89+
know the input data won't be modified elsewhere).
90+
Set to True to force copying Series/Index input up front.
8691
name : object
8792
Name to be stored in the index.
8893
@@ -158,11 +163,14 @@ def __new__(
158163
data=None,
159164
freq=lib.no_default,
160165
dtype=None,
161-
copy: bool = False,
166+
copy: bool | None = None,
162167
name=None,
163168
):
164169
name = maybe_extract_name(name, data, cls)
165170

171+
# GH#63388
172+
data, copy = cls._maybe_copy_array_input(data, copy, dtype)
173+
166174
if is_scalar(data):
167175
cls._raise_scalar_data_error(data)
168176

pandas/tests/arrays/test_datetimelike.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ def test_array_object_dtype(self, arr1d):
707707
def test_array_tz(self, arr1d):
708708
# GH#23524
709709
arr = arr1d
710-
dti = self.index_cls(arr1d)
710+
dti = self.index_cls(arr1d, copy=False)
711711
copy_false = None if np_version_gt2 else False
712712

713713
expected = dti.asi8.view("M8[ns]")

pandas/tests/copy_view/index/test_datetimeindex.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1+
import numpy as np
12
import pytest
23

34
from pandas import (
45
DatetimeIndex,
56
Series,
67
Timestamp,
8+
array,
79
date_range,
810
)
911
import pandas._testing as tm
12+
from pandas.tests.copy_view.util import get_array
1013

1114
pytestmark = pytest.mark.filterwarnings(
1215
"ignore:Setting a value on a view:FutureWarning"
@@ -54,3 +57,30 @@ def test_index_values():
5457
idx = date_range("2019-12-31", periods=3, freq="D")
5558
result = idx.values
5659
assert result.flags.writeable is False
60+
61+
62+
def test_constructor_copy_input_datetime_ndarray_default():
63+
# GH 63388
64+
arr = np.array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
65+
idx = DatetimeIndex(arr)
66+
assert not np.shares_memory(arr, get_array(idx))
67+
68+
69+
def test_constructor_copy_input_datetime_ea_default():
70+
# GH 63388
71+
arr = array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
72+
idx = DatetimeIndex(arr)
73+
assert not tm.shares_memory(arr, idx.array)
74+
75+
76+
def test_series_from_temporary_datetimeindex_readonly_data():
77+
# GH 63388
78+
arr = np.array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
79+
arr.flags.writeable = False
80+
ser = Series(DatetimeIndex(arr))
81+
assert not np.shares_memory(arr, get_array(ser))
82+
ser.iloc[0] = Timestamp("2020-01-01")
83+
expected = Series(
84+
[Timestamp("2020-01-01"), Timestamp("2020-01-02")], dtype="datetime64[ns]"
85+
)
86+
tm.assert_series_equal(ser, expected)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import numpy as np
2+
3+
from pandas import (
4+
Interval,
5+
IntervalIndex,
6+
Series,
7+
array,
8+
)
9+
import pandas._testing as tm
10+
from pandas.tests.copy_view.util import get_array
11+
12+
13+
def test_constructor_copy_input_interval_ea_default():
14+
# GH 63388
15+
arr = array([Interval(0, 1), Interval(1, 2)])
16+
idx = IntervalIndex(arr)
17+
assert not tm.shares_memory(arr, idx.array)
18+
19+
20+
def test_series_from_temporary_intervalindex_readonly_data():
21+
# GH 63388
22+
arr = array([Interval(0, 1), Interval(1, 2)])
23+
arr._left.flags.writeable = False
24+
arr._right.flags.writeable = False
25+
ser = Series(IntervalIndex(arr))
26+
assert not np.shares_memory(arr._left, get_array(ser)._left)
27+
ser.iloc[0] = Interval(5, 6)
28+
expected = Series([Interval(5, 6), Interval(1, 2)], dtype="interval[int64, right]")
29+
tm.assert_series_equal(ser, expected)

pandas/tests/copy_view/index/test_periodindex.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1+
import numpy as np
12
import pytest
23

34
from pandas import (
45
Period,
56
PeriodIndex,
67
Series,
8+
array,
79
period_range,
810
)
911
import pandas._testing as tm
12+
from pandas.tests.copy_view.util import get_array
1013

1114
pytestmark = pytest.mark.filterwarnings(
1215
"ignore:Setting a value on a view:FutureWarning"
@@ -21,3 +24,24 @@ def test_periodindex(box):
2124
expected = idx.copy(deep=True)
2225
ser.iloc[0] = Period("2020-12-31")
2326
tm.assert_index_equal(idx, expected)
27+
28+
29+
def test_constructor_copy_input_period_ea_default():
30+
# GH 63388
31+
arr = array(["2020-01-01", "2020-01-02"], dtype="period[D]")
32+
idx = PeriodIndex(arr)
33+
assert not tm.shares_memory(arr, idx.array)
34+
35+
36+
def test_series_from_temporary_periodindex_readonly_data():
37+
# GH 63388
38+
arr = array(["2020-01-01", "2020-01-02"], dtype="period[D]")
39+
arr._ndarray.flags.writeable = False
40+
ser = Series(PeriodIndex(arr))
41+
assert not np.shares_memory(arr._ndarray, get_array(ser))
42+
ser.iloc[0] = Period("2022-01-01", freq="D")
43+
expected = Series(
44+
[Period("2022-01-01", freq="D"), Period("2020-01-02", freq="D")],
45+
dtype="period[D]",
46+
)
47+
tm.assert_series_equal(ser, expected)

0 commit comments

Comments
 (0)