diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 8bad206eea028..3506a8e3a5279 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -210,6 +210,26 @@ def _from_sequence( result = scalars._data result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) pa_arr = pa.array(result, mask=na_values, type=pa.large_string()) + elif isinstance(scalars, ArrowExtensionArray): + pa_type = scalars._pa_array.type + # Use PyArrow's native cast for integer, string, and boolean types. + # Float has different representation in PyArrow: 1.0 -> "1" instead + # of "1.0", and uses different scientific notation (1e+10 vs 1e10). + # Boolean needs capitalize (true -> True, false -> False). + if ( + pa.types.is_integer(pa_type) + or pa.types.is_large_string(pa_type) + or pa.types.is_string(pa_type) + or pa.types.is_boolean(pa_type) + ): + pa_arr = pc.cast(scalars._pa_array, pa.large_string()) + if pa.types.is_boolean(pa_type): + pa_arr = pc.utf8_capitalize(pa_arr) + else: + # Fall back for types where PyArrow's string representation + # differs from Python's str() + result = lib.ensure_string_array(scalars, copy=copy) + pa_arr = pa.array(result, type=pa.large_string(), from_pandas=True) elif isinstance(scalars, (pa.Array, pa.ChunkedArray)): pa_arr = pc.cast(scalars, pa.large_string()) else: diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index 0075a7ed59795..c436391739ab2 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.compat import HAS_PYARROW - from pandas import ( DataFrame, Series, @@ -218,10 +216,12 @@ def test_convert_dtypes(using_infer_string): df_orig = df.copy() df2 = df.convert_dtypes() - if HAS_PYARROW: - assert not tm.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: + if using_infer_string: + # String column is already Arrow-backed, so memory is shared assert tm.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + # String column converts from object to Arrow, no memory sharing + assert not tm.shares_memory(get_array(df2, "a"), get_array(df, "a")) assert tm.shares_memory(get_array(df2, "d"), get_array(df, "d")) assert tm.shares_memory(get_array(df2, "b"), get_array(df, "b")) assert tm.shares_memory(get_array(df2, "c"), get_array(df, "c"))