Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,15 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,

Notes
-----
Multidimensional numpy arrays are supported and will be converted to
nested list arrays. For example, a 2D array of shape (2, 3) will be
converted to a list array of 2 lists, each containing 3 elements.
For C-contiguous arrays (default numpy layout), conversion is zero-copy
and very efficient. For non-contiguous arrays (e.g., transposed, sliced,
or Fortran-ordered), a conversion via Python lists is used.
Note that mask and size parameters are not supported for multidimensional
arrays.

Timezone will be preserved in the returned array for timezone-aware data,
else no timezone will be returned for naive timestamps.
Internally, UTC values are stored for timezone-aware data with the
Expand Down Expand Up @@ -229,6 +238,24 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
>>> arr = pa.array(range(1024), type=pa.dictionary(pa.int8(), pa.int64()))
>>> arr.type.index_type
DataType(int16)

Multidimensional numpy arrays are supported:

>>> np_2d = np.arange(6).reshape(2, 3)
>>> pa.array(np_2d)
<pyarrow.lib.ListArray object at ...>
[
[
0,
1,
2
],
[
3,
4,
5
]
]
"""
cdef:
CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
Expand Down Expand Up @@ -299,6 +326,47 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
mask = None if values.mask is np.ma.nomask else values.mask
values = values.data

# Handle multidimensional numpy arrays by converting to nested lists
if isinstance(values, np.ndarray) and values.ndim > 1:
if mask is not None:
raise NotImplementedError(
"mask is not supported for multidimensional arrays")
if size is not None:
raise NotImplementedError(
"size is not supported for multidimensional arrays")

# For efficiency, use recursive FixedSizeListArray construction
# for C-contiguous arrays (zero-copy), otherwise use .tolist()
if values.flags['C_CONTIGUOUS']:
# Efficient path: flatten to 1D, convert with zero-copy,
# then wrap in nested FixedSizeListArray layers
shape = values.shape
flat = values.ravel()

# Convert flattened 1D array to Arrow (zero-copy)
base_arr = _ndarray_to_array(flat, None, None, c_from_pandas,
safe, pool)

# Build nested FixedSizeListArray from innermost to outermost
# For shape (2, 3, 4), we create:
# FixedSizeList[4] -> FixedSizeList[3] -> 2 elements
result = base_arr
for dim_size in reversed(shape[1:]):
result = FixedSizeListArray.from_arrays(result, int(dim_size))

# Apply explicit type if provided
if type is not None:
result = result.cast(type, safe=safe, memory_pool=memory_pool)
else:
# Non-contiguous arrays: fallback to .tolist()
# This handles transposed, sliced, and F-contiguous arrays
result = _sequence_to_array(values.tolist(), None, None, type, pool,
c_from_pandas)

if extension_type is not None:
result = ExtensionArray.from_storage(extension_type, result)
return result

if mask is not None:
if mask.dtype != np.bool_:
raise TypeError("Mask must be boolean dtype")
Expand Down
66 changes: 66 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2478,6 +2478,72 @@ def test_array_from_numpy_datetime(dtype, type):
assert arr.equals(expected)


@pytest.mark.numpy
def test_array_from_numpy_multidimensional():
# Support reading multidimensional numpy arrays
# Test 2D array
np_arr_2d = np.arange(6).reshape(2, 3)
pa_arr_2d = pa.array(np_arr_2d)
expected_2d = pa.array(np_arr_2d.tolist())
assert pa_arr_2d.equals(expected_2d)

# Test 3D array (example from the issue)
np_arr_3d = np.arange(24).reshape(2, 3, 4)
pa_arr_3d = pa.array(np_arr_3d)
expected_3d = pa.array(np_arr_3d.tolist())
assert pa_arr_3d.equals(expected_3d)

# Test with different dtypes
np_arr_float = np.array([[1.5, 2.5], [3.5, 4.5]])
pa_arr_float = pa.array(np_arr_float)
expected_float = pa.array(np_arr_float.tolist())
assert pa_arr_float.equals(expected_float)

# Test with explicit type
np_arr_typed = np.array([[1, 2], [3, 4]], dtype=np.int32)
pa_arr_typed = pa.array(np_arr_typed, type=pa.list_(pa.int32()))
expected_typed = pa.array(np_arr_typed.tolist(), type=pa.list_(pa.int32()))
assert pa_arr_typed.equals(expected_typed)

# Test that mask is not supported for multidimensional arrays
with pytest.raises(NotImplementedError, match="mask is not supported"):
pa.array(np_arr_2d, mask=np.array([True, False]))

# Test that size is not supported for multidimensional arrays
with pytest.raises(NotImplementedError, match="size is not supported"):
pa.array(np_arr_2d, size=2)

# Test with transposed (non-contiguous) array
np_arr_transposed = np_arr_2d.T
assert not np_arr_transposed.flags['C_CONTIGUOUS']
pa_arr_transposed = pa.array(np_arr_transposed)
expected_transposed = pa.array(np_arr_transposed.tolist())
assert pa_arr_transposed.equals(expected_transposed)

# Test with sliced (non-contiguous) array
np_arr_sliced = np.arange(12).reshape(3, 4)[:, ::2]
assert not np_arr_sliced.flags['C_CONTIGUOUS']
pa_arr_sliced = pa.array(np_arr_sliced)
expected_sliced = pa.array(np_arr_sliced.tolist())
assert pa_arr_sliced.equals(expected_sliced)

# Test with Fortran-contiguous array
np_arr_fortran = np.asfortranarray(np.arange(6).reshape(2, 3))
assert not np_arr_fortran.flags['C_CONTIGUOUS']
assert np_arr_fortran.flags['F_CONTIGUOUS']
pa_arr_fortran = pa.array(np_arr_fortran)
expected_fortran = pa.array(np_arr_fortran.tolist())
assert pa_arr_fortran.equals(expected_fortran)

# Verify that C-contiguous arrays use efficient path
# (result should be identical to tolist() result)
np_arr_contiguous = np.arange(24).reshape(2, 3, 4)
assert np_arr_contiguous.flags['C_CONTIGUOUS']
pa_arr_efficient = pa.array(np_arr_contiguous)
pa_arr_tolist = pa.array(np_arr_contiguous.tolist())
assert pa_arr_efficient.equals(pa_arr_tolist)


@pytest.mark.numpy
def test_array_from_different_numpy_datetime_units_raises():
data = [
Expand Down