From a15e73060c01b94c99376e6ec7bc5d44bb173ba4 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Thu, 18 Dec 2025 14:48:17 -0800 Subject: [PATCH] [slimtensor migration 4/n] use ET ArrayRef and IntArrayRef This stack aims to migrate slim tensor into ExecuTorch stack to make it as internal tensor representation of cudabackend. This diff makes slimtensor leverage ET ArrayRef and IntArrayRef, and introduce utilty functions to make things done. Differential Revision: [D89494978](https://our.internmc.facebook.com/intern/diff/D89494978/) [ghstack-poisoned] --- backends/aoti/slim/c10/core/Contiguity.h | 6 +- backends/aoti/slim/c10/core/MemoryFormat.h | 24 +- backends/aoti/slim/c10/core/SizesAndStrides.h | 4 +- backends/aoti/slim/c10/targets.bzl | 1 + backends/aoti/slim/c10/util/ArrayRef.h | 376 ------------------ backends/aoti/slim/c10/util/Half-inl.h | 12 +- backends/aoti/slim/core/SlimTensor.h | 69 ++-- .../aoti/slim/core/SlimTensorResize-incl.h | 13 +- backends/aoti/slim/core/SlimTensorView-incl.h | 49 ++- backends/aoti/slim/core/Storage.h | 6 +- backends/aoti/slim/factory/Empty.h | 20 +- backends/aoti/slim/factory/Factory.h | 19 +- backends/aoti/slim/factory/FromBlob.h | 38 +- backends/aoti/slim/factory/Pad.h | 10 +- backends/aoti/slim/util/ArrayRefUtil.h | 38 ++ backends/aoti/slim/util/SizeUtil.h | 32 +- 16 files changed, 217 insertions(+), 500 deletions(-) delete mode 100644 backends/aoti/slim/c10/util/ArrayRef.h create mode 100644 backends/aoti/slim/util/ArrayRefUtil.h diff --git a/backends/aoti/slim/c10/core/Contiguity.h b/backends/aoti/slim/c10/core/Contiguity.h index 80db87eb588..bba29a7355c 100644 --- a/backends/aoti/slim/c10/core/Contiguity.h +++ b/backends/aoti/slim/c10/core/Contiguity.h @@ -1,13 +1,17 @@ #pragma once -#include #include +#include #include #include +#include namespace executorch::backends::aoti::slim::c10 { +using ::executorch::runtime::ArrayRef; +using ::executorch::runtime::IntArrayRef; + template bool _compute_contiguous(ArrayRef sizes, ArrayRef strides, T numel) { if (numel == 0) { diff --git a/backends/aoti/slim/c10/core/MemoryFormat.h b/backends/aoti/slim/c10/core/MemoryFormat.h index 956e3c81703..683885d3ce8 100644 --- a/backends/aoti/slim/c10/core/MemoryFormat.h +++ b/backends/aoti/slim/c10/core/MemoryFormat.h @@ -1,31 +1,17 @@ #pragma once -#include +#include #include #include #include #include -// Memory format is not the property of a Tensor. It is the way to tell an -// operator how the result should be organized in memory and nothing more. That -// means memory format should never be used as return value for any tensor state -// interrogation functions (internally and externally). -// -// Possible options are: -// Preserve: -// If any of the input tensors is in channels_last format, operator output -// should be in channels_last format -// -// Contiguous: -// Regardless of input tensors format, the output should be contiguous -// Tensor. -// -// ChannelsLast: -// Regardless of input tensors format, the output should be in channels_last -// format. - namespace executorch::backends::aoti::slim::c10 { + +using ::executorch::runtime::ArrayRef; +using ::executorch::runtime::IntArrayRef; + enum class MemoryFormat : int8_t { Contiguous, Preserve, diff --git a/backends/aoti/slim/c10/core/SizesAndStrides.h b/backends/aoti/slim/c10/core/SizesAndStrides.h index 28d24555d1d..284997b8034 100644 --- a/backends/aoti/slim/c10/core/SizesAndStrides.h +++ b/backends/aoti/slim/c10/core/SizesAndStrides.h @@ -6,13 +6,15 @@ #include #include -#include +#include #include #define STANDALONE_SIZES_AND_STRIDES_MAX_INLINE_SIZE 5 namespace executorch::backends::aoti::slim::c10 { +using ::executorch::runtime::IntArrayRef; + // Packed container for TensorImpl sizes and strides. // This design improves on the previous approach of using a pair of // c10::SmallVector by specializing for the operations we diff --git a/backends/aoti/slim/c10/targets.bzl b/backends/aoti/slim/c10/targets.bzl index d65e0f5aa8b..c2f28bdfb45 100644 --- a/backends/aoti/slim/c10/targets.bzl +++ b/backends/aoti/slim/c10/targets.bzl @@ -16,6 +16,7 @@ def define_common_targets(): ), visibility = ["@EXECUTORCH_CLIENTS"], exported_deps = [ + "//executorch/runtime/core:core", "//executorch/runtime/platform:platform", ], ) diff --git a/backends/aoti/slim/c10/util/ArrayRef.h b/backends/aoti/slim/c10/util/ArrayRef.h deleted file mode 100644 index 62727455aaa..00000000000 --- a/backends/aoti/slim/c10/util/ArrayRef.h +++ /dev/null @@ -1,376 +0,0 @@ -//===--- ArrayRef.h - Array Reference Wrapper -------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -// ATen: modified from llvm::ArrayRef. -// removed llvm-specific functionality -// removed some implicit const -> non-const conversions that rely on -// complicated std::enable_if meta-programming -// removed a bunch of slice variants for simplicity... - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace executorch::backends::aoti::slim::c10 { -/// ArrayRef - Represent a constant reference to an array (0 or more elements -/// consecutively in memory), i.e. a start pointer and a length. It allows -/// various APIs to take consecutive elements easily and conveniently. -/// -/// This class does not own the underlying data, it is expected to be used in -/// situations where the data resides in some other buffer, whose lifetime -/// extends past that of the ArrayRef. For this reason, it is not in general -/// safe to store an ArrayRef. -/// -/// This is intended to be trivially copyable, so it should be passed by -/// value. -template -class ArrayRef final { - public: - using iterator = const T*; - using const_iterator = const T*; - using size_type = size_t; - using value_type = T; - - using reverse_iterator = std::reverse_iterator; - - private: - /// The start of the array, in an external buffer. - const T* Data; - - /// The number of elements. - size_type Length; - - void debugCheckNullptrInvariant() { - ET_DCHECK_MSG( - Data != nullptr || Length == 0, - "created ArrayRef with nullptr and non-zero length! std::optional " - "relies on this being illegal"); - } - - public: - /// @name Constructors - /// @{ - - /// Construct an empty ArrayRef. - /* implicit */ constexpr ArrayRef() : Data(nullptr), Length(0) {} - - /// Construct an ArrayRef from a single element. - // TODO Make this explicit - constexpr ArrayRef(const T& OneElt) : Data(&OneElt), Length(1) {} - - /// Construct an ArrayRef from a pointer and length. - constexpr ArrayRef(const T* data, size_t length) - : Data(data), Length(length) { - debugCheckNullptrInvariant(); - } - - /// Construct an ArrayRef from a range. - constexpr ArrayRef(const T* begin, const T* end) - : Data(begin), Length(end - begin) { - debugCheckNullptrInvariant(); - } - - template < - typename Container, - typename U = decltype(std::declval().data()), - typename = std::enable_if_t< - (std::is_same_v || std::is_same_v)>> - /* implicit */ ArrayRef(const Container& container) - : Data(container.data()), Length(container.size()) { - debugCheckNullptrInvariant(); - } - - /// Construct an ArrayRef from a std::vector. - // The enable_if stuff here makes sure that this isn't used for - // std::vector, because ArrayRef can't work on a std::vector - // bitfield. - template - /* implicit */ ArrayRef(const std::vector& Vec) - : Data(Vec.data()), Length(Vec.size()) { - static_assert( - !std::is_same_v, - "ArrayRef cannot be constructed from a " - "std::vector bitfield."); - } - - /// Construct an ArrayRef from a std::array - template - /* implicit */ constexpr ArrayRef(const std::array& Arr) - : Data(Arr.data()), Length(N) {} - - /// Construct an ArrayRef from a C array. - template - // NOLINTNEXTLINE(*c-arrays*) - /* implicit */ constexpr ArrayRef(const T (&Arr)[N]) : Data(Arr), Length(N) {} - - /// Construct an ArrayRef from a std::initializer_list. - /* implicit */ constexpr ArrayRef(const std::initializer_list& Vec) - : Data( - std::begin(Vec) == std::end(Vec) ? static_cast(nullptr) - : std::begin(Vec)), - Length(Vec.size()) {} - - /// @} - /// @name Simple Operations - /// @{ - - constexpr iterator begin() const { - return Data; - } - constexpr iterator end() const { - return Data + Length; - } - - // These are actually the same as iterator, since ArrayRef only - // gives you const iterators. - constexpr const_iterator cbegin() const { - return Data; - } - constexpr const_iterator cend() const { - return Data + Length; - } - - constexpr reverse_iterator rbegin() const { - return reverse_iterator(end()); - } - constexpr reverse_iterator rend() const { - return reverse_iterator(begin()); - } - - /// Check if all elements in the array satisfy the given expression - constexpr bool allMatch(const std::function& pred) const { - return std::all_of(cbegin(), cend(), pred); - } - - /// empty - Check if the array is empty. - constexpr bool empty() const { - return Length == 0; - } - - constexpr const T* data() const { - return Data; - } - - /// size - Get the array size. - constexpr size_t size() const { - return Length; - } - - /// front - Get the first element. - constexpr const T& front() const { - ET_CHECK_MSG( - !empty(), "ArrayRef: attempted to access front() of empty list"); - return Data[0]; - } - - /// back - Get the last element. - constexpr const T& back() const { - ET_CHECK_MSG( - !empty(), "ArrayRef: attempted to access back() of empty list"); - return Data[Length - 1]; - } - - /// equals - Check for element-wise equality. - constexpr bool equals(ArrayRef RHS) const { - return Length == RHS.Length && std::equal(begin(), end(), RHS.begin()); - } - - /// slice(n, m) - Take M elements of the array starting at element N - constexpr ArrayRef slice(size_t N, size_t M) const { - ET_CHECK_MSG( - N + M <= size(), - "ArrayRef: invalid slice, N = %zu; M = %zu; size = %zu", - N, - M, - size()); - return ArrayRef(data() + N, M); - } - - /// slice(n) - Chop off the first N elements of the array. - constexpr ArrayRef slice(size_t N) const { - ET_CHECK_MSG( - N <= size(), "ArrayRef: invalid slice, N = %zu; size = %zu", N, size()); - return slice(N, size() - N); - } - - /// @} - /// @name Operator Overloads - /// @{ - constexpr const T& operator[](size_t Index) const { - return Data[Index]; - } - - /// Vector compatibility - constexpr const T& at(size_t Index) const { - ET_CHECK_MSG( - Index < Length, - "ArrayRef: invalid index Index = %zu; Length = %zu", - Index, - Length); - return Data[Index]; - } - - /// Disallow accidental assignment from a temporary. - /// - /// The declaration here is extra complicated so that "arrayRef = {}" - /// continues to select the move assignment operator. - template - std::enable_if_t, ArrayRef>& operator=( - // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) - U&& Temporary) = delete; - - /// Disallow accidental assignment from a temporary. - /// - /// The declaration here is extra complicated so that "arrayRef = {}" - /// continues to select the move assignment operator. - template - std::enable_if_t, ArrayRef>& operator=( - std::initializer_list) = delete; - - /// @} - /// @name Expensive Operations - /// @{ - std::vector vec() const { - return std::vector(Data, Data + Length); - } - - /// @} -}; - -template -std::ostream& operator<<(std::ostream& out, ArrayRef list) { - int i = 0; - out << "["; - for (const auto& e : list) { - if (i++ > 0) - out << ", "; - out << e; - } - out << "]"; - return out; -} - -/// @name ArrayRef Convenience constructors -/// @{ - -/// Construct an ArrayRef from a single element. -template -ArrayRef makeArrayRef(const T& OneElt) { - return OneElt; -} - -/// Construct an ArrayRef from a pointer and length. -template -ArrayRef makeArrayRef(const T* data, size_t length) { - return ArrayRef(data, length); -} - -/// Construct an ArrayRef from a range. -template -ArrayRef makeArrayRef(const T* begin, const T* end) { - return ArrayRef(begin, end); -} - -/// Construct an ArrayRef from a std::vector. -template -ArrayRef makeArrayRef(const std::vector& Vec) { - return Vec; -} - -/// Construct an ArrayRef from a std::array. -template -ArrayRef makeArrayRef(const std::array& Arr) { - return Arr; -} - -/// Construct an ArrayRef from an ArrayRef (no-op) (const) -template -ArrayRef makeArrayRef(const ArrayRef& Vec) { - return Vec; -} - -/// Construct an ArrayRef from an ArrayRef (no-op) -template -ArrayRef& makeArrayRef(ArrayRef& Vec) { - return Vec; -} - -/// Construct an ArrayRef from a C array. -template -// NOLINTNEXTLINE(*c-arrays*) -ArrayRef makeArrayRef(const T (&Arr)[N]) { - return ArrayRef(Arr); -} - -// WARNING: Template instantiation will NOT be willing to do an implicit -// conversions to get you to an executorch::backends::aoti::slim::c10::ArrayRef, -// which is why we need so many overloads. - -template -bool operator==( - executorch::backends::aoti::slim::c10::ArrayRef a1, - executorch::backends::aoti::slim::c10::ArrayRef a2) { - return a1.equals(a2); -} - -template -bool operator!=( - executorch::backends::aoti::slim::c10::ArrayRef a1, - executorch::backends::aoti::slim::c10::ArrayRef a2) { - return !a1.equals(a2); -} - -template -bool operator==( - const std::vector& a1, - executorch::backends::aoti::slim::c10::ArrayRef a2) { - return executorch::backends::aoti::slim::c10::ArrayRef(a1).equals(a2); -} - -template -bool operator!=( - const std::vector& a1, - executorch::backends::aoti::slim::c10::ArrayRef a2) { - return !executorch::backends::aoti::slim::c10::ArrayRef(a1).equals(a2); -} - -template -bool operator==( - executorch::backends::aoti::slim::c10::ArrayRef a1, - const std::vector& a2) { - return a1.equals(executorch::backends::aoti::slim::c10::ArrayRef(a2)); -} - -template -bool operator!=( - executorch::backends::aoti::slim::c10::ArrayRef a1, - const std::vector& a2) { - return !a1.equals(executorch::backends::aoti::slim::c10::ArrayRef(a2)); -} - -using IntArrayRef = ArrayRef; - -using IntList - [[deprecated("This alias is deprecated because it doesn't make ownership " - "semantics obvious. Use IntArrayRef instead!")]] = - ArrayRef; - -} // namespace executorch::backends::aoti::slim::c10 diff --git a/backends/aoti/slim/c10/util/Half-inl.h b/backends/aoti/slim/c10/util/Half-inl.h index f7b25c0ebe0..7dbddd47d61 100644 --- a/backends/aoti/slim/c10/util/Half-inl.h +++ b/backends/aoti/slim/c10/util/Half-inl.h @@ -48,10 +48,10 @@ inline STANDALONE_HOST_DEVICE Half::Half(float value) #elif defined(__SYCL_DEVICE_ONLY__) x(executorch::backends::aoti::slim::c10::bit_cast( sycl::half(value))) -#elif (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \ - !defined(__APPLE__) - x(at::vec::float2half_scalar(value)) #else + // Use software fallback for all CPU targets in standalone SlimTensor. + // The ATen vectorization functions (at::vec::float2half_scalar) are not + // available in standalone builds. x(detail::fp16_ieee_from_fp32_value(value)) #endif { @@ -64,12 +64,12 @@ inline STANDALONE_HOST_DEVICE Half::operator float() const { return __half2float(*reinterpret_cast(&x)); #elif defined(__SYCL_DEVICE_ONLY__) return float(executorch::backends::aoti::slim::c10::bit_cast(x)); -#elif (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \ - !defined(__APPLE__) - return at::vec::half2float_scalar(x); #elif defined(__aarch64__) && !defined(__CUDACC__) return detail::native_fp16_to_fp32_value(x); #else + // Use software fallback for all CPU targets in standalone SlimTensor. + // The ATen vectorization functions (at::vec::half2float_scalar) are not + // available in standalone builds. return detail::fp16_ieee_to_fp32_value(x); #endif } diff --git a/backends/aoti/slim/core/SlimTensor.h b/backends/aoti/slim/core/SlimTensor.h index 3eea629d26b..9d21b5a8288 100644 --- a/backends/aoti/slim/core/SlimTensor.h +++ b/backends/aoti/slim/core/SlimTensor.h @@ -24,8 +24,8 @@ class SlimTensor { public: SlimTensor( Storage&& storage, - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, + IntArrayRef sizes, + IntArrayRef strides, executorch::backends::aoti::slim::c10::ScalarType dtype, int64_t storage_offset = 0) : storage_(std::move(storage)), @@ -70,7 +70,7 @@ class SlimTensor { return executorch::backends::aoti::slim::c10::elementSize(dtype_); } - executorch::backends::aoti::slim::c10::IntArrayRef sizes() const { + IntArrayRef sizes() const { return sizes_and_strides_.sizes_arrayref(); } @@ -80,7 +80,7 @@ class SlimTensor { return sizes_and_strides_.size_at(static_cast(wrapped_dim)); } - executorch::backends::aoti::slim::c10::IntArrayRef strides() const { + IntArrayRef strides() const { return sizes_and_strides_.strides_arrayref(); } @@ -149,8 +149,8 @@ class SlimTensor { } void set_sizes_and_strides( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, + IntArrayRef sizes, + IntArrayRef strides, std::optional storage_offset = std::nullopt) { const int64_t new_dim = static_cast(sizes.size()); ET_CHECK_MSG( @@ -159,8 +159,8 @@ class SlimTensor { static_cast(new_dim), strides.size()); - std::vector new_sizes = sizes.vec(); - std::vector new_strides = strides.vec(); + std::vector new_sizes = toVec(sizes); + std::vector new_strides = toVec(strides); // stride calculation logic bool overflowed = false; @@ -183,8 +183,8 @@ class SlimTensor { } ET_CHECK_MSG(!overflowed, "Stride calculation overflowed"); - sizes_and_strides_.set_sizes(new_sizes); - sizes_and_strides_.set_strides(new_strides); + sizes_and_strides_.set_sizes(makeArrayRef(new_sizes)); + sizes_and_strides_.set_strides(makeArrayRef(new_strides)); if (storage_offset.has_value()) { storage_offset_ = *storage_offset; } @@ -193,8 +193,7 @@ class SlimTensor { refresh_contiguous(); } - void set_sizes_contiguous( - executorch::backends::aoti::slim::c10::IntArrayRef new_size) { + void set_sizes_contiguous(IntArrayRef new_size) { sizes_and_strides_.set_sizes(new_size); refresh_numel(); empty_tensor_restride( @@ -205,7 +204,7 @@ class SlimTensor { executorch::backends::aoti::slim::c10::MemoryFormat memory_format); SlimTensor resize_( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, + IntArrayRef sizes, std::optional optional_memory_format); // Conversion operations @@ -461,35 +460,41 @@ class SlimTensor { executorch::backends::aoti::slim::compute_contiguous_strides( this->sizes()); return _clone_impl( - this->sizes(), contig_strides, this->dtype(), this->device()); + this->sizes(), + makeArrayRef(contig_strides), + this->dtype(), + this->device()); } // View operations SlimTensor as_strided( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, + IntArrayRef sizes, + IntArrayRef strides, int64_t storage_offset) const; - SlimTensor as_strided_( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, - int64_t storage_offset); + SlimTensor + as_strided_(IntArrayRef sizes, IntArrayRef strides, int64_t storage_offset); - SlimTensor permute( - executorch::backends::aoti::slim::c10::IntArrayRef dims) const; + SlimTensor permute(IntArrayRef dims) const; + + SlimTensor permute(std::initializer_list dims) const { + return permute(makeArrayRef(dims)); + } // Transpose operations SlimTensor transpose() const; SlimTensor transpose(int64_t dim0, int64_t dim1) const; SlimTensor t() const; - SlimTensor reshape( - executorch::backends::aoti::slim::c10::IntArrayRef proposed_shape) const; + SlimTensor reshape(IntArrayRef proposed_shape) const; + + SlimTensor reshape(std::initializer_list proposed_shape) const { + return reshape(makeArrayRef(proposed_shape)); + } SlimTensor narrow(int64_t dim, int64_t start, int64_t length) const; // Generic element access returning SlimTensor - SlimTensor operator[]( - executorch::backends::aoti::slim::c10::IntArrayRef indices) const { + SlimTensor operator[](IntArrayRef indices) const { ET_CHECK_MSG( indices.size() <= this->dim(), "Number of indices (%zu) cannot exceed tensor dimensions (%zu)", @@ -532,8 +537,8 @@ class SlimTensor { int64_t new_storage_offset = this->storage_offset_ + offset_adjustment; return SlimTensor( Storage(this->storage_), - new_sizes, - new_strides, + makeArrayRef(new_sizes), + makeArrayRef(new_strides), this->dtype_, new_storage_offset); } @@ -541,12 +546,12 @@ class SlimTensor { // Convenience overload for single index SlimTensor operator[](int64_t index) const { - return (*this)[executorch::backends::aoti::slim::c10::IntArrayRef{index}]; + return (*this)[IntArrayRef{index}]; } // Convenience overloads for common multi-dimensional cases SlimTensor operator[](std::initializer_list indices) const { - return (*this)[executorch::backends::aoti::slim::c10::IntArrayRef(indices)]; + return (*this)[makeArrayRef(indices)]; } // Extract scalar value from 0-dimensional tensor @@ -599,8 +604,8 @@ class SlimTensor { private: SlimTensor _clone_impl( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, + IntArrayRef sizes, + IntArrayRef strides, executorch::backends::aoti::slim::c10::ScalarType dtype, const executorch::backends::aoti::slim::c10::Device& device) const { Storage storage = new_storage(sizes, strides, dtype, device); diff --git a/backends/aoti/slim/core/SlimTensorResize-incl.h b/backends/aoti/slim/core/SlimTensorResize-incl.h index ac52df109f8..eb93f2bfac2 100644 --- a/backends/aoti/slim/core/SlimTensorResize-incl.h +++ b/backends/aoti/slim/core/SlimTensorResize-incl.h @@ -4,6 +4,7 @@ #include #include +#include #include namespace executorch::backends::aoti::slim { @@ -35,13 +36,15 @@ inline void SlimTensor::empty_tensor_restride( case executorch::backends::aoti::slim::c10::MemoryFormat::ChannelsLast: { ET_CHECK_MSG( dim() == 4, "required rank 4 tensor to use channels_last format"); - set_sizes_and_strides(sizes(), get_channels_last_strides_2d(sizes())); + auto strides = c10::get_channels_last_strides_2d(sizes()); + set_sizes_and_strides(sizes(), makeArrayRef(strides)); break; } case executorch::backends::aoti::slim::c10::MemoryFormat::ChannelsLast3d: { ET_CHECK_MSG( dim() == 5, "required rank 5 tensor to use channels_last_3d format"); - set_sizes_and_strides(sizes(), get_channels_last_strides_3d(sizes())); + auto strides = c10::get_channels_last_strides_3d(sizes()); + set_sizes_and_strides(sizes(), makeArrayRef(strides)); break; } case executorch::backends::aoti::slim::c10::MemoryFormat::Preserve: @@ -122,8 +125,8 @@ inline void _maybe_resize_storage(SlimTensor* self, int64_t new_size_bytes) { inline SlimTensor* _resize_impl_( SlimTensor* self, - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - std::optional strides, + IntArrayRef sizes, + std::optional strides, bool resize_storage) { if (self->sizes() == sizes && (!strides || self->strides() == strides.value())) { @@ -151,7 +154,7 @@ inline SlimTensor* _resize_impl_( } inline SlimTensor SlimTensor::resize_( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, + IntArrayRef sizes, std::optional optional_memory_format) { _resize_impl_(this, sizes, /*strides=*/std::nullopt, true); diff --git a/backends/aoti/slim/core/SlimTensorView-incl.h b/backends/aoti/slim/core/SlimTensorView-incl.h index c352a531d2c..8d018f25657 100644 --- a/backends/aoti/slim/core/SlimTensorView-incl.h +++ b/backends/aoti/slim/core/SlimTensorView-incl.h @@ -3,13 +3,13 @@ #include #include -#include +#include #include namespace executorch::backends::aoti::slim { inline SlimTensor SlimTensor::as_strided( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, + IntArrayRef sizes, + IntArrayRef strides, int64_t storage_offset) const { SlimTensor result = *this; result.as_strided_(sizes, strides, storage_offset); @@ -17,8 +17,8 @@ inline SlimTensor SlimTensor::as_strided( } inline SlimTensor SlimTensor::as_strided_( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, + IntArrayRef sizes, + IntArrayRef strides, int64_t storage_offset) { ET_CHECK_MSG( sizes.size() == strides.size(), @@ -41,17 +41,16 @@ inline SlimTensor SlimTensor::as_strided_( return *this; } -inline SlimTensor SlimTensor::permute( - executorch::backends::aoti::slim::c10::IntArrayRef dims) const { +inline SlimTensor SlimTensor::permute(IntArrayRef dims) const { const size_t ndim = this->dim(); ET_CHECK_MSG( ndim == static_cast(dims.size()), "permute: dims length must be equal to tensor.dim()"); - executorch::backends::aoti::slim::c10::ArrayRef old_sizes = this->sizes(); - executorch::backends::aoti::slim::c10::ArrayRef old_strides = this->strides(); - std::vector new_sizes = old_sizes.vec(); - std::vector new_strides = old_strides.vec(); + IntArrayRef old_sizes = this->sizes(); + IntArrayRef old_strides = this->strides(); + std::vector new_sizes = toVec(old_sizes); + std::vector new_strides = toVec(old_strides); std::vector seen_dims(ndim, false); for (size_t i = 0; i < ndim; i++) { @@ -64,13 +63,17 @@ inline SlimTensor SlimTensor::permute( } SlimTensor result = *this; - result.as_strided_(new_sizes, new_strides, this->storage_offset()); + result.as_strided_( + makeArrayRef(new_sizes), + makeArrayRef(new_strides), + this->storage_offset()); return result; } inline SlimTensor SlimTensor::transpose() const { ET_CHECK_MSG(dim() == 2, "transpose() can only be called on 2D tensors"); - return permute({1, 0}); + std::vector dims_vec = {1, 0}; + return permute(makeArrayRef(dims_vec)); } inline SlimTensor SlimTensor::transpose(int64_t dim0, int64_t dim1) const { @@ -85,28 +88,29 @@ inline SlimTensor SlimTensor::transpose(int64_t dim0, int64_t dim1) const { dim1 = executorch::backends::aoti::slim::c10::maybe_wrap_dim(dim1, ndim); std::swap(dims[dim0], dims[dim1]); - return permute(dims); + return permute(makeArrayRef(dims)); } inline SlimTensor SlimTensor::t() const { return transpose(); } -inline SlimTensor SlimTensor::reshape( - executorch::backends::aoti::slim::c10::IntArrayRef proposed_shape) const { +inline SlimTensor SlimTensor::reshape(IntArrayRef proposed_shape) const { std::vector final_shape_vec = infer_size(proposed_shape, this->numel()); // `compute_stride` return the proper strides to use if this // `reshape` can be just a view. - std::optional> new_strides_opt = - compute_stride(this->sizes(), this->strides(), final_shape_vec); + std::optional> new_strides_opt = compute_stride( + this->sizes(), this->strides(), makeArrayRef(final_shape_vec)); // create a view if possible if (new_strides_opt.has_value()) { SlimTensor result = *this; result.as_strided_( - final_shape_vec, new_strides_opt.value(), this->storage_offset()); + makeArrayRef(final_shape_vec), + makeArrayRef(new_strides_opt.value()), + this->storage_offset()); return result; } @@ -115,7 +119,7 @@ inline SlimTensor SlimTensor::reshape( // after cloning, the tensor is already contiguous. We just need to update // its metadata to reflect the new shape. This is effectively a view of // the new contiguous clone - contiguous_clone.set_sizes_contiguous(final_shape_vec); + contiguous_clone.set_sizes_contiguous(makeArrayRef(final_shape_vec)); return contiguous_clone; } @@ -140,9 +144,10 @@ inline SlimTensor SlimTensor::narrow(int64_t dim, int64_t start, int64_t length) SlimTensor result = *this; int64_t new_storage_offset = this->storage_offset() + start * this->stride(dim); - std::vector new_sizes = this->sizes().vec(); + std::vector new_sizes = toVec(this->sizes()); new_sizes[dim] = length; - result.as_strided_(new_sizes, this->strides(), new_storage_offset); + result.as_strided_( + makeArrayRef(new_sizes), this->strides(), new_storage_offset); return result; } diff --git a/backends/aoti/slim/core/Storage.h b/backends/aoti/slim/core/Storage.h index ffcaa48b36b..be607fd53cf 100644 --- a/backends/aoti/slim/core/Storage.h +++ b/backends/aoti/slim/core/Storage.h @@ -10,9 +10,9 @@ #include #include -#include #include #include +#include #include #include @@ -310,8 +310,8 @@ class MaybeOwningStorage { using Storage = SharedPtr; inline Storage new_storage( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, + IntArrayRef sizes, + IntArrayRef strides, executorch::backends::aoti::slim::c10::ScalarType dtype, const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) { size_t nbytes = compute_storage_nbytes( diff --git a/backends/aoti/slim/factory/Empty.h b/backends/aoti/slim/factory/Empty.h index 20dd89fe1e6..a8aa8ddc414 100644 --- a/backends/aoti/slim/factory/Empty.h +++ b/backends/aoti/slim/factory/Empty.h @@ -5,13 +5,14 @@ #include #include +#include #include namespace executorch::backends::aoti::slim { // The returned SlimTensor owns the underlying storage inline SlimTensor empty_strided( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, + IntArrayRef sizes, + IntArrayRef strides, executorch::backends::aoti::slim::c10::ScalarType dtype, const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) { Storage storage = new_storage(sizes, strides, dtype, device); @@ -19,13 +20,22 @@ inline SlimTensor empty_strided( } inline SlimTensor empty( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, + IntArrayRef sizes, executorch::backends::aoti::slim::c10::ScalarType dtype, const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) { std::vector contig_strides = executorch::backends::aoti::slim::compute_contiguous_strides(sizes); - Storage storage = new_storage(sizes, contig_strides, dtype, device); - return SlimTensor(std::move(storage), sizes, contig_strides, dtype, 0); + Storage storage = + new_storage(sizes, makeArrayRef(contig_strides), dtype, device); + return SlimTensor( + std::move(storage), sizes, makeArrayRef(contig_strides), dtype, 0); +} + +inline SlimTensor empty( + std::initializer_list sizes, + executorch::backends::aoti::slim::c10::ScalarType dtype, + const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) { + return empty(makeArrayRef(sizes), dtype, device); } inline SlimTensor empty_like(const SlimTensor& other) { diff --git a/backends/aoti/slim/factory/Factory.h b/backends/aoti/slim/factory/Factory.h index f0d26041ad3..b96df87ad45 100644 --- a/backends/aoti/slim/factory/Factory.h +++ b/backends/aoti/slim/factory/Factory.h @@ -1,10 +1,11 @@ #pragma once #include +#include namespace executorch::backends::aoti::slim { inline SlimTensor zeros( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, + IntArrayRef sizes, executorch::backends::aoti::slim::c10::ScalarType dtype, const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) { SlimTensor tensor = empty(sizes, dtype, device); @@ -12,12 +13,19 @@ inline SlimTensor zeros( return tensor; } +inline SlimTensor zeros( + std::initializer_list sizes, + executorch::backends::aoti::slim::c10::ScalarType dtype, + const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) { + return zeros(makeArrayRef(sizes), dtype, device); +} + inline SlimTensor zeros_like(const SlimTensor& other) { return zeros(other.sizes(), other.dtype(), other.device()); } inline SlimTensor ones( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, + IntArrayRef sizes, executorch::backends::aoti::slim::c10::ScalarType dtype, const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) { SlimTensor tensor = empty(sizes, dtype, device); @@ -25,6 +33,13 @@ inline SlimTensor ones( return tensor; } +inline SlimTensor ones( + std::initializer_list sizes, + executorch::backends::aoti::slim::c10::ScalarType dtype, + const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) { + return ones(makeArrayRef(sizes), dtype, device); +} + inline SlimTensor ones_like(const SlimTensor& other) { return ones(other.sizes(), other.dtype(), other.device()); } diff --git a/backends/aoti/slim/factory/FromBlob.h b/backends/aoti/slim/factory/FromBlob.h index 5f58987d750..5c66129457b 100644 --- a/backends/aoti/slim/factory/FromBlob.h +++ b/backends/aoti/slim/factory/FromBlob.h @@ -1,14 +1,16 @@ #pragma once -#include +#include +#include +#include namespace executorch::backends::aoti::slim { // The returned SlimTensor does not own the underlying storage inline SlimTensor from_blob( void* data, - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, + IntArrayRef sizes, + IntArrayRef strides, executorch::backends::aoti::slim::c10::ScalarType dtype, const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE, int64_t storage_offset = 0) { @@ -24,13 +26,39 @@ inline SlimTensor from_blob( inline SlimTensor from_blob( void* data, - executorch::backends::aoti::slim::c10::IntArrayRef sizes, + IntArrayRef sizes, executorch::backends::aoti::slim::c10::ScalarType dtype, const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE, int64_t storage_offset = 0) { std::vector contig_strides = executorch::backends::aoti::slim::compute_contiguous_strides(sizes); - return from_blob(data, sizes, contig_strides, dtype, device, storage_offset); + return from_blob( + data, sizes, makeArrayRef(contig_strides), dtype, device, storage_offset); +} + +inline SlimTensor from_blob( + void* data, + std::initializer_list sizes, + executorch::backends::aoti::slim::c10::ScalarType dtype, + const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE, + int64_t storage_offset = 0) { + return from_blob(data, makeArrayRef(sizes), dtype, device, storage_offset); +} + +inline SlimTensor from_blob( + void* data, + std::initializer_list sizes, + std::initializer_list strides, + executorch::backends::aoti::slim::c10::ScalarType dtype, + const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE, + int64_t storage_offset = 0) { + return from_blob( + data, + makeArrayRef(sizes), + makeArrayRef(strides), + dtype, + device, + storage_offset); } } // namespace executorch::backends::aoti::slim diff --git a/backends/aoti/slim/factory/Pad.h b/backends/aoti/slim/factory/Pad.h index 18e262c372b..b71b8b595c0 100644 --- a/backends/aoti/slim/factory/Pad.h +++ b/backends/aoti/slim/factory/Pad.h @@ -1,16 +1,17 @@ #pragma once #include +#include namespace executorch::backends::aoti::slim { inline SlimTensor constant_pad_nd( const SlimTensor& self, - executorch::backends::aoti::slim::c10::IntArrayRef pad, + IntArrayRef pad, const executorch::backends::aoti::slim::c10::Scalar& value) { ET_CHECK_MSG(pad.size() % 2 == 0, "Length of pad must be even"); - executorch::backends::aoti::slim::c10::IntArrayRef input_sizes = self.sizes(); + IntArrayRef input_sizes = self.sizes(); int64_t l_inp = self.dim(); int64_t l_pad = static_cast(pad.size()) / 2; int64_t l_diff = l_inp - l_pad; @@ -61,7 +62,8 @@ inline SlimTensor constant_pad_nd( new_shape.emplace_back(new_dim); } - SlimTensor output = empty(new_shape, self.dtype(), self.device()); + SlimTensor output = + empty(makeArrayRef(new_shape), self.dtype(), self.device()); output.fill_(value); // create a view into the center of the output tensor @@ -84,7 +86,7 @@ inline SlimTensor constant_pad_nd( inline SlimTensor pad( const SlimTensor& self, - executorch::backends::aoti::slim::c10::IntArrayRef pad, + IntArrayRef pad, std::string_view mode, std::optional value) { if (mode == "constant") { diff --git a/backends/aoti/slim/util/ArrayRefUtil.h b/backends/aoti/slim/util/ArrayRefUtil.h new file mode 100644 index 00000000000..61dc48ff0a1 --- /dev/null +++ b/backends/aoti/slim/util/ArrayRefUtil.h @@ -0,0 +1,38 @@ +#pragma once + +// Utilities for working with ExecuTorch's ArrayRef in SlimTensor code. +// This header provides helper functions for creating ArrayRefs from +// std::vector and std::initializer_list, and for converting ArrayRefs +// back to std::vector. + +#include + +#include +#include + +namespace executorch::backends::aoti::slim { + +// Bring ExecuTorch's ArrayRef types into the SlimTensor namespace +using ::executorch::runtime::ArrayRef; +using ::executorch::runtime::IntArrayRef; +using ::executorch::runtime::makeArrayRef; + +/// Helper function to construct an ArrayRef from a std::vector. +template +inline ArrayRef makeArrayRef(const std::vector& Vec) { + return ArrayRef(Vec.data(), Vec.size()); +} + +/// Helper function to construct an ArrayRef from a std::initializer_list. +template +inline ArrayRef makeArrayRef(std::initializer_list list) { + return ArrayRef(list.begin(), list.size()); +} + +/// Helper function to convert ArrayRef to std::vector. +template +inline std::vector toVec(ArrayRef arr) { + return std::vector(arr.begin(), arr.end()); +} + +} // namespace executorch::backends::aoti::slim diff --git a/backends/aoti/slim/util/SizeUtil.h b/backends/aoti/slim/util/SizeUtil.h index 234fc2e9457..3e18d98577e 100644 --- a/backends/aoti/slim/util/SizeUtil.h +++ b/backends/aoti/slim/util/SizeUtil.h @@ -3,12 +3,11 @@ #include #include #include -#include -#include #include #include #include +#include namespace executorch::backends::aoti::slim { #ifndef STANDALONE_MOBILE @@ -27,8 +26,7 @@ inline constexpr uint64_t storage_max() { * tensor. Catches integer overflow that may occur when a tensor * using a sparse layout has multiple dimensions with large sizes. */ -inline int64_t safe_compute_numel( - executorch::backends::aoti::slim::c10::IntArrayRef sizes) { +inline int64_t safe_compute_numel(IntArrayRef sizes) { uint64_t n = 1; bool overflowed = executorch::backends::aoti::slim::c10::safe_multiplies_u64(sizes, &n); @@ -37,8 +35,7 @@ inline int64_t safe_compute_numel( return static_cast(n); } -inline std::vector safe_compute_contiguous_strides( - c10::IntArrayRef sizes) { +inline std::vector safe_compute_contiguous_strides(IntArrayRef sizes) { int64_t ndim = static_cast(sizes.size()); std::vector strides(ndim); if (ndim > 0) { @@ -60,8 +57,7 @@ inline std::vector safe_compute_contiguous_strides( } #endif // STANDALONE_MOBILE -inline int64_t compute_numel( - executorch::backends::aoti::slim::c10::IntArrayRef sizes) { +inline int64_t compute_numel(IntArrayRef sizes) { #ifndef STANDALONE_MOBILE // Use overflow checks if supported by the compiler return safe_compute_numel(sizes); @@ -72,7 +68,7 @@ inline int64_t compute_numel( // named computeStorageNbytesContiguous in c10 inline size_t compute_storage_nbytes_contiguous( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, + IntArrayRef sizes, size_t itemsize_bytes, size_t storage_offset) { // Ignore overflow checks on mobile @@ -95,8 +91,8 @@ inline size_t compute_storage_nbytes_contiguous( // named computeStorageNbytes in c10 inline size_t compute_storage_nbytes( - executorch::backends::aoti::slim::c10::IntArrayRef sizes, - executorch::backends::aoti::slim::c10::IntArrayRef strides, + IntArrayRef sizes, + IntArrayRef strides, size_t itemsize_bytes, size_t storage_offset) { ET_CHECK_MSG( @@ -144,7 +140,7 @@ inline size_t compute_storage_nbytes( #endif } -inline std::vector compute_contiguous_strides(c10::IntArrayRef sizes) { +inline std::vector compute_contiguous_strides(IntArrayRef sizes) { #ifndef STANDALONE_MOBILE return safe_compute_contiguous_strides(sizes); #else @@ -165,9 +161,7 @@ inline std::vector compute_contiguous_strides(c10::IntArrayRef sizes) { // calculates the final concrete shape by also filling in at most one '-1' // dimension. -inline std::vector infer_size( - executorch::backends::aoti::slim::c10::IntArrayRef shape, - int64_t numel) { +inline std::vector infer_size(IntArrayRef shape, int64_t numel) { int64_t new_size = 1; std::optional infer_dim; std::vector result_shape; @@ -215,9 +209,9 @@ inline std::vector infer_size( // If so, it returns the new strides // If not, it returns an empty optional inline std::optional> compute_stride( - executorch::backends::aoti::slim::c10::IntArrayRef old_sizes, - executorch::backends::aoti::slim::c10::IntArrayRef old_strides, - executorch::backends::aoti::slim::c10::IntArrayRef new_sizes) { + IntArrayRef old_sizes, + IntArrayRef old_strides, + IntArrayRef new_sizes) { if (old_sizes.empty()) { return std::vector(new_sizes.size(), 1); } @@ -229,7 +223,7 @@ inline std::optional> compute_stride( // didn't seem worth it. size_t numel = compute_numel(old_sizes); if (numel == 0 && old_sizes == new_sizes) { - return old_strides.vec(); + return toVec(old_strides); } int64_t new_sizes_len = static_cast(new_sizes.size());