diff --git a/backends/aoti/slim/c10/core/Contiguity.h b/backends/aoti/slim/c10/core/Contiguity.h
index d5ff49561ab..80db87eb588 100644
--- a/backends/aoti/slim/c10/core/Contiguity.h
+++ b/backends/aoti/slim/c10/core/Contiguity.h
@@ -6,7 +6,7 @@
 #include <algorithm>
 #include <cstdint>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 template <typename T>
 bool _compute_contiguous(ArrayRef<T> sizes, ArrayRef<T> strides, T numel) {
@@ -148,4 +148,4 @@ bool _compute_non_overlapping_and_dense(
   return true;
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/core/Device.h b/backends/aoti/slim/c10/core/Device.h
index a9a6d3a8136..02e88a30b1e 100644
--- a/backends/aoti/slim/c10/core/Device.h
+++ b/backends/aoti/slim/c10/core/Device.h
@@ -17,7 +17,7 @@
 
 // Copied from c10/core/DeviceType.h with some modifications
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 namespace detail {
 enum class DeviceStringParsingState {
   kSTART,
@@ -341,18 +341,21 @@ inline std::ostream& operator<<(std::ostream& stream, const Device& device) {
   stream << device.str();
   return stream;
 }
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 template <>
-struct hash<standalone::c10::Device> {
-  size_t operator()(standalone::c10::Device d) const noexcept {
+struct hash<executorch::backends::aoti::slim::c10::Device> {
+  size_t operator()(
+      executorch::backends::aoti::slim::c10::Device d) const noexcept {
     // Are you here because this static assert failed?  Make sure you ensure
     // that the bitmasking code below is updated accordingly!
     static_assert(
-        sizeof(standalone::c10::DeviceType) == 1, "DeviceType is not 8-bit");
+        sizeof(executorch::backends::aoti::slim::c10::DeviceType) == 1,
+        "DeviceType is not 8-bit");
     static_assert(
-        sizeof(standalone::c10::DeviceIndex) == 1, "DeviceIndex is not 8-bit");
+        sizeof(executorch::backends::aoti::slim::c10::DeviceIndex) == 1,
+        "DeviceIndex is not 8-bit");
     // Note [Hazard when concatenating signed integers]
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // We must first convert to a same-sized unsigned type, before promoting to
diff --git a/backends/aoti/slim/c10/core/DeviceType.h b/backends/aoti/slim/c10/core/DeviceType.h
index f2631a48f2d..eb024a3595d 100644
--- a/backends/aoti/slim/c10/core/DeviceType.h
+++ b/backends/aoti/slim/c10/core/DeviceType.h
@@ -15,7 +15,7 @@
 
 #include <executorch/backends/aoti/slim/c10/util/Exception.h>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 enum class DeviceType : int8_t {
   CPU = 0,
   CUDA = 1, // CUDA.
@@ -122,12 +122,13 @@ inline std::ostream& operator<<(std::ostream& stream, DeviceType type) {
   stream << DeviceTypeName(type, /* lower case */ true);
   return stream;
 }
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 template <>
-struct hash<standalone::c10::DeviceType> {
-  std::size_t operator()(standalone::c10::DeviceType k) const {
+struct hash<executorch::backends::aoti::slim::c10::DeviceType> {
+  std::size_t operator()(
+      executorch::backends::aoti::slim::c10::DeviceType k) const {
     return std::hash<int>()(static_cast<int>(k));
   }
 };
diff --git a/backends/aoti/slim/c10/core/Layout.h b/backends/aoti/slim/c10/core/Layout.h
index 79230f23bb7..4d7b5499088 100644
--- a/backends/aoti/slim/c10/core/Layout.h
+++ b/backends/aoti/slim/c10/core/Layout.h
@@ -5,7 +5,7 @@
 #include <cstdint>
 #include <ostream>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 enum class Layout : int8_t {
   Strided,
   Sparse,
@@ -50,4 +50,4 @@ inline std::ostream& operator<<(std::ostream& stream, c10::Layout layout) {
   }
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/core/MemoryFormat.h b/backends/aoti/slim/c10/core/MemoryFormat.h
index 756caf64f26..68f1a6d7357 100644
--- a/backends/aoti/slim/c10/core/MemoryFormat.h
+++ b/backends/aoti/slim/c10/core/MemoryFormat.h
@@ -25,7 +25,7 @@
 //    Regardless of input tensors format, the output should be in channels_last
 //    format.
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 enum class MemoryFormat : int8_t {
   Contiguous,
   Preserve,
@@ -38,7 +38,7 @@ enum class MemoryFormat : int8_t {
 // the memory format could be preserved, and it was switched to old default
 // behaviour of contiguous
 #define LEGACY_CONTIGUOUS_MEMORY_FORMAT \
-  ::standalone::c10::get_contiguous_memory_format()
+  ::executorch::backends::aoti::slim::c10::get_contiguous_memory_format()
 
 inline MemoryFormat get_contiguous_memory_format() {
   return MemoryFormat::Contiguous;
@@ -288,4 +288,4 @@ inline bool is_channels_last_strides_3d(
   return is_channels_last_strides_3d<int64_t>(sizes, strides);
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/core/Scalar.h b/backends/aoti/slim/c10/core/Scalar.h
index 1c61ecb4704..b46add34946 100644
--- a/backends/aoti/slim/c10/core/Scalar.h
+++ b/backends/aoti/slim/c10/core/Scalar.h
@@ -15,7 +15,7 @@
 
 // Copy-pasted from c10/core/Scalar.h, but dropping SymScalar support
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /**
  * Scalar represents a 0-dimensional tensor which contains a single element.
@@ -86,22 +86,23 @@ class Scalar {
     v.i = convert<int64_t, bool>(vv);
   }
 
-#define DEFINE_ACCESSOR(type, name)                                   \
-  type to##name() const {                                             \
-    if (Tag::HAS_d == tag) {                                          \
-      return checked_convert<type, double>(v.d, #type);               \
-    } else if (Tag::HAS_z == tag) {                                   \
-      return checked_convert<type, standalone::c10::complex<double>>( \
-          v.z, #type);                                                \
-    }                                                                 \
-    if (Tag::HAS_b == tag) {                                          \
-      return checked_convert<type, bool>(v.i, #type);                 \
-    } else if (Tag::HAS_i == tag) {                                   \
-      return checked_convert<type, int64_t>(v.i, #type);              \
-    } else if (Tag::HAS_u == tag) {                                   \
-      return checked_convert<type, uint64_t>(v.u, #type);             \
-    }                                                                 \
-    STANDALONE_CHECK(false)                                           \
+#define DEFINE_ACCESSOR(type, name)                                            \
+  type to##name() const {                                                      \
+    if (Tag::HAS_d == tag) {                                                   \
+      return checked_convert<type, double>(v.d, #type);                        \
+    } else if (Tag::HAS_z == tag) {                                            \
+      return checked_convert<                                                  \
+          type,                                                                \
+          executorch::backends::aoti::slim::c10::complex<double>>(v.z, #type); \
+    }                                                                          \
+    if (Tag::HAS_b == tag) {                                                   \
+      return checked_convert<type, bool>(v.i, #type);                          \
+    } else if (Tag::HAS_i == tag) {                                            \
+      return checked_convert<type, int64_t>(v.i, #type);                       \
+    } else if (Tag::HAS_u == tag) {                                            \
+      return checked_convert<type, uint64_t>(v.u, #type);                      \
+    }                                                                          \
+    STANDALONE_CHECK(false)                                                    \
   }
 
   // TODO: Support ComplexHalf accessor
@@ -193,8 +194,9 @@ class Scalar {
 
   template <
       typename T,
-      typename std::enable_if_t<!standalone::c10::is_complex<T>::value, int> =
-          0>
+      typename std::enable_if_t<
+          !executorch::backends::aoti::slim::c10::is_complex<T>::value,
+          int> = 0>
   bool equal(T num) const {
     if (isComplex()) {
       auto val = v.z;
@@ -223,7 +225,9 @@ class Scalar {
 
   template <
       typename T,
-      typename std::enable_if_t<standalone::c10::is_complex<T>::value, int> = 0>
+      typename std::enable_if_t<
+          executorch::backends::aoti::slim::c10::is_complex<T>::value,
+          int> = 0>
   bool equal(T num) const {
     if (isComplex()) {
       return v.z == num;
@@ -257,20 +261,20 @@ class Scalar {
     }
   }
 
-  standalone::c10::ScalarType type() const {
+  executorch::backends::aoti::slim::c10::ScalarType type() const {
     if (isComplex()) {
-      return standalone::c10::ScalarType::ComplexDouble;
+      return executorch::backends::aoti::slim::c10::ScalarType::ComplexDouble;
     } else if (isFloatingPoint()) {
-      return standalone::c10::ScalarType::Double;
+      return executorch::backends::aoti::slim::c10::ScalarType::Double;
     } else if (isIntegral(/*includeBool=*/false)) {
       // Represent all integers as long, UNLESS it is unsigned and therefore
       // unrepresentable as long
       if (Tag::HAS_u == tag) {
-        return standalone::c10::ScalarType::UInt64;
+        return executorch::backends::aoti::slim::c10::ScalarType::UInt64;
       }
-      return standalone::c10::ScalarType::Long;
+      return executorch::backends::aoti::slim::c10::ScalarType::Long;
     } else if (isBoolean()) {
-      return standalone::c10::ScalarType::Bool;
+      return executorch::backends::aoti::slim::c10::ScalarType::Bool;
     } else {
       throw std::runtime_error("Unknown scalar type.");
     }
@@ -313,7 +317,7 @@ class Scalar {
     int64_t i;
     // See Note [Meaning of HAS_u]
     uint64_t u;
-    standalone::c10::complex<double> z;
+    executorch::backends::aoti::slim::c10::complex<double> z;
     // NOLINTNEXTLINE(modernize-use-equals-default)
     v_t() {} // default constructor
   } v;
@@ -330,7 +334,8 @@ class Scalar {
   template <
       typename T,
       typename std::enable_if_t<
-          !std::is_integral_v<T> && !standalone::c10::is_complex<T>::value,
+          !std::is_integral_v<T> &&
+              !executorch::backends::aoti::slim::c10::is_complex<T>::value,
           bool>* = nullptr>
   Scalar(T vv, bool) : tag(Tag::HAS_d) {
     v.d = convert<decltype(v.d), T>(vv);
@@ -338,8 +343,9 @@ class Scalar {
 
   template <
       typename T,
-      typename std::enable_if_t<standalone::c10::is_complex<T>::value, bool>* =
-          nullptr>
+      typename std::enable_if_t<
+          executorch::backends::aoti::slim::c10::is_complex<T>::value,
+          bool>* = nullptr>
   Scalar(T vv, bool) : tag(Tag::HAS_z) {
     v.z = convert<decltype(v.z), T>(vv);
   }
@@ -357,4 +363,4 @@ DEFINE_TO(uint32_t, UInt32)
 DEFINE_TO(uint64_t, UInt64)
 #undef DEFINE_TO
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/core/ScalarType.h b/backends/aoti/slim/c10/core/ScalarType.h
index 6daeaad5f2c..6481b3d2c4b 100644
--- a/backends/aoti/slim/c10/core/ScalarType.h
+++ b/backends/aoti/slim/c10/core/ScalarType.h
@@ -26,7 +26,7 @@
 #include <type_traits>
 #include <unordered_map>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 // dummy struct for uint1 to uint7, actual functionality
 // of these dtypes will be implemented in python with Tensor subclass
@@ -60,53 +60,62 @@ struct dummy_int1_7_t {};
 
 // NB: Order matters for this macro; it is relied upon in
 // _promoteTypesLookup and the serialization format.
-#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(_)                  \
-  _(uint8_t, Byte) /* 0 */                                                \
-  _(int8_t, Char) /* 1 */                                                 \
-  _(int16_t, Short) /* 2 */                                               \
-  _(int, Int) /* 3 */                                                     \
-  _(int64_t, Long) /* 4 */                                                \
-  _(standalone::c10::Half, Half) /* 5 */                                  \
-  _(float, Float) /* 6 */                                                 \
-  _(double, Double) /* 7 */                                               \
-  _(standalone::c10::complex<standalone::c10::Half>, ComplexHalf) /* 8 */ \
-  _(standalone::c10::complex<float>, ComplexFloat) /* 9 */                \
-  _(standalone::c10::complex<double>, ComplexDouble) /* 10 */             \
-  _(bool, Bool) /* 11 */                                                  \
-  _(standalone::c10::qint8, QInt8) /* 12 */                               \
-  _(standalone::c10::quint8, QUInt8) /* 13 */                             \
-  _(standalone::c10::qint32, QInt32) /* 14 */                             \
-  _(standalone::c10::BFloat16, BFloat16) /* 15 */                         \
-  _(standalone::c10::quint4x2, QUInt4x2) /* 16 */                         \
-  _(standalone::c10::quint2x4, QUInt2x4) /* 17 */                         \
-  _(standalone::c10::bits1x8, Bits1x8) /* 18 */                           \
-  _(standalone::c10::bits2x4, Bits2x4) /* 19 */                           \
-  _(standalone::c10::bits4x2, Bits4x2) /* 20 */                           \
-  _(standalone::c10::bits8, Bits8) /* 21 */                               \
-  _(standalone::c10::bits16, Bits16) /* 22 */                             \
-  _(standalone::c10::Float8_e5m2, Float8_e5m2) /* 23 */                   \
-  _(standalone::c10::Float8_e4m3fn, Float8_e4m3fn) /* 24 */               \
-  _(standalone::c10::Float8_e5m2fnuz, Float8_e5m2fnuz) /* 25 */           \
-  _(standalone::c10::Float8_e4m3fnuz, Float8_e4m3fnuz) /* 26 */           \
-  _(uint16_t, UInt16) /* 27 */                                            \
-  _(uint32_t, UInt32) /* 28 */                                            \
-  _(uint64_t, UInt64) /* 29 */                                            \
-  _(standalone::c10::dummy_uint1_7_t<1>, UInt1) /* 30 */                  \
-  _(standalone::c10::dummy_uint1_7_t<2>, UInt2) /* 31 */                  \
-  _(standalone::c10::dummy_uint1_7_t<3>, UInt3) /* 32 */                  \
-  _(standalone::c10::dummy_uint1_7_t<4>, UInt4) /* 33 */                  \
-  _(standalone::c10::dummy_uint1_7_t<5>, UInt5) /* 34 */                  \
-  _(standalone::c10::dummy_uint1_7_t<6>, UInt6) /* 35 */                  \
-  _(standalone::c10::dummy_uint1_7_t<7>, UInt7) /* 36 */                  \
-  _(standalone::c10::dummy_int1_7_t<1>, Int1) /* 37 */                    \
-  _(standalone::c10::dummy_int1_7_t<2>, Int2) /* 38 */                    \
-  _(standalone::c10::dummy_int1_7_t<3>, Int3) /* 39 */                    \
-  _(standalone::c10::dummy_int1_7_t<4>, Int4) /* 40 */                    \
-  _(standalone::c10::dummy_int1_7_t<5>, Int5) /* 41 */                    \
-  _(standalone::c10::dummy_int1_7_t<6>, Int6) /* 42 */                    \
-  _(standalone::c10::dummy_int1_7_t<7>, Int7) /* 43 */                    \
-  _(standalone::c10::Float8_e8m0fnu, Float8_e8m0fnu) /* 44 */             \
-  _(standalone::c10::Float4_e2m1fn_x2, Float4_e2m1fn_x2) /* 45 */
+#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(_)                       \
+  _(uint8_t, Byte) /* 0 */                                                     \
+  _(int8_t, Char) /* 1 */                                                      \
+  _(int16_t, Short) /* 2 */                                                    \
+  _(int, Int) /* 3 */                                                          \
+  _(int64_t, Long) /* 4 */                                                     \
+  _(executorch::backends::aoti::slim::c10::Half, Half) /* 5 */                 \
+  _(float, Float) /* 6 */                                                      \
+  _(double, Double) /* 7 */                                                    \
+  _(executorch::backends::aoti::slim::c10::complex<                            \
+        executorch::backends::aoti::slim::c10::Half>,                          \
+    ComplexHalf) /* 8 */                                                       \
+  _(executorch::backends::aoti::slim::c10::complex<float>,                     \
+    ComplexFloat) /* 9 */                                                      \
+  _(executorch::backends::aoti::slim::c10::complex<double>,                    \
+    ComplexDouble) /* 10 */                                                    \
+  _(bool, Bool) /* 11 */                                                       \
+  _(executorch::backends::aoti::slim::c10::qint8, QInt8) /* 12 */              \
+  _(executorch::backends::aoti::slim::c10::quint8, QUInt8) /* 13 */            \
+  _(executorch::backends::aoti::slim::c10::qint32, QInt32) /* 14 */            \
+  _(executorch::backends::aoti::slim::c10::BFloat16, BFloat16) /* 15 */        \
+  _(executorch::backends::aoti::slim::c10::quint4x2, QUInt4x2) /* 16 */        \
+  _(executorch::backends::aoti::slim::c10::quint2x4, QUInt2x4) /* 17 */        \
+  _(executorch::backends::aoti::slim::c10::bits1x8, Bits1x8) /* 18 */          \
+  _(executorch::backends::aoti::slim::c10::bits2x4, Bits2x4) /* 19 */          \
+  _(executorch::backends::aoti::slim::c10::bits4x2, Bits4x2) /* 20 */          \
+  _(executorch::backends::aoti::slim::c10::bits8, Bits8) /* 21 */              \
+  _(executorch::backends::aoti::slim::c10::bits16, Bits16) /* 22 */            \
+  _(executorch::backends::aoti::slim::c10::Float8_e5m2, Float8_e5m2) /* 23 */  \
+  _(executorch::backends::aoti::slim::c10::Float8_e4m3fn,                      \
+    Float8_e4m3fn) /* 24 */                                                    \
+  _(executorch::backends::aoti::slim::c10::Float8_e5m2fnuz,                    \
+    Float8_e5m2fnuz) /* 25 */                                                  \
+  _(executorch::backends::aoti::slim::c10::Float8_e4m3fnuz,                    \
+    Float8_e4m3fnuz) /* 26 */                                                  \
+  _(uint16_t, UInt16) /* 27 */                                                 \
+  _(uint32_t, UInt32) /* 28 */                                                 \
+  _(uint64_t, UInt64) /* 29 */                                                 \
+  _(executorch::backends::aoti::slim::c10::dummy_uint1_7_t<1>, UInt1) /* 30 */ \
+  _(executorch::backends::aoti::slim::c10::dummy_uint1_7_t<2>, UInt2) /* 31 */ \
+  _(executorch::backends::aoti::slim::c10::dummy_uint1_7_t<3>, UInt3) /* 32 */ \
+  _(executorch::backends::aoti::slim::c10::dummy_uint1_7_t<4>, UInt4) /* 33 */ \
+  _(executorch::backends::aoti::slim::c10::dummy_uint1_7_t<5>, UInt5) /* 34 */ \
+  _(executorch::backends::aoti::slim::c10::dummy_uint1_7_t<6>, UInt6) /* 35 */ \
+  _(executorch::backends::aoti::slim::c10::dummy_uint1_7_t<7>, UInt7) /* 36 */ \
+  _(executorch::backends::aoti::slim::c10::dummy_int1_7_t<1>, Int1) /* 37 */   \
+  _(executorch::backends::aoti::slim::c10::dummy_int1_7_t<2>, Int2) /* 38 */   \
+  _(executorch::backends::aoti::slim::c10::dummy_int1_7_t<3>, Int3) /* 39 */   \
+  _(executorch::backends::aoti::slim::c10::dummy_int1_7_t<4>, Int4) /* 40 */   \
+  _(executorch::backends::aoti::slim::c10::dummy_int1_7_t<5>, Int5) /* 41 */   \
+  _(executorch::backends::aoti::slim::c10::dummy_int1_7_t<6>, Int6) /* 42 */   \
+  _(executorch::backends::aoti::slim::c10::dummy_int1_7_t<7>, Int7) /* 43 */   \
+  _(executorch::backends::aoti::slim::c10::Float8_e8m0fnu,                     \
+    Float8_e8m0fnu) /* 44 */                                                   \
+  _(executorch::backends::aoti::slim::c10::Float4_e2m1fn_x2,                   \
+    Float4_e2m1fn_x2) /* 45 */
 
 // If you want to support ComplexHalf for real, add ComplexHalf
 // into this macro (and change the name).  But beware: convert()
@@ -115,43 +124,45 @@ struct dummy_int1_7_t {};
 // TODO: To add unsigned int types here, we must define accumulate type.
 // But uint8 currently accumulates into int64, so we would have to make
 // an inconsistent choice for the larger types.  Difficult.
-#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF_F8NZ(_) \
-  _(uint8_t, Byte)                                                      \
-  _(int8_t, Char)                                                       \
-  _(int16_t, Short)                                                     \
-  _(int, Int)                                                           \
-  _(int64_t, Long)                                                      \
-  _(standalone::c10::Half, Half)                                        \
-  _(float, Float)                                                       \
-  _(double, Double)                                                     \
-  _(standalone::c10::complex<float>, ComplexFloat)                      \
-  _(standalone::c10::complex<double>, ComplexDouble)                    \
-  _(bool, Bool)                                                         \
-  _(standalone::c10::BFloat16, BFloat16)                                \
-  _(standalone::c10::Float8_e5m2, Float8_e5m2)                          \
-  _(standalone::c10::Float8_e4m3fn, Float8_e4m3fn)
+#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF_F8NZ(_)    \
+  _(uint8_t, Byte)                                                         \
+  _(int8_t, Char)                                                          \
+  _(int16_t, Short)                                                        \
+  _(int, Int)                                                              \
+  _(int64_t, Long)                                                         \
+  _(executorch::backends::aoti::slim::c10::Half, Half)                     \
+  _(float, Float)                                                          \
+  _(double, Double)                                                        \
+  _(executorch::backends::aoti::slim::c10::complex<float>, ComplexFloat)   \
+  _(executorch::backends::aoti::slim::c10::complex<double>, ComplexDouble) \
+  _(bool, Bool)                                                            \
+  _(executorch::backends::aoti::slim::c10::BFloat16, BFloat16)             \
+  _(executorch::backends::aoti::slim::c10::Float8_e5m2, Float8_e5m2)       \
+  _(executorch::backends::aoti::slim::c10::Float8_e4m3fn, Float8_e4m3fn)
 
 // This macro controls many of our C++ APIs, including constructors
 // for Scalar as well as the data() and item() accessors on Tensor
-#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(_)                    \
-  _(uint8_t, Byte)                                                \
-  _(int8_t, Char)                                                 \
-  _(int16_t, Short)                                               \
-  _(int, Int)                                                     \
-  _(int64_t, Long)                                                \
-  _(standalone::c10::Half, Half)                                  \
-  _(float, Float)                                                 \
-  _(double, Double)                                               \
-  _(standalone::c10::complex<standalone::c10::Half>, ComplexHalf) \
-  _(standalone::c10::complex<float>, ComplexFloat)                \
-  _(standalone::c10::complex<double>, ComplexDouble)              \
-  _(bool, Bool)                                                   \
-  _(standalone::c10::BFloat16, BFloat16)                          \
-  _(standalone::c10::Float8_e5m2, Float8_e5m2)                    \
-  _(standalone::c10::Float8_e4m3fn, Float8_e4m3fn)                \
-  _(standalone::c10::Float8_e5m2fnuz, Float8_e5m2fnuz)            \
-  _(standalone::c10::Float8_e4m3fnuz, Float8_e4m3fnuz)            \
-  _(standalone::c10::Float8_e8m0fnu, Float8_e8m0fnu)
+#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(_)                               \
+  _(uint8_t, Byte)                                                           \
+  _(int8_t, Char)                                                            \
+  _(int16_t, Short)                                                          \
+  _(int, Int)                                                                \
+  _(int64_t, Long)                                                           \
+  _(executorch::backends::aoti::slim::c10::Half, Half)                       \
+  _(float, Float)                                                            \
+  _(double, Double)                                                          \
+  _(executorch::backends::aoti::slim::c10::complex<                          \
+        executorch::backends::aoti::slim::c10::Half>,                        \
+    ComplexHalf)                                                             \
+  _(executorch::backends::aoti::slim::c10::complex<float>, ComplexFloat)     \
+  _(executorch::backends::aoti::slim::c10::complex<double>, ComplexDouble)   \
+  _(bool, Bool)                                                              \
+  _(executorch::backends::aoti::slim::c10::BFloat16, BFloat16)               \
+  _(executorch::backends::aoti::slim::c10::Float8_e5m2, Float8_e5m2)         \
+  _(executorch::backends::aoti::slim::c10::Float8_e4m3fn, Float8_e4m3fn)     \
+  _(executorch::backends::aoti::slim::c10::Float8_e5m2fnuz, Float8_e5m2fnuz) \
+  _(executorch::backends::aoti::slim::c10::Float8_e4m3fnuz, Float8_e4m3fnuz) \
+  _(executorch::backends::aoti::slim::c10::Float8_e8m0fnu, Float8_e8m0fnu)
 
 enum class ScalarType : int8_t {
 #define DEFINE_ST_ENUM_VAL_(_1, n) n,
@@ -168,19 +179,20 @@ namespace impl {
 
 // These are used to map ScalarTypes to C++ types.
 
-template <standalone::c10::ScalarType N>
+template <executorch::backends::aoti::slim::c10::ScalarType N>
 struct ScalarTypeToCPPType;
 
 #define SPECIALIZE_ScalarTypeToCPPType(cpp_type, scalar_type)                \
   template <>                                                                \
-  struct ScalarTypeToCPPType<standalone::c10::ScalarType::scalar_type> {     \
+  struct ScalarTypeToCPPType<                                                \
+      executorch::backends::aoti::slim::c10::ScalarType::scalar_type> {      \
     using type = cpp_type;                                                   \
                                                                              \
     /* This is a workaround for the CUDA bug which prevents */               \
     /* ::detail::ScalarTypeToCType<T>::type being used directly due to */    \
     /* ambiguous reference which can't to be resolved. For some reason it */ \
-    /* can't pick between standalone::c10::detail and                        \
-     * standalone::c10::cuda::detail. */                                     \
+    /* can't pick between executorch::backends::aoti::slim::c10::detail and  \
+     * executorch::backends::aoti::slim::c10::cuda::detail. */               \
     /* For repro example, please see: */                                     \
     /* https://gist.github.com/izdeby/952ae7cf256ddb740a73776d39a7e7ba */    \
     /* TODO: remove once the bug is fixed. */                                \
@@ -191,7 +203,7 @@ AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(SPECIALIZE_ScalarTypeToCPPType)
 
 #undef SPECIALIZE_ScalarTypeToCPPType
 
-template <standalone::c10::ScalarType N>
+template <executorch::backends::aoti::slim::c10::ScalarType N>
 using ScalarTypeToCPPTypeT = typename ScalarTypeToCPPType<N>::type;
 
 } // namespace impl
@@ -199,12 +211,13 @@ using ScalarTypeToCPPTypeT = typename ScalarTypeToCPPType<N>::type;
 template <typename T>
 struct CppTypeToScalarType;
 
-#define SPECIALIZE_CppTypeToScalarType(cpp_type, scalar_type) \
-  template <>                                                 \
-  struct CppTypeToScalarType<cpp_type>                        \
-      : std::integral_constant<                               \
-            standalone::c10::ScalarType,                      \
-            standalone::c10::ScalarType::scalar_type> {};
+#define SPECIALIZE_CppTypeToScalarType(cpp_type, scalar_type)                 \
+  template <>                                                                 \
+  struct CppTypeToScalarType<cpp_type>                                        \
+      : std::integral_constant<                                               \
+            executorch::backends::aoti::slim::c10::ScalarType,                \
+            executorch::backends::aoti::slim::c10::ScalarType::scalar_type> { \
+  };
 
 AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(SPECIALIZE_CppTypeToScalarType)
 
@@ -233,106 +246,119 @@ AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(SPECIALIZE_CppTypeToScalarType)
 // instead, new types should be added to use sites on a case-by-case basis.
 // We generally are not accepting new dtypes due to binary size concerns.
 
-#define AT_FORALL_SCALAR_TYPES_AND(SCALARTYPE, _)          \
-  _(uint8_t, Byte)                                         \
-  _(int8_t, Char)                                          \
-  _(int16_t, Short)                                        \
-  _(int, Int)                                              \
-  _(int64_t, Long)                                         \
-  _(float, Float)                                          \
-  _(double, Double)                                        \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<   \
-             standalone::c10::ScalarType::SCALARTYPE>::t), \
+#define AT_FORALL_SCALAR_TYPES_AND(SCALARTYPE, _)                              \
+  _(uint8_t, Byte)                                                             \
+  _(int8_t, Char)                                                              \
+  _(int16_t, Short)                                                            \
+  _(int, Int)                                                                  \
+  _(int64_t, Long)                                                             \
+  _(float, Float)                                                              \
+  _(double, Double)                                                            \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE>::  \
+                 t),                                                           \
     SCALARTYPE)
 
-#define AT_FORALL_SCALAR_TYPES_AND2(SCALARTYPE1, SCALARTYPE2, _) \
-  _(uint8_t, Byte)                                               \
-  _(int8_t, Char)                                                \
-  _(int16_t, Short)                                              \
-  _(int, Int)                                                    \
-  _(int64_t, Long)                                               \
-  _(float, Float)                                                \
-  _(double, Double)                                              \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<         \
-             standalone::c10::ScalarType::SCALARTYPE1>::t),      \
-    SCALARTYPE1)                                                 \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<         \
-             standalone::c10::ScalarType::SCALARTYPE2>::t),      \
+#define AT_FORALL_SCALAR_TYPES_AND2(SCALARTYPE1, SCALARTYPE2, _)               \
+  _(uint8_t, Byte)                                                             \
+  _(int8_t, Char)                                                              \
+  _(int16_t, Short)                                                            \
+  _(int, Int)                                                                  \
+  _(int64_t, Long)                                                             \
+  _(float, Float)                                                              \
+  _(double, Double)                                                            \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE1>:: \
+                 t),                                                           \
+    SCALARTYPE1)                                                               \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE2>:: \
+                 t),                                                           \
     SCALARTYPE2)
 
-#define AT_FORALL_SCALAR_TYPES_AND3(SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, _) \
-  _(uint8_t, Byte)                                                            \
-  _(int8_t, Char)                                                             \
-  _(int16_t, Short)                                                           \
-  _(int, Int)                                                                 \
-  _(int64_t, Long)                                                            \
-  _(float, Float)                                                             \
-  _(double, Double)                                                           \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<                      \
-             standalone::c10::ScalarType::SCALARTYPE1>::t),                   \
-    SCALARTYPE1)                                                              \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<                      \
-             standalone::c10::ScalarType::SCALARTYPE2>::t),                   \
-    SCALARTYPE2)                                                              \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<                      \
-             standalone::c10::ScalarType::SCALARTYPE3>::t),                   \
+#define AT_FORALL_SCALAR_TYPES_AND3(SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, _)  \
+  _(uint8_t, Byte)                                                             \
+  _(int8_t, Char)                                                              \
+  _(int16_t, Short)                                                            \
+  _(int, Int)                                                                  \
+  _(int64_t, Long)                                                             \
+  _(float, Float)                                                              \
+  _(double, Double)                                                            \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE1>:: \
+                 t),                                                           \
+    SCALARTYPE1)                                                               \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE2>:: \
+                 t),                                                           \
+    SCALARTYPE2)                                                               \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE3>:: \
+                 t),                                                           \
     SCALARTYPE3)
 
-#define AT_FORALL_SCALAR_TYPES_AND7(                        \
-    SCALARTYPE1,                                            \
-    SCALARTYPE2,                                            \
-    SCALARTYPE3,                                            \
-    SCALARTYPE4,                                            \
-    SCALARTYPE5,                                            \
-    SCALARTYPE6,                                            \
-    SCALARTYPE7,                                            \
-    _)                                                      \
-  _(uint8_t, Byte)                                          \
-  _(int8_t, Char)                                           \
-  _(int16_t, Short)                                         \
-  _(int, Int)                                               \
-  _(int64_t, Long)                                          \
-  _(float, Float)                                           \
-  _(double, Double)                                         \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<    \
-             standalone::c10::ScalarType::SCALARTYPE1>::t), \
-    SCALARTYPE1)                                            \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<    \
-             standalone::c10::ScalarType::SCALARTYPE2>::t), \
-    SCALARTYPE2)                                            \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<    \
-             standalone::c10::ScalarType::SCALARTYPE3>::t), \
-    SCALARTYPE3)                                            \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<    \
-             standalone::c10::ScalarType::SCALARTYPE4>::t), \
-    SCALARTYPE4)                                            \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<    \
-             standalone::c10::ScalarType::SCALARTYPE5>::t), \
-    SCALARTYPE5)                                            \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<    \
-             standalone::c10::ScalarType::SCALARTYPE6>::t), \
-    SCALARTYPE6)                                            \
-  _(decltype(standalone::c10::impl::ScalarTypeToCPPType<    \
-             standalone::c10::ScalarType::SCALARTYPE7>::t), \
+#define AT_FORALL_SCALAR_TYPES_AND7(                                           \
+    SCALARTYPE1,                                                               \
+    SCALARTYPE2,                                                               \
+    SCALARTYPE3,                                                               \
+    SCALARTYPE4,                                                               \
+    SCALARTYPE5,                                                               \
+    SCALARTYPE6,                                                               \
+    SCALARTYPE7,                                                               \
+    _)                                                                         \
+  _(uint8_t, Byte)                                                             \
+  _(int8_t, Char)                                                              \
+  _(int16_t, Short)                                                            \
+  _(int, Int)                                                                  \
+  _(int64_t, Long)                                                             \
+  _(float, Float)                                                              \
+  _(double, Double)                                                            \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE1>:: \
+                 t),                                                           \
+    SCALARTYPE1)                                                               \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE2>:: \
+                 t),                                                           \
+    SCALARTYPE2)                                                               \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE3>:: \
+                 t),                                                           \
+    SCALARTYPE3)                                                               \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE4>:: \
+                 t),                                                           \
+    SCALARTYPE4)                                                               \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE5>:: \
+                 t),                                                           \
+    SCALARTYPE5)                                                               \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE6>:: \
+                 t),                                                           \
+    SCALARTYPE6)                                                               \
+  _(decltype(executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPType< \
+             executorch::backends::aoti::slim::c10::ScalarType::SCALARTYPE7>:: \
+                 t),                                                           \
     SCALARTYPE7)
 
-#define AT_FORALL_QINT_TYPES(_)          \
-  _(standalone::c10::qint8, QInt8)       \
-  _(standalone::c10::quint8, QUInt8)     \
-  _(standalone::c10::qint32, QInt32)     \
-  _(standalone::c10::quint4x2, QUInt4x2) \
-  _(standalone::c10::quint2x4, QUInt2x4)
+#define AT_FORALL_QINT_TYPES(_)                                \
+  _(executorch::backends::aoti::slim::c10::qint8, QInt8)       \
+  _(executorch::backends::aoti::slim::c10::quint8, QUInt8)     \
+  _(executorch::backends::aoti::slim::c10::qint32, QInt32)     \
+  _(executorch::backends::aoti::slim::c10::quint4x2, QUInt4x2) \
+  _(executorch::backends::aoti::slim::c10::quint2x4, QUInt2x4)
 
-#define AT_FORALL_FLOAT8_TYPES(_)                      \
-  _(standalone::c10::Float8_e5m2, Float8_e5m2)         \
-  _(standalone::c10::Float8_e4m3fn, Float8_e4m3fn)     \
-  _(standalone::c10::Float8_e5m2fnuz, Float8_e5m2fnuz) \
-  _(standalone::c10::Float8_e4m3fnuz, Float8_e4m3fnuz) \
-  _(standalone::c10::Float8_e8m0fnu, Float8_e8m0fnu)
+#define AT_FORALL_FLOAT8_TYPES(_)                                            \
+  _(executorch::backends::aoti::slim::c10::Float8_e5m2, Float8_e5m2)         \
+  _(executorch::backends::aoti::slim::c10::Float8_e4m3fn, Float8_e4m3fn)     \
+  _(executorch::backends::aoti::slim::c10::Float8_e5m2fnuz, Float8_e5m2fnuz) \
+  _(executorch::backends::aoti::slim::c10::Float8_e4m3fnuz, Float8_e4m3fnuz) \
+  _(executorch::backends::aoti::slim::c10::Float8_e8m0fnu, Float8_e8m0fnu)
 
-#define AT_FORALL_COMPLEX_TYPES(_)                 \
-  _(standalone::c10::complex<float>, ComplexFloat) \
-  _(standalone::c10::complex<double>, ComplexDouble)
+#define AT_FORALL_COMPLEX_TYPES(_)                                       \
+  _(executorch::backends::aoti::slim::c10::complex<float>, ComplexFloat) \
+  _(executorch::backends::aoti::slim::c10::complex<double>, ComplexDouble)
 
 #define DEFINE_CONSTANT(_, name) \
   constexpr ScalarType k##name = ScalarType::name;
@@ -450,10 +476,11 @@ inline ScalarType toUnderlying(ScalarType t) {
 }
 
 inline bool isSignedType(ScalarType t) {
-#define CASE_ISSIGNED(name)                                                   \
-  case ScalarType::name:                                                      \
-    return std::numeric_limits<::standalone::c10::impl::ScalarTypeToCPPTypeT< \
-        ScalarType::name>>::is_signed;
+#define CASE_ISSIGNED(name)                                                  \
+  case ScalarType::name:                                                     \
+    return std::numeric_limits<                                              \
+        ::executorch::backends::aoti::slim::c10::impl::ScalarTypeToCPPTypeT< \
+            ScalarType::name>>::is_signed;
 
   // TODO(#146647): If we expect to have numeric_limits for everything,
   // let's just have a big macro for the whole thing.
@@ -605,20 +632,21 @@ constexpr auto b1 = ScalarType::Bool;
 constexpr auto bf = ScalarType::BFloat16;
 constexpr auto ud = ScalarType::Undefined;
 
-constexpr auto index2dtype = array_of<standalone::c10::ScalarType>(
-    u1,
-    i1,
-    i2,
-    i4,
-    i8,
-    f2,
-    f4,
-    f8,
-    c2,
-    c4,
-    c8,
-    b1,
-    bf);
+constexpr auto index2dtype =
+    array_of<executorch::backends::aoti::slim::c10::ScalarType>(
+        u1,
+        i1,
+        i2,
+        i4,
+        i8,
+        f2,
+        f4,
+        f8,
+        c2,
+        c4,
+        c8,
+        b1,
+        bf);
 
 constexpr std::array<int64_t, static_cast<size_t>(ScalarType::NumOptions)>
 calculate_dtype2index() {
@@ -728,8 +756,8 @@ inline ScalarType promoteTypes(ScalarType a, ScalarType b) {
 
 inline std::ostream& operator<<(
     std::ostream& stream,
-    standalone::c10::ScalarType scalar_type) {
+    executorch::backends::aoti::slim::c10::ScalarType scalar_type) {
   return stream << toString(scalar_type);
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/core/SizesAndStrides.h b/backends/aoti/slim/c10/core/SizesAndStrides.h
index aef0ddab171..0b9edaccde7 100644
--- a/backends/aoti/slim/c10/core/SizesAndStrides.h
+++ b/backends/aoti/slim/c10/core/SizesAndStrides.h
@@ -10,7 +10,7 @@
 
 #define STANDALONE_SIZES_AND_STRIDES_MAX_INLINE_SIZE 5
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 // Packed container for TensorImpl sizes and strides.
 // This design improves on the previous approach of using a pair of
@@ -399,4 +399,4 @@ class SizesAndStrides {
   };
 };
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/core/WrapDimMinimal.h b/backends/aoti/slim/c10/core/WrapDimMinimal.h
index 651421e6d89..68c80a4abc3 100644
--- a/backends/aoti/slim/c10/core/WrapDimMinimal.h
+++ b/backends/aoti/slim/c10/core/WrapDimMinimal.h
@@ -7,7 +7,7 @@
 
 // Different from the original implementation in c10, we don't need
 // to support SymInt here.
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 namespace detail {
 template <typename T>
 T maybe_wrap_dim_slow(T dim, T dim_post_expr, bool wrap_scalar);
@@ -25,7 +25,7 @@ T _maybe_wrap_dim(T dim, T dim_post_expr, bool wrap_scalar = true) {
     return dim;
   }
   // Check edge-cases out-of-line (wrapping scalars and out-of-bounds errors)
-  return standalone::c10::detail::maybe_wrap_dim_slow<T>(
+  return executorch::backends::aoti::slim::c10::detail::maybe_wrap_dim_slow<T>(
       std::move(dim), std::move(dim_post_expr), wrap_scalar);
 }
 
@@ -48,7 +48,7 @@ T maybe_wrap_dim_slow(T dim, T dim_post_expr, bool wrap_scalar) {
         "Dimension specified as ",
         dim,
         " but tensor has no dimensions");
-    return standalone::c10::maybe_wrap_dim(
+    return executorch::backends::aoti::slim::c10::maybe_wrap_dim(
         std::move(dim),
         /*dim_post_expr=*/1,
         /*wrap_scalar=*/false);
@@ -70,4 +70,4 @@ T maybe_wrap_dim_slow(T dim, T dim_post_expr, bool wrap_scalar) {
       false, "should never reach here as dim should be out-of-bounds");
 }
 } // namespace detail
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/Array.h b/backends/aoti/slim/c10/util/Array.h
index 39eabc830d1..d093d26c51a 100644
--- a/backends/aoti/slim/c10/util/Array.h
+++ b/backends/aoti/slim/c10/util/Array.h
@@ -3,7 +3,7 @@
 #include <array>
 #include <utility>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 // This helper function creates a constexpr std::array
 // From a compile time list of values, without requiring you to explicitly
@@ -15,4 +15,4 @@ inline constexpr auto array_of(T&&... t) -> std::array<V, sizeof...(T)> {
   return {{std::forward<T>(t)...}};
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/ArrayRef.h b/backends/aoti/slim/c10/util/ArrayRef.h
index 4a09f7a9335..9c7c6cd781d 100644
--- a/backends/aoti/slim/c10/util/ArrayRef.h
+++ b/backends/aoti/slim/c10/util/ArrayRef.h
@@ -29,7 +29,7 @@
 #include <type_traits>
 #include <vector>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 /// ArrayRef - Represent a constant reference to an array (0 or more elements
 /// consecutively in memory), i.e. a start pointer and a length.  It allows
 /// various APIs to take consecutive elements easily and conveniently.
@@ -324,41 +324,49 @@ ArrayRef<T> makeArrayRef(const T (&Arr)[N]) {
 }
 
 // WARNING: Template instantiation will NOT be willing to do an implicit
-// conversions to get you to an standalone::c10::ArrayRef, which is why we
-// need so many overloads.
+// conversions to get you to an executorch::backends::aoti::slim::c10::ArrayRef,
+// which is why we need so many overloads.
 
 template <typename T>
 bool operator==(
-    standalone::c10::ArrayRef<T> a1,
-    standalone::c10::ArrayRef<T> a2) {
+    executorch::backends::aoti::slim::c10::ArrayRef<T> a1,
+    executorch::backends::aoti::slim::c10::ArrayRef<T> a2) {
   return a1.equals(a2);
 }
 
 template <typename T>
 bool operator!=(
-    standalone::c10::ArrayRef<T> a1,
-    standalone::c10::ArrayRef<T> a2) {
+    executorch::backends::aoti::slim::c10::ArrayRef<T> a1,
+    executorch::backends::aoti::slim::c10::ArrayRef<T> a2) {
   return !a1.equals(a2);
 }
 
 template <typename T>
-bool operator==(const std::vector<T>& a1, standalone::c10::ArrayRef<T> a2) {
-  return standalone::c10::ArrayRef<T>(a1).equals(a2);
+bool operator==(
+    const std::vector<T>& a1,
+    executorch::backends::aoti::slim::c10::ArrayRef<T> a2) {
+  return executorch::backends::aoti::slim::c10::ArrayRef<T>(a1).equals(a2);
 }
 
 template <typename T>
-bool operator!=(const std::vector<T>& a1, standalone::c10::ArrayRef<T> a2) {
-  return !standalone::c10::ArrayRef<T>(a1).equals(a2);
+bool operator!=(
+    const std::vector<T>& a1,
+    executorch::backends::aoti::slim::c10::ArrayRef<T> a2) {
+  return !executorch::backends::aoti::slim::c10::ArrayRef<T>(a1).equals(a2);
 }
 
 template <typename T>
-bool operator==(standalone::c10::ArrayRef<T> a1, const std::vector<T>& a2) {
-  return a1.equals(standalone::c10::ArrayRef<T>(a2));
+bool operator==(
+    executorch::backends::aoti::slim::c10::ArrayRef<T> a1,
+    const std::vector<T>& a2) {
+  return a1.equals(executorch::backends::aoti::slim::c10::ArrayRef<T>(a2));
 }
 
 template <typename T>
-bool operator!=(standalone::c10::ArrayRef<T> a1, const std::vector<T>& a2) {
-  return !a1.equals(standalone::c10::ArrayRef<T>(a2));
+bool operator!=(
+    executorch::backends::aoti::slim::c10::ArrayRef<T> a1,
+    const std::vector<T>& a2) {
+  return !a1.equals(executorch::backends::aoti::slim::c10::ArrayRef<T>(a2));
 }
 
 using IntArrayRef = ArrayRef<int64_t>;
@@ -368,4 +376,4 @@ using IntList
                  "semantics obvious. Use IntArrayRef instead!")]] =
         ArrayRef<int64_t>;
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/BFloat16-inl.h b/backends/aoti/slim/c10/util/BFloat16-inl.h
index 4608d9a6c54..5c41d4aaad0 100644
--- a/backends/aoti/slim/c10/util/BFloat16-inl.h
+++ b/backends/aoti/slim/c10/util/BFloat16-inl.h
@@ -16,7 +16,7 @@ STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #include <sycl/sycl.hpp> // for SYCL 2020
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /// Constructors
 inline STANDALONE_HOST_DEVICE BFloat16::BFloat16(float value)
@@ -26,7 +26,8 @@ inline STANDALONE_HOST_DEVICE BFloat16::BFloat16(float value)
       x(__bfloat16_as_ushort(__float2bfloat16(value)))
 #elif defined(__SYCL_DEVICE_ONLY__) && \
     defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS)
-      x(standalone::c10::bit_cast<uint16_t>(sycl::ext::oneapi::bfloat16(value)))
+      x(executorch::backends::aoti::slim::c10::bit_cast<uint16_t>(
+          sycl::ext::oneapi::bfloat16(value)))
 #else
       // RNE by default
       x(detail::round_to_nearest_even(value))
@@ -289,12 +290,12 @@ inline STANDALONE_HOST_DEVICE bool operator<(BFloat16& lhs, BFloat16& rhs) {
   return float(lhs) < float(rhs);
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 
 template <>
-class numeric_limits<standalone::c10::BFloat16> {
+class numeric_limits<executorch::backends::aoti::slim::c10::BFloat16> {
  public:
   static constexpr bool is_signed = true;
   static constexpr bool is_specialized = true;
@@ -322,41 +323,44 @@ class numeric_limits<standalone::c10::BFloat16> {
   static constexpr auto tinyness_before =
       numeric_limits<float>::tinyness_before;
 
-  static constexpr standalone::c10::BFloat16 min() {
-    return standalone::c10::BFloat16(
-        0x0080, standalone::c10::BFloat16::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::BFloat16 min() {
+    return executorch::backends::aoti::slim::c10::BFloat16(
+        0x0080, executorch::backends::aoti::slim::c10::BFloat16::from_bits());
   }
-  static constexpr standalone::c10::BFloat16 lowest() {
-    return standalone::c10::BFloat16(
-        0xFF7F, standalone::c10::BFloat16::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::BFloat16 lowest() {
+    return executorch::backends::aoti::slim::c10::BFloat16(
+        0xFF7F, executorch::backends::aoti::slim::c10::BFloat16::from_bits());
   }
-  static constexpr standalone::c10::BFloat16 max() {
-    return standalone::c10::BFloat16(
-        0x7F7F, standalone::c10::BFloat16::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::BFloat16 max() {
+    return executorch::backends::aoti::slim::c10::BFloat16(
+        0x7F7F, executorch::backends::aoti::slim::c10::BFloat16::from_bits());
   }
-  static constexpr standalone::c10::BFloat16 epsilon() {
-    return standalone::c10::BFloat16(
-        0x3C00, standalone::c10::BFloat16::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::BFloat16 epsilon() {
+    return executorch::backends::aoti::slim::c10::BFloat16(
+        0x3C00, executorch::backends::aoti::slim::c10::BFloat16::from_bits());
   }
-  static constexpr standalone::c10::BFloat16 round_error() {
-    return standalone::c10::BFloat16(
-        0x3F00, standalone::c10::BFloat16::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::BFloat16
+  round_error() {
+    return executorch::backends::aoti::slim::c10::BFloat16(
+        0x3F00, executorch::backends::aoti::slim::c10::BFloat16::from_bits());
   }
-  static constexpr standalone::c10::BFloat16 infinity() {
-    return standalone::c10::BFloat16(
-        0x7F80, standalone::c10::BFloat16::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::BFloat16 infinity() {
+    return executorch::backends::aoti::slim::c10::BFloat16(
+        0x7F80, executorch::backends::aoti::slim::c10::BFloat16::from_bits());
   }
-  static constexpr standalone::c10::BFloat16 quiet_NaN() {
-    return standalone::c10::BFloat16(
-        0x7FC0, standalone::c10::BFloat16::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::BFloat16 quiet_NaN() {
+    return executorch::backends::aoti::slim::c10::BFloat16(
+        0x7FC0, executorch::backends::aoti::slim::c10::BFloat16::from_bits());
   }
-  static constexpr standalone::c10::BFloat16 signaling_NaN() {
-    return standalone::c10::BFloat16(
-        0x7F80, standalone::c10::BFloat16::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::BFloat16
+  signaling_NaN() {
+    return executorch::backends::aoti::slim::c10::BFloat16(
+        0x7F80, executorch::backends::aoti::slim::c10::BFloat16::from_bits());
   }
-  static constexpr standalone::c10::BFloat16 denorm_min() {
-    return standalone::c10::BFloat16(
-        0x0001, standalone::c10::BFloat16::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::BFloat16
+  denorm_min() {
+    return executorch::backends::aoti::slim::c10::BFloat16(
+        0x0001, executorch::backends::aoti::slim::c10::BFloat16::from_bits());
   }
 };
 
diff --git a/backends/aoti/slim/c10/util/BFloat16-math.h b/backends/aoti/slim/c10/util/BFloat16-math.h
index f036f309e26..ad67d81fa23 100644
--- a/backends/aoti/slim/c10/util/BFloat16-math.h
+++ b/backends/aoti/slim/c10/util/BFloat16-math.h
@@ -8,243 +8,276 @@ STANDALONE_CLANG_DIAGNOSTIC_PUSH()
 STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-float-conversion")
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 template <typename T>
 struct is_reduced_floating_point
     : std::integral_constant<
           bool,
-          std::is_same_v<T, standalone::c10::Half> ||
-              std::is_same_v<T, standalone::c10::BFloat16>> {};
+          std::is_same_v<T, executorch::backends::aoti::slim::c10::Half> ||
+              std::is_same_v<
+                  T,
+                  executorch::backends::aoti::slim::c10::BFloat16>> {};
 
 template <typename T>
 constexpr bool is_reduced_floating_point_v =
     is_reduced_floating_point<T>::value;
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 
 #if !defined(FBCODE_CAFFE2) && !defined(STANDALONE_NODEPRECATED)
-using standalone::c10::is_reduced_floating_point;
-using standalone::c10::is_reduced_floating_point_v;
+using executorch::backends::aoti::slim::c10::is_reduced_floating_point;
+using executorch::backends::aoti::slim::c10::is_reduced_floating_point_v;
 #endif // !defined(FBCODE_CAFFE2) && !defined(STANDALONE_NODEPRECATED)
 
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T acos(T a) {
   return std::acos(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T asin(T a) {
   return std::asin(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T atan(T a) {
   return std::atan(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T atanh(T a) {
   return std::atanh(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T erf(T a) {
   return std::erf(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T erfc(T a) {
   return std::erfc(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T exp(T a) {
   return std::exp(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T expm1(T a) {
   return std::expm1(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline bool isfinite(T a) {
   return std::isfinite(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T log(T a) {
   return std::log(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T log10(T a) {
   return std::log10(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T log1p(T a) {
   return std::log1p(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T log2(T a) {
   return std::log2(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T ceil(T a) {
   return std::ceil(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T cos(T a) {
   return std::cos(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T floor(T a) {
   return std::floor(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T nearbyint(T a) {
   return std::nearbyint(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T sin(T a) {
   return std::sin(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T tan(T a) {
   return std::tan(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T sinh(T a) {
   return std::sinh(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T cosh(T a) {
   return std::cosh(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T tanh(T a) {
   return std::tanh(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T trunc(T a) {
   return std::trunc(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T lgamma(T a) {
   return std::lgamma(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T sqrt(T a) {
   return std::sqrt(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T rsqrt(T a) {
   return 1.0 / std::sqrt(float(a));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T abs(T a) {
   return std::abs(float(a));
 }
 #if defined(_MSC_VER) && defined(__CUDACC__)
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T pow(T a, double b) {
   return std::pow(float(a), float(b));
 }
 #else
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T pow(T a, double b) {
   return std::pow(float(a), b);
 }
 #endif
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T pow(T a, T b) {
   return std::pow(float(a), float(b));
 }
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 inline T fmod(T a, T b) {
   return std::fmod(float(a), float(b));
 }
@@ -277,8 +310,9 @@ inline T fmod(T a, T b) {
  */
 template <
     typename T,
-    typename std::
-        enable_if_t<standalone::c10::is_reduced_floating_point_v<T>, int> = 0>
+    typename std::enable_if_t<
+        executorch::backends::aoti::slim::c10::is_reduced_floating_point_v<T>,
+        int> = 0>
 STANDALONE_HOST_DEVICE inline T nextafter(T from, T to) {
   // Reference:
   // https://git.musl-libc.org/cgit/musl/tree/src/math/nextafter.c
diff --git a/backends/aoti/slim/c10/util/BFloat16.h b/backends/aoti/slim/c10/util/BFloat16.h
index ed6d07f53d0..d1b2a5baeb2 100644
--- a/backends/aoti/slim/c10/util/BFloat16.h
+++ b/backends/aoti/slim/c10/util/BFloat16.h
@@ -20,7 +20,7 @@
 #include <sycl/sycl.hpp> // for SYCL 2020
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 namespace detail {
 inline STANDALONE_HOST_DEVICE float f32_from_bits(uint16_t src) {
@@ -118,6 +118,6 @@ inline std::ostream& operator<<(std::ostream& out, const BFloat16& value) {
   return out;
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 #include <executorch/backends/aoti/slim/c10/util/BFloat16-inl.h> // IWYU pragma: keep
diff --git a/backends/aoti/slim/c10/util/Exception.h b/backends/aoti/slim/c10/util/Exception.h
index 6ab2bd8aae6..f83bf3f074a 100644
--- a/backends/aoti/slim/c10/util/Exception.h
+++ b/backends/aoti/slim/c10/util/Exception.h
@@ -6,8 +6,8 @@
 #include <string>
 
 // In the standalone version, STANDALONE_CHECK throws std::runtime_error
-// instead of standalone::c10::Error.
-namespace standalone::c10::detail {
+// instead of executorch::backends::aoti::slim::c10::Error.
+namespace executorch::backends::aoti::slim::c10::detail {
 template <typename... Args>
 std::string torchCheckMsgImpl(const char* /*msg*/, const Args&... args) {
   // This is similar to the one in c10/util/Exception.h, but does
@@ -25,14 +25,14 @@ inline const char* torchCheckMsgImpl(const char* msg) {
 inline const char* torchCheckMsgImpl(const char* /*msg*/, const char* args) {
   return args;
 }
-} // namespace standalone::c10::detail
+} // namespace executorch::backends::aoti::slim::c10::detail
 
-#define STANDALONE_CHECK_MSG(cond, type, ...)              \
-  (::standalone::c10::detail::torchCheckMsgImpl(           \
-      "Expected " #cond                                    \
-      " to be true, but got false.  "                      \
-      "(Could this error message be improved?  If so, "    \
-      "please report an enhancement request to PyTorch.)", \
+#define STANDALONE_CHECK_MSG(cond, type, ...)                          \
+  (::executorch::backends::aoti::slim::c10::detail::torchCheckMsgImpl( \
+      "Expected " #cond                                                \
+      " to be true, but got false.  "                                  \
+      "(Could this error message be improved?  If so, "                \
+      "please report an enhancement request to PyTorch.)",             \
       ##__VA_ARGS__))
 #define STANDALONE_CHECK(cond, ...)                \
   if (STANDALONE_UNLIKELY_OR_CONST(!(cond))) {     \
@@ -63,8 +63,9 @@ inline const char* torchCheckMsgImpl(const char* /*msg*/, const char* args) {
         ##__VA_ARGS__));                           \
   }
 
-#define WARNING_MESSAGE_STRING(...) \
-  ::standalone::c10::detail::torchCheckMsgImpl(__VA_ARGS__)
+#define WARNING_MESSAGE_STRING(...)                                   \
+  ::executorch::backends::aoti::slim::c10::detail::torchCheckMsgImpl( \
+      __VA_ARGS__)
 
 #ifdef DISABLE_WARN
 #define _STANDALONE_WARN_WITH(...) ((void)0);
diff --git a/backends/aoti/slim/c10/util/Float4_e2m1fn_x2.h b/backends/aoti/slim/c10/util/Float4_e2m1fn_x2.h
index 600e281b583..182163b9ca2 100644
--- a/backends/aoti/slim/c10/util/Float4_e2m1fn_x2.h
+++ b/backends/aoti/slim/c10/util/Float4_e2m1fn_x2.h
@@ -17,7 +17,7 @@
 ///   sign/exponent/mantissa     | seem : seem
 ///
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 struct alignas(1) Float4_e2m1fn_x2 {
   uint8_t val_;
@@ -25,4 +25,4 @@ struct alignas(1) Float4_e2m1fn_x2 {
   STANDALONE_HOST_DEVICE explicit Float4_e2m1fn_x2(uint8_t val) : val_(val) {}
 };
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/Float8_e4m3fn-inl.h b/backends/aoti/slim/c10/util/Float8_e4m3fn-inl.h
index cc31b82e699..a0cb1db2888 100644
--- a/backends/aoti/slim/c10/util/Float8_e4m3fn-inl.h
+++ b/backends/aoti/slim/c10/util/Float8_e4m3fn-inl.h
@@ -9,7 +9,7 @@ STANDALONE_CLANG_DIAGNOSTIC_PUSH()
 STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /// Constructors
 
@@ -229,14 +229,15 @@ operator/(int64_t a, Float8_e4m3fn b) {
 }
 
 /// NOTE: we do not define comparisons directly and instead rely on the implicit
-/// conversion from standalone::c10::Float8_e4m3fn to float.
+/// conversion from executorch::backends::aoti::slim::c10::Float8_e4m3fn to
+/// float.
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 
 template <>
-class numeric_limits<standalone::c10::Float8_e4m3fn> {
+class numeric_limits<executorch::backends::aoti::slim::c10::Float8_e4m3fn> {
  public:
   static constexpr bool is_specialized = true;
   static constexpr bool is_signed = true;
@@ -262,33 +263,45 @@ class numeric_limits<standalone::c10::Float8_e4m3fn> {
   static constexpr auto traps = numeric_limits<float>::traps;
   static constexpr auto tinyness_before = false;
 
-  static constexpr standalone::c10::Float8_e4m3fn min() {
-    return standalone::c10::Float8_e4m3fn(
-        0x08, standalone::c10::Float8_e4m3fn::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fn min() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fn(
+        0x08,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fn::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fn lowest() {
-    return standalone::c10::Float8_e4m3fn(
-        0xFE, standalone::c10::Float8_e4m3fn::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fn
+  lowest() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fn(
+        0xFE,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fn::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fn max() {
-    return standalone::c10::Float8_e4m3fn(
-        0x7E, standalone::c10::Float8_e4m3fn::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fn max() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fn(
+        0x7E,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fn::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fn epsilon() {
-    return standalone::c10::Float8_e4m3fn(
-        0x20, standalone::c10::Float8_e4m3fn::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fn
+  epsilon() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fn(
+        0x20,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fn::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fn round_error() {
-    return standalone::c10::Float8_e4m3fn(
-        0x30, standalone::c10::Float8_e4m3fn::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fn
+  round_error() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fn(
+        0x30,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fn::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fn quiet_NaN() {
-    return standalone::c10::Float8_e4m3fn(
-        0x7F, standalone::c10::Float8_e4m3fn::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fn
+  quiet_NaN() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fn(
+        0x7F,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fn::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fn denorm_min() {
-    return standalone::c10::Float8_e4m3fn(
-        0x01, standalone::c10::Float8_e4m3fn::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fn
+  denorm_min() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fn(
+        0x01,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fn::from_bits());
   }
 };
 
diff --git a/backends/aoti/slim/c10/util/Float8_e4m3fn.h b/backends/aoti/slim/c10/util/Float8_e4m3fn.h
index 320a677cbbb..22118007289 100644
--- a/backends/aoti/slim/c10/util/Float8_e4m3fn.h
+++ b/backends/aoti/slim/c10/util/Float8_e4m3fn.h
@@ -32,7 +32,7 @@
 #include <climits>
 #include <iostream>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 namespace detail {
 
@@ -233,6 +233,6 @@ inline std::ostream& operator<<(std::ostream& out, const Float8_e4m3fn& value) {
   return out;
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 #include <executorch/backends/aoti/slim/c10/util/Float8_e4m3fn-inl.h> // IWYU pragma: keep
diff --git a/backends/aoti/slim/c10/util/Float8_e4m3fnuz-inl.h b/backends/aoti/slim/c10/util/Float8_e4m3fnuz-inl.h
index 55a6ce73972..51f7c017504 100644
--- a/backends/aoti/slim/c10/util/Float8_e4m3fnuz-inl.h
+++ b/backends/aoti/slim/c10/util/Float8_e4m3fnuz-inl.h
@@ -10,7 +10,7 @@ STANDALONE_CLANG_DIAGNOSTIC_PUSH()
 STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /// Constructors
 
@@ -239,14 +239,15 @@ operator/(int64_t a, Float8_e4m3fnuz b) {
 }
 
 /// NOTE: we do not define comparisons directly and instead rely on the implicit
-/// conversion from standalone::c10::Float8_e4m3fnuz to float.
+/// conversion from executorch::backends::aoti::slim::c10::Float8_e4m3fnuz to
+/// float.
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 
 template <>
-class numeric_limits<standalone::c10::Float8_e4m3fnuz> {
+class numeric_limits<executorch::backends::aoti::slim::c10::Float8_e4m3fnuz> {
  public:
   static constexpr bool is_specialized = true;
   static constexpr bool is_signed = true;
@@ -272,38 +273,54 @@ class numeric_limits<standalone::c10::Float8_e4m3fnuz> {
   static constexpr auto traps = numeric_limits<float>::traps;
   static constexpr auto tinyness_before = false;
 
-  static constexpr standalone::c10::Float8_e4m3fnuz min() {
-    return standalone::c10::Float8_e4m3fnuz(
-        0x08, standalone::c10::Float8_e4m3fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fnuz
+  min() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fnuz(
+        0x08,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fnuz lowest() {
-    return standalone::c10::Float8_e4m3fnuz(
-        0xFF, standalone::c10::Float8_e4m3fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fnuz
+  lowest() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fnuz(
+        0xFF,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fnuz max() {
-    return standalone::c10::Float8_e4m3fnuz(
-        0x7F, standalone::c10::Float8_e4m3fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fnuz
+  max() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fnuz(
+        0x7F,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fnuz epsilon() {
-    return standalone::c10::Float8_e4m3fnuz(
-        0x28, standalone::c10::Float8_e4m3fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fnuz
+  epsilon() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fnuz(
+        0x28,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fnuz round_error() {
-    return standalone::c10::Float8_e4m3fnuz(
-        0x38, standalone::c10::Float8_e4m3fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fnuz
+  round_error() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fnuz(
+        0x38,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fnuz infinity() {
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fnuz
+  infinity() {
     // NaN (no infinities)
-    return standalone::c10::Float8_e4m3fnuz(
-        0x80, standalone::c10::Float8_e4m3fnuz::from_bits());
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fnuz(
+        0x80,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fnuz quiet_NaN() {
-    return standalone::c10::Float8_e4m3fnuz(
-        0x80, standalone::c10::Float8_e4m3fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fnuz
+  quiet_NaN() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fnuz(
+        0x80,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e4m3fnuz denorm_min() {
-    return standalone::c10::Float8_e4m3fnuz(
-        0x01, standalone::c10::Float8_e4m3fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e4m3fnuz
+  denorm_min() {
+    return executorch::backends::aoti::slim::c10::Float8_e4m3fnuz(
+        0x01,
+        executorch::backends::aoti::slim::c10::Float8_e4m3fnuz::from_bits());
   }
 };
 
diff --git a/backends/aoti/slim/c10/util/Float8_e4m3fnuz.h b/backends/aoti/slim/c10/util/Float8_e4m3fnuz.h
index ff3c050f018..b9c8ae582f4 100644
--- a/backends/aoti/slim/c10/util/Float8_e4m3fnuz.h
+++ b/backends/aoti/slim/c10/util/Float8_e4m3fnuz.h
@@ -31,7 +31,7 @@
 #include <iosfwd>
 #include <ostream>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 namespace detail {
 
@@ -133,6 +133,6 @@ inline std::ostream& operator<<(
   return out;
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 #include <executorch/backends/aoti/slim/c10/util/Float8_e4m3fnuz-inl.h> // IWYU pragma: keep
diff --git a/backends/aoti/slim/c10/util/Float8_e5m2-inl.h b/backends/aoti/slim/c10/util/Float8_e5m2-inl.h
index c8e90a8aa0d..bdc80613015 100644
--- a/backends/aoti/slim/c10/util/Float8_e5m2-inl.h
+++ b/backends/aoti/slim/c10/util/Float8_e5m2-inl.h
@@ -13,7 +13,7 @@ STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #define MAN_WIDTH_FP8 2
 #define EXP_BIAS_FP8 15
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /// Constructors
 
@@ -229,14 +229,14 @@ inline STANDALONE_HOST_DEVICE Float8_e5m2 operator/(int64_t a, Float8_e5m2 b) {
 }
 
 /// NOTE: we do not define comparisons directly and instead rely on the implicit
-/// conversion from standalone::c10::Float8_e5m2 to float.
+/// conversion from executorch::backends::aoti::slim::c10::Float8_e5m2 to float.
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 
 template <>
-class numeric_limits<standalone::c10::Float8_e5m2> {
+class numeric_limits<executorch::backends::aoti::slim::c10::Float8_e5m2> {
  public:
   static constexpr bool is_signed = true;
   static constexpr bool is_integer = false;
@@ -263,37 +263,42 @@ class numeric_limits<standalone::c10::Float8_e5m2> {
   static constexpr auto tinyness_before =
       numeric_limits<float>::tinyness_before;
 
-  static constexpr standalone::c10::Float8_e5m2 min() {
-    return standalone::c10::Float8_e5m2(
-        0x4, standalone::c10::Float8_e5m2::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2 min() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2(
+        0x4, executorch::backends::aoti::slim::c10::Float8_e5m2::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2 max() {
-    return standalone::c10::Float8_e5m2(
-        0x7B, standalone::c10::Float8_e5m2::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2 max() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2(
+        0x7B, executorch::backends::aoti::slim::c10::Float8_e5m2::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2 lowest() {
-    return standalone::c10::Float8_e5m2(
-        0xFB, standalone::c10::Float8_e5m2::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2 lowest() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2(
+        0xFB, executorch::backends::aoti::slim::c10::Float8_e5m2::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2 epsilon() {
-    return standalone::c10::Float8_e5m2(
-        0x34, standalone::c10::Float8_e5m2::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2
+  epsilon() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2(
+        0x34, executorch::backends::aoti::slim::c10::Float8_e5m2::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2 round_error() {
-    return standalone::c10::Float8_e5m2(
-        0x38, standalone::c10::Float8_e5m2::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2
+  round_error() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2(
+        0x38, executorch::backends::aoti::slim::c10::Float8_e5m2::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2 infinity() {
-    return standalone::c10::Float8_e5m2(
-        0x7C, standalone::c10::Float8_e5m2::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2
+  infinity() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2(
+        0x7C, executorch::backends::aoti::slim::c10::Float8_e5m2::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2 quiet_NaN() {
-    return standalone::c10::Float8_e5m2(
-        0x7F, standalone::c10::Float8_e5m2::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2
+  quiet_NaN() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2(
+        0x7F, executorch::backends::aoti::slim::c10::Float8_e5m2::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2 denorm_min() {
-    return standalone::c10::Float8_e5m2(
-        0x01, standalone::c10::Float8_e5m2::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2
+  denorm_min() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2(
+        0x01, executorch::backends::aoti::slim::c10::Float8_e5m2::from_bits());
   }
 };
 
diff --git a/backends/aoti/slim/c10/util/Float8_e5m2.h b/backends/aoti/slim/c10/util/Float8_e5m2.h
index 88d1aab0525..6e9fa9b5aed 100644
--- a/backends/aoti/slim/c10/util/Float8_e5m2.h
+++ b/backends/aoti/slim/c10/util/Float8_e5m2.h
@@ -16,7 +16,7 @@
 
 #include <executorch/backends/aoti/slim/c10/util/Half.h>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 namespace detail {
 
@@ -142,6 +142,6 @@ inline std::ostream& operator<<(std::ostream& out, const Float8_e5m2& value) {
   return out;
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 #include <executorch/backends/aoti/slim/c10/util/Float8_e5m2-inl.h> // IWYU pragma: keep
diff --git a/backends/aoti/slim/c10/util/Float8_e5m2fnuz-inl.h b/backends/aoti/slim/c10/util/Float8_e5m2fnuz-inl.h
index d2ccac329af..ca46726424b 100644
--- a/backends/aoti/slim/c10/util/Float8_e5m2fnuz-inl.h
+++ b/backends/aoti/slim/c10/util/Float8_e5m2fnuz-inl.h
@@ -10,7 +10,7 @@ STANDALONE_CLANG_DIAGNOSTIC_PUSH()
 STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /// Constructors
 
@@ -243,14 +243,15 @@ operator/(int64_t a, Float8_e5m2fnuz b) {
 }
 
 /// NOTE: we do not define comparisons directly and instead rely on the implicit
-/// conversion from standalone::c10::Float8_e5m2fnuz to float.
+/// conversion from executorch::backends::aoti::slim::c10::Float8_e5m2fnuz to
+/// float.
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 
 template <>
-class numeric_limits<standalone::c10::Float8_e5m2fnuz> {
+class numeric_limits<executorch::backends::aoti::slim::c10::Float8_e5m2fnuz> {
  public:
   static constexpr bool is_signed = true;
   static constexpr bool is_integer = false;
@@ -277,39 +278,55 @@ class numeric_limits<standalone::c10::Float8_e5m2fnuz> {
   static constexpr auto tinyness_before =
       numeric_limits<float>::tinyness_before;
 
-  static constexpr standalone::c10::Float8_e5m2fnuz min() {
-    return standalone::c10::Float8_e5m2fnuz(
-        0x04, standalone::c10::Float8_e5m2fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2fnuz
+  min() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2fnuz(
+        0x04,
+        executorch::backends::aoti::slim::c10::Float8_e5m2fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2fnuz max() {
-    return standalone::c10::Float8_e5m2fnuz(
-        0x7F, standalone::c10::Float8_e5m2fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2fnuz
+  max() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2fnuz(
+        0x7F,
+        executorch::backends::aoti::slim::c10::Float8_e5m2fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2fnuz lowest() {
-    return standalone::c10::Float8_e5m2fnuz(
-        0xFF, standalone::c10::Float8_e5m2fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2fnuz
+  lowest() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2fnuz(
+        0xFF,
+        executorch::backends::aoti::slim::c10::Float8_e5m2fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2fnuz epsilon() {
-    return standalone::c10::Float8_e5m2fnuz(
-        0x34, standalone::c10::Float8_e5m2fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2fnuz
+  epsilon() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2fnuz(
+        0x34,
+        executorch::backends::aoti::slim::c10::Float8_e5m2fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2fnuz round_error() {
-    return standalone::c10::Float8_e5m2fnuz(
-        0x38, standalone::c10::Float8_e5m2fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2fnuz
+  round_error() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2fnuz(
+        0x38,
+        executorch::backends::aoti::slim::c10::Float8_e5m2fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2fnuz infinity() {
-    return standalone::c10::Float8_e5m2fnuz(
-        0x80, standalone::c10::Float8_e5m2fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2fnuz
+  infinity() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2fnuz(
+        0x80,
+        executorch::backends::aoti::slim::c10::Float8_e5m2fnuz::from_bits());
   }
   // TODO(future): we are mapping neg_zero to both inf and NaN, this is
   // surprising and we should figure out what to do about it.
-  static constexpr standalone::c10::Float8_e5m2fnuz quiet_NaN() {
-    return standalone::c10::Float8_e5m2fnuz(
-        0x80, standalone::c10::Float8_e5m2fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2fnuz
+  quiet_NaN() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2fnuz(
+        0x80,
+        executorch::backends::aoti::slim::c10::Float8_e5m2fnuz::from_bits());
   }
-  static constexpr standalone::c10::Float8_e5m2fnuz denorm_min() {
-    return standalone::c10::Float8_e5m2fnuz(
-        0x01, standalone::c10::Float8_e5m2fnuz::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e5m2fnuz
+  denorm_min() {
+    return executorch::backends::aoti::slim::c10::Float8_e5m2fnuz(
+        0x01,
+        executorch::backends::aoti::slim::c10::Float8_e5m2fnuz::from_bits());
   }
 };
 
diff --git a/backends/aoti/slim/c10/util/Float8_e5m2fnuz.h b/backends/aoti/slim/c10/util/Float8_e5m2fnuz.h
index c16e5613202..66c2427c8ac 100644
--- a/backends/aoti/slim/c10/util/Float8_e5m2fnuz.h
+++ b/backends/aoti/slim/c10/util/Float8_e5m2fnuz.h
@@ -31,7 +31,7 @@
 #include <iosfwd>
 #include <ostream>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 namespace detail {
 
@@ -133,6 +133,6 @@ inline std::ostream& operator<<(
   return out;
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 #include <executorch/backends/aoti/slim/c10/util/Float8_e5m2fnuz-inl.h> // IWYU pragma: keep
diff --git a/backends/aoti/slim/c10/util/Float8_e8m0fnu-inl.h b/backends/aoti/slim/c10/util/Float8_e8m0fnu-inl.h
index f510ca551b8..4e35e04bc22 100644
--- a/backends/aoti/slim/c10/util/Float8_e8m0fnu-inl.h
+++ b/backends/aoti/slim/c10/util/Float8_e8m0fnu-inl.h
@@ -11,7 +11,7 @@ STANDALONE_CLANG_DIAGNOSTIC_PUSH()
 STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /// Constructors
 
@@ -25,18 +25,20 @@ inline STANDALONE_HOST_DEVICE Float8_e8m0fnu::operator float() const {
 
   // if exponent is zero, need to special case to return 2^-127 instead of zero
   if (x == 0) {
-    return standalone::c10::detail::fp32_from_bits(0x00400000);
+    return executorch::backends::aoti::slim::c10::detail::fp32_from_bits(
+        0x00400000);
   }
 
   // if exponent is NaN, need to special case to return properly encoded NaN
   if (isnan()) {
-    return standalone::c10::detail::fp32_from_bits(0x7f800001);
+    return executorch::backends::aoti::slim::c10::detail::fp32_from_bits(
+        0x7f800001);
   }
 
   // leave sign at 0, set the exponent bits, leave stored mantissa at 0
   uint32_t res = x << 23;
 
-  return standalone::c10::detail::fp32_from_bits(res);
+  return executorch::backends::aoti::slim::c10::detail::fp32_from_bits(res);
 }
 
 /// Special values helper
@@ -46,14 +48,15 @@ inline STANDALONE_HOST_DEVICE bool Float8_e8m0fnu::isnan() const {
 }
 
 /// NOTE: we do not define comparisons directly and instead rely on the implicit
-/// conversion from standalone::c10::Float8_e8m0fnu to float.
+/// conversion from executorch::backends::aoti::slim::c10::Float8_e8m0fnu to
+/// float.
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 
 template <>
-class numeric_limits<standalone::c10::Float8_e8m0fnu> {
+class numeric_limits<executorch::backends::aoti::slim::c10::Float8_e8m0fnu> {
  public:
   static constexpr bool is_specialized = true;
   static constexpr bool is_signed = false;
@@ -79,37 +82,47 @@ class numeric_limits<standalone::c10::Float8_e8m0fnu> {
   static constexpr auto traps = numeric_limits<float>::traps;
   static constexpr auto tinyness_before = false;
 
-  static constexpr standalone::c10::Float8_e8m0fnu min() {
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e8m0fnu min() {
     // 2^-127
-    return standalone::c10::Float8_e8m0fnu(
-        0b00000000, standalone::c10::Float8_e8m0fnu::from_bits());
+    return executorch::backends::aoti::slim::c10::Float8_e8m0fnu(
+        0b00000000,
+        executorch::backends::aoti::slim::c10::Float8_e8m0fnu::from_bits());
   }
-  static constexpr standalone::c10::Float8_e8m0fnu lowest() {
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e8m0fnu
+  lowest() {
     // 2^-127
-    return standalone::c10::Float8_e8m0fnu(
-        0b00000000, standalone::c10::Float8_e8m0fnu::from_bits());
+    return executorch::backends::aoti::slim::c10::Float8_e8m0fnu(
+        0b00000000,
+        executorch::backends::aoti::slim::c10::Float8_e8m0fnu::from_bits());
   }
-  static constexpr standalone::c10::Float8_e8m0fnu max() {
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e8m0fnu max() {
     // 254 biased, which is 127 unbiased, so 2^127
-    return standalone::c10::Float8_e8m0fnu(
-        0b11111110, standalone::c10::Float8_e8m0fnu::from_bits());
+    return executorch::backends::aoti::slim::c10::Float8_e8m0fnu(
+        0b11111110,
+        executorch::backends::aoti::slim::c10::Float8_e8m0fnu::from_bits());
   }
-  static constexpr standalone::c10::Float8_e8m0fnu epsilon() {
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e8m0fnu
+  epsilon() {
     // according to https://en.cppreference.com/w/cpp/types/numeric_limits, this
     // is "the difference between 1.0 and the next representable value of the
     // given floating-point type". The next representable value is 2.0, so the
     // difference is 1.0 which is 2^0. 0 unbiased is 127 biased.
-    return standalone::c10::Float8_e8m0fnu(
-        0b01111111, standalone::c10::Float8_e8m0fnu::from_bits());
+    return executorch::backends::aoti::slim::c10::Float8_e8m0fnu(
+        0b01111111,
+        executorch::backends::aoti::slim::c10::Float8_e8m0fnu::from_bits());
   }
-  static constexpr standalone::c10::Float8_e8m0fnu round_error() {
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e8m0fnu
+  round_error() {
     // 0.5 in float, which is 2^-1, and -1 + 127 = 126
-    return standalone::c10::Float8_e8m0fnu(
-        0b01111110, standalone::c10::Float8_e8m0fnu::from_bits());
+    return executorch::backends::aoti::slim::c10::Float8_e8m0fnu(
+        0b01111110,
+        executorch::backends::aoti::slim::c10::Float8_e8m0fnu::from_bits());
   }
-  static constexpr standalone::c10::Float8_e8m0fnu quiet_NaN() {
-    return standalone::c10::Float8_e8m0fnu(
-        0b11111111, standalone::c10::Float8_e8m0fnu::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Float8_e8m0fnu
+  quiet_NaN() {
+    return executorch::backends::aoti::slim::c10::Float8_e8m0fnu(
+        0b11111111,
+        executorch::backends::aoti::slim::c10::Float8_e8m0fnu::from_bits());
   }
 };
 
diff --git a/backends/aoti/slim/c10/util/Float8_e8m0fnu.h b/backends/aoti/slim/c10/util/Float8_e8m0fnu.h
index 2e2e46d627a..0f67705c510 100644
--- a/backends/aoti/slim/c10/util/Float8_e8m0fnu.h
+++ b/backends/aoti/slim/c10/util/Float8_e8m0fnu.h
@@ -27,7 +27,7 @@
 #include <iosfwd>
 #include <ostream>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 namespace detail {
 
@@ -38,7 +38,8 @@ namespace detail {
 inline STANDALONE_HOST_DEVICE uint8_t fp8e8m0fnu_from_fp32_value(float f) {
   // TODO(#146647): maybe rewrite without control flow
 
-  uint32_t f_bits = standalone::c10::detail::fp32_to_bits(f);
+  uint32_t f_bits =
+      executorch::backends::aoti::slim::c10::detail::fp32_to_bits(f);
 
   // extract the exponent
   uint32_t exponent = (f_bits >> 23) & 0b11111111;
@@ -114,6 +115,6 @@ inline std::ostream& operator<<(
   return out;
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 #include <executorch/backends/aoti/slim/c10/util/Float8_e8m0fnu-inl.h> // IWYU pragma: keep
diff --git a/backends/aoti/slim/c10/util/Float8_fnuz_cvt.h b/backends/aoti/slim/c10/util/Float8_fnuz_cvt.h
index 00bfa8cd8fc..49bcaad6842 100644
--- a/backends/aoti/slim/c10/util/Float8_fnuz_cvt.h
+++ b/backends/aoti/slim/c10/util/Float8_fnuz_cvt.h
@@ -8,7 +8,7 @@
 #include <sycl/sycl.hpp>
 #endif
 
-namespace standalone::c10::detail {
+namespace executorch::backends::aoti::slim::c10::detail {
 
 /*
  * Convert a 8-bit floating-point number in either f8 E4M3FNUZ or bf8 E5M2FNUZ
@@ -61,4 +61,4 @@ inline STANDALONE_HOST_DEVICE float fp8_fnuz_to_fp32_value(uint8_t x) {
   return fp32_from_bits(retval);
 }
 
-} // namespace standalone::c10::detail
+} // namespace executorch::backends::aoti::slim::c10::detail
diff --git a/backends/aoti/slim/c10/util/Half-inl.h b/backends/aoti/slim/c10/util/Half-inl.h
index 05fa6349f81..f7b25c0ebe0 100644
--- a/backends/aoti/slim/c10/util/Half-inl.h
+++ b/backends/aoti/slim/c10/util/Half-inl.h
@@ -31,7 +31,7 @@ STANDALONE_CLANG_DIAGNOSTIC_PUSH()
 STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 #if defined(__aarch64__) && !defined(__CUDACC__)
 /// Constructors
@@ -46,7 +46,8 @@ inline STANDALONE_HOST_DEVICE Half::Half(float value)
 #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
       x(__half_as_short(__float2half(value)))
 #elif defined(__SYCL_DEVICE_ONLY__)
-      x(standalone::c10::bit_cast<uint16_t>(sycl::half(value)))
+      x(executorch::backends::aoti::slim::c10::bit_cast<uint16_t>(
+          sycl::half(value)))
 #elif (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
     !defined(__APPLE__)
       x(at::vec::float2half_scalar(value))
@@ -62,7 +63,7 @@ inline STANDALONE_HOST_DEVICE Half::operator float() const {
 #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
   return __half2float(*reinterpret_cast<const __half*>(&x));
 #elif defined(__SYCL_DEVICE_ONLY__)
-  return float(standalone::c10::bit_cast<sycl::half>(x));
+  return float(executorch::backends::aoti::slim::c10::bit_cast<sycl::half>(x));
 #elif (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
     !defined(__APPLE__)
   return at::vec::half2float_scalar(x);
@@ -127,7 +128,7 @@ inline STANDALONE_HOST_DEVICE Half operator-(const Half& a) {
     defined(__HIP_DEVICE_COMPILE__)
   return __hneg(a);
 #elif defined(__SYCL_DEVICE_ONLY__)
-  return -standalone::c10::bit_cast<sycl::half>(a);
+  return -executorch::backends::aoti::slim::c10::bit_cast<sycl::half>(a);
 #else
   return -static_cast<float>(a);
 #endif
@@ -283,14 +284,14 @@ inline STANDALONE_HOST_DEVICE Half operator/(int64_t a, Half b) {
 }
 
 /// NOTE: we do not define comparisons directly and instead rely on the implicit
-/// conversion from standalone::c10::Half to float.
+/// conversion from executorch::backends::aoti::slim::c10::Half to float.
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 
 template <>
-class numeric_limits<standalone::c10::Half> {
+class numeric_limits<executorch::backends::aoti::slim::c10::Half> {
  public:
   static constexpr bool is_specialized = true;
   static constexpr bool is_signed = true;
@@ -317,32 +318,41 @@ class numeric_limits<standalone::c10::Half> {
   static constexpr auto traps = numeric_limits<float>::traps;
   static constexpr auto tinyness_before =
       numeric_limits<float>::tinyness_before;
-  static constexpr standalone::c10::Half min() {
-    return standalone::c10::Half(0x0400, standalone::c10::Half::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Half min() {
+    return executorch::backends::aoti::slim::c10::Half(
+        0x0400, executorch::backends::aoti::slim::c10::Half::from_bits());
   }
-  static constexpr standalone::c10::Half lowest() {
-    return standalone::c10::Half(0xFBFF, standalone::c10::Half::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Half lowest() {
+    return executorch::backends::aoti::slim::c10::Half(
+        0xFBFF, executorch::backends::aoti::slim::c10::Half::from_bits());
   }
-  static constexpr standalone::c10::Half max() {
-    return standalone::c10::Half(0x7BFF, standalone::c10::Half::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Half max() {
+    return executorch::backends::aoti::slim::c10::Half(
+        0x7BFF, executorch::backends::aoti::slim::c10::Half::from_bits());
   }
-  static constexpr standalone::c10::Half epsilon() {
-    return standalone::c10::Half(0x1400, standalone::c10::Half::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Half epsilon() {
+    return executorch::backends::aoti::slim::c10::Half(
+        0x1400, executorch::backends::aoti::slim::c10::Half::from_bits());
   }
-  static constexpr standalone::c10::Half round_error() {
-    return standalone::c10::Half(0x3800, standalone::c10::Half::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Half round_error() {
+    return executorch::backends::aoti::slim::c10::Half(
+        0x3800, executorch::backends::aoti::slim::c10::Half::from_bits());
   }
-  static constexpr standalone::c10::Half infinity() {
-    return standalone::c10::Half(0x7C00, standalone::c10::Half::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Half infinity() {
+    return executorch::backends::aoti::slim::c10::Half(
+        0x7C00, executorch::backends::aoti::slim::c10::Half::from_bits());
   }
-  static constexpr standalone::c10::Half quiet_NaN() {
-    return standalone::c10::Half(0x7E00, standalone::c10::Half::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Half quiet_NaN() {
+    return executorch::backends::aoti::slim::c10::Half(
+        0x7E00, executorch::backends::aoti::slim::c10::Half::from_bits());
   }
-  static constexpr standalone::c10::Half signaling_NaN() {
-    return standalone::c10::Half(0x7D00, standalone::c10::Half::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Half signaling_NaN() {
+    return executorch::backends::aoti::slim::c10::Half(
+        0x7D00, executorch::backends::aoti::slim::c10::Half::from_bits());
   }
-  static constexpr standalone::c10::Half denorm_min() {
-    return standalone::c10::Half(0x0001, standalone::c10::Half::from_bits());
+  static constexpr executorch::backends::aoti::slim::c10::Half denorm_min() {
+    return executorch::backends::aoti::slim::c10::Half(
+        0x0001, executorch::backends::aoti::slim::c10::Half::from_bits());
   }
 };
 
diff --git a/backends/aoti/slim/c10/util/Half.h b/backends/aoti/slim/c10/util/Half.h
index 86f8d8683e0..26597d23e53 100644
--- a/backends/aoti/slim/c10/util/Half.h
+++ b/backends/aoti/slim/c10/util/Half.h
@@ -61,7 +61,7 @@
 #endif // __x86_64__ || _M_X64 || __i386 || _M_IX86
 #endif // __GNUC__ || __clang__
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 namespace detail {
 
@@ -359,11 +359,11 @@ inline uint16_t fp16_ieee_from_fp32_value(float f) {
 
 #if defined(__aarch64__) && !defined(__CUDACC__)
 inline float16_t fp16_from_bits(uint16_t h) {
-  return standalone::c10::bit_cast<float16_t>(h);
+  return executorch::backends::aoti::slim::c10::bit_cast<float16_t>(h);
 }
 
 inline uint16_t fp16_to_bits(float16_t f) {
-  return standalone::c10::bit_cast<uint16_t>(f);
+  return executorch::backends::aoti::slim::c10::bit_cast<uint16_t>(f);
 }
 
 // According to https://godbolt.org/z/frExdbsWG it would translate to single
@@ -419,6 +419,6 @@ inline std::ostream& operator<<(std::ostream& out, const Half& value) {
   return out;
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 #include <executorch/backends/aoti/slim/c10/util/Half-inl.h> // IWYU pragma: keep
diff --git a/backends/aoti/slim/c10/util/StringUtil.h b/backends/aoti/slim/c10/util/StringUtil.h
index ff7c591e734..8a696322716 100644
--- a/backends/aoti/slim/c10/util/StringUtil.h
+++ b/backends/aoti/slim/c10/util/StringUtil.h
@@ -3,7 +3,7 @@
 #include <sstream>
 #include <string>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 template <class Container>
 inline std::string Join(const std::string& delimiter, const Container& v) {
   std::stringstream s;
@@ -13,4 +13,4 @@ inline std::string Join(const std::string& delimiter, const Container& v) {
   }
   return std::move(s).str();
 }
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/TypeCast.h b/backends/aoti/slim/c10/util/TypeCast.h
index cfaaaebec95..e3d65a7ef16 100644
--- a/backends/aoti/slim/c10/util/TypeCast.h
+++ b/backends/aoti/slim/c10/util/TypeCast.h
@@ -20,7 +20,7 @@ STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-float-conversion")
 STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 template <typename dest_t, typename src_t>
 struct needs_real {
@@ -103,66 +103,76 @@ struct static_cast_with_inter_type<uint8_t, src_t> {
 
 template <>
 struct static_cast_with_inter_type<
-    standalone::c10::complex<standalone::c10::Half>,
-    standalone::c10::BFloat16> {
+    executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>,
+    executorch::backends::aoti::slim::c10::BFloat16> {
   STANDALONE_HOST_DEVICE
-  __ubsan_ignore_undefined__ static inline standalone::c10::complex<
-      standalone::c10::Half>
-  apply(standalone::c10::BFloat16 src) {
-    return static_cast<standalone::c10::complex<standalone::c10::Half>>(
-        standalone::c10::complex<float>{src});
+  __ubsan_ignore_undefined__ static inline executorch::backends::aoti::slim::
+      c10::complex<executorch::backends::aoti::slim::c10::Half>
+      apply(executorch::backends::aoti::slim::c10::BFloat16 src) {
+    return static_cast<executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>>(
+        executorch::backends::aoti::slim::c10::complex<float>{src});
   }
 };
 
 template <>
 struct static_cast_with_inter_type<
-    standalone::c10::complex<standalone::c10::Half>,
-    standalone::c10::Float8_e5m2> {
+    executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>,
+    executorch::backends::aoti::slim::c10::Float8_e5m2> {
   STANDALONE_HOST_DEVICE
-  __ubsan_ignore_undefined__ static inline standalone::c10::complex<
-      standalone::c10::Half>
-  apply(standalone::c10::Float8_e5m2 src) {
-    return static_cast<standalone::c10::complex<standalone::c10::Half>>(
-        standalone::c10::complex<float>{src});
+  __ubsan_ignore_undefined__ static inline executorch::backends::aoti::slim::
+      c10::complex<executorch::backends::aoti::slim::c10::Half>
+      apply(executorch::backends::aoti::slim::c10::Float8_e5m2 src) {
+    return static_cast<executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>>(
+        executorch::backends::aoti::slim::c10::complex<float>{src});
   }
 };
 
 template <>
 struct static_cast_with_inter_type<
-    standalone::c10::complex<standalone::c10::Half>,
-    standalone::c10::Float8_e5m2fnuz> {
+    executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>,
+    executorch::backends::aoti::slim::c10::Float8_e5m2fnuz> {
   STANDALONE_HOST_DEVICE
-  __ubsan_ignore_undefined__ static inline standalone::c10::complex<
-      standalone::c10::Half>
-  apply(standalone::c10::Float8_e5m2fnuz src) {
-    return static_cast<standalone::c10::complex<standalone::c10::Half>>(
-        standalone::c10::complex<float>{src});
+  __ubsan_ignore_undefined__ static inline executorch::backends::aoti::slim::
+      c10::complex<executorch::backends::aoti::slim::c10::Half>
+      apply(executorch::backends::aoti::slim::c10::Float8_e5m2fnuz src) {
+    return static_cast<executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>>(
+        executorch::backends::aoti::slim::c10::complex<float>{src});
   }
 };
 
 template <>
 struct static_cast_with_inter_type<
-    standalone::c10::complex<standalone::c10::Half>,
-    standalone::c10::Float8_e4m3fn> {
+    executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>,
+    executorch::backends::aoti::slim::c10::Float8_e4m3fn> {
   STANDALONE_HOST_DEVICE
-  __ubsan_ignore_undefined__ static inline standalone::c10::complex<
-      standalone::c10::Half>
-  apply(standalone::c10::Float8_e4m3fn src) {
-    return static_cast<standalone::c10::complex<standalone::c10::Half>>(
-        standalone::c10::complex<float>{src});
+  __ubsan_ignore_undefined__ static inline executorch::backends::aoti::slim::
+      c10::complex<executorch::backends::aoti::slim::c10::Half>
+      apply(executorch::backends::aoti::slim::c10::Float8_e4m3fn src) {
+    return static_cast<executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>>(
+        executorch::backends::aoti::slim::c10::complex<float>{src});
   }
 };
 
 template <>
 struct static_cast_with_inter_type<
-    standalone::c10::complex<standalone::c10::Half>,
-    standalone::c10::Float8_e4m3fnuz> {
+    executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>,
+    executorch::backends::aoti::slim::c10::Float8_e4m3fnuz> {
   STANDALONE_HOST_DEVICE
-  __ubsan_ignore_undefined__ static inline standalone::c10::complex<
-      standalone::c10::Half>
-  apply(standalone::c10::Float8_e4m3fnuz src) {
-    return static_cast<standalone::c10::complex<standalone::c10::Half>>(
-        standalone::c10::complex<float>{src});
+  __ubsan_ignore_undefined__ static inline executorch::backends::aoti::slim::
+      c10::complex<executorch::backends::aoti::slim::c10::Half>
+      apply(executorch::backends::aoti::slim::c10::Float8_e4m3fnuz src) {
+    return static_cast<executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>>(
+        executorch::backends::aoti::slim::c10::complex<float>{src});
   }
 };
 
@@ -170,40 +180,47 @@ struct static_cast_with_inter_type<
 // based off our apply macros?
 template <>
 struct static_cast_with_inter_type<
-    standalone::c10::complex<standalone::c10::Half>,
-    standalone::c10::Float8_e8m0fnu> {
+    executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>,
+    executorch::backends::aoti::slim::c10::Float8_e8m0fnu> {
   STANDALONE_HOST_DEVICE
-  __ubsan_ignore_undefined__ static inline standalone::c10::complex<
-      standalone::c10::Half>
-  apply(standalone::c10::Float8_e8m0fnu src) {
-    return static_cast<standalone::c10::complex<standalone::c10::Half>>(
-        standalone::c10::complex<float>{src});
+  __ubsan_ignore_undefined__ static inline executorch::backends::aoti::slim::
+      c10::complex<executorch::backends::aoti::slim::c10::Half>
+      apply(executorch::backends::aoti::slim::c10::Float8_e8m0fnu src) {
+    return static_cast<executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>>(
+        executorch::backends::aoti::slim::c10::complex<float>{src});
   }
 };
 
 template <>
 struct static_cast_with_inter_type<
-    standalone::c10::complex<standalone::c10::Half>,
-    standalone::c10::Half> {
+    executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>,
+    executorch::backends::aoti::slim::c10::Half> {
   STANDALONE_HOST_DEVICE
-  __ubsan_ignore_undefined__ static inline standalone::c10::complex<
-      standalone::c10::Half>
-  apply(standalone::c10::Half src) {
-    return static_cast<standalone::c10::complex<standalone::c10::Half>>(
-        standalone::c10::complex<float>{src});
+  __ubsan_ignore_undefined__ static inline executorch::backends::aoti::slim::
+      c10::complex<executorch::backends::aoti::slim::c10::Half>
+      apply(executorch::backends::aoti::slim::c10::Half src) {
+    return static_cast<executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>>(
+        executorch::backends::aoti::slim::c10::complex<float>{src});
   }
 };
 
 template <>
 struct static_cast_with_inter_type<
-    standalone::c10::complex<standalone::c10::Half>,
-    standalone::c10::complex<double>> {
+    executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>,
+    executorch::backends::aoti::slim::c10::complex<double>> {
   STANDALONE_HOST_DEVICE
-  __ubsan_ignore_undefined__ static inline standalone::c10::complex<
-      standalone::c10::Half>
-  apply(standalone::c10::complex<double> src) {
-    return static_cast<standalone::c10::complex<standalone::c10::Half>>(
-        static_cast<standalone::c10::complex<float>>(src));
+  __ubsan_ignore_undefined__ static inline executorch::backends::aoti::slim::
+      c10::complex<executorch::backends::aoti::slim::c10::Half>
+      apply(executorch::backends::aoti::slim::c10::complex<double> src) {
+    return static_cast<executorch::backends::aoti::slim::c10::complex<
+        executorch::backends::aoti::slim::c10::Half>>(
+        static_cast<executorch::backends::aoti::slim::c10::complex<float>>(
+            src));
   }
 };
 
@@ -229,7 +246,7 @@ To checked_convert(From f, const char* name) {
   return convert<To, From>(f);
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 STANDALONE_CLANG_DIAGNOSTIC_POP()
 
diff --git a/backends/aoti/slim/c10/util/TypeSafeSignMath.h b/backends/aoti/slim/c10/util/TypeSafeSignMath.h
index 276b1cee7d0..7e23f64a39e 100644
--- a/backends/aoti/slim/c10/util/TypeSafeSignMath.h
+++ b/backends/aoti/slim/c10/util/TypeSafeSignMath.h
@@ -13,7 +13,7 @@ STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wstring-conversion")
 STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /// Returns false since we cannot have x < 0 if x is unsigned.
 template <typename T>
@@ -33,7 +33,8 @@ inline constexpr bool is_negative(const T& x, std::false_type /*is_unsigned*/) {
 /// NOTE: Will fail on an unsigned custom type
 ///       For the most part it's possible to fix this if
 ///       the custom type has a constexpr constructor.
-///       However, notably, standalone::c10::Half does not :-(
+///       However, notably, executorch::backends::aoti::slim::c10::Half does not
+///       :-(
 template <typename T>
 inline constexpr bool is_negative(const T& x) {
   return is_negative(x, std::is_unsigned<T>());
@@ -55,7 +56,8 @@ inline constexpr int signum(const T& x, std::false_type /*is_unsigned*/) {
 /// NOTE: Will fail on an unsigned custom type
 ///       For the most part it's possible to fix this if
 ///       the custom type has a constexpr constructor.
-///       However, notably, standalone::c10::Half does not :-(
+///       However, notably, executorch::backends::aoti::slim::c10::Half does not
+///       :-(
 template <typename T>
 inline constexpr int signum(const T& x) {
   return signum(x, std::is_unsigned<T>());
@@ -129,13 +131,14 @@ inline constexpr bool less_than_lowest(
 /// NOTE: Will fail on an unsigned custom type
 ///       For the most part it's possible to fix this if
 ///       the custom type has a constexpr constructor.
-///       However, notably, standalone::c10::Half does not :
+///       However, notably, executorch::backends::aoti::slim::c10::Half does not
+///       :
 template <typename Limit, typename T>
 inline constexpr bool less_than_lowest(const T& x) {
   return less_than_lowest<Limit>(
       x, std::is_unsigned<Limit>(), std::is_unsigned<T>());
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 STANDALONE_CLANG_DIAGNOSTIC_POP()
diff --git a/backends/aoti/slim/c10/util/accumulate.h b/backends/aoti/slim/c10/util/accumulate.h
index 4972dd9826a..578c6246b29 100644
--- a/backends/aoti/slim/c10/util/accumulate.h
+++ b/backends/aoti/slim/c10/util/accumulate.h
@@ -11,7 +11,7 @@
 #include <type_traits>
 #include <utility>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /// Sum of a list of integers; accumulates into the int64_t datatype
 template <
@@ -122,4 +122,4 @@ inline int64_t numelements_between_dim(int k, int l, const C& dims) {
   return multiply_integers(cbegin, cend);
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/bit_cast.h b/backends/aoti/slim/c10/util/bit_cast.h
index 765ec641486..5a1e1208acf 100644
--- a/backends/aoti/slim/c10/util/bit_cast.h
+++ b/backends/aoti/slim/c10/util/bit_cast.h
@@ -11,7 +11,7 @@
 #endif // __has_include(<bit>) && (__cplusplus >= 202002L ||
        // (defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L))
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 #if STANDALONE_HAVE_STD_BIT_CAST
 using std::bit_cast;
@@ -41,4 +41,4 @@ bit_cast(const From& src) noexcept {
 #endif // STANDALONE_HAVE_STD_BIT_CAST
 #undef STANDALONE_HAVE_STD_BIT_CAST
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/bits.h b/backends/aoti/slim/c10/util/bits.h
index 2d365463a01..d04f88dafc8 100644
--- a/backends/aoti/slim/c10/util/bits.h
+++ b/backends/aoti/slim/c10/util/bits.h
@@ -3,7 +3,7 @@
 
 #include <executorch/backends/aoti/slim/c10/macros/Macros.h>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /**
  * bits1x8 is an uninterpreted dtype of a tensor with 1 bit (packed to byte
@@ -58,4 +58,4 @@ struct alignas(2) bits16 {
   STANDALONE_HOST_DEVICE explicit bits16(uint16_t val) : val_(val) {}
 };
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/complex.h b/backends/aoti/slim/c10/util/complex.h
index 988e446b3e4..b48ef792ed7 100644
--- a/backends/aoti/slim/c10/util/complex.h
+++ b/backends/aoti/slim/c10/util/complex.h
@@ -17,19 +17,19 @@ STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-float-conversion")
 STANDALONE_CLANG_DIAGNOSTIC_IGNORE("-Wfloat-conversion")
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
-// standalone::c10::complex is an implementation of complex numbers that aims
-// to work on all devices supported by PyTorch
+// executorch::backends::aoti::slim::c10::complex is an implementation of
+// complex numbers that aims to work on all devices supported by PyTorch
 //
 // Most of the APIs duplicates std::complex
 // Reference: https://en.cppreference.com/w/cpp/numeric/complex
 //
 // [NOTE: Complex Operator Unification]
 // Operators currently use a mix of std::complex, thrust::complex, and
-// standalone::c10::complex internally. The end state is that all operators
-// will use standalone::c10::complex internally.  Until then, there may be
-// some hacks to support all variants.
+// executorch::backends::aoti::slim::c10::complex internally. The end state is
+// that all operators will use executorch::backends::aoti::slim::c10::complex
+// internally.  Until then, there may be some hacks to support all variants.
 //
 //
 // [Note on Constructors]
@@ -89,9 +89,9 @@ namespace standalone::c10 {
 //
 // std::complex has custom literals `i`, `if` and `il` defined in namespace
 // `std::literals::complex_literals`. We define our own custom literals in the
-// namespace `standalone::c10::complex_literals`. Our custom literals does not
-// follow the same behavior as in std::complex, instead, we define _if, _id to
-// construct float/double complex literals.
+// namespace `executorch::backends::aoti::slim::c10::complex_literals`. Our
+// custom literals does not follow the same behavior as in std::complex,
+// instead, we define _if, _id to construct float/double complex literals.
 //
 //
 // [real() and imag()]
@@ -138,9 +138,11 @@ namespace standalone::c10 {
 //
 //
 //
-// TODO(@zasdfgbnm): standalone::c10::complex<standalone::c10::Half> is not
-// currently supported, because:
-//  - lots of members and functions of standalone::c10::Half are not constexpr
+// TODO(@zasdfgbnm):
+// executorch::backends::aoti::slim::c10::complex<executorch::backends::aoti::slim::c10::Half>
+// is not currently supported, because:
+//  - lots of members and functions of
+//  executorch::backends::aoti::slim::c10::Half are not constexpr
 //  - thrust::complex only support float and double
 
 template <typename T>
@@ -166,7 +168,8 @@ struct alignas(sizeof(T) * 2) complex {
 #endif
 
   // Use SFINAE to specialize casting constructor for
-  // standalone::c10::complex<float> and standalone::c10::complex<double>
+  // executorch::backends::aoti::slim::c10::complex<float> and
+  // executorch::backends::aoti::slim::c10::complex<double>
   template <typename U = T>
   STANDALONE_HOST_DEVICE explicit constexpr complex(
       const std::enable_if_t<std::is_same_v<U, float>, complex<double>>& other)
@@ -430,69 +433,69 @@ constexpr complex<T> operator/(const T& lhs, const complex<T>& rhs) {
   return result /= rhs;
 }
 
-// Define operators between integral scalars and standalone::c10::complex.
-// std::complex does not support this when T is a floating-point number. This is
-// useful because it saves a lot of "static_cast" when operate a complex and an
-// integer. This makes the code both less verbose and potentially more
-// efficient.
+// Define operators between integral scalars and
+// executorch::backends::aoti::slim::c10::complex. std::complex does not support
+// this when T is a floating-point number. This is useful because it saves a lot
+// of "static_cast" when operate a complex and an integer. This makes the code
+// both less verbose and potentially more efficient.
 #define COMPLEX_INTEGER_OP_TEMPLATE_CONDITION                 \
   typename std::enable_if_t<                                  \
       std::is_floating_point_v<fT> && std::is_integral_v<iT>, \
       int> = 0
 
 template <typename fT, typename iT, COMPLEX_INTEGER_OP_TEMPLATE_CONDITION>
-constexpr standalone::c10::complex<fT> operator+(
-    const standalone::c10::complex<fT>& a,
+constexpr executorch::backends::aoti::slim::c10::complex<fT> operator+(
+    const executorch::backends::aoti::slim::c10::complex<fT>& a,
     const iT& b) {
   return a + static_cast<fT>(b);
 }
 
 template <typename fT, typename iT, COMPLEX_INTEGER_OP_TEMPLATE_CONDITION>
-constexpr standalone::c10::complex<fT> operator+(
+constexpr executorch::backends::aoti::slim::c10::complex<fT> operator+(
     const iT& a,
-    const standalone::c10::complex<fT>& b) {
+    const executorch::backends::aoti::slim::c10::complex<fT>& b) {
   return static_cast<fT>(a) + b;
 }
 
 template <typename fT, typename iT, COMPLEX_INTEGER_OP_TEMPLATE_CONDITION>
-constexpr standalone::c10::complex<fT> operator-(
-    const standalone::c10::complex<fT>& a,
+constexpr executorch::backends::aoti::slim::c10::complex<fT> operator-(
+    const executorch::backends::aoti::slim::c10::complex<fT>& a,
     const iT& b) {
   return a - static_cast<fT>(b);
 }
 
 template <typename fT, typename iT, COMPLEX_INTEGER_OP_TEMPLATE_CONDITION>
-constexpr standalone::c10::complex<fT> operator-(
+constexpr executorch::backends::aoti::slim::c10::complex<fT> operator-(
     const iT& a,
-    const standalone::c10::complex<fT>& b) {
+    const executorch::backends::aoti::slim::c10::complex<fT>& b) {
   return static_cast<fT>(a) - b;
 }
 
 template <typename fT, typename iT, COMPLEX_INTEGER_OP_TEMPLATE_CONDITION>
-constexpr standalone::c10::complex<fT> operator*(
-    const standalone::c10::complex<fT>& a,
+constexpr executorch::backends::aoti::slim::c10::complex<fT> operator*(
+    const executorch::backends::aoti::slim::c10::complex<fT>& a,
     const iT& b) {
   return a * static_cast<fT>(b);
 }
 
 template <typename fT, typename iT, COMPLEX_INTEGER_OP_TEMPLATE_CONDITION>
-constexpr standalone::c10::complex<fT> operator*(
+constexpr executorch::backends::aoti::slim::c10::complex<fT> operator*(
     const iT& a,
-    const standalone::c10::complex<fT>& b) {
+    const executorch::backends::aoti::slim::c10::complex<fT>& b) {
   return static_cast<fT>(a) * b;
 }
 
 template <typename fT, typename iT, COMPLEX_INTEGER_OP_TEMPLATE_CONDITION>
-constexpr standalone::c10::complex<fT> operator/(
-    const standalone::c10::complex<fT>& a,
+constexpr executorch::backends::aoti::slim::c10::complex<fT> operator/(
+    const executorch::backends::aoti::slim::c10::complex<fT>& a,
     const iT& b) {
   return a / static_cast<fT>(b);
 }
 
 template <typename fT, typename iT, COMPLEX_INTEGER_OP_TEMPLATE_CONDITION>
-constexpr standalone::c10::complex<fT> operator/(
+constexpr executorch::backends::aoti::slim::c10::complex<fT> operator/(
     const iT& a,
-    const standalone::c10::complex<fT>& b) {
+    const executorch::backends::aoti::slim::c10::complex<fT>& b) {
   return static_cast<fT>(a) / b;
 }
 
@@ -545,7 +548,7 @@ std::basic_istream<CharT, Traits>& operator>>(
   return is;
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 // std functions
 //
@@ -554,17 +557,18 @@ std::basic_istream<CharT, Traits>& operator>>(
 namespace std {
 
 template <typename T>
-constexpr T real(const standalone::c10::complex<T>& z) {
+constexpr T real(const executorch::backends::aoti::slim::c10::complex<T>& z) {
   return z.real();
 }
 
 template <typename T>
-constexpr T imag(const standalone::c10::complex<T>& z) {
+constexpr T imag(const executorch::backends::aoti::slim::c10::complex<T>& z) {
   return z.imag();
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE T abs(const standalone::c10::complex<T>& z) {
+STANDALONE_HOST_DEVICE T
+abs(const executorch::backends::aoti::slim::c10::complex<T>& z) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
   return thrust::abs(static_cast<thrust::complex<T>>(z));
 #else
@@ -579,14 +583,15 @@ STANDALONE_HOST_DEVICE T abs(const standalone::c10::complex<T>& z) {
 #endif
 
 template <typename T>
-STANDALONE_HOST_DEVICE T arg(const standalone::c10::complex<T>& z) {
+STANDALONE_HOST_DEVICE T
+arg(const executorch::backends::aoti::slim::c10::complex<T>& z) {
   return ROCm_Bug(std)::atan2(std::imag(z), std::real(z));
 }
 
 #undef ROCm_Bug
 
 template <typename T>
-constexpr T norm(const standalone::c10::complex<T>& z) {
+constexpr T norm(const executorch::backends::aoti::slim::c10::complex<T>& z) {
   return z.real() * z.real() + z.imag() * z.imag();
 }
 
@@ -596,11 +601,12 @@ constexpr T norm(const standalone::c10::complex<T>& z) {
 //   constexpr std::complex<double> conj( DoubleOrInteger z );
 //   constexpr std::complex<long double> conj( long double z );
 // These are not implemented
-// TODO(@zasdfgbnm): implement them as standalone::c10::conj
+// TODO(@zasdfgbnm): implement them as
+// executorch::backends::aoti::slim::c10::conj
 template <typename T>
-constexpr standalone::c10::complex<T> conj(
-    const standalone::c10::complex<T>& z) {
-  return standalone::c10::complex<T>(z.real(), -z.imag());
+constexpr executorch::backends::aoti::slim::c10::complex<T> conj(
+    const executorch::backends::aoti::slim::c10::complex<T>& z) {
+  return executorch::backends::aoti::slim::c10::complex<T>(z.real(), -z.imag());
 }
 
 // Thrust does not have complex --> complex version of thrust::proj,
@@ -608,11 +614,12 @@ constexpr standalone::c10::complex<T> conj(
 // TODO(@zasdfgbnm): implement it by ourselves
 
 // There is no standalone version of std::polar, because std::polar always
-// returns std::complex. Use standalone::c10::polar instead;
+// returns std::complex. Use executorch::backends::aoti::slim::c10::polar
+// instead;
 
 } // namespace std
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 template <typename T>
 STANDALONE_HOST_DEVICE complex<T> polar(const T& r, const T& theta = T()) {
@@ -639,12 +646,12 @@ struct alignas(4) complex<Half> {
       const Half& imag)
       : real_(real), imag_(imag) {}
   STANDALONE_HOST_DEVICE inline complex(
-      const standalone::c10::complex<float>& value)
+      const executorch::backends::aoti::slim::c10::complex<float>& value)
       : real_(value.real()), imag_(value.imag()) {}
 
   // Conversion operator
-  inline STANDALONE_HOST_DEVICE operator standalone::c10::complex<float>()
-      const {
+  inline STANDALONE_HOST_DEVICE
+  operator executorch::backends::aoti::slim::c10::complex<float>() const {
     return {real_, imag_};
   }
 
@@ -678,7 +685,7 @@ struct alignas(4) complex<Half> {
   }
 };
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 STANDALONE_CLANG_DIAGNOSTIC_POP()
 
diff --git a/backends/aoti/slim/c10/util/complex_math.h b/backends/aoti/slim/c10/util/complex_math.h
index 56fc84fe90b..3ada9db6f00 100644
--- a/backends/aoti/slim/c10/util/complex_math.h
+++ b/backends/aoti/slim/c10/util/complex_math.h
@@ -5,52 +5,52 @@
 
 #include <cmath>
 
-namespace standalone::c10::complex_math {
+namespace executorch::backends::aoti::slim::c10::complex_math {
 
 // Exponential functions
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> exp(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+exp(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::exp(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::exp(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> log(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+log(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::log(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::log(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> log10(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+log10(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::log10(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::log10(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> log2(
-    const standalone::c10::complex<T>& x) {
-  const standalone::c10::complex<T> log2 =
-      standalone::c10::complex<T>(::log(2.0), 0.0);
-  return standalone::c10::complex_math::log(x) / log2;
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+log2(const executorch::backends::aoti::slim::c10::complex<T>& x) {
+  const executorch::backends::aoti::slim::c10::complex<T> log2 =
+      executorch::backends::aoti::slim::c10::complex<T>(::log(2.0), 0.0);
+  return executorch::backends::aoti::slim::c10::complex_math::log(x) / log2;
 }
 
 // Power functions
@@ -59,34 +59,36 @@ STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> log2(
     (defined(__GLIBCXX__) && !defined(_GLIBCXX11_USE_C99_COMPLEX))
 namespace _detail {
 template <typename T>
-standalone::c10::complex<T> compute_csqrt(
-    const standalone::c10::complex<T>& z) {
+executorch::backends::aoti::slim::c10::complex<T> compute_csqrt(
+    const executorch::backends::aoti::slim::c10::complex<T>& z) {
   constexpr auto half = T(.5);
 
   // Trust standard library to correctly handle infs and NaNs
   if (std::isinf(z.real()) || std::isinf(z.imag()) || std::isnan(z.real()) ||
       std::isnan(z.imag())) {
-    return static_cast<standalone::c10::complex<T>>(
+    return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
         std::sqrt(static_cast<std::complex<T>>(z)));
   }
 
   // Special case for square root of pure imaginary values
   if (z.real() == T(0)) {
     if (z.imag() == T(0)) {
-      return standalone::c10::complex<T>(T(0), z.imag());
+      return executorch::backends::aoti::slim::c10::complex<T>(T(0), z.imag());
     }
     auto v = std::sqrt(half * std::abs(z.imag()));
-    return standalone::c10::complex<T>(v, std::copysign(v, z.imag()));
+    return executorch::backends::aoti::slim::c10::complex<T>(
+        v, std::copysign(v, z.imag()));
   }
 
   // At this point, z is non-zero and finite
   if (z.real() >= 0.0) {
     auto t = std::sqrt((z.real() + std::abs(z)) * half);
-    return standalone::c10::complex<T>(t, half * (z.imag() / t));
+    return executorch::backends::aoti::slim::c10::complex<T>(
+        t, half * (z.imag() / t));
   }
 
   auto t = std::sqrt((-z.real() + std::abs(z)) * half);
-  return standalone::c10::complex<T>(
+  return executorch::backends::aoti::slim::c10::complex<T>(
       half * std::abs(z.imag() / t), std::copysign(t, z.imag()));
 }
 
@@ -95,58 +97,59 @@ standalone::c10::complex<T> compute_csqrt(
 // cacos(z).re = 2*atan2(sqrt(1-z).re(), sqrt(1+z).re())
 // cacos(z).im = asinh((sqrt(conj(1+z))*sqrt(1-z)).im())
 template <typename T>
-standalone::c10::complex<T> compute_cacos(
-    const standalone::c10::complex<T>& z) {
+executorch::backends::aoti::slim::c10::complex<T> compute_cacos(
+    const executorch::backends::aoti::slim::c10::complex<T>& z) {
   auto constexpr one = T(1);
   // Trust standard library to correctly handle infs and NaNs
   if (std::isinf(z.real()) || std::isinf(z.imag()) || std::isnan(z.real()) ||
       std::isnan(z.imag())) {
-    return static_cast<standalone::c10::complex<T>>(
+    return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
         std::acos(static_cast<std::complex<T>>(z)));
   }
-  auto a =
-      compute_csqrt(standalone::c10::complex<T>(one - z.real(), -z.imag()));
-  auto b = compute_csqrt(standalone::c10::complex<T>(one + z.real(), z.imag()));
-  auto c =
-      compute_csqrt(standalone::c10::complex<T>(one + z.real(), -z.imag()));
+  auto a = compute_csqrt(executorch::backends::aoti::slim::c10::complex<T>(
+      one - z.real(), -z.imag()));
+  auto b = compute_csqrt(executorch::backends::aoti::slim::c10::complex<T>(
+      one + z.real(), z.imag()));
+  auto c = compute_csqrt(executorch::backends::aoti::slim::c10::complex<T>(
+      one + z.real(), -z.imag()));
   auto r = T(2) * std::atan2(a.real(), b.real());
   // Explicitly unroll (a*c).imag()
   auto i = std::asinh(a.real() * c.imag() + a.imag() * c.real());
-  return standalone::c10::complex<T>(r, i);
+  return executorch::backends::aoti::slim::c10::complex<T>(r, i);
 }
 
-inline standalone::c10::complex<float> sqrt(
-    const standalone::c10::complex<float>& in) {
+inline executorch::backends::aoti::slim::c10::complex<float> sqrt(
+    const executorch::backends::aoti::slim::c10::complex<float>& in) {
   return compute_csqrt(in);
 }
 
-inline standalone::c10::complex<double> sqrt(
-    const standalone::c10::complex<double>& in) {
+inline executorch::backends::aoti::slim::c10::complex<double> sqrt(
+    const executorch::backends::aoti::slim::c10::complex<double>& in) {
   return compute_csqrt(in);
 }
 
-inline standalone::c10::complex<float> acos(
-    const standalone::c10::complex<float>& in) {
+inline executorch::backends::aoti::slim::c10::complex<float> acos(
+    const executorch::backends::aoti::slim::c10::complex<float>& in) {
   return compute_cacos(in);
 }
 
-inline standalone::c10::complex<double> acos(
-    const standalone::c10::complex<double>& in) {
+inline executorch::backends::aoti::slim::c10::complex<double> acos(
+    const executorch::backends::aoti::slim::c10::complex<double>& in) {
   return compute_cacos(in);
 }
 } // namespace _detail
 #endif
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> sqrt(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+sqrt(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::sqrt(static_cast<thrust::complex<T>>(x)));
 #elif !(                        \
     defined(_LIBCPP_VERSION) || \
     (defined(__GLIBCXX__) && !defined(_GLIBCXX11_USE_C99_COMPLEX)))
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::sqrt(static_cast<std::complex<T>>(x)));
 #else
   return _detail::sqrt(x);
@@ -154,79 +157,84 @@ STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> sqrt(
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> pow(
-    const standalone::c10::complex<T>& x,
-    const standalone::c10::complex<T>& y) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+pow(const executorch::backends::aoti::slim::c10::complex<T>& x,
+    const executorch::backends::aoti::slim::c10::complex<T>& y) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(thrust::pow(
-      static_cast<thrust::complex<T>>(x), static_cast<thrust::complex<T>>(y)));
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
+      thrust::pow(
+          static_cast<thrust::complex<T>>(x),
+          static_cast<thrust::complex<T>>(y)));
 #else
-  return static_cast<standalone::c10::complex<T>>(std::pow(
-      static_cast<std::complex<T>>(x), static_cast<std::complex<T>>(y)));
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
+      std::pow(
+          static_cast<std::complex<T>>(x), static_cast<std::complex<T>>(y)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> pow(
-    const standalone::c10::complex<T>& x,
-    const T& y) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+pow(const executorch::backends::aoti::slim::c10::complex<T>& x, const T& y) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::pow(static_cast<thrust::complex<T>>(x), y));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::pow(static_cast<std::complex<T>>(x), y));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> pow(
-    const T& x,
-    const standalone::c10::complex<T>& y) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+pow(const T& x, const executorch::backends::aoti::slim::c10::complex<T>& y) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::pow(x, static_cast<thrust::complex<T>>(y)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::pow(x, static_cast<std::complex<T>>(y)));
 #endif
 }
 
 template <typename T, typename U>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<decltype(T() * U())> pow(
-    const standalone::c10::complex<T>& x,
-    const standalone::c10::complex<U>& y) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<
+    decltype(T() * U())>
+pow(const executorch::backends::aoti::slim::c10::complex<T>& x,
+    const executorch::backends::aoti::slim::c10::complex<U>& y) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(thrust::pow(
-      static_cast<thrust::complex<T>>(x), static_cast<thrust::complex<T>>(y)));
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
+      thrust::pow(
+          static_cast<thrust::complex<T>>(x),
+          static_cast<thrust::complex<T>>(y)));
 #else
-  return static_cast<standalone::c10::complex<T>>(std::pow(
-      static_cast<std::complex<T>>(x), static_cast<std::complex<T>>(y)));
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
+      std::pow(
+          static_cast<std::complex<T>>(x), static_cast<std::complex<T>>(y)));
 #endif
 }
 
 template <typename T, typename U>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<decltype(T() * U())> pow(
-    const standalone::c10::complex<T>& x,
-    const U& y) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<
+    decltype(T() * U())>
+pow(const executorch::backends::aoti::slim::c10::complex<T>& x, const U& y) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::pow(static_cast<thrust::complex<T>>(x), y));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::pow(static_cast<std::complex<T>>(x), y));
 #endif
 }
 
 template <typename T, typename U>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<decltype(T() * U())> pow(
-    const T& x,
-    const standalone::c10::complex<U>& y) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<
+    decltype(T() * U())>
+pow(const T& x, const executorch::backends::aoti::slim::c10::complex<U>& y) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::pow(x, static_cast<thrust::complex<T>>(y)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::pow(x, static_cast<std::complex<T>>(y)));
 #endif
 }
@@ -234,61 +242,61 @@ STANDALONE_HOST_DEVICE inline standalone::c10::complex<decltype(T() * U())> pow(
 // Trigonometric functions
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> sin(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+sin(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::sin(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::sin(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> cos(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+cos(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::cos(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::cos(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> tan(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+tan(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::tan(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::tan(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> asin(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+asin(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::asin(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::asin(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> acos(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+acos(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::acos(static_cast<thrust::complex<T>>(x)));
 #elif !defined(_LIBCPP_VERSION)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::acos(static_cast<std::complex<T>>(x)));
 #else
   return _detail::acos(x);
@@ -296,13 +304,13 @@ STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> acos(
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> atan(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+atan(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::atan(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::atan(static_cast<std::complex<T>>(x)));
 #endif
 }
@@ -310,80 +318,80 @@ STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> atan(
 // Hyperbolic functions
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> sinh(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+sinh(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::sinh(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::sinh(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> cosh(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+cosh(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::cosh(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::cosh(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> tanh(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+tanh(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::tanh(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::tanh(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> asinh(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+asinh(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::asinh(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::asinh(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> acosh(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+acosh(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::acosh(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::acosh(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> atanh(
-    const standalone::c10::complex<T>& x) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+atanh(const executorch::backends::aoti::slim::c10::complex<T>& x) {
 #if defined(__CUDACC__) || defined(__HIPCC__)
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       thrust::atanh(static_cast<thrust::complex<T>>(x)));
 #else
-  return static_cast<standalone::c10::complex<T>>(
+  return static_cast<executorch::backends::aoti::slim::c10::complex<T>>(
       std::atanh(static_cast<std::complex<T>>(x)));
 #endif
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> log1p(
-    const standalone::c10::complex<T>& z) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+log1p(const executorch::backends::aoti::slim::c10::complex<T>& z) {
 #if defined(__APPLE__) || defined(__MACOSX) || defined(__CUDACC__) || \
     defined(__HIPCC__)
   // For Mac, the new implementation yielded a high relative error. Falling back
@@ -420,7 +428,7 @@ STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> log1p(
 #else
   // CPU path
   // Based on https://github.com/numpy/numpy/pull/22611#issuecomment-1667945354
-  standalone::c10::complex<T> u = z + T(1);
+  executorch::backends::aoti::slim::c10::complex<T> u = z + T(1);
   if (u == T(1)) {
     return z;
   } else {
@@ -434,8 +442,8 @@ STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> log1p(
 }
 
 template <typename T>
-STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> expm1(
-    const standalone::c10::complex<T>& z) {
+STANDALONE_HOST_DEVICE inline executorch::backends::aoti::slim::c10::complex<T>
+expm1(const executorch::backends::aoti::slim::c10::complex<T>& z) {
   // expm1(z) = exp(z) - 1
   // Define z = x + i * y
   // f = e ^ (x + i * y) - 1
@@ -451,50 +459,50 @@ STANDALONE_HOST_DEVICE inline standalone::c10::complex<T> expm1(
   return {er, ei};
 }
 
-} // namespace standalone::c10::complex_math
-
-using standalone::c10::complex_math::acos;
-using standalone::c10::complex_math::acosh;
-using standalone::c10::complex_math::asin;
-using standalone::c10::complex_math::asinh;
-using standalone::c10::complex_math::atan;
-using standalone::c10::complex_math::atanh;
-using standalone::c10::complex_math::cos;
-using standalone::c10::complex_math::cosh;
-using standalone::c10::complex_math::exp;
-using standalone::c10::complex_math::expm1;
-using standalone::c10::complex_math::log;
-using standalone::c10::complex_math::log10;
-using standalone::c10::complex_math::log1p;
-using standalone::c10::complex_math::log2;
-using standalone::c10::complex_math::pow;
-using standalone::c10::complex_math::sin;
-using standalone::c10::complex_math::sinh;
-using standalone::c10::complex_math::sqrt;
-using standalone::c10::complex_math::tan;
-using standalone::c10::complex_math::tanh;
+} // namespace executorch::backends::aoti::slim::c10::complex_math
+
+using executorch::backends::aoti::slim::c10::complex_math::acos;
+using executorch::backends::aoti::slim::c10::complex_math::acosh;
+using executorch::backends::aoti::slim::c10::complex_math::asin;
+using executorch::backends::aoti::slim::c10::complex_math::asinh;
+using executorch::backends::aoti::slim::c10::complex_math::atan;
+using executorch::backends::aoti::slim::c10::complex_math::atanh;
+using executorch::backends::aoti::slim::c10::complex_math::cos;
+using executorch::backends::aoti::slim::c10::complex_math::cosh;
+using executorch::backends::aoti::slim::c10::complex_math::exp;
+using executorch::backends::aoti::slim::c10::complex_math::expm1;
+using executorch::backends::aoti::slim::c10::complex_math::log;
+using executorch::backends::aoti::slim::c10::complex_math::log10;
+using executorch::backends::aoti::slim::c10::complex_math::log1p;
+using executorch::backends::aoti::slim::c10::complex_math::log2;
+using executorch::backends::aoti::slim::c10::complex_math::pow;
+using executorch::backends::aoti::slim::c10::complex_math::sin;
+using executorch::backends::aoti::slim::c10::complex_math::sinh;
+using executorch::backends::aoti::slim::c10::complex_math::sqrt;
+using executorch::backends::aoti::slim::c10::complex_math::tan;
+using executorch::backends::aoti::slim::c10::complex_math::tanh;
 
 namespace std {
 
-using standalone::c10::complex_math::acos;
-using standalone::c10::complex_math::acosh;
-using standalone::c10::complex_math::asin;
-using standalone::c10::complex_math::asinh;
-using standalone::c10::complex_math::atan;
-using standalone::c10::complex_math::atanh;
-using standalone::c10::complex_math::cos;
-using standalone::c10::complex_math::cosh;
-using standalone::c10::complex_math::exp;
-using standalone::c10::complex_math::expm1;
-using standalone::c10::complex_math::log;
-using standalone::c10::complex_math::log10;
-using standalone::c10::complex_math::log1p;
-using standalone::c10::complex_math::log2;
-using standalone::c10::complex_math::pow;
-using standalone::c10::complex_math::sin;
-using standalone::c10::complex_math::sinh;
-using standalone::c10::complex_math::sqrt;
-using standalone::c10::complex_math::tan;
-using standalone::c10::complex_math::tanh;
+using executorch::backends::aoti::slim::c10::complex_math::acos;
+using executorch::backends::aoti::slim::c10::complex_math::acosh;
+using executorch::backends::aoti::slim::c10::complex_math::asin;
+using executorch::backends::aoti::slim::c10::complex_math::asinh;
+using executorch::backends::aoti::slim::c10::complex_math::atan;
+using executorch::backends::aoti::slim::c10::complex_math::atanh;
+using executorch::backends::aoti::slim::c10::complex_math::cos;
+using executorch::backends::aoti::slim::c10::complex_math::cosh;
+using executorch::backends::aoti::slim::c10::complex_math::exp;
+using executorch::backends::aoti::slim::c10::complex_math::expm1;
+using executorch::backends::aoti::slim::c10::complex_math::log;
+using executorch::backends::aoti::slim::c10::complex_math::log10;
+using executorch::backends::aoti::slim::c10::complex_math::log1p;
+using executorch::backends::aoti::slim::c10::complex_math::log2;
+using executorch::backends::aoti::slim::c10::complex_math::pow;
+using executorch::backends::aoti::slim::c10::complex_math::sin;
+using executorch::backends::aoti::slim::c10::complex_math::sinh;
+using executorch::backends::aoti::slim::c10::complex_math::sqrt;
+using executorch::backends::aoti::slim::c10::complex_math::tan;
+using executorch::backends::aoti::slim::c10::complex_math::tanh;
 
 } // namespace std
diff --git a/backends/aoti/slim/c10/util/complex_utils.h b/backends/aoti/slim/c10/util/complex_utils.h
index 5b29406a186..af6d8203c65 100644
--- a/backends/aoti/slim/c10/util/complex_utils.h
+++ b/backends/aoti/slim/c10/util/complex_utils.h
@@ -5,7 +5,7 @@
 
 #include <limits>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 template <typename T>
 struct is_complex : public std::false_type {};
@@ -14,7 +14,8 @@ template <typename T>
 struct is_complex<std::complex<T>> : public std::true_type {};
 
 template <typename T>
-struct is_complex<standalone::c10::complex<T>> : public std::true_type {};
+struct is_complex<executorch::backends::aoti::slim::c10::complex<T>>
+    : public std::true_type {};
 
 // Extract double from std::complex<double>; is identity otherwise
 // TODO: Write in more idiomatic C++17
@@ -27,19 +28,20 @@ struct scalar_value_type<std::complex<T>> {
   using type = T;
 };
 template <typename T>
-struct scalar_value_type<standalone::c10::complex<T>> {
+struct scalar_value_type<executorch::backends::aoti::slim::c10::complex<T>> {
   using type = T;
 };
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
 
 namespace std {
 
 template <typename T>
-class numeric_limits<standalone::c10::complex<T>> : public numeric_limits<T> {};
+class numeric_limits<executorch::backends::aoti::slim::c10::complex<T>>
+    : public numeric_limits<T> {};
 
 template <typename T>
-bool isnan(const standalone::c10::complex<T>& v) {
+bool isnan(const executorch::backends::aoti::slim::c10::complex<T>& v) {
   return std::isnan(v.real()) || std::isnan(v.imag());
 }
 
diff --git a/backends/aoti/slim/c10/util/copysign.h b/backends/aoti/slim/c10/util/copysign.h
index 1012934049c..ff0b0fcc847 100644
--- a/backends/aoti/slim/c10/util/copysign.h
+++ b/backends/aoti/slim/c10/util/copysign.h
@@ -3,7 +3,7 @@
 #include <executorch/backends/aoti/slim/c10/util/BFloat16.h>
 #include <executorch/backends/aoti/slim/c10/util/Half.h>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 // Note: Explicit implementation of copysign for Half and BFloat16
 // is needed to workaround g++-7/8 crash on aarch64, but also makes
@@ -23,4 +23,4 @@ inline BFloat16 copysign(BFloat16 a, BFloat16 b) {
   return BFloat16((a.x & 0x7fff) | (b.x & 0x8000), BFloat16::from_bits());
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/floating_point_utils.h b/backends/aoti/slim/c10/util/floating_point_utils.h
index 259cb93b0a5..dbe208b05b9 100644
--- a/backends/aoti/slim/c10/util/floating_point_utils.h
+++ b/backends/aoti/slim/c10/util/floating_point_utils.h
@@ -4,7 +4,7 @@
 #include <executorch/backends/aoti/slim/c10/util/bit_cast.h>
 #include <cstdint>
 
-namespace standalone::c10::detail {
+namespace executorch::backends::aoti::slim::c10::detail {
 
 STANDALONE_HOST_DEVICE inline float fp32_from_bits(uint32_t w) {
 #if defined(__OPENCL_VERSION__)
@@ -14,7 +14,7 @@ STANDALONE_HOST_DEVICE inline float fp32_from_bits(uint32_t w) {
 #elif defined(__INTEL_COMPILER)
   return _castu32_f32(w);
 #else
-  return standalone::c10::bit_cast<float>(w);
+  return executorch::backends::aoti::slim::c10::bit_cast<float>(w);
 #endif
 }
 
@@ -26,8 +26,8 @@ STANDALONE_HOST_DEVICE inline uint32_t fp32_to_bits(float f) {
 #elif defined(__INTEL_COMPILER)
   return _castf32_u32(f);
 #else
-  return standalone::c10::bit_cast<uint32_t>(f);
+  return executorch::backends::aoti::slim::c10::bit_cast<uint32_t>(f);
 #endif
 }
 
-} // namespace standalone::c10::detail
+} // namespace executorch::backends::aoti::slim::c10::detail
diff --git a/backends/aoti/slim/c10/util/generic_math.h b/backends/aoti/slim/c10/util/generic_math.h
index 00bb4265d9d..6cc9ec72bec 100644
--- a/backends/aoti/slim/c10/util/generic_math.h
+++ b/backends/aoti/slim/c10/util/generic_math.h
@@ -6,20 +6,23 @@
 
 #if defined(__CUDA_ARCH__)
 #include <executorch/backends/aoti/slim/c10/cuda/CUDAMathCompat.h>
-#define STANDALONE_COMPAT_COPYSIGN standalone::c10::cuda::compat::copysign
+#define STANDALONE_COMPAT_COPYSIGN \
+  executorch::backends::aoti::slim::c10::cuda::compat::copysign
 // TODO: rocm is not supported yet
 // #elif defined(__HIPCC__)
 // #include <executorch/backends/aoti/slim/hip/HIPMathCompat.h>
-// #define STANDALONE_COMPAT_COPYSIGN standalone::c10::hip::compat::copysign
+// #define STANDALONE_COMPAT_COPYSIGN
+// executorch::backends::aoti::slim::c10::hip::compat::copysign
 #else
 #include <executorch/backends/aoti/slim/c10/util/copysign.h>
-#define STANDALONE_COMPAT_COPYSIGN standalone::c10::copysign
+#define STANDALONE_COMPAT_COPYSIGN \
+  executorch::backends::aoti::slim::c10::copysign
 #endif
 
 // The functions in this file should be header-only as it is used under
 // ABI-compatibility mode.
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 // NOTE: [Floor Division in Python]
 // Python's __floordiv__ operator is more complicated than just floor(a / b).
@@ -61,7 +64,7 @@ inline STANDALONE_HOST_DEVICE scalar_t div_floor_floating(
 template <typename scalar_t>
 inline STANDALONE_HOST_DEVICE scalar_t
 div_floor_integer(scalar_t a, scalar_t b) {
-  if (standalone::c10::signs_differ(a, b)) {
+  if (executorch::backends::aoti::slim::c10::signs_differ(a, b)) {
     // Subtracts one from the results of truncation division if the
     // divisor and dividend have different sign(bit)s and the remainder of
     // the division is nonzero
@@ -102,4 +105,4 @@ inline STANDALONE_HOST_DEVICE scalar_t div_mod(scalar_t a, scalar_t b) {
   return mod;
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/irange.h b/backends/aoti/slim/c10/util/irange.h
index 0d10f373a04..75c8b48d1ca 100644
--- a/backends/aoti/slim/c10/util/irange.h
+++ b/backends/aoti/slim/c10/util/irange.h
@@ -9,7 +9,7 @@
 #include <iterator>
 #include <type_traits>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 namespace detail {
 
@@ -48,9 +48,9 @@ struct integer_iterator {
   constexpr bool operator==(const integer_iterator& other) const {
     if constexpr (one_sided) {
       // Range-for loops' end test is `begin != end`, not `begin <
-      // end`. To handle `standalone::c10::irange(n)` where n < 0 (which
-      // should be empty), we just make `begin != end` fail whenever `end` is
-      // negative.
+      // end`. To handle `executorch::backends::aoti::slim::c10::irange(n)`
+      // where n < 0 (which should be empty), we just make `begin != end` fail
+      // whenever `end` is negative.
       return is_negative(other.value) || value == other.value;
     } else {
       return value == other.value;
@@ -120,4 +120,4 @@ constexpr integer_range<Integer, true> irange(Integer end) {
   return {Integer(), end};
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/llvmMathExtras.h b/backends/aoti/slim/c10/util/llvmMathExtras.h
index 0b4f92c44c6..a42423d009d 100644
--- a/backends/aoti/slim/c10/util/llvmMathExtras.h
+++ b/backends/aoti/slim/c10/util/llvmMathExtras.h
@@ -56,7 +56,7 @@ unsigned char _BitScanReverse64(unsigned long* _Index, unsigned __int64 _Mask);
 }
 #endif
 
-namespace standalone::c10::llvm {
+namespace executorch::backends::aoti::slim::c10::llvm {
 /// The behavior an operation has on an input of 0.
 enum ZeroBehavior {
   /// The returned value is undefined.
@@ -620,7 +620,7 @@ inline double BitsToDouble(uint64_t Bits) {
 /// This function takes a 32-bit integer and returns the bit equivalent float.
 inline float BitsToFloat(uint32_t Bits) {
   // TODO: Use std::bit_cast once C++20 becomes available.
-  return standalone::c10::bit_cast<float>(Bits);
+  return executorch::backends::aoti::slim::c10::bit_cast<float>(Bits);
 }
 
 /// This function takes a double and returns the bit equivalent 64-bit integer.
@@ -896,4 +896,4 @@ SaturatingMultiplyAdd(T X, T Y, T A, bool* ResultOverflowed = nullptr) {
 
 /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
 extern const float huge_valf;
-} // namespace standalone::c10::llvm
+} // namespace executorch::backends::aoti::slim::c10::llvm
diff --git a/backends/aoti/slim/c10/util/overflows.h b/backends/aoti/slim/c10/util/overflows.h
index 5f636cd1a75..df2502d7910 100644
--- a/backends/aoti/slim/c10/util/overflows.h
+++ b/backends/aoti/slim/c10/util/overflows.h
@@ -8,7 +8,7 @@
 #include <limits>
 #include <type_traits>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 // In some versions of MSVC, there will be a compiler error when building.
 // C4146: unary minus operator applied to unsigned type, result still unsigned
 // C4804: unsafe use of type 'bool' in operation
@@ -50,11 +50,12 @@ overflows(From f, bool strict_unsigned = false) {
     // `a + 255 * b`.
     if (!strict_unsigned) {
       return greater_than_max<To>(f) ||
-          (standalone::c10::is_negative(f) &&
+          (executorch::backends::aoti::slim::c10::is_negative(f) &&
            -static_cast<uint64_t>(f) > static_cast<uint64_t>(limit::max()));
     }
   }
-  return standalone::c10::less_than_lowest<To>(f) || greater_than_max<To>(f);
+  return executorch::backends::aoti::slim::c10::less_than_lowest<To>(f) ||
+      greater_than_max<To>(f);
 }
 
 template <typename To, typename From>
@@ -97,4 +98,4 @@ std::enable_if_t<is_complex<From>::value, bool> overflows(
              typename scalar_value_type<To>::type,
              typename From::value_type>(f.imag(), strict_unsigned);
 }
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/qint32.h b/backends/aoti/slim/c10/util/qint32.h
index 7951bfd240a..2d3f72e9a10 100644
--- a/backends/aoti/slim/c10/util/qint32.h
+++ b/backends/aoti/slim/c10/util/qint32.h
@@ -3,7 +3,7 @@
 
 #include <executorch/backends/aoti/slim/c10/macros/Macros.h>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /**
  * qint32 is for signed 32 bit quantized Tensors
@@ -15,4 +15,4 @@ struct alignas(4) qint32 {
   STANDALONE_HOST_DEVICE explicit qint32(int32_t val) : val_(val) {}
 };
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/qint8.h b/backends/aoti/slim/c10/util/qint8.h
index 53c1fdf465a..f08ce5bfc3f 100644
--- a/backends/aoti/slim/c10/util/qint8.h
+++ b/backends/aoti/slim/c10/util/qint8.h
@@ -3,7 +3,7 @@
 
 #include <executorch/backends/aoti/slim/c10/macros/Macros.h>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /**
  * This is the data type for quantized Tensors. Right now we only have
@@ -17,4 +17,4 @@ struct alignas(1) qint8 {
   STANDALONE_HOST_DEVICE explicit qint8(int8_t val) : val_(val) {}
 };
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/quint2x4.h b/backends/aoti/slim/c10/util/quint2x4.h
index 009802be7f2..e80848cd9eb 100644
--- a/backends/aoti/slim/c10/util/quint2x4.h
+++ b/backends/aoti/slim/c10/util/quint2x4.h
@@ -3,7 +3,7 @@
 
 #include <executorch/backends/aoti/slim/c10/macros/Macros.h>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /**
  * quint2x4 is for un-signed 2 bit quantized Tensors that are packed to byte
@@ -16,4 +16,4 @@ struct alignas(1) quint2x4 {
   STANDALONE_HOST_DEVICE explicit quint2x4(uint8_t val) : val_(val) {}
 };
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/quint4x2.h b/backends/aoti/slim/c10/util/quint4x2.h
index b6812ab8fde..1c2f8350596 100644
--- a/backends/aoti/slim/c10/util/quint4x2.h
+++ b/backends/aoti/slim/c10/util/quint4x2.h
@@ -3,7 +3,7 @@
 
 #include <executorch/backends/aoti/slim/c10/macros/Macros.h>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /**
  * quint4x2 is for un-signed 4 bit quantized Tensors that are packed to byte
@@ -16,4 +16,4 @@ struct alignas(1) quint4x2 {
   STANDALONE_HOST_DEVICE explicit quint4x2(uint8_t val) : val_(val) {}
 };
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/quint8.h b/backends/aoti/slim/c10/util/quint8.h
index 4019765ca4a..e8649bc4fa8 100644
--- a/backends/aoti/slim/c10/util/quint8.h
+++ b/backends/aoti/slim/c10/util/quint8.h
@@ -3,7 +3,7 @@
 
 #include <executorch/backends/aoti/slim/c10/macros/Macros.h>
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 /**
  * quint8 is for unsigned 8 bit quantized Tensors
@@ -15,4 +15,4 @@ struct alignas(1) quint8 {
   STANDALONE_HOST_DEVICE explicit quint8(uint8_t val) : val_(val) {}
 };
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/util/safe_numerics.h b/backends/aoti/slim/c10/util/safe_numerics.h
index 26a05c636aa..df0aa6e7c5c 100644
--- a/backends/aoti/slim/c10/util/safe_numerics.h
+++ b/backends/aoti/slim/c10/util/safe_numerics.h
@@ -12,7 +12,7 @@
 #define STANDALONE_HAS_BUILTIN_OVERFLOW() (1)
 #endif
 
-namespace standalone::c10 {
+namespace executorch::backends::aoti::slim::c10 {
 
 STANDALONE_ALWAYS_INLINE bool
 add_overflows(uint64_t a, uint64_t b, uint64_t* out) {
@@ -40,8 +40,8 @@ mul_overflows(uint64_t a, uint64_t b, uint64_t* out) {
   *out = a * b;
   // This test isnt exact, but avoids doing integer division
   return (
-      (standalone::c10::llvm::countLeadingZeros(a) +
-       standalone::c10::llvm::countLeadingZeros(b)) < 64);
+      (executorch::backends::aoti::slim::c10::llvm::countLeadingZeros(a) +
+       executorch::backends::aoti::slim::c10::llvm::countLeadingZeros(b)) < 64);
 #endif
 }
 
@@ -65,7 +65,8 @@ bool safe_multiplies_u64(It first, It last, uint64_t* out) {
   uint64_t prod = 1;
   bool overflow = false;
   for (; first != last; ++first) {
-    overflow |= standalone::c10::mul_overflows(prod, *first, &prod);
+    overflow |= executorch::backends::aoti::slim::c10::mul_overflows(
+        prod, *first, &prod);
   }
   *out = prod;
   return overflow;
@@ -78,7 +79,7 @@ bool safe_multiplies_u64(It first, It last, uint64_t* out) {
     prod *= x;
     // log2(0) isn't valid, so need to track it specially
     is_zero |= (x == 0);
-    prod_log2 += standalone::c10::llvm::Log2_64_Ceil(x);
+    prod_log2 += executorch::backends::aoti::slim::c10::llvm::Log2_64_Ceil(x);
   }
   *out = prod;
   // This test isnt exact, but avoids doing integer division
@@ -91,4 +92,4 @@ bool safe_multiplies_u64(const Container& c, uint64_t* out) {
   return safe_multiplies_u64(c.begin(), c.end(), out);
 }
 
-} // namespace standalone::c10
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/core/SlimTensor.h b/backends/aoti/slim/core/SlimTensor.h
index 69ac4fec65f..9021e2db922 100644
--- a/backends/aoti/slim/core/SlimTensor.h
+++ b/backends/aoti/slim/core/SlimTensor.h
@@ -18,15 +18,15 @@
 #include <executorch/backends/aoti/slim/core/Storage.h>
 #include <executorch/backends/aoti/slim/util/SizeUtil.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 
 class SlimTensor {
  public:
   SlimTensor(
       Storage&& storage,
-      standalone::c10::IntArrayRef sizes,
-      standalone::c10::IntArrayRef strides,
-      standalone::c10::ScalarType dtype,
+      executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+      executorch::backends::aoti::slim::c10::IntArrayRef strides,
+      executorch::backends::aoti::slim::c10::ScalarType dtype,
       int64_t storage_offset = 0)
       : storage_(std::move(storage)),
         storage_offset_(storage_offset),
@@ -39,7 +39,7 @@ class SlimTensor {
       : storage_(Storage()),
         storage_offset_(0),
         numel_(0),
-        dtype_(standalone::c10::ScalarType::Float),
+        dtype_(executorch::backends::aoti::slim::c10::ScalarType::Float),
         is_contiguous_(true) {
     sizes_and_strides_.set_sizes({0});
     sizes_and_strides_.set_strides({1});
@@ -67,42 +67,42 @@ class SlimTensor {
   }
 
   size_t itemsize() const {
-    return standalone::c10::elementSize(dtype_);
+    return executorch::backends::aoti::slim::c10::elementSize(dtype_);
   }
 
-  standalone::c10::IntArrayRef sizes() const {
+  executorch::backends::aoti::slim::c10::IntArrayRef sizes() const {
     return sizes_and_strides_.sizes_arrayref();
   }
 
   int64_t size(int64_t dim) const {
-    int64_t wrapped_dim =
-        standalone::c10::maybe_wrap_dim(dim, static_cast<int64_t>(this->dim()));
+    int64_t wrapped_dim = executorch::backends::aoti::slim::c10::maybe_wrap_dim(
+        dim, static_cast<int64_t>(this->dim()));
     return sizes_and_strides_.size_at(static_cast<size_t>(wrapped_dim));
   }
 
-  standalone::c10::IntArrayRef strides() const {
+  executorch::backends::aoti::slim::c10::IntArrayRef strides() const {
     return sizes_and_strides_.strides_arrayref();
   }
 
   int64_t stride(int64_t dim) const {
-    int64_t wrapped_dim =
-        standalone::c10::maybe_wrap_dim(dim, static_cast<int64_t>(this->dim()));
+    int64_t wrapped_dim = executorch::backends::aoti::slim::c10::maybe_wrap_dim(
+        dim, static_cast<int64_t>(this->dim()));
     return sizes_and_strides_.stride_at(static_cast<size_t>(wrapped_dim));
   }
 
-  standalone::c10::ScalarType dtype() const {
+  executorch::backends::aoti::slim::c10::ScalarType dtype() const {
     return dtype_;
   }
 
-  const standalone::c10::Device& device() const {
+  const executorch::backends::aoti::slim::c10::Device& device() const {
     return storage_->device();
   }
 
-  standalone::c10::DeviceType device_type() const {
+  executorch::backends::aoti::slim::c10::DeviceType device_type() const {
     return storage_->device().type();
   }
 
-  standalone::c10::DeviceIndex device_index() const {
+  executorch::backends::aoti::slim::c10::DeviceIndex device_index() const {
     return storage_->device().index();
   }
 
@@ -149,8 +149,8 @@ class SlimTensor {
   }
 
   void set_sizes_and_strides(
-      standalone::c10::IntArrayRef sizes,
-      standalone::c10::IntArrayRef strides,
+      executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+      executorch::backends::aoti::slim::c10::IntArrayRef strides,
       std::optional<int64_t> storage_offset = std::nullopt) {
     const int64_t new_dim = static_cast<int64_t>(sizes.size());
     STANDALONE_CHECK(
@@ -175,7 +175,7 @@ class SlimTensor {
           if (dim == new_dim - 1) {
             new_strides[dim] = 1;
           } else {
-            overflowed |= standalone::c10::mul_overflows(
+            overflowed |= executorch::backends::aoti::slim::c10::mul_overflows(
                 new_strides[dim + 1],
                 std::max<int64_t>(new_sizes[dim + 1], 1),
                 &new_strides[dim]);
@@ -195,20 +195,24 @@ class SlimTensor {
     refresh_contiguous();
   }
 
-  void set_sizes_contiguous(standalone::c10::IntArrayRef new_size) {
+  void set_sizes_contiguous(
+      executorch::backends::aoti::slim::c10::IntArrayRef new_size) {
     sizes_and_strides_.set_sizes(new_size);
     refresh_numel();
-    empty_tensor_restride(standalone::c10::MemoryFormat::Contiguous);
+    empty_tensor_restride(
+        executorch::backends::aoti::slim::c10::MemoryFormat::Contiguous);
   }
 
-  void empty_tensor_restride(standalone::c10::MemoryFormat memory_format);
+  void empty_tensor_restride(
+      executorch::backends::aoti::slim::c10::MemoryFormat memory_format);
 
   SlimTensor resize_(
-      standalone::c10::IntArrayRef sizes,
+      executorch::backends::aoti::slim::c10::IntArrayRef sizes,
       std::optional<c10::MemoryFormat> optional_memory_format);
 
   // Conversion operations
-  SlimTensor to(const standalone::c10::Device& device) const {
+  SlimTensor to(
+      const executorch::backends::aoti::slim::c10::Device& device) const {
     if (device == storage_->device()) {
       return *this;
     }
@@ -230,7 +234,7 @@ class SlimTensor {
     return to(DEFAULT_CUDA_DEVICE);
   }
 
-  SlimTensor to(standalone::c10::ScalarType dtype) const {
+  SlimTensor to(executorch::backends::aoti::slim::c10::ScalarType dtype) const {
     STANDALONE_CHECK(false, "TBD: to(dtype)");
   }
 
@@ -252,7 +256,8 @@ class SlimTensor {
 
     // Case 2: At least one tensor is non-contiguous, perform element-wise copy
     // that respects both source and destination strides.
-    const size_t elem_size = standalone::c10::elementSize(dtype_);
+    const size_t elem_size =
+        executorch::backends::aoti::slim::c10::elementSize(dtype_);
     char* dst_data = static_cast<char*>(this->data_ptr());
     const char* src_data = static_cast<const char*>(other.data_ptr());
 
@@ -372,7 +377,8 @@ class SlimTensor {
           }
         } else {
           // Handle non-contiguous tensors by respecting strides
-          const size_t elem_size = standalone::c10::elementSize(this->dtype_);
+          const size_t elem_size =
+              executorch::backends::aoti::slim::c10::elementSize(this->dtype_);
           char* base_data = static_cast<char*>(this->data_ptr());
 
           std::vector<int64_t> counter(this->dim(), 0);
@@ -403,41 +409,43 @@ class SlimTensor {
     };
 
     switch (this->dtype()) {
-      case standalone::c10::ScalarType::Double:
+      case executorch::backends::aoti::slim::c10::ScalarType::Double:
         fill_value(value.to<double>());
         break;
-      case standalone::c10::ScalarType::Float:
+      case executorch::backends::aoti::slim::c10::ScalarType::Float:
         fill_value(value.to<float>());
         break;
-      case standalone::c10::ScalarType::Half:
-        fill_value(value.to<standalone::c10::Half>());
+      case executorch::backends::aoti::slim::c10::ScalarType::Half:
+        fill_value(value.to<executorch::backends::aoti::slim::c10::Half>());
         break;
-      case standalone::c10::ScalarType::BFloat16:
-        fill_value(value.to<standalone::c10::BFloat16>());
+      case executorch::backends::aoti::slim::c10::ScalarType::BFloat16:
+        fill_value(value.to<executorch::backends::aoti::slim::c10::BFloat16>());
         break;
-      case standalone::c10::ScalarType::Long:
+      case executorch::backends::aoti::slim::c10::ScalarType::Long:
         fill_value(value.to<int64_t>());
         break;
-      case standalone::c10::ScalarType::Int:
+      case executorch::backends::aoti::slim::c10::ScalarType::Int:
         fill_value(value.to<int32_t>());
         break;
-      case standalone::c10::ScalarType::Short:
+      case executorch::backends::aoti::slim::c10::ScalarType::Short:
         fill_value(value.to<int16_t>());
         break;
-      case standalone::c10::ScalarType::Char:
+      case executorch::backends::aoti::slim::c10::ScalarType::Char:
         fill_value(value.to<int8_t>());
         break;
-      case standalone::c10::ScalarType::Byte:
+      case executorch::backends::aoti::slim::c10::ScalarType::Byte:
         fill_value(value.to<uint8_t>());
         break;
-      case standalone::c10::ScalarType::Bool:
+      case executorch::backends::aoti::slim::c10::ScalarType::Bool:
         fill_value(value.to<bool>());
         break;
-      case standalone::c10::ScalarType::ComplexFloat:
-        fill_value(value.to<standalone::c10::complex<float>>());
+      case executorch::backends::aoti::slim::c10::ScalarType::ComplexFloat:
+        fill_value(
+            value.to<executorch::backends::aoti::slim::c10::complex<float>>());
         break;
-      case standalone::c10::ScalarType::ComplexDouble:
-        fill_value(value.to<standalone::c10::complex<double>>());
+      case executorch::backends::aoti::slim::c10::ScalarType::ComplexDouble:
+        fill_value(
+            value.to<executorch::backends::aoti::slim::c10::complex<double>>());
         break;
       default:
         STANDALONE_CHECK(false, "fill_: Unsupported dtype");
@@ -452,34 +460,38 @@ class SlimTensor {
 
   SlimTensor clone_contiguous() const {
     std::vector<int64_t> contig_strides =
-        standalone::slim::compute_contiguous_strides(this->sizes());
+        executorch::backends::aoti::slim::compute_contiguous_strides(
+            this->sizes());
     return _clone_impl(
         this->sizes(), contig_strides, this->dtype(), this->device());
   }
 
   // View operations
   SlimTensor as_strided(
-      standalone::c10::IntArrayRef sizes,
-      standalone::c10::IntArrayRef strides,
+      executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+      executorch::backends::aoti::slim::c10::IntArrayRef strides,
       int64_t storage_offset) const;
   SlimTensor as_strided_(
-      standalone::c10::IntArrayRef sizes,
-      standalone::c10::IntArrayRef strides,
+      executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+      executorch::backends::aoti::slim::c10::IntArrayRef strides,
       int64_t storage_offset);
 
-  SlimTensor permute(standalone::c10::IntArrayRef dims) const;
+  SlimTensor permute(
+      executorch::backends::aoti::slim::c10::IntArrayRef dims) const;
 
   // Transpose operations
   SlimTensor transpose() const;
   SlimTensor transpose(int64_t dim0, int64_t dim1) const;
   SlimTensor t() const;
 
-  SlimTensor reshape(standalone::c10::IntArrayRef proposed_shape) const;
+  SlimTensor reshape(
+      executorch::backends::aoti::slim::c10::IntArrayRef proposed_shape) const;
 
   SlimTensor narrow(int64_t dim, int64_t start, int64_t length) const;
 
   // Generic element access returning SlimTensor
-  SlimTensor operator[](standalone::c10::IntArrayRef indices) const {
+  SlimTensor operator[](
+      executorch::backends::aoti::slim::c10::IntArrayRef indices) const {
     STANDALONE_CHECK(
         indices.size() <= this->dim(),
         "Number of indices (",
@@ -494,7 +506,7 @@ class SlimTensor {
       for (size_t i = 0; i < indices.size(); ++i) {
         int64_t idx = indices[i];
         int64_t size = this->size(i);
-        idx = standalone::c10::maybe_wrap_dim(idx, size);
+        idx = executorch::backends::aoti::slim::c10::maybe_wrap_dim(idx, size);
         linear_index += idx * this->stride(i);
       }
       // Create 0-dimensional tensor pointing to the indexed element
@@ -511,7 +523,7 @@ class SlimTensor {
       for (size_t i = 0; i < indices.size(); ++i) {
         int64_t idx = indices[i];
         int64_t size = this->size(i);
-        idx = standalone::c10::maybe_wrap_dim(idx, size);
+        idx = executorch::backends::aoti::slim::c10::maybe_wrap_dim(idx, size);
         offset_adjustment += idx * this->stride(i);
       }
 
@@ -533,41 +545,43 @@ class SlimTensor {
 
   // Convenience overload for single index
   SlimTensor operator[](int64_t index) const {
-    return (*this)[standalone::c10::IntArrayRef{index}];
+    return (*this)[executorch::backends::aoti::slim::c10::IntArrayRef{index}];
   }
 
   // Convenience overloads for common multi-dimensional cases
   SlimTensor operator[](std::initializer_list<int64_t> indices) const {
-    return (*this)[standalone::c10::IntArrayRef(indices)];
+    return (*this)[executorch::backends::aoti::slim::c10::IntArrayRef(indices)];
   }
 
   // Extract scalar value from 0-dimensional tensor
-  standalone::c10::Scalar item() const {
+  executorch::backends::aoti::slim::c10::Scalar item() const {
     switch (this->dtype()) {
-      case standalone::c10::ScalarType::Double:
+      case executorch::backends::aoti::slim::c10::ScalarType::Double:
         return this->item<double>();
-      case standalone::c10::ScalarType::Float:
+      case executorch::backends::aoti::slim::c10::ScalarType::Float:
         return this->item<float>();
-      case standalone::c10::ScalarType::Half:
-        return this->item<standalone::c10::Half>();
-      case standalone::c10::ScalarType::BFloat16:
-        return this->item<standalone::c10::BFloat16>();
-      case standalone::c10::ScalarType::Long:
+      case executorch::backends::aoti::slim::c10::ScalarType::Half:
+        return this->item<executorch::backends::aoti::slim::c10::Half>();
+      case executorch::backends::aoti::slim::c10::ScalarType::BFloat16:
+        return this->item<executorch::backends::aoti::slim::c10::BFloat16>();
+      case executorch::backends::aoti::slim::c10::ScalarType::Long:
         return this->item<int64_t>();
-      case standalone::c10::ScalarType::Int:
+      case executorch::backends::aoti::slim::c10::ScalarType::Int:
         return this->item<int32_t>();
-      case standalone::c10::ScalarType::Short:
+      case executorch::backends::aoti::slim::c10::ScalarType::Short:
         return this->item<int16_t>();
-      case standalone::c10::ScalarType::Char:
+      case executorch::backends::aoti::slim::c10::ScalarType::Char:
         return this->item<int8_t>();
-      case standalone::c10::ScalarType::Byte:
+      case executorch::backends::aoti::slim::c10::ScalarType::Byte:
         return this->item<uint8_t>();
-      case standalone::c10::ScalarType::Bool:
+      case executorch::backends::aoti::slim::c10::ScalarType::Bool:
         return this->item<bool>();
-      case standalone::c10::ScalarType::ComplexFloat:
-        return this->item<standalone::c10::complex<float>>();
-      case standalone::c10::ScalarType::ComplexDouble:
-        return this->item<standalone::c10::complex<double>>();
+      case executorch::backends::aoti::slim::c10::ScalarType::ComplexFloat:
+        return this
+            ->item<executorch::backends::aoti::slim::c10::complex<float>>();
+      case executorch::backends::aoti::slim::c10::ScalarType::ComplexDouble:
+        return this
+            ->item<executorch::backends::aoti::slim::c10::complex<double>>();
       default:
         STANDALONE_CHECK(false, "item(): Unsupported dtype");
     }
@@ -589,10 +603,10 @@ class SlimTensor {
 
  private:
   SlimTensor _clone_impl(
-      standalone::c10::IntArrayRef sizes,
-      standalone::c10::IntArrayRef strides,
-      standalone::c10::ScalarType dtype,
-      const standalone::c10::Device& device) const {
+      executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+      executorch::backends::aoti::slim::c10::IntArrayRef strides,
+      executorch::backends::aoti::slim::c10::ScalarType dtype,
+      const executorch::backends::aoti::slim::c10::Device& device) const {
     Storage storage = new_storage(sizes, strides, dtype, device);
     SlimTensor result =
         SlimTensor(std::move(storage), sizes, strides, dtype, 0);
@@ -605,7 +619,7 @@ class SlimTensor {
   }
 
   bool compute_is_contiguous() const {
-    return standalone::c10::_compute_contiguous<int64_t>(
+    return executorch::backends::aoti::slim::c10::_compute_contiguous<int64_t>(
         sizes_and_strides_.sizes_arrayref(),
         sizes_and_strides_.strides_arrayref(),
         numel_);
@@ -619,19 +633,27 @@ class SlimTensor {
 
   Storage storage_; // device_type_ and device_index_ are stored in storage_
   int64_t storage_offset_{0};
-  standalone::c10::SizesAndStrides sizes_and_strides_;
+  executorch::backends::aoti::slim::c10::SizesAndStrides sizes_and_strides_;
   // If sizes and strides are empty, the numel is 1!!  However, most of the
   // time, we will immediately set sizes to {0} and reset numel to 0.
   // (Can't do that in the default initializers, because there's no way to
   // spell "allocate a one-element array" for strides_).
   size_t numel_{1};
-  standalone::c10::ScalarType dtype_;
+  executorch::backends::aoti::slim::c10::ScalarType dtype_;
   bool is_contiguous_{true};
   // NOLINTNEXTLINE(clang-diagnostic-unused-private-field)
   std::array<int8_t, 6> reserved_{0}; // padding to align to 8 bytes
 };
 
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
+
+namespace torch {
+namespace executor {
+// TODO(T197294990): Remove these deprecated aliases once all users have moved
+// to the new `::executorch` namespaces.
+using ::executorch::backends::aoti::slim::SlimTensor;
+} // namespace executor
+} // namespace torch
 
 #include <executorch/backends/aoti/slim/core/SlimTensorResize-incl.h>
 #include <executorch/backends/aoti/slim/core/SlimTensorView-incl.h>
diff --git a/backends/aoti/slim/core/SlimTensorResize-incl.h b/backends/aoti/slim/core/SlimTensorResize-incl.h
index 64c976aa5d8..bfc7b149d5e 100644
--- a/backends/aoti/slim/core/SlimTensorResize-incl.h
+++ b/backends/aoti/slim/core/SlimTensorResize-incl.h
@@ -6,9 +6,9 @@
 #include <executorch/backends/aoti/slim/core/Storage.h>
 #include <executorch/backends/aoti/slim/util/SizeUtil.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 inline void SlimTensor::empty_tensor_restride(
-    standalone::c10::MemoryFormat memory_format) {
+    executorch::backends::aoti::slim::c10::MemoryFormat memory_format) {
 #ifdef DEBUG
   STANDALONE_INTERNAL_ASSERT(
       compute_numel() == numel_,
@@ -16,7 +16,7 @@ inline void SlimTensor::empty_tensor_restride(
       "called before setting correct numel");
 #endif
   switch (memory_format) {
-    case standalone::c10::MemoryFormat::Contiguous: {
+    case executorch::backends::aoti::slim::c10::MemoryFormat::Contiguous: {
       // dim_ is a virtual call, don't repeat it
       const auto dim_ = dim();
       sizes_and_strides_.resize(dim_);
@@ -25,7 +25,7 @@ inline void SlimTensor::empty_tensor_restride(
         const auto last_idx = dim_ - 1;
         sizes_and_strides_.stride_at_unchecked(last_idx) = 1;
         for (int64_t i = static_cast<int64_t>(last_idx) - 1; i >= 0; --i) {
-          overflowed |= standalone::c10::mul_overflows(
+          overflowed |= executorch::backends::aoti::slim::c10::mul_overflows(
               sizes_and_strides_.stride_at_unchecked(i + 1),
               std::max<int64_t>(sizes_and_strides_.size_at_unchecked(i + 1), 1),
               std::addressof(sizes_and_strides_.stride_at_unchecked(i)));
@@ -34,24 +34,24 @@ inline void SlimTensor::empty_tensor_restride(
       }
       break;
     }
-    case standalone::c10::MemoryFormat::ChannelsLast: {
+    case executorch::backends::aoti::slim::c10::MemoryFormat::ChannelsLast: {
       STANDALONE_CHECK(
           dim() == 4, "required rank 4 tensor to use channels_last format");
       set_sizes_and_strides(sizes(), get_channels_last_strides_2d(sizes()));
       break;
     }
-    case standalone::c10::MemoryFormat::ChannelsLast3d: {
+    case executorch::backends::aoti::slim::c10::MemoryFormat::ChannelsLast3d: {
       STANDALONE_CHECK(
           dim() == 5, "required rank 5 tensor to use channels_last_3d format");
       set_sizes_and_strides(sizes(), get_channels_last_strides_3d(sizes()));
       break;
     }
-    case standalone::c10::MemoryFormat::Preserve:
+    case executorch::backends::aoti::slim::c10::MemoryFormat::Preserve:
       STANDALONE_CHECK(false, "unsupported memory format ", memory_format);
       // Cleaning warning messages, no need to break as STANDALONE_CHECK(false)
       // terminates flow.
       // break;
-    case standalone::c10::MemoryFormat::NumOptions:
+    case executorch::backends::aoti::slim::c10::MemoryFormat::NumOptions:
       STANDALONE_INTERNAL_ASSERT(
           false, "invalid memory format ", memory_format);
   }
@@ -125,8 +125,8 @@ inline void _maybe_resize_storage(SlimTensor* self, int64_t new_size_bytes) {
 
 inline SlimTensor* _resize_impl_(
     SlimTensor* self,
-    standalone::c10::IntArrayRef sizes,
-    std::optional<standalone::c10::IntArrayRef> strides,
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    std::optional<executorch::backends::aoti::slim::c10::IntArrayRef> strides,
     bool resize_storage) {
   if (self->sizes() == sizes &&
       (!strides || self->strides() == strides.value())) {
@@ -154,16 +154,17 @@ inline SlimTensor* _resize_impl_(
 }
 
 inline SlimTensor SlimTensor::resize_(
-    standalone::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
     std::optional<c10::MemoryFormat> optional_memory_format) {
   _resize_impl_(this, sizes, /*strides=*/std::nullopt, true);
 
   if (optional_memory_format.has_value()) {
-    standalone::c10::MemoryFormat memory_format =
-        static_cast<standalone::c10::MemoryFormat>(
+    executorch::backends::aoti::slim::c10::MemoryFormat memory_format =
+        static_cast<executorch::backends::aoti::slim::c10::MemoryFormat>(
             optional_memory_format.value());
     STANDALONE_CHECK(
-        memory_format != standalone::c10::MemoryFormat::Preserve,
+        memory_format !=
+            executorch::backends::aoti::slim::c10::MemoryFormat::Preserve,
         "Unsupported memory format",
         memory_format);
     this->empty_tensor_restride(memory_format);
@@ -171,4 +172,4 @@ inline SlimTensor SlimTensor::resize_(
   return *this;
 }
 
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/core/SlimTensorView-incl.h b/backends/aoti/slim/core/SlimTensorView-incl.h
index 0df4c4705f1..c247047900c 100644
--- a/backends/aoti/slim/core/SlimTensorView-incl.h
+++ b/backends/aoti/slim/core/SlimTensorView-incl.h
@@ -6,10 +6,10 @@
 #include <executorch/backends/aoti/slim/c10/util/ArrayRef.h>
 #include <executorch/backends/aoti/slim/util/SizeUtil.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 inline SlimTensor SlimTensor::as_strided(
-    standalone::c10::IntArrayRef sizes,
-    standalone::c10::IntArrayRef strides,
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::IntArrayRef strides,
     int64_t storage_offset) const {
   SlimTensor result = *this;
   result.as_strided_(sizes, strides, storage_offset);
@@ -17,8 +17,8 @@ inline SlimTensor SlimTensor::as_strided(
 }
 
 inline SlimTensor SlimTensor::as_strided_(
-    standalone::c10::IntArrayRef sizes,
-    standalone::c10::IntArrayRef strides,
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::IntArrayRef strides,
     int64_t storage_offset) {
   STANDALONE_CHECK(
       sizes.size() == strides.size(),
@@ -44,20 +44,22 @@ inline SlimTensor SlimTensor::as_strided_(
   return *this;
 }
 
-inline SlimTensor SlimTensor::permute(standalone::c10::IntArrayRef dims) const {
+inline SlimTensor SlimTensor::permute(
+    executorch::backends::aoti::slim::c10::IntArrayRef dims) const {
   const size_t ndim = this->dim();
   STANDALONE_CHECK(
       ndim == static_cast<size_t>(dims.size()),
       "permute: dims length must be equal to tensor.dim()")
 
-  standalone::c10::ArrayRef old_sizes = this->sizes();
-  standalone::c10::ArrayRef old_strides = this->strides();
+  executorch::backends::aoti::slim::c10::ArrayRef old_sizes = this->sizes();
+  executorch::backends::aoti::slim::c10::ArrayRef old_strides = this->strides();
   std::vector<int64_t> new_sizes = old_sizes.vec();
   std::vector<int64_t> new_strides = old_strides.vec();
   std::vector<bool> seen_dims(ndim, false);
 
   for (size_t i = 0; i < ndim; i++) {
-    int64_t d = standalone::c10::maybe_wrap_dim(dims[i], ndim);
+    int64_t d =
+        executorch::backends::aoti::slim::c10::maybe_wrap_dim(dims[i], ndim);
     STANDALONE_CHECK(!seen_dims[d], "permute: duplicate dims are not allowed");
     seen_dims[d] = true;
     new_sizes[i] = old_sizes[d];
@@ -82,8 +84,8 @@ inline SlimTensor SlimTensor::transpose(int64_t dim0, int64_t dim1) const {
   }
 
   // Wrap dimensions and swap them
-  dim0 = standalone::c10::maybe_wrap_dim(dim0, ndim);
-  dim1 = standalone::c10::maybe_wrap_dim(dim1, ndim);
+  dim0 = executorch::backends::aoti::slim::c10::maybe_wrap_dim(dim0, ndim);
+  dim1 = executorch::backends::aoti::slim::c10::maybe_wrap_dim(dim1, ndim);
   std::swap(dims[dim0], dims[dim1]);
 
   return permute(dims);
@@ -94,7 +96,7 @@ inline SlimTensor SlimTensor::t() const {
 }
 
 inline SlimTensor SlimTensor::reshape(
-    standalone::c10::IntArrayRef proposed_shape) const {
+    executorch::backends::aoti::slim::c10::IntArrayRef proposed_shape) const {
   std::vector<int64_t> final_shape_vec =
       infer_size(proposed_shape, this->numel());
 
@@ -124,8 +126,9 @@ inline SlimTensor SlimTensor::narrow(int64_t dim, int64_t start, int64_t length)
     const {
   STANDALONE_CHECK(
       this->dim() > 0, "narrow() cannot be applied to a 0-dim tensor.");
-  dim = standalone::c10::maybe_wrap_dim(dim, static_cast<int64_t>(this->dim()));
-  start = standalone::c10::maybe_wrap_dim(
+  dim = executorch::backends::aoti::slim::c10::maybe_wrap_dim(
+      dim, static_cast<int64_t>(this->dim()));
+  start = executorch::backends::aoti::slim::c10::maybe_wrap_dim(
       start, static_cast<int64_t>(this->size(dim)));
 
   STANDALONE_CHECK(length >= 0, "narrow(): length must be non-negative.");
@@ -149,4 +152,4 @@ inline SlimTensor SlimTensor::narrow(int64_t dim, int64_t start, int64_t length)
   return result;
 }
 
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/core/Storage.h b/backends/aoti/slim/core/Storage.h
index 4230a0d2b0a..135b44bca23 100644
--- a/backends/aoti/slim/core/Storage.h
+++ b/backends/aoti/slim/core/Storage.h
@@ -16,29 +16,35 @@
 #include <executorch/backends/aoti/slim/util/SharedPtr.h>
 #include <executorch/backends/aoti/slim/util/SizeUtil.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 using DeleterFn = void (*)(void*);
 
 namespace detail {
 inline void noop(void*) {}
 } // namespace detail
 
-const standalone::c10::Device CPU_DEVICE =
-    standalone::c10::Device(standalone::c10::DeviceType::CPU, 0);
+const executorch::backends::aoti::slim::c10::Device CPU_DEVICE =
+    executorch::backends::aoti::slim::c10::Device(
+        executorch::backends::aoti::slim::c10::DeviceType::CPU,
+        0);
 
-const standalone::c10::Device DEFAULT_CUDA_DEVICE =
-    standalone::c10::Device(standalone::c10::DeviceType::CUDA, 0);
+const executorch::backends::aoti::slim::c10::Device DEFAULT_CUDA_DEVICE =
+    executorch::backends::aoti::slim::c10::Device(
+        executorch::backends::aoti::slim::c10::DeviceType::CUDA,
+        0);
 
-// standalone::c10::Device traits template for device-specific operations
-template <standalone::c10::DeviceType D>
+// executorch::backends::aoti::slim::c10::Device traits template for
+// device-specific operations
+template <executorch::backends::aoti::slim::c10::DeviceType D>
 struct DeviceTraits;
 
 // CPU specialization
 template <>
-struct DeviceTraits<standalone::c10::DeviceType::CPU> {
+struct DeviceTraits<executorch::backends::aoti::slim::c10::DeviceType::CPU> {
   static void* allocate(
       size_t nbytes,
-      const standalone::c10::Device& device = CPU_DEVICE) {
+      const executorch::backends::aoti::slim::c10::Device& device =
+          CPU_DEVICE) {
     // NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
     return malloc(nbytes);
   }
@@ -52,8 +58,8 @@ struct DeviceTraits<standalone::c10::DeviceType::CPU> {
       void* dst,
       const void* src,
       size_t nbytes,
-      const standalone::c10::Device& dst_device,
-      const standalone::c10::Device& src_device) {
+      const executorch::backends::aoti::slim::c10::Device& dst_device,
+      const executorch::backends::aoti::slim::c10::Device& src_device) {
     std::memcpy(dst, src, nbytes);
   }
 };
@@ -61,9 +67,11 @@ struct DeviceTraits<standalone::c10::DeviceType::CPU> {
 // CUDA specialization
 #ifdef USE_CUDA
 template <>
-struct DeviceTraits<standalone::c10::DeviceType::CUDA> {
-  static void* allocate(size_t nbytes, const standalone::c10::Device& device) {
-    standalone::slim::cuda::CUDAGuard guard(device);
+struct DeviceTraits<executorch::backends::aoti::slim::c10::DeviceType::CUDA> {
+  static void* allocate(
+      size_t nbytes,
+      const executorch::backends::aoti::slim::c10::Device& device) {
+    executorch::backends::aoti::slim::cuda::CUDAGuard guard(device);
     void* data = nullptr;
     STANDALONE_CUDA_CHECK(cudaMalloc(&data, nbytes));
     return data;
@@ -77,11 +85,11 @@ struct DeviceTraits<standalone::c10::DeviceType::CUDA> {
       void* dst,
       const void* src,
       size_t nbytes,
-      const standalone::c10::Device& dst_device,
-      const standalone::c10::Device& src_device) {
+      const executorch::backends::aoti::slim::c10::Device& dst_device,
+      const executorch::backends::aoti::slim::c10::Device& src_device) {
     // Determine the direction
     cudaMemcpyKind direction = cudaMemcpyDeviceToDevice;
-    standalone::c10::Device cuda_device =
+    executorch::backends::aoti::slim::c10::Device cuda_device =
         dst_device; // Default to destination device
 
     if (src_device.is_cpu()) {
@@ -98,14 +106,16 @@ struct DeviceTraits<standalone::c10::DeviceType::CUDA> {
           dst_device.index());
     }
     // Set up CUDA context for the appropriate device
-    standalone::slim::cuda::CUDAGuard guard(cuda_device);
+    executorch::backends::aoti::slim::cuda::CUDAGuard guard(cuda_device);
     STANDALONE_CUDA_CHECK(cudaMemcpy(dst, src, nbytes, direction));
   }
 };
 #else
 template <>
-struct DeviceTraits<standalone::c10::DeviceType::CUDA> {
-  static void* allocate(size_t nbytes, const standalone::c10::Device& device) {
+struct DeviceTraits<executorch::backends::aoti::slim::c10::DeviceType::CUDA> {
+  static void* allocate(
+      size_t nbytes,
+      const executorch::backends::aoti::slim::c10::Device& device) {
     STANDALONE_CHECK(false, "Build with USE_CUDA=1 to enable CUDA support");
   }
 
@@ -117,8 +127,8 @@ struct DeviceTraits<standalone::c10::DeviceType::CUDA> {
       void* dst,
       const void* src,
       size_t nbytes,
-      const standalone::c10::Device& dst_device,
-      const standalone::c10::Device& src_device) {
+      const executorch::backends::aoti::slim::c10::Device& dst_device,
+      const executorch::backends::aoti::slim::c10::Device& src_device) {
     STANDALONE_CHECK(false, "Build with USE_CUDA=1 to enable CUDA support");
   }
 };
@@ -129,24 +139,29 @@ struct DeviceTraits<standalone::c10::DeviceType::CUDA> {
 // non-owning.
 class MaybeOwningStorage {
  public:
-  MaybeOwningStorage(const standalone::c10::Device& device, size_t nbytes)
+  MaybeOwningStorage(
+      const executorch::backends::aoti::slim::c10::Device& device,
+      size_t nbytes)
       : device_(device), capacity_(nbytes), is_owning_(true) {
     // Allocating memory here so owning_ has to be true.
     if (device.is_cpu()) {
-      data_ = DeviceTraits<standalone::c10::DeviceType::CPU>::allocate(
-          nbytes, device);
-      deleter_ = DeviceTraits<standalone::c10::DeviceType::CPU>::free;
+      data_ =
+          DeviceTraits<executorch::backends::aoti::slim::c10::DeviceType::CPU>::
+              allocate(nbytes, device);
+      deleter_ = DeviceTraits<
+          executorch::backends::aoti::slim::c10::DeviceType::CPU>::free;
     } else if (device.is_cuda()) {
-      data_ = DeviceTraits<standalone::c10::DeviceType::CUDA>::allocate(
-          nbytes, device);
-      deleter_ = DeviceTraits<standalone::c10::DeviceType::CUDA>::free;
+      data_ = DeviceTraits<executorch::backends::aoti::slim::c10::DeviceType::
+                               CUDA>::allocate(nbytes, device);
+      deleter_ = DeviceTraits<
+          executorch::backends::aoti::slim::c10::DeviceType::CUDA>::free;
     } else {
       STANDALONE_CHECK(false, "Unsupported device type");
     }
   }
 
   MaybeOwningStorage(
-      const standalone::c10::Device& device,
+      const executorch::backends::aoti::slim::c10::Device& device,
       void* data,
       size_t nbytes)
       : device_(device), data_(data), capacity_(nbytes), is_owning_(false) {
@@ -201,7 +216,7 @@ class MaybeOwningStorage {
       void* dst_data_ptr,
       void* src_data_ptr,
       size_t nbytes,
-      const standalone::c10::Device& src_device) {
+      const executorch::backends::aoti::slim::c10::Device& src_device) {
     STANDALONE_CHECK(
         dst_data_ptr, "Storage clone failed: dst_data_ptr can not be nullptr")
     STANDALONE_CHECK(
@@ -221,7 +236,8 @@ class MaybeOwningStorage {
     }
   }
 
-  MaybeOwningStorage clone(const standalone::c10::Device& device) const {
+  MaybeOwningStorage clone(
+      const executorch::backends::aoti::slim::c10::Device& device) const {
     STANDALONE_CHECK(
         data_, "Storage clone failed: source data can not be nullptr")
     // Create a new owning storage with the specified device and same capacity
@@ -230,12 +246,12 @@ class MaybeOwningStorage {
     // Copy the data from the current storage to the new storage
     if (device_.is_cpu() && device.is_cpu()) {
       // CPU to CPU copy
-      DeviceTraits<standalone::c10::DeviceType::CPU>::memcpy(
-          cloned_storage.data_, data_, capacity_, device, device_);
+      DeviceTraits<executorch::backends::aoti::slim::c10::DeviceType::CPU>::
+          memcpy(cloned_storage.data_, data_, capacity_, device, device_);
     } else {
       // At least one of the devices is CUDA
-      DeviceTraits<standalone::c10::DeviceType::CUDA>::memcpy(
-          cloned_storage.data_, data_, capacity_, device, device_);
+      DeviceTraits<executorch::backends::aoti::slim::c10::DeviceType::CUDA>::
+          memcpy(cloned_storage.data_, data_, capacity_, device, device_);
     }
 
     return cloned_storage;
@@ -249,7 +265,7 @@ class MaybeOwningStorage {
     return data_;
   }
 
-  const standalone::c10::Device& device() const {
+  const executorch::backends::aoti::slim::c10::Device& device() const {
     return device_;
   }
 
@@ -286,7 +302,7 @@ class MaybeOwningStorage {
   }
 
  private:
-  standalone::c10::Device device_ = CPU_DEVICE;
+  executorch::backends::aoti::slim::c10::Device device_ = CPU_DEVICE;
   void* data_ = nullptr;
   size_t capacity_ = 0;
   DeleterFn deleter_ = detail::noop;
@@ -296,12 +312,15 @@ class MaybeOwningStorage {
 using Storage = SharedPtr<MaybeOwningStorage>;
 
 inline Storage new_storage(
-    standalone::c10::IntArrayRef sizes,
-    standalone::c10::IntArrayRef strides,
-    standalone::c10::ScalarType dtype,
-    const standalone::c10::Device& device = CPU_DEVICE) {
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::IntArrayRef strides,
+    executorch::backends::aoti::slim::c10::ScalarType dtype,
+    const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) {
   size_t nbytes = compute_storage_nbytes(
-      sizes, strides, standalone::c10::elementSize(dtype), 0);
+      sizes,
+      strides,
+      executorch::backends::aoti::slim::c10::elementSize(dtype),
+      0);
   return Storage(new MaybeOwningStorage(device, nbytes));
 }
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/cuda/Guard.h b/backends/aoti/slim/cuda/Guard.h
index c9b2441b148..2fcafce92f9 100644
--- a/backends/aoti/slim/cuda/Guard.h
+++ b/backends/aoti/slim/cuda/Guard.h
@@ -14,19 +14,20 @@
 #include <executorch/backends/aoti/slim/c10/core/Device.h>
 #include <executorch/backends/aoti/slim/c10/cuda/Exception.h>
 
-namespace standalone::slim::cuda {
+namespace executorch::backends::aoti::slim::cuda {
 
 // Thread-local stream management
 namespace detail {
-inline thread_local std::
-    unordered_map<standalone::c10::DeviceIndex, cudaStream_t>
-        current_streams_;
+inline thread_local std::unordered_map<
+    executorch::backends::aoti::slim::c10::DeviceIndex,
+    cudaStream_t>
+    current_streams_;
 }
 
 /// Set the current CUDA stream for the specified device
 inline void setCurrentCUDAStream(
     cudaStream_t stream,
-    standalone::c10::DeviceIndex device_index = -1) {
+    executorch::backends::aoti::slim::c10::DeviceIndex device_index = -1) {
   if (device_index == -1) {
     // Get current device if not specified
     int current_device;
@@ -39,7 +40,7 @@ inline void setCurrentCUDAStream(
 
 /// Get the current CUDA stream for the specified device
 inline cudaStream_t getCurrentCUDAStream(
-    standalone::c10::DeviceIndex device_index = -1) {
+    executorch::backends::aoti::slim::c10::DeviceIndex device_index = -1) {
   if (device_index == -1) {
     // Get current device if not specified
     int current_device;
@@ -64,13 +65,14 @@ struct CUDAGuard {
   explicit CUDAGuard() = delete;
 
   /// Set the current CUDA device to the passed device index.
-  explicit CUDAGuard(standalone::c10::DeviceIndex device_index) {
+  explicit CUDAGuard(
+      executorch::backends::aoti::slim::c10::DeviceIndex device_index) {
     set_index(device_index);
   }
 
   /// Sets the current CUDA device to the passed device.  Errors if the passed
   /// device is not a CUDA device.
-  explicit CUDAGuard(standalone::c10::Device device) {
+  explicit CUDAGuard(executorch::backends::aoti::slim::c10::Device device) {
     STANDALONE_CHECK(
         device.is_cuda(),
         "Expected a CUDA device for CUDAGuard, but got ",
@@ -94,7 +96,8 @@ struct CUDAGuard {
   }
 
   /// Sets the CUDA device to the given device index.
-  void set_index(standalone::c10::DeviceIndex device_index) {
+  void set_index(
+      executorch::backends::aoti::slim::c10::DeviceIndex device_index) {
     int orig_index = -1;
     STANDALONE_CUDA_CHECK(cudaGetDevice(&orig_index));
 
@@ -107,8 +110,8 @@ struct CUDAGuard {
 
  private:
   /// The guard for the current device.
-  standalone::c10::DeviceIndex original_device_index_;
-  standalone::c10::DeviceIndex current_device_index_;
+  executorch::backends::aoti::slim::c10::DeviceIndex original_device_index_;
+  executorch::backends::aoti::slim::c10::DeviceIndex current_device_index_;
 };
 
 struct CUDAStreamGuard {
@@ -118,7 +121,7 @@ struct CUDAStreamGuard {
   /// Set the current CUDA stream to the passed stream on the specified device.
   explicit CUDAStreamGuard(
       cudaStream_t stream,
-      standalone::c10::DeviceIndex device_index)
+      executorch::backends::aoti::slim::c10::DeviceIndex device_index)
       : device_guard_(device_index) {
     set_stream(stream, device_index);
   }
@@ -140,7 +143,7 @@ struct CUDAStreamGuard {
   /// Sets the CUDA stream to the given stream on the specified device.
   void set_stream(
       cudaStream_t stream,
-      standalone::c10::DeviceIndex device_index) {
+      executorch::backends::aoti::slim::c10::DeviceIndex device_index) {
     // Store the original stream for this device
     original_stream_ = getCurrentCUDAStream(device_index);
     current_stream_ = stream;
@@ -156,7 +159,7 @@ struct CUDAStreamGuard {
   }
 
   /// Get the device index being guarded
-  standalone::c10::DeviceIndex device_index() const {
+  executorch::backends::aoti::slim::c10::DeviceIndex device_index() const {
     return device_index_;
   }
 
@@ -168,7 +171,7 @@ struct CUDAStreamGuard {
   /// The current stream being guarded
   cudaStream_t current_stream_ = nullptr;
   /// The device index for this stream guard
-  standalone::c10::DeviceIndex device_index_;
+  executorch::backends::aoti::slim::c10::DeviceIndex device_index_;
 };
 
-} // namespace standalone::slim::cuda
+} // namespace executorch::backends::aoti::slim::cuda
diff --git a/backends/aoti/slim/factory/Empty.h b/backends/aoti/slim/factory/Empty.h
index bbd4996b84c..20dd89fe1e6 100644
--- a/backends/aoti/slim/factory/Empty.h
+++ b/backends/aoti/slim/factory/Empty.h
@@ -7,23 +7,23 @@
 #include <executorch/backends/aoti/slim/core/SlimTensor.h>
 #include <executorch/backends/aoti/slim/util/SizeUtil.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 // The returned SlimTensor owns the underlying storage
 inline SlimTensor empty_strided(
-    standalone::c10::IntArrayRef sizes,
-    standalone::c10::IntArrayRef strides,
-    standalone::c10::ScalarType dtype,
-    const standalone::c10::Device& device = CPU_DEVICE) {
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::IntArrayRef strides,
+    executorch::backends::aoti::slim::c10::ScalarType dtype,
+    const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) {
   Storage storage = new_storage(sizes, strides, dtype, device);
   return SlimTensor(std::move(storage), sizes, strides, dtype, 0);
 }
 
 inline SlimTensor empty(
-    standalone::c10::IntArrayRef sizes,
-    standalone::c10::ScalarType dtype,
-    const standalone::c10::Device& device = CPU_DEVICE) {
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::ScalarType dtype,
+    const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) {
   std::vector<int64_t> contig_strides =
-      standalone::slim::compute_contiguous_strides(sizes);
+      executorch::backends::aoti::slim::compute_contiguous_strides(sizes);
   Storage storage = new_storage(sizes, contig_strides, dtype, device);
   return SlimTensor(std::move(storage), sizes, contig_strides, dtype, 0);
 }
@@ -32,4 +32,4 @@ inline SlimTensor empty_like(const SlimTensor& other) {
   return empty_strided(
       other.sizes(), other.strides(), other.dtype(), other.device());
 }
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/factory/Factory.h b/backends/aoti/slim/factory/Factory.h
index 5e172bc9f6a..f0d26041ad3 100644
--- a/backends/aoti/slim/factory/Factory.h
+++ b/backends/aoti/slim/factory/Factory.h
@@ -2,13 +2,13 @@
 
 #include <executorch/backends/aoti/slim/factory/Empty.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 inline SlimTensor zeros(
-    standalone::c10::IntArrayRef sizes,
-    standalone::c10::ScalarType dtype,
-    const standalone::c10::Device& device = CPU_DEVICE) {
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::ScalarType dtype,
+    const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) {
   SlimTensor tensor = empty(sizes, dtype, device);
-  tensor.fill_(standalone::c10::Scalar(0));
+  tensor.fill_(executorch::backends::aoti::slim::c10::Scalar(0));
   return tensor;
 }
 
@@ -17,11 +17,11 @@ inline SlimTensor zeros_like(const SlimTensor& other) {
 }
 
 inline SlimTensor ones(
-    standalone::c10::IntArrayRef sizes,
-    standalone::c10::ScalarType dtype,
-    const standalone::c10::Device& device = CPU_DEVICE) {
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::ScalarType dtype,
+    const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) {
   SlimTensor tensor = empty(sizes, dtype, device);
-  tensor.fill_(standalone::c10::Scalar(1));
+  tensor.fill_(executorch::backends::aoti::slim::c10::Scalar(1));
   return tensor;
 }
 
@@ -29,4 +29,4 @@ inline SlimTensor ones_like(const SlimTensor& other) {
   return ones(other.sizes(), other.dtype(), other.device());
 }
 
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/factory/FromBlob.h b/backends/aoti/slim/factory/FromBlob.h
index d1877f7f31d..c7a558f72ed 100644
--- a/backends/aoti/slim/factory/FromBlob.h
+++ b/backends/aoti/slim/factory/FromBlob.h
@@ -2,15 +2,15 @@
 
 #include <executorch/backends/aoti/slim/factory/Empty.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 
 // The returned SlimTensor does not own the underlying storage
 inline SlimTensor from_blob(
     void* data,
-    standalone::c10::IntArrayRef sizes,
-    standalone::c10::IntArrayRef strides,
-    standalone::c10::ScalarType dtype,
-    const standalone::c10::Device& device = CPU_DEVICE,
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::IntArrayRef strides,
+    executorch::backends::aoti::slim::c10::ScalarType dtype,
+    const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE,
     int64_t storage_offset = 0) {
   STANDALONE_CHECK(data != nullptr, "data pointer can not be nullptr");
 
@@ -24,13 +24,13 @@ inline SlimTensor from_blob(
 
 inline SlimTensor from_blob(
     void* data,
-    standalone::c10::IntArrayRef sizes,
-    standalone::c10::ScalarType dtype,
-    const standalone::c10::Device& device = CPU_DEVICE,
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::ScalarType dtype,
+    const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE,
     int64_t storage_offset = 0) {
   std::vector<int64_t> contig_strides =
-      standalone::slim::compute_contiguous_strides(sizes);
+      executorch::backends::aoti::slim::compute_contiguous_strides(sizes);
   return from_blob(data, sizes, contig_strides, dtype, device, storage_offset);
 }
 
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/factory/FromScalar.h b/backends/aoti/slim/factory/FromScalar.h
index 223f734d940..df01121a6f7 100644
--- a/backends/aoti/slim/factory/FromScalar.h
+++ b/backends/aoti/slim/factory/FromScalar.h
@@ -2,14 +2,14 @@
 
 #include <executorch/backends/aoti/slim/factory/Empty.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 
 inline SlimTensor scalar_to_tensor(
-    const standalone::c10::Scalar& s,
-    const standalone::c10::Device& device = CPU_DEVICE) {
+    const executorch::backends::aoti::slim::c10::Scalar& s,
+    const executorch::backends::aoti::slim::c10::Device& device = CPU_DEVICE) {
   SlimTensor result = empty_strided({}, {}, s.type(), device);
   result.fill_(s);
   return result;
 }
 
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/factory/Pad.h b/backends/aoti/slim/factory/Pad.h
index 4d7fef731bd..44a83696a14 100644
--- a/backends/aoti/slim/factory/Pad.h
+++ b/backends/aoti/slim/factory/Pad.h
@@ -2,15 +2,15 @@
 
 #include <executorch/backends/aoti/slim/factory/Empty.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 
 inline SlimTensor constant_pad_nd(
     const SlimTensor& self,
-    standalone::c10::IntArrayRef pad,
-    const standalone::c10::Scalar& value) {
+    executorch::backends::aoti::slim::c10::IntArrayRef pad,
+    const executorch::backends::aoti::slim::c10::Scalar& value) {
   STANDALONE_CHECK(pad.size() % 2 == 0, "Length of pad must be even");
 
-  standalone::c10::IntArrayRef input_sizes = self.sizes();
+  executorch::backends::aoti::slim::c10::IntArrayRef input_sizes = self.sizes();
   int64_t l_inp = self.dim();
   int64_t l_pad = static_cast<int64_t>(pad.size()) / 2;
   int64_t l_diff = l_inp - l_pad;
@@ -50,7 +50,8 @@ inline SlimTensor constant_pad_nd(
     new_shape.emplace_back(input_sizes[i]);
   }
 
-  for (const auto i : standalone::c10::irange((size_t)l_pad)) {
+  for (const auto i :
+       executorch::backends::aoti::slim::c10::irange((size_t)l_pad)) {
     auto pad_idx = pad.size() - ((i + 1) * 2);
     auto new_dim = input_sizes[l_diff + i] + pad[pad_idx] + pad[pad_idx + 1];
     STANDALONE_CHECK(
@@ -73,7 +74,8 @@ inline SlimTensor constant_pad_nd(
 
   // create a view into the center of the output tensor
   SlimTensor c_output = output;
-  for (const auto i : standalone::c10::irange(l_diff, l_inp)) {
+  for (const auto i :
+       executorch::backends::aoti::slim::c10::irange(l_diff, l_inp)) {
     auto pad_idx = 2 * (l_inp - i - 1);
     if (pad[pad_idx] > 0) {
       c_output =
@@ -90,7 +92,7 @@ inline SlimTensor constant_pad_nd(
 
 inline SlimTensor pad(
     const SlimTensor& self,
-    standalone::c10::IntArrayRef pad,
+    executorch::backends::aoti::slim::c10::IntArrayRef pad,
     std::string_view mode,
     std::optional<double> value) {
   if (mode == "constant") {
@@ -103,4 +105,4 @@ inline SlimTensor pad(
       ". Only constant mode is available.");
 }
 
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/tests/test_slim_tensor_basic.cpp b/backends/aoti/slim/tests/test_slim_tensor_basic.cpp
index 37b6ccb240d..e37f252c740 100644
--- a/backends/aoti/slim/tests/test_slim_tensor_basic.cpp
+++ b/backends/aoti/slim/tests/test_slim_tensor_basic.cpp
@@ -14,31 +14,39 @@
 #include <executorch/backends/aoti/slim/factory/FromBlob.h>
 #include <executorch/backends/aoti/slim/factory/FromScalar.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 namespace {
 
 TEST(SlimTensorBasicTest, EmptyTensorCreation) {
-  auto tensor =
-      empty({2, 3, 4}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = empty(
+      {2, 3, 4},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   EXPECT_EQ(tensor.dim(), 3);
   EXPECT_EQ(tensor.size(0), 2);
   EXPECT_EQ(tensor.size(1), 3);
   EXPECT_EQ(tensor.size(2), 4);
   EXPECT_EQ(tensor.numel(), 24);
-  EXPECT_EQ(tensor.dtype(), standalone::c10::ScalarType::Float);
+  EXPECT_EQ(
+      tensor.dtype(), executorch::backends::aoti::slim::c10::ScalarType::Float);
   EXPECT_TRUE(tensor.is_contiguous());
 }
 
 TEST(SlimTensorBasicTest, EmptyTensorContiguousStrides) {
-  auto tensor =
-      empty({2, 3, 4}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = empty(
+      {2, 3, 4},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   EXPECT_EQ(tensor.stride(0), 12);
   EXPECT_EQ(tensor.stride(1), 4);
   EXPECT_EQ(tensor.stride(2), 1);
 }
 
 TEST(SlimTensorBasicTest, ZerosTensorCreation) {
-  auto tensor = zeros({3, 3}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = zeros(
+      {3, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   EXPECT_EQ(tensor.numel(), 9);
   float* data = static_cast<float*>(tensor.data_ptr());
   for (int i = 0; i < 9; ++i) {
@@ -47,7 +55,10 @@ TEST(SlimTensorBasicTest, ZerosTensorCreation) {
 }
 
 TEST(SlimTensorBasicTest, OnesTensorCreation) {
-  auto tensor = ones({2, 2}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = ones(
+      {2, 2},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   EXPECT_EQ(tensor.numel(), 4);
   float* data = static_cast<float*>(tensor.data_ptr());
   for (int i = 0; i < 4; ++i) {
@@ -56,7 +67,10 @@ TEST(SlimTensorBasicTest, OnesTensorCreation) {
 }
 
 TEST(SlimTensorBasicTest, FillTensor) {
-  auto tensor = empty({2, 3}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   tensor.fill_(5.0f);
   float* data = static_cast<float*>(tensor.data_ptr());
   for (int i = 0; i < 6; ++i) {
@@ -67,7 +81,10 @@ TEST(SlimTensorBasicTest, FillTensor) {
 TEST(SlimTensorBasicTest, FromBlobNonOwning) {
   std::vector<float> data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
   auto tensor = from_blob(
-      data.data(), {2, 3}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+      data.data(),
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   EXPECT_EQ(tensor.dim(), 2);
   EXPECT_EQ(tensor.size(0), 2);
   EXPECT_EQ(tensor.size(1), 3);
@@ -76,7 +93,10 @@ TEST(SlimTensorBasicTest, FromBlobNonOwning) {
 }
 
 TEST(SlimTensorBasicTest, Clone) {
-  auto tensor = empty({2, 3}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   tensor.fill_(3.14f);
 
   auto cloned = tensor.clone();
@@ -91,10 +111,16 @@ TEST(SlimTensorBasicTest, Clone) {
 }
 
 TEST(SlimTensorBasicTest, CopyFrom) {
-  auto src = empty({2, 3}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto src = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   src.fill_(2.5f);
 
-  auto dst = empty({2, 3}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto dst = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   dst.copy_(src);
 
   float* dst_data = static_cast<float*>(dst.data_ptr());
@@ -104,7 +130,10 @@ TEST(SlimTensorBasicTest, CopyFrom) {
 }
 
 TEST(SlimTensorBasicTest, Reshape) {
-  auto tensor = empty({2, 6}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = empty(
+      {2, 6},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   tensor.fill_(1.0f);
 
   auto reshaped = tensor.reshape({3, 4});
@@ -115,15 +144,20 @@ TEST(SlimTensorBasicTest, Reshape) {
 }
 
 TEST(SlimTensorBasicTest, Transpose) {
-  auto tensor = empty({2, 3}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   auto transposed = tensor.transpose(0, 1);
   EXPECT_EQ(transposed.size(0), 3);
   EXPECT_EQ(transposed.size(1), 2);
 }
 
 TEST(SlimTensorBasicTest, Permute) {
-  auto tensor =
-      empty({2, 3, 4}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = empty(
+      {2, 3, 4},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   auto permuted = tensor.permute({2, 0, 1});
   EXPECT_EQ(permuted.size(0), 4);
   EXPECT_EQ(permuted.size(1), 2);
@@ -131,7 +165,10 @@ TEST(SlimTensorBasicTest, Permute) {
 }
 
 TEST(SlimTensorBasicTest, Narrow) {
-  auto tensor = empty({10}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = empty(
+      {10},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   for (int i = 0; i < 10; ++i) {
     static_cast<float*>(tensor.data_ptr())[i] = static_cast<float>(i);
   }
@@ -147,8 +184,10 @@ TEST(SlimTensorBasicTest, Narrow) {
 }
 
 TEST(SlimTensorBasicTest, EmptyLike) {
-  auto tensor =
-      empty({2, 3, 4}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = empty(
+      {2, 3, 4},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   auto empty_like_tensor = empty_like(tensor);
   EXPECT_EQ(empty_like_tensor.sizes(), tensor.sizes());
   EXPECT_EQ(empty_like_tensor.dtype(), tensor.dtype());
@@ -156,7 +195,10 @@ TEST(SlimTensorBasicTest, EmptyLike) {
 }
 
 TEST(SlimTensorBasicTest, ZerosLike) {
-  auto tensor = empty({2, 3}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   auto zeros_tensor = zeros_like(tensor);
   EXPECT_EQ(zeros_tensor.sizes(), tensor.sizes());
 
@@ -167,4 +209,4 @@ TEST(SlimTensorBasicTest, ZerosLike) {
 }
 
 } // namespace
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/tests/test_slim_tensor_cuda.cpp b/backends/aoti/slim/tests/test_slim_tensor_cuda.cpp
index 571d4f99893..2bca695fa15 100644
--- a/backends/aoti/slim/tests/test_slim_tensor_cuda.cpp
+++ b/backends/aoti/slim/tests/test_slim_tensor_cuda.cpp
@@ -15,7 +15,7 @@
 #include <executorch/backends/aoti/slim/factory/Empty.h>
 #include <executorch/backends/aoti/slim/factory/Factory.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 namespace {
 
 class SlimTensorCUDATest : public ::testing::Test {
@@ -30,22 +30,30 @@ class SlimTensorCUDATest : public ::testing::Test {
 };
 
 TEST_F(SlimTensorCUDATest, EmptyCUDATensorCreation) {
-  auto tensor =
-      empty({2, 3, 4}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto tensor = empty(
+      {2, 3, 4},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   EXPECT_EQ(tensor.dim(), 3);
   EXPECT_EQ(tensor.size(0), 2);
   EXPECT_EQ(tensor.size(1), 3);
   EXPECT_EQ(tensor.size(2), 4);
   EXPECT_EQ(tensor.numel(), 24);
-  EXPECT_EQ(tensor.device().type(), standalone::c10::DeviceType::CUDA);
+  EXPECT_EQ(
+      tensor.device().type(),
+      executorch::backends::aoti::slim::c10::DeviceType::CUDA);
   EXPECT_TRUE(tensor.is_contiguous());
 }
 
 TEST_F(SlimTensorCUDATest, ZerosCUDATensor) {
-  auto tensor =
-      zeros({3, 3}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto tensor = zeros(
+      {3, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   EXPECT_EQ(tensor.numel(), 9);
-  EXPECT_EQ(tensor.device().type(), standalone::c10::DeviceType::CUDA);
+  EXPECT_EQ(
+      tensor.device().type(),
+      executorch::backends::aoti::slim::c10::DeviceType::CUDA);
 
   std::vector<float> host_data(9);
   cudaMemcpy(
@@ -60,8 +68,10 @@ TEST_F(SlimTensorCUDATest, ZerosCUDATensor) {
 }
 
 TEST_F(SlimTensorCUDATest, OnesCUDATensor) {
-  auto tensor =
-      ones({2, 2}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto tensor = ones(
+      {2, 2},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   EXPECT_EQ(tensor.numel(), 4);
 
   std::vector<float> host_data(4);
@@ -77,8 +87,10 @@ TEST_F(SlimTensorCUDATest, OnesCUDATensor) {
 }
 
 TEST_F(SlimTensorCUDATest, FillCUDATensor) {
-  auto tensor =
-      empty({2, 3}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   tensor.fill_(5.0f);
 
   std::vector<float> host_data(6);
@@ -94,8 +106,10 @@ TEST_F(SlimTensorCUDATest, FillCUDATensor) {
 }
 
 TEST_F(SlimTensorCUDATest, CloneCUDATensor) {
-  auto tensor =
-      empty({2, 3}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   tensor.fill_(3.14f);
 
   auto cloned = tensor.clone();
@@ -116,12 +130,16 @@ TEST_F(SlimTensorCUDATest, CloneCUDATensor) {
 }
 
 TEST_F(SlimTensorCUDATest, CopyCUDAToCUDA) {
-  auto src =
-      empty({2, 3}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto src = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   src.fill_(2.5f);
 
-  auto dst =
-      empty({2, 3}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto dst = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   dst.copy_(src);
 
   std::vector<float> host_data(6);
@@ -137,12 +155,16 @@ TEST_F(SlimTensorCUDATest, CopyCUDAToCUDA) {
 }
 
 TEST_F(SlimTensorCUDATest, CopyCPUToCUDA) {
-  auto cpu_tensor =
-      empty({2, 3}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto cpu_tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   cpu_tensor.fill_(1.5f);
 
-  auto cuda_tensor =
-      empty({2, 3}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto cuda_tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   cuda_tensor.copy_(cpu_tensor);
 
   std::vector<float> host_data(6);
@@ -158,12 +180,16 @@ TEST_F(SlimTensorCUDATest, CopyCPUToCUDA) {
 }
 
 TEST_F(SlimTensorCUDATest, CopyCUDAToCPU) {
-  auto cuda_tensor =
-      empty({2, 3}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto cuda_tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   cuda_tensor.fill_(4.5f);
 
-  auto cpu_tensor =
-      empty({2, 3}, standalone::c10::ScalarType::Float, CPU_DEVICE);
+  auto cpu_tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      CPU_DEVICE);
   cpu_tensor.copy_(cuda_tensor);
 
   float* data = static_cast<float*>(cpu_tensor.data_ptr());
@@ -174,14 +200,20 @@ TEST_F(SlimTensorCUDATest, CopyCUDAToCPU) {
 
 TEST_F(SlimTensorCUDATest, CUDAGuard) {
   cuda::CUDAGuard guard(0);
-  auto tensor =
-      empty({2, 3}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
-  EXPECT_EQ(tensor.device().type(), standalone::c10::DeviceType::CUDA);
+  auto tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
+  EXPECT_EQ(
+      tensor.device().type(),
+      executorch::backends::aoti::slim::c10::DeviceType::CUDA);
 }
 
 TEST_F(SlimTensorCUDATest, ReshapeCUDATensor) {
-  auto tensor =
-      empty({2, 6}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto tensor = empty(
+      {2, 6},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   auto reshaped = tensor.reshape({3, 4});
   EXPECT_EQ(reshaped.dim(), 2);
   EXPECT_EQ(reshaped.size(0), 3);
@@ -190,8 +222,10 @@ TEST_F(SlimTensorCUDATest, ReshapeCUDATensor) {
 }
 
 TEST_F(SlimTensorCUDATest, TransposeCUDATensor) {
-  auto tensor =
-      empty({2, 3}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto tensor = empty(
+      {2, 3},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   auto transposed = tensor.transpose(0, 1);
   EXPECT_EQ(transposed.size(0), 3);
   EXPECT_EQ(transposed.size(1), 2);
@@ -199,8 +233,10 @@ TEST_F(SlimTensorCUDATest, TransposeCUDATensor) {
 }
 
 TEST_F(SlimTensorCUDATest, PermuteCUDATensor) {
-  auto tensor =
-      empty({2, 3, 4}, standalone::c10::ScalarType::Float, DEFAULT_CUDA_DEVICE);
+  auto tensor = empty(
+      {2, 3, 4},
+      executorch::backends::aoti::slim::c10::ScalarType::Float,
+      DEFAULT_CUDA_DEVICE);
   auto permuted = tensor.permute({2, 0, 1});
   EXPECT_EQ(permuted.size(0), 4);
   EXPECT_EQ(permuted.size(1), 2);
@@ -209,4 +245,4 @@ TEST_F(SlimTensorCUDATest, PermuteCUDATensor) {
 }
 
 } // namespace
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/util/SharedPtr.h b/backends/aoti/slim/util/SharedPtr.h
index 9ad565d9ab9..33a4def5845 100644
--- a/backends/aoti/slim/util/SharedPtr.h
+++ b/backends/aoti/slim/util/SharedPtr.h
@@ -7,7 +7,7 @@
 
 #include <executorch/backends/aoti/slim/c10/util/Exception.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 
 /**
  * NonAtomicSharedPtr - A lightweight, non-thread-safe shared pointer
@@ -210,7 +210,7 @@ std::shared_ptr<T> make_shared(Args&&... args) {
 
 #else
 template <typename T>
-using SharedPtr = ::standalone::slim::NonAtomicSharedPtr<T>;
+using SharedPtr = ::executorch::backends::aoti::slim::NonAtomicSharedPtr<T>;
 
 // make_shared for NonAtomicSharedPtr
 template <typename T, typename... Args>
@@ -219,4 +219,4 @@ NonAtomicSharedPtr<T> make_shared(Args&&... args) {
 }
 
 #endif // USE_MULTI_THREAD
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/util/SizeUtil.h b/backends/aoti/slim/util/SizeUtil.h
index d22416cd176..4eab9fc2329 100644
--- a/backends/aoti/slim/util/SizeUtil.h
+++ b/backends/aoti/slim/util/SizeUtil.h
@@ -11,7 +11,7 @@
 #include <executorch/backends/aoti/slim/c10/util/irange.h>
 #include <executorch/backends/aoti/slim/c10/util/safe_numerics.h>
 
-namespace standalone::slim {
+namespace executorch::backends::aoti::slim {
 #ifndef STANDALONE_MOBILE
 inline constexpr uint64_t storage_max() {
   // int64_t and size_t are used somewhat inconsistently throughout ATen.
@@ -28,9 +28,11 @@ inline constexpr uint64_t storage_max() {
  * tensor. Catches integer overflow that may occur when a tensor
  * using a sparse layout has multiple dimensions with large sizes.
  */
-inline int64_t safe_compute_numel(standalone::c10::IntArrayRef sizes) {
+inline int64_t safe_compute_numel(
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes) {
   uint64_t n = 1;
-  bool overflowed = standalone::c10::safe_multiplies_u64(sizes, &n);
+  bool overflowed =
+      executorch::backends::aoti::slim::c10::safe_multiplies_u64(sizes, &n);
   overflowed |= (n > storage_max());
   STANDALONE_CHECK(!overflowed, "numel: integer multiplication overflow");
   return static_cast<int64_t>(n);
@@ -59,26 +61,30 @@ inline std::vector<int64_t> safe_compute_contiguous_strides(
 }
 #endif // STANDALONE_MOBILE
 
-inline int64_t compute_numel(standalone::c10::IntArrayRef sizes) {
+inline int64_t compute_numel(
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes) {
 #ifndef STANDALONE_MOBILE
   // Use overflow checks if supported by the compiler
   return safe_compute_numel(sizes);
 #else
-  return standalone::c10::multiply_integers(sizes);
+  return executorch::backends::aoti::slim::c10::multiply_integers(sizes);
 #endif
 }
 
 // named computeStorageNbytesContiguous in c10
 inline size_t compute_storage_nbytes_contiguous(
-    standalone::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
     size_t itemsize_bytes,
     size_t storage_offset) {
 // Ignore overflow checks on mobile
 #ifndef STANDALONE_MOBILE
   uint64_t size = 1;
-  bool overflowed = standalone::c10::safe_multiplies_u64(sizes, &size);
-  overflowed |= standalone::c10::add_overflows(size, storage_offset, &size);
-  overflowed |= standalone::c10::mul_overflows(size, itemsize_bytes, &size);
+  bool overflowed =
+      executorch::backends::aoti::slim::c10::safe_multiplies_u64(sizes, &size);
+  overflowed |= executorch::backends::aoti::slim::c10::add_overflows(
+      size, storage_offset, &size);
+  overflowed |= executorch::backends::aoti::slim::c10::mul_overflows(
+      size, itemsize_bytes, &size);
   overflowed |= size > storage_max();
   STANDALONE_CHECK(
       !overflowed, "Storage size calculation overflowed with sizes=", sizes);
@@ -91,8 +97,8 @@ inline size_t compute_storage_nbytes_contiguous(
 
 // named computeStorageNbytes in c10
 inline size_t compute_storage_nbytes(
-    standalone::c10::IntArrayRef sizes,
-    standalone::c10::IntArrayRef strides,
+    executorch::backends::aoti::slim::c10::IntArrayRef sizes,
+    executorch::backends::aoti::slim::c10::IntArrayRef strides,
     size_t itemsize_bytes,
     size_t storage_offset) {
   STANDALONE_CHECK(
@@ -109,17 +115,20 @@ inline size_t compute_storage_nbytes(
   // of the last element according to stride
   uint64_t size = storage_offset + 1;
   bool overflowed = false;
-  for (const auto i : standalone::c10::irange(sizes.size())) {
+  for (const auto i :
+       executorch::backends::aoti::slim::c10::irange(sizes.size())) {
     if (sizes[i] == 0) {
       return 0;
     }
 
     uint64_t strided_size = 0;
-    overflowed |=
-        standalone::c10::mul_overflows(strides[i], sizes[i] - 1, &strided_size);
-    overflowed |= standalone::c10::add_overflows(size, strided_size, &size);
+    overflowed |= executorch::backends::aoti::slim::c10::mul_overflows(
+        strides[i], sizes[i] - 1, &strided_size);
+    overflowed |= executorch::backends::aoti::slim::c10::add_overflows(
+        size, strided_size, &size);
   }
-  overflowed |= standalone::c10::mul_overflows(size, itemsize_bytes, &size);
+  overflowed |= executorch::backends::aoti::slim::c10::mul_overflows(
+      size, itemsize_bytes, &size);
   overflowed |= size > storage_max();
   STANDALONE_CHECK(
       !overflowed,
@@ -132,7 +141,8 @@ inline size_t compute_storage_nbytes(
   // size of the underlying storage is 1 bigger than the offset
   // of the last element according to stride
   uint64_t size = 1;
-  for (const auto i : standalone::c10::irange(sizes.size())) {
+  for (const auto i :
+       executorch::backends::aoti::slim::c10::irange(sizes.size())) {
     if (sizes[i] == 0) {
       return 0;
     }
@@ -165,7 +175,7 @@ inline std::vector<int64_t> compute_contiguous_strides(c10::IntArrayRef sizes) {
 // calculates the final concrete shape by also filling in at most one '-1'
 // dimension.
 inline std::vector<int64_t> infer_size(
-    standalone::c10::IntArrayRef shape,
+    executorch::backends::aoti::slim::c10::IntArrayRef shape,
     int64_t numel) {
   int64_t new_size = 1;
   std::optional<int64_t> infer_dim;
@@ -182,8 +192,8 @@ inline std::vector<int64_t> infer_size(
       result_shape.push_back(-1); // placeholder
     } else {
       STANDALONE_CHECK(shape[dim] >= 0, "invalid shape dimension ", shape[dim]);
-      overflowed |=
-          standalone::c10::mul_overflows(new_size, shape[dim], &new_size);
+      overflowed |= executorch::backends::aoti::slim::c10::mul_overflows(
+          new_size, shape[dim], &new_size);
       result_shape.push_back(shape[dim]);
     }
   }
@@ -207,9 +217,9 @@ inline std::vector<int64_t> infer_size(
 // If so, it returns the new strides
 // If not, it returns an empty optional
 inline std::optional<std::vector<int64_t>> compute_stride(
-    standalone::c10::IntArrayRef old_sizes,
-    standalone::c10::IntArrayRef old_strides,
-    standalone::c10::IntArrayRef new_sizes) {
+    executorch::backends::aoti::slim::c10::IntArrayRef old_sizes,
+    executorch::backends::aoti::slim::c10::IntArrayRef old_strides,
+    executorch::backends::aoti::slim::c10::IntArrayRef new_sizes) {
   if (old_sizes.empty()) {
     return std::vector<int64_t>(new_sizes.size(), 1);
   }
@@ -248,7 +258,7 @@ inline std::optional<std::vector<int64_t>> compute_stride(
        tensor_d--) {
     // TODO: ask if this could lead to overflow by any chance?
     // even if so, overflow is not handled in the aten implementation
-    overflowed |= standalone::c10::mul_overflows(
+    overflowed |= executorch::backends::aoti::slim::c10::mul_overflows(
         tensor_numel, old_sizes[tensor_d], &tensor_numel);
 
     bool is_chunk_end = (tensor_d == 0) ||
@@ -280,4 +290,4 @@ inline std::optional<std::vector<int64_t>> compute_stride(
   return new_strides;
 }
 
-} // namespace standalone::slim
+} // namespace executorch::backends::aoti::slim