Skip to content

Commit 0f5a252

Browse files
authored
Add device type validation in aoti_torch_create_tensor_from_blob_v2 (#16344)
Validate that the data pointer location matches the requested device_type. This prevents silent data corruption when tensors are created with mismatched data pointer and device type. Changes: - Add cudaPointerGetAttributes check to verify data pointer location - Error out if CPU data is provided but CUDA device is requested - Error out if CUDA data is provided but CPU device is requested - Add unit tests for device type mismatch scenarios
1 parent dafa082 commit 0f5a252

File tree

7 files changed

+385
-3
lines changed

7 files changed

+385
-3
lines changed

.github/workflows/cuda.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,33 @@ jobs:
8787
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
8888
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
8989
90+
test-cuda-shims:
91+
name: test-cuda-shims
92+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
93+
permissions:
94+
id-token: write
95+
contents: read
96+
with:
97+
timeout: 90
98+
runner: linux.g5.4xlarge.nvidia.gpu
99+
gpu-arch-type: cuda
100+
gpu-arch-version: 12.6
101+
use-custom-docker-registry: false
102+
submodules: recursive
103+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
104+
script: |
105+
set -eux
106+
# Install requirements
107+
bash ./install_requirements.sh
108+
109+
# Build ExecuTorch with CUDA support
110+
cmake --workflow --preset llm-release-cuda
111+
112+
# Build and run CUDA shim tests
113+
pushd backends/cuda/runtime/shims/tests
114+
cmake --workflow --preset default
115+
popd
116+
90117
export-model-cuda-artifact:
91118
name: export-model-cuda-artifact
92119
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)

backends/cuda/runtime/shims/memory.cpp

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,6 @@ AOTITorchError aoti_torch_create_tensor_from_blob_v2(
119119
int32_t layout,
120120
const uint8_t* opaque_metadata,
121121
int64_t opaque_metadata_size) {
122-
// TODO(gasoonjia): verify given data is on the target device
123-
(void)device_type;
124122
(void)opaque_metadata;
125123
(void)layout;
126124
(void)opaque_metadata_size;
@@ -154,6 +152,34 @@ AOTITorchError aoti_torch_create_tensor_from_blob_v2(
154152
// Storage offset must be 0 since from_blob cannot handle different offsets
155153
ET_CHECK_OK_OR_RETURN_ERROR(validate_storage_offset(storage_offset));
156154

155+
// Verify that data pointer location matches the requested device_type
156+
cudaPointerAttributes data_attributes{};
157+
ET_CUDA_CHECK_OR_RETURN_ERROR(
158+
cudaPointerGetAttributes(&data_attributes, data));
159+
160+
bool data_is_on_device = data_attributes.type == cudaMemoryTypeDevice;
161+
bool data_is_on_host = data_attributes.type == cudaMemoryTypeHost ||
162+
data_attributes.type == cudaMemoryTypeUnregistered;
163+
bool requested_device =
164+
device_type == static_cast<int32_t>(SupportedDevices::CUDA);
165+
bool requested_cpu =
166+
device_type == static_cast<int32_t>(SupportedDevices::CPU);
167+
168+
// Error if data location doesn't match requested device type
169+
ET_CHECK_OR_RETURN_ERROR(
170+
!(data_is_on_device && requested_cpu),
171+
InvalidArgument,
172+
"aoti_torch_create_tensor_from_blob_v2 failed: data pointer %p is on CUDA "
173+
"but device_type is CPU. Data must be on CPU for CPU tensors.",
174+
data);
175+
176+
ET_CHECK_OR_RETURN_ERROR(
177+
!(data_is_on_host && requested_device),
178+
InvalidArgument,
179+
"aoti_torch_create_tensor_from_blob_v2 failed: data pointer %p is on CPU "
180+
"but device_type is CUDA. Data must be on GPU for CUDA tensors.",
181+
data);
182+
157183
// Convert sizes to the format expected by ExecutorTorch using SizesType
158184
std::vector<executorch::aten::SizesType> sizes =
159185
convert_sizes_to_vector(ndim, sizes_ptr);
@@ -305,7 +331,10 @@ void clear_all_tensors() {
305331
// tensors set should now be empty, but ensure it's cleared
306332
tensors.clear();
307333

308-
ET_LOG(Info, "Cleared all tensors");
334+
// Clear memory tracking map (includes leftover NOT_OWN entries)
335+
memory_to_n_tensor.clear();
336+
337+
ET_LOG(Info, "Cleared all tensors and memory tracking");
309338
}
310339

311340
AOTITorchError aoti_torch_delete_tensor_object(Tensor* tensor) {

backends/cuda/runtime/shims/memory.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ AOTITorchError aoti_torch_new_tensor_handle(
167167

168168
// Function to clear all tensors from internal storage
169169
AOTI_SHIM_EXPORT void clear_all_tensors();
170+
171+
// Function to clear memory tracking map (for test cleanup)
172+
AOTI_SHIM_EXPORT void clear_memory_tracking();
170173
} // extern "C"
171174

172175
} // namespace executorch::backends::cuda
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
cmake_minimum_required(VERSION 3.19)
8+
project(aoti_cuda_shim_tests LANGUAGES CXX CUDA)
9+
10+
set(CMAKE_CXX_STANDARD 17)
11+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
12+
13+
# Find required packages
14+
find_package(CUDAToolkit REQUIRED)
15+
16+
# Fetch GoogleTest
17+
include(FetchContent)
18+
FetchContent_Declare(
19+
googletest
20+
GIT_REPOSITORY https://github.com/google/googletest.git
21+
GIT_TAG v1.14.0
22+
)
23+
# For Windows: Prevent overriding the parent project's compiler/linker settings
24+
set(gtest_force_shared_crt
25+
ON
26+
CACHE BOOL "" FORCE
27+
)
28+
FetchContent_MakeAvailable(googletest)
29+
30+
# Get EXECUTORCH_ROOT
31+
if(NOT EXECUTORCH_ROOT)
32+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..)
33+
endif()
34+
35+
# Find installed ExecuTorch
36+
find_package(executorch CONFIG REQUIRED HINTS ${CMAKE_INSTALL_PREFIX})
37+
38+
# List of test files
39+
set(CUDA_SHIM_TESTS
40+
test_aoti_torch_create_tensor_from_blob_v2 test_aoti_torch_empty_strided
41+
test_aoti_torch_delete_tensor_object test_aoti_torch__reinterpret_tensor
42+
test_aoti_torch_copy_ test_aoti_torch_new_tensor_handle
43+
)
44+
45+
enable_testing()
46+
47+
foreach(test_name ${CUDA_SHIM_TESTS})
48+
add_executable(${test_name} ${test_name}.cpp)
49+
50+
target_include_directories(
51+
${test_name} PRIVATE ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}
52+
${CUDAToolkit_INCLUDE_DIRS}
53+
)
54+
55+
target_link_libraries(
56+
${test_name}
57+
PRIVATE GTest::gtest
58+
GTest::gtest_main
59+
aoti_cuda_shims
60+
aoti_cuda_backend
61+
cuda_tensor_maker
62+
cuda_platform
63+
executorch_core
64+
extension_tensor
65+
CUDA::cudart
66+
)
67+
68+
add_test(NAME ${test_name} COMMAND ${test_name})
69+
endforeach()
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
{
2+
"version": 6,
3+
"configurePresets": [
4+
{
5+
"name": "default",
6+
"displayName": "CUDA Shim Tests",
7+
"binaryDir": "${sourceDir}/../../../../../cmake-out/backends/cuda/runtime/shims/tests",
8+
"cacheVariables": {
9+
"CMAKE_BUILD_TYPE": "Release",
10+
"CMAKE_PREFIX_PATH": "${sourceDir}/../../../../../cmake-out"
11+
},
12+
"condition": {
13+
"type": "inList",
14+
"string": "${hostSystemName}",
15+
"list": ["Linux", "Windows"]
16+
}
17+
},
18+
{
19+
"name": "debug",
20+
"displayName": "CUDA Shim Tests (Debug)",
21+
"inherits": ["default"],
22+
"cacheVariables": {
23+
"CMAKE_BUILD_TYPE": "Debug"
24+
}
25+
}
26+
],
27+
"buildPresets": [
28+
{
29+
"name": "default",
30+
"displayName": "Build CUDA Shim Tests",
31+
"configurePreset": "default"
32+
},
33+
{
34+
"name": "debug",
35+
"displayName": "Build CUDA Shim Tests (Debug)",
36+
"configurePreset": "debug"
37+
}
38+
],
39+
"workflowPresets": [
40+
{
41+
"name": "default",
42+
"displayName": "Configure, build, and test CUDA Shim Tests",
43+
"steps": [
44+
{
45+
"type": "configure",
46+
"name": "default"
47+
},
48+
{
49+
"type": "build",
50+
"name": "default"
51+
},
52+
{
53+
"type": "test",
54+
"name": "default"
55+
}
56+
]
57+
},
58+
{
59+
"name": "debug",
60+
"displayName": "Configure, build, and test CUDA Shim Tests (Debug)",
61+
"steps": [
62+
{
63+
"type": "configure",
64+
"name": "debug"
65+
},
66+
{
67+
"type": "build",
68+
"name": "debug"
69+
},
70+
{
71+
"type": "test",
72+
"name": "debug"
73+
}
74+
]
75+
}
76+
],
77+
"testPresets": [
78+
{
79+
"name": "default",
80+
"displayName": "Run all CUDA Shim Tests",
81+
"configurePreset": "default",
82+
"output": {
83+
"outputOnFailure": true
84+
}
85+
},
86+
{
87+
"name": "debug",
88+
"displayName": "Run all CUDA Shim Tests (Debug)",
89+
"configurePreset": "debug",
90+
"output": {
91+
"outputOnFailure": true
92+
}
93+
}
94+
]
95+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# CUDA AOTI Shim Tests
2+
3+
Unit tests for the CUDA AOTI (Ahead-Of-Time Inductor) shim functions used by the ExecuTorch CUDA backend.
4+
5+
## Prerequisites
6+
7+
1. **CUDA Toolkit**: Ensure CUDA is installed and available
8+
2. **ExecuTorch with CUDA**: Build and install ExecuTorch with CUDA support first
9+
10+
## Building ExecuTorch with CUDA
11+
12+
From the ExecuTorch root directory:
13+
14+
```bash
15+
# Release build
16+
cmake --workflow --preset llm-release-cuda
17+
18+
# Or debug build (recommended for debugging test failures)
19+
cmake --workflow --preset llm-debug-cuda
20+
```
21+
22+
## Building and Run the Tests
23+
24+
### Option 1: Using CMake Presets (Recommended)
25+
26+
From this directory (`backends/cuda/runtime/shims/tests/`):
27+
28+
```bash
29+
# Release build
30+
cmake --workflow --preset default
31+
32+
# Debug build
33+
cmake --workflow --preset debug
34+
```
35+
36+
### Option 2: Manual CMake Commands
37+
38+
From the ExecuTorch root directory:
39+
40+
```bash
41+
# Configure
42+
cmake -B cmake-out/backends/cuda/runtime/shims/tests \
43+
-S backends/cuda/runtime/shims/tests \
44+
-DCMAKE_PREFIX_PATH=$(pwd)/cmake-out \
45+
-DCMAKE_BUILD_TYPE=Debug
46+
47+
# Build
48+
cmake --build cmake-out/backends/cuda/runtime/shims/tests -j$(nproc)
49+
```
50+
51+
### Run Specific Test Cases
52+
53+
Use Google Test filters to run specific test cases:
54+
55+
```bash
56+
# From the build directory
57+
cd cmake-out/backends/cuda/runtime/shims/tests
58+
# Run only device mismatch tests
59+
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_filter="*DeviceMismatch*"
60+
61+
# Run a single test
62+
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_filter="AOTITorchCreateTensorFromBlobV2Test.BasicFunctionalityCUDA"
63+
64+
# List all available tests
65+
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_list_tests
66+
```
67+
68+
## Troubleshooting
69+
70+
### CUDA Not Available
71+
72+
If tests are skipped with "CUDA not available", ensure:
73+
- CUDA drivers are installed
74+
- A CUDA-capable GPU is present
75+
- `nvidia-smi` shows the GPU
76+
77+
### Link Errors
78+
79+
If you get link errors, ensure ExecuTorch was built with CUDA support:
80+
```bash
81+
cmake --workflow --preset llm-release-cuda
82+
```
83+
84+
### Test Failures
85+
86+
For debugging test failures, build with debug mode:
87+
```bash
88+
cmake --workflow --preset debug
89+
```
90+
91+
Then run with verbose output:
92+
```bash
93+
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_break_on_failure
94+
```

0 commit comments

Comments
 (0)