From 1995f7b14f9c3c6f80713e9785dec12d49e7b00b Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 15 Dec 2025 10:52:41 -0800 Subject: [PATCH 01/15] Set up windows-cuda stuff script --- scripts/install_windows_cuda_deps.py | 330 +++++++++++++++++++++++++++ 1 file changed, 330 insertions(+) create mode 100755 scripts/install_windows_cuda_deps.py diff --git a/scripts/install_windows_cuda_deps.py b/scripts/install_windows_cuda_deps.py new file mode 100755 index 00000000000..30611dcaaed --- /dev/null +++ b/scripts/install_windows_cuda_deps.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Script to install Windows CUDA dependencies for cross-compilation. +Supports Fedora/RHEL and WSL environments. + +Detects CUDA version from the installed PyTorch to ensure compatibility. +""" + +import argparse +import os +import platform +import shutil +import subprocess +import sys +from pathlib import Path + +# Mapping of CUDA versions to their corresponding driver versions for Windows installers +# Source: https://developer.nvidia.com/cuda-toolkit-archive +CUDA_DRIVER_VERSION_MAP = { + # CUDA 12.9.x + "12.9.1": "576.40", + "12.9.0": "576.02", + # CUDA 12.8.x + "12.8.1": "572.17", + "12.8.0": "571.96", + # CUDA 12.6.x + "12.6.3": "561.17", + "12.6.2": "560.94", + "12.6.1": "560.94", + "12.6.0": "560.76", +} + + +class Colors: + RED = "\033[0;31m" + GREEN = "\033[0;32m" + YELLOW = "\033[1;33m" + NC = "\033[0m" # No Color + + +def log_info(msg: str) -> None: + print(f"{Colors.GREEN}[INFO]{Colors.NC} {msg}") + + +def log_warn(msg: str) -> None: + print(f"{Colors.YELLOW}[WARN]{Colors.NC} {msg}") + + +def log_error(msg: str) -> None: + print(f"{Colors.RED}[ERROR]{Colors.NC} {msg}") + + +def get_pytorch_cuda_version() -> tuple[str, str] | None: + """ + Get the CUDA version from the installed PyTorch. + + Returns: + A tuple of (cuda_version, driver_version) if found, None otherwise. + """ + try: + import torch + except ImportError: + log_error("PyTorch is not installed. Cannot detect CUDA version.") + return None + + cuda_version = torch.version.cuda + if cuda_version is None: + log_error("PyTorch is not built with CUDA support.") + return None + + log_info(f"Detected PyTorch CUDA version: {cuda_version}") + + # torch.version.cuda returns something like "12.4" (major.minor only) + # We need to find a matching full version in our map + matching_versions = [ + v for v in CUDA_DRIVER_VERSION_MAP.keys() if v.startswith(cuda_version) + ] + + if not matching_versions: + log_error( + f"CUDA version {cuda_version} is not in the known version map. " + f"Known versions: {', '.join(sorted(CUDA_DRIVER_VERSION_MAP.keys()))}" + ) + return None + + # Use the latest patch version available + full_cuda_version = sorted(matching_versions, reverse=True)[0] + driver_version = CUDA_DRIVER_VERSION_MAP[full_cuda_version] + + log_info(f"Using CUDA {full_cuda_version} with driver {driver_version}") + return full_cuda_version, driver_version + + +def run_command( + cmd: list[str], check: bool = True, capture_output: bool = False +) -> subprocess.CompletedProcess: + """Run a command and optionally check for errors.""" + log_info(f"Running: {' '.join(cmd)}") + return subprocess.run(cmd, check=check, capture_output=capture_output, text=True) + + +def detect_environment() -> str: + """Detect the current environment (wsl, fedora, or unknown).""" + # Check if running on Linux + if platform.system() != "Linux": + return "unknown" + + # Check for WSL + try: + with open("/proc/version", "r") as f: + if "microsoft" in f.read().lower(): + return "wsl" + except FileNotFoundError: + pass + + # Check for RHEL/Fedora + if Path("/etc/redhat-release").exists() or shutil.which("dnf"): + return "fedora" + + return "unknown" + + +def install_mingw_fedora() -> None: + """Install mingw64 on Fedora/RHEL.""" + log_info("Installing mingw64 for Fedora (dnf)...") + run_command(["sudo", "dnf", "install", "-y", "mingw64-gcc-c++"]) + + log_info("Verifying installation...") + run_command(["x86_64-w64-mingw32-gcc", "--version"]) + + +def install_mingw_wsl() -> None: + """Install mingw64 on WSL.""" + log_info("Installing mingw64 for WSL...") + run_command(["sudo", "apt", "update"]) + run_command(["sudo", "apt", "install", "-y", "g++-mingw-w64-x86-64-win32"]) + + log_info("Verifying installation...") + run_command(["x86_64-w64-mingw32-g++", "--version"]) + + +def install_7zip(env_type: str) -> None: + """Install 7zip if not already available.""" + if shutil.which("7z"): + log_info("7zip already installed") + return + + log_info("Installing 7zip...") + if env_type == "fedora": + run_command(["sudo", "dnf", "install", "-y", "p7zip", "p7zip-plugins"]) + else: + run_command(["sudo", "apt", "install", "-y", "p7zip-full"]) + + +def find_windows_cuda_install(cuda_version: str) -> Path | None: + """ + Check if CUDA is installed on Windows (accessible via WSL mount). + + Args: + cuda_version: The full CUDA version (e.g., "12.6.0") + + Returns: + Path to the CUDA installation if found, None otherwise. + """ + cuda_major_minor = ".".join(cuda_version.split(".")[:2]) + windows_cuda_path = Path( + f"/mnt/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v{cuda_major_minor}" + ) + + if windows_cuda_path.exists(): + log_info(f"Found Windows CUDA installation at: {windows_cuda_path}") + return windows_cuda_path + + log_info(f"No Windows CUDA installation found at: {windows_cuda_path}") + return None + + +def set_windows_cuda_home(cuda_home_path: Path) -> None: + """ + Set WINDOWS_CUDA_HOME environment variable in the user's shell config. + + Adds the export to ~/.bashrc and ~/.zshrc if they exist. + Also sets it in the current environment. + """ + export_line = f'export WINDOWS_CUDA_HOME="{cuda_home_path}"' + + # Set in current environment + os.environ["WINDOWS_CUDA_HOME"] = str(cuda_home_path) + log_info(f"Set WINDOWS_CUDA_HOME={cuda_home_path}") + + # Add to shell config files + shell_configs = [ + Path.home() / ".bashrc", + Path.home() / ".zshrc", + ] + + for config_file in shell_configs: + if not config_file.exists(): + continue + + # Check if already set + content = config_file.read_text() + if "WINDOWS_CUDA_HOME" in content: + log_info(f"WINDOWS_CUDA_HOME already in {config_file}, updating...") + # Remove old line(s) and add new one + lines = [ + line for line in content.splitlines() if "WINDOWS_CUDA_HOME" not in line + ] + lines.append(export_line) + config_file.write_text("\n".join(lines) + "\n") + else: + log_info(f"Adding WINDOWS_CUDA_HOME to {config_file}") + with open(config_file, "a") as f: + f.write(f"\n# Windows CUDA path for cross-compilation\n") + f.write(f"{export_line}\n") + + +def download_and_extract_cuda( + cuda_version: str, cuda_driver_version: str, install_dir: Path, env_type: str +) -> None: + """Download and extract CUDA toolkit for Windows.""" + log_info("Setting up CUDA toolkit for Windows cross-compilation...") + + install_dir.mkdir(parents=True, exist_ok=True) + + cuda_installer = f"cuda_{cuda_version}_{cuda_driver_version}_windows.exe" + cuda_installer_path = install_dir / cuda_installer + cuda_url = ( + f"https://developer.download.nvidia.com/compute/cuda/{cuda_version}/" + f"local_installers/{cuda_installer}" + ) + + # Download CUDA installer if not present + if not cuda_installer_path.exists(): + log_info(f"Downloading CUDA {cuda_version} Windows installer...") + run_command(["wget", cuda_url, "-O", str(cuda_installer_path)]) + else: + log_info("CUDA installer already downloaded, skipping download...") + + # Install 7zip if needed + install_7zip(env_type) + + # Extract CUDA toolkit + extracted_dir = install_dir / "extracted" + if not extracted_dir.exists(): + log_info("Extracting CUDA toolkit...") + run_command(["7z", "x", str(cuda_installer_path), f"-o{extracted_dir}", "-y"]) + else: + log_info("CUDA already extracted, skipping extraction...") + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Install Windows CUDA dependencies for cross-compilation. " + "CUDA version is automatically detected from PyTorch installation." + ) + parser.add_argument( + "--install-dir", + type=Path, + default=Path(os.environ.get("INSTALL_DIR", Path.home() / "cuda-windows")), + help="Installation directory (default: $HOME/cuda-windows)", + ) + + args = parser.parse_args() + + env_type = detect_environment() + log_info(f"Detected environment: {env_type}") + + if env_type == "unknown": + log_error("Unknown environment. This script supports Fedora/RHEL and WSL.") + return 1 + + # Install mingw + try: + if env_type == "fedora": + install_mingw_fedora() + elif env_type == "wsl": + install_mingw_wsl() + except subprocess.CalledProcessError as e: + log_error(f"Failed to install mingw: {e}") + return 1 + + # Get CUDA version from PyTorch + cuda_info = get_pytorch_cuda_version() + if cuda_info is None: + return 1 + + cuda_version, cuda_driver_version = cuda_info + + # For WSL, check if CUDA is already installed on Windows + if env_type == "wsl": + windows_cuda_path = find_windows_cuda_install(cuda_version) + if windows_cuda_path is not None: + log_info("Using existing Windows CUDA installation.") + set_windows_cuda_home(windows_cuda_path) + log_info("") + log_info("Installation complete!") + return 0 + + log_info("Will download CUDA toolkit instead...") + + # Download and extract CUDA + try: + download_and_extract_cuda( + cuda_version, + cuda_driver_version, + args.install_dir, + env_type, + ) + + cuda_home_path = args.install_dir / "extracted" / "cuda_cudart" / "cudart" + set_windows_cuda_home(cuda_home_path) + except subprocess.CalledProcessError as e: + log_error(f"Failed to download/extract CUDA: {e}") + return 1 + + log_info("") + log_info("Installation complete!") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 13ab5f85756a4353d7b136eb5546e6eb20281dbd Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 15 Dec 2025 10:52:57 -0800 Subject: [PATCH 02/15] Ci spec --- .ci/scripts/export_model_artifact.sh | 17 ++++-- .github/workflows/cuda-windows.yml | 79 ++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/cuda-windows.yml diff --git a/.ci/scripts/export_model_artifact.sh b/.ci/scripts/export_model_artifact.sh index 3c173b0ea2a..188f375202f 100755 --- a/.ci/scripts/export_model_artifact.sh +++ b/.ci/scripts/export_model_artifact.sh @@ -58,11 +58,13 @@ OUTPUT_DIR="${4:-.}" case "$DEVICE" in cuda) ;; + cuda-windows) + ;; metal) ;; *) echo "Error: Unsupported device '$DEVICE'" - echo "Supported devices: cuda, metal" + echo "Supported devices: cuda, cuda-windows, metal" exit 1 ;; esac @@ -147,7 +149,7 @@ if [ -n "$MAX_SEQ_LEN" ]; then fi DEVICE_ARG="" -if [ "$DEVICE" = "cuda" ]; then +if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then DEVICE_ARG="--device cuda" fi @@ -169,8 +171,15 @@ if [ -n "$PREPROCESSOR_OUTPUT" ]; then --output_file $PREPROCESSOR_OUTPUT fi +# Determine blob file name - cuda and cuda-windows both use aoti_cuda_blob.ptd +if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then + BLOB_FILE="aoti_cuda_blob.ptd" +else + BLOB_FILE="aoti_${DEVICE}_blob.ptd" +fi + test -f model.pte -test -f aoti_${DEVICE}_blob.ptd +test -f $BLOB_FILE if [ -n "$PREPROCESSOR_OUTPUT" ]; then test -f $PREPROCESSOR_OUTPUT fi @@ -179,7 +188,7 @@ echo "::endgroup::" echo "::group::Store $MODEL_NAME Artifacts" mkdir -p "${OUTPUT_DIR}" mv model.pte "${OUTPUT_DIR}/" -mv aoti_${DEVICE}_blob.ptd "${OUTPUT_DIR}/" +mv $BLOB_FILE "${OUTPUT_DIR}/" if [ -n "$PREPROCESSOR_OUTPUT" ]; then mv $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/" fi diff --git a/.github/workflows/cuda-windows.yml b/.github/workflows/cuda-windows.yml new file mode 100644 index 00000000000..98e950aec05 --- /dev/null +++ b/.github/workflows/cuda-windows.yml @@ -0,0 +1,79 @@ +# Test ExecuTorch CUDA Windows Cross-Compilation Export +# This workflow tests model export targeting CUDA Windows using optimum-executorch. +# It runs on a Linux machine with CUDA and uses the install_windows_cuda_deps.py +# script to install the Windows CUDA cross-compilation dependencies. + +name: Test CUDA Windows Export + +on: + pull_request: + push: + branches: + - main + - release/* + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} + cancel-in-progress: false + +jobs: + export-model-cuda-windows-artifact: + name: export-model-cuda-windows-artifact + # Skip this job if the pull request is from a fork (HuggingFace secrets are not available) + if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + secrets: inherit + strategy: + fail-fast: false + matrix: + model: + - repo: "mistralai" + name: "Voxtral-Mini-3B-2507" + - repo: "openai" + name: "whisper-small" + - repo: "openai" + name: "whisper-large-v3-turbo" + - repo: "google" + name: "gemma-3-4b-it" + quant: + - "non-quantized" + - "quantized-int4-tile-packed" + - "quantized-int4-weight-only" + exclude: + # TODO: enable int4-weight-only on gemma3. + - model: + repo: "google" + name: "gemma-3-4b-it" + quant: "quantized-int4-weight-only" + with: + timeout: 90 + secrets-env: EXECUTORCH_HF_TOKEN + runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: 12.6 + use-custom-docker-registry: false + submodules: recursive + upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }} + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + script: | + set -eux + + echo "::group::Setup ExecuTorch" + ./install_executorch.sh + echo "::endgroup::" + + echo "::group::Install Windows CUDA Dependencies" + python scripts/install_windows_cuda_deps.py + echo "::endgroup::" + + echo "::group::Setup Huggingface" + pip install -U "huggingface_hub[cli]<1.0" accelerate + huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN + OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) + pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} + echo "::endgroup::" + + source .ci/scripts/export_model_artifact.sh cuda-windows "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" From 567e12e0ff16247a197de401ec6c50d17a48b139 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 15 Dec 2025 14:10:31 -0800 Subject: [PATCH 03/15] try 2 for ci --- .ci/docker/build.sh | 9 ++ .../install_cuda_windows_cross_compile.sh | 144 ++++++++++++++++++ .ci/docker/common/install_pytorch_cuda.sh | 30 ++++ .ci/docker/ubuntu/Dockerfile | 12 ++ .github/workflows/cuda-windows.yml | 20 ++- .github/workflows/docker-builds.yml | 3 +- 6 files changed, 209 insertions(+), 9 deletions(-) create mode 100644 .ci/docker/common/install_cuda_windows_cross_compile.sh create mode 100644 .ci/docker/common/install_pytorch_cuda.sh diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 5b46e62067f..97347d5e5fe 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -67,6 +67,13 @@ case "${IMAGE_NAME}" in # From https://developer.android.com/ndk/downloads ANDROID_NDK_VERSION=r28c ;; + executorch-ubuntu-22.04-cuda-windows) + LINTRUNNER="" + GCC_VERSION=11 + CUDA_WINDOWS_CROSS_COMPILE=yes + CUDA_VERSION=12.8 + SKIP_PYTORCH=yes + ;; *) echo "Invalid image name ${IMAGE_NAME}" exit 1 @@ -101,6 +108,8 @@ docker build \ --build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \ --build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \ --build-arg "SKIP_PYTORCH=${SKIP_PYTORCH:-}" \ + --build-arg "CUDA_WINDOWS_CROSS_COMPILE=${CUDA_WINDOWS_CROSS_COMPILE:-}" \ + --build-arg "CUDA_VERSION=${CUDA_VERSION:-}" \ -f "${OS}"/Dockerfile \ "$@" \ . diff --git a/.ci/docker/common/install_cuda_windows_cross_compile.sh b/.ci/docker/common/install_cuda_windows_cross_compile.sh new file mode 100644 index 00000000000..21d4fa76a72 --- /dev/null +++ b/.ci/docker/common/install_cuda_windows_cross_compile.sh @@ -0,0 +1,144 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Install mingw-w64 cross-compiler and Windows CUDA toolkit for cross-compilation + +set -ex + +INSTALL_DIR="${WINDOWS_CUDA_INSTALL_DIR:-/opt/cuda-windows}" + +# Mapping of CUDA versions to their corresponding driver versions for Windows installers +# Source: https://developer.nvidia.com/cuda-toolkit-archive +declare -A CUDA_DRIVER_MAP=( + ["12.6"]="12.6.3:561.17" + ["12.8"]="12.8.1:572.17" + ["12.9"]="12.9.1:576.40" + ["13.0"]="13.0.1:578.22" +) + +install_mingw() { + echo "Installing mingw-w64 cross-compiler..." + + apt-get update + apt-get install -y --no-install-recommends \ + g++-mingw-w64-x86-64 \ + mingw-w64-tools \ + p7zip-full \ + wget + + # Verify installation + x86_64-w64-mingw32-g++ --version + + # Cleanup + apt-get clean + rm -rf /var/lib/apt/lists/* + + echo "mingw-w64 installation complete" +} + +get_torch_cuda_version() { + # Query PyTorch for its CUDA version + python3 -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo "" +} + +install_windows_cuda() { + # Get CUDA version from torch + TORCH_CUDA_VERSION=$(get_torch_cuda_version) + + if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then + echo "ERROR: Could not detect CUDA version from PyTorch." + echo "Make sure PyTorch with CUDA support is installed before running this script." + exit 1 + fi + + echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}" + + # Extract major.minor version (e.g., "12.8" from "12.8.1" or "12.8") + CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2) + + # Look up the full version and driver version + if [ -z "${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" ]; then + echo "ERROR: CUDA version ${CUDA_MAJOR_MINOR} is not in the known version map." + echo "Known versions: ${!CUDA_DRIVER_MAP[*]}" + exit 1 + fi + + CUDA_INFO="${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" + CUDA_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f1) + CUDA_DRIVER_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f2) + + echo "Using CUDA ${CUDA_VERSION} with driver ${CUDA_DRIVER_VERSION}" + + echo "Installing Windows CUDA toolkit ${CUDA_VERSION}..." + + mkdir -p "${INSTALL_DIR}" + cd "${INSTALL_DIR}" + + CUDA_INSTALLER="cuda_${CUDA_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe" + CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${CUDA_INSTALLER}" + + # Check if already downloaded and extracted + if [ -d "${INSTALL_DIR}/extracted/cuda_cudart" ]; then + echo "Windows CUDA toolkit already installed, skipping download..." + return 0 + fi + + echo "Downloading CUDA installer from ${CUDA_URL}..." + wget -q "${CUDA_URL}" -O "${CUDA_INSTALLER}" + + echo "Extracting CUDA toolkit..." + 7z x "${CUDA_INSTALLER}" -o"extracted" -y + + # Clean up installer to save space + rm -f "${CUDA_INSTALLER}" + + echo "Windows CUDA toolkit installation complete" + echo "WINDOWS_CUDA_HOME=${INSTALL_DIR}/extracted/cuda_cudart/cudart" +} + +# Parse command line arguments +INSTALL_MINGW=false +INSTALL_CUDA=false + +while [[ $# -gt 0 ]]; do + case $1 in + --mingw) + INSTALL_MINGW=true + shift + ;; + --cuda) + INSTALL_CUDA=true + shift + ;; + --all) + INSTALL_MINGW=true + INSTALL_CUDA=true + shift + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 [--mingw] [--cuda] [--all]" + exit 1 + ;; + esac +done + +# Default to installing everything if no options specified +if [ "${INSTALL_MINGW}" = false ] && [ "${INSTALL_CUDA}" = false ]; then + INSTALL_MINGW=true + INSTALL_CUDA=true +fi + +if [ "${INSTALL_MINGW}" = true ]; then + install_mingw +fi + +if [ "${INSTALL_CUDA}" = true ]; then + install_windows_cuda +fi + +echo "Installation complete" diff --git a/.ci/docker/common/install_pytorch_cuda.sh b/.ci/docker/common/install_pytorch_cuda.sh new file mode 100644 index 00000000000..b75f8d564e2 --- /dev/null +++ b/.ci/docker/common/install_pytorch_cuda.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Install PyTorch with CUDA support from prebuilt wheels +# This is used for the cuda-windows Docker image to get a specific CUDA version + +set -ex + +# shellcheck source=/dev/null +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" + +# Default CUDA version if not specified +CUDA_VERSION="${CUDA_VERSION:-12.8}" + +# Convert CUDA version to PyTorch wheel suffix (e.g., 12.8 -> cu128) +CUDA_SUFFIX="cu$(echo ${CUDA_VERSION} | tr -d '.')" + +echo "Installing PyTorch with CUDA ${CUDA_VERSION} (${CUDA_SUFFIX})..." + +# Install PyTorch from nightly with specific CUDA version +pip_install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/nightly/${CUDA_SUFFIX}" + +# Verify installation +python3 -c "import torch; print(f'PyTorch {torch.__version__} installed with CUDA {torch.version.cuda}')" + +echo "PyTorch CUDA installation complete" diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile index b7478df5489..118873d9155 100644 --- a/.ci/docker/ubuntu/Dockerfile +++ b/.ci/docker/ubuntu/Dockerfile @@ -98,5 +98,17 @@ ARG QNN_SDK ARG MEDIATEK_SDK +ARG CUDA_WINDOWS_CROSS_COMPILE +ARG CUDA_VERSION +COPY ./common/install_pytorch_cuda.sh install_pytorch_cuda.sh +COPY ./common/install_cuda_windows_cross_compile.sh install_cuda_windows_cross_compile.sh +COPY ./common/utils.sh utils.sh +RUN if [ -n "${CUDA_WINDOWS_CROSS_COMPILE}" ]; then \ + CUDA_VERSION=${CUDA_VERSION} bash ./install_pytorch_cuda.sh && \ + bash ./install_cuda_windows_cross_compile.sh; \ + fi +RUN rm -f install_pytorch_cuda.sh install_cuda_windows_cross_compile.sh utils.sh +ENV WINDOWS_CUDA_HOME=/opt/cuda-windows/extracted/cuda_cudart/cudart + USER ci-user CMD ["bash"] diff --git a/.github/workflows/cuda-windows.yml b/.github/workflows/cuda-windows.yml index 98e950aec05..3318697abd7 100644 --- a/.github/workflows/cuda-windows.yml +++ b/.github/workflows/cuda-windows.yml @@ -1,7 +1,7 @@ # Test ExecuTorch CUDA Windows Cross-Compilation Export # This workflow tests model export targeting CUDA Windows using optimum-executorch. -# It runs on a Linux machine with CUDA and uses the install_windows_cuda_deps.py -# script to install the Windows CUDA cross-compilation dependencies. +# It runs on a Linux machine with CUDA and uses the executorch-ubuntu-22.04-cuda-windows +# Docker image which has mingw pre-installed for Windows cross-compilation. name: Test CUDA Windows Export @@ -53,20 +53,24 @@ jobs: secrets-env: EXECUTORCH_HF_TOKEN runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: 12.6 - use-custom-docker-registry: false + gpu-arch-version: 12.8 + docker-image: ci-image:executorch-ubuntu-22.04-cuda-windows submodules: recursive upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }} ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | set -eux - echo "::group::Setup ExecuTorch" - ./install_executorch.sh + echo "::group::Verify pre-installed dependencies" + x86_64-w64-mingw32-g++ --version + python3 -c "import torch; print(f'PyTorch {torch.__version__} with CUDA {torch.version.cuda}')" + echo "WINDOWS_CUDA_HOME=${WINDOWS_CUDA_HOME}" + ls -la "${WINDOWS_CUDA_HOME}" echo "::endgroup::" - echo "::group::Install Windows CUDA Dependencies" - python scripts/install_windows_cuda_deps.py + echo "::group::Setup ExecuTorch" + # Use --use-pt-pinned-commit to skip reinstalling PyTorch (already in Docker with CUDA support) + ./install_executorch.sh --use-pt-pinned-commit echo "::endgroup::" echo "::group::Setup Huggingface" diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml index e3b72a6bcd6..7243c23dc03 100644 --- a/.github/workflows/docker-builds.yml +++ b/.github/workflows/docker-builds.yml @@ -41,7 +41,8 @@ jobs: executorch-ubuntu-22.04-zephyr-sdk, executorch-ubuntu-22.04-qnn-sdk, executorch-ubuntu-22.04-mediatek-sdk, - executorch-ubuntu-22.04-clang12-android + executorch-ubuntu-22.04-clang12-android, + executorch-ubuntu-22.04-cuda-windows ] include: - docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64 From 0db8b5c8afc824587c36c51cdd0d59817c55675f Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 15 Dec 2025 14:12:49 -0800 Subject: [PATCH 04/15] build cuda windows docker on cuda machine --- .github/workflows/docker-builds.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml index 7243c23dc03..0fa4d3685f7 100644 --- a/.github/workflows/docker-builds.yml +++ b/.github/workflows/docker-builds.yml @@ -41,12 +41,13 @@ jobs: executorch-ubuntu-22.04-zephyr-sdk, executorch-ubuntu-22.04-qnn-sdk, executorch-ubuntu-22.04-mediatek-sdk, - executorch-ubuntu-22.04-clang12-android, - executorch-ubuntu-22.04-cuda-windows + executorch-ubuntu-22.04-clang12-android ] include: - docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64 runner: linux.arm64.2xlarge + - docker-image-name: executorch-ubuntu-22.04-cuda-windows + runner: linux.g5.4xlarge.nvidia.gpu runs-on: [self-hosted, "${{ matrix.runner }}"] env: From cc2b1ae72c62e554fb66176fe007378294f1f393 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 15 Dec 2025 15:54:15 -0800 Subject: [PATCH 05/15] if you can believe it claude hallucinated the drivers --- .ci/docker/common/install_cuda_windows_cross_compile.sh | 5 ++--- scripts/install_windows_cuda_deps.py | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.ci/docker/common/install_cuda_windows_cross_compile.sh b/.ci/docker/common/install_cuda_windows_cross_compile.sh index 21d4fa76a72..a7e102cd137 100644 --- a/.ci/docker/common/install_cuda_windows_cross_compile.sh +++ b/.ci/docker/common/install_cuda_windows_cross_compile.sh @@ -15,9 +15,8 @@ INSTALL_DIR="${WINDOWS_CUDA_INSTALL_DIR:-/opt/cuda-windows}" # Source: https://developer.nvidia.com/cuda-toolkit-archive declare -A CUDA_DRIVER_MAP=( ["12.6"]="12.6.3:561.17" - ["12.8"]="12.8.1:572.17" - ["12.9"]="12.9.1:576.40" - ["13.0"]="13.0.1:578.22" + ["12.8"]="12.8.1:572.61" + ["12.9"]="12.9.1:576.57" ) install_mingw() { diff --git a/scripts/install_windows_cuda_deps.py b/scripts/install_windows_cuda_deps.py index 30611dcaaed..632305c26fc 100755 --- a/scripts/install_windows_cuda_deps.py +++ b/scripts/install_windows_cuda_deps.py @@ -24,10 +24,10 @@ # Source: https://developer.nvidia.com/cuda-toolkit-archive CUDA_DRIVER_VERSION_MAP = { # CUDA 12.9.x - "12.9.1": "576.40", - "12.9.0": "576.02", + "12.9.1": "576.57", + "12.9.0": "576.33", # CUDA 12.8.x - "12.8.1": "572.17", + "12.8.1": "572.61", "12.8.0": "571.96", # CUDA 12.6.x "12.6.3": "561.17", From b5d1b348b034ee0b548f12a1b7feeaaf056e3c30 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 15 Dec 2025 20:17:49 -0800 Subject: [PATCH 06/15] try conda --- .ci/docker/common/install_cuda_windows_cross_compile.sh | 4 ++-- .ci/docker/common/install_pytorch_cuda.sh | 2 +- .github/workflows/cuda-windows.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.ci/docker/common/install_cuda_windows_cross_compile.sh b/.ci/docker/common/install_cuda_windows_cross_compile.sh index a7e102cd137..c350d822480 100644 --- a/.ci/docker/common/install_cuda_windows_cross_compile.sh +++ b/.ci/docker/common/install_cuda_windows_cross_compile.sh @@ -40,8 +40,8 @@ install_mingw() { } get_torch_cuda_version() { - # Query PyTorch for its CUDA version - python3 -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo "" + # Query PyTorch for its CUDA version using conda environment + conda run -n "py_${PYTHON_VERSION}" python3 -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo "" } install_windows_cuda() { diff --git a/.ci/docker/common/install_pytorch_cuda.sh b/.ci/docker/common/install_pytorch_cuda.sh index b75f8d564e2..f5b0396354d 100644 --- a/.ci/docker/common/install_pytorch_cuda.sh +++ b/.ci/docker/common/install_pytorch_cuda.sh @@ -25,6 +25,6 @@ echo "Installing PyTorch with CUDA ${CUDA_VERSION} (${CUDA_SUFFIX})..." pip_install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/nightly/${CUDA_SUFFIX}" # Verify installation -python3 -c "import torch; print(f'PyTorch {torch.__version__} installed with CUDA {torch.version.cuda}')" +conda_run python3 -c "import torch; print(f'PyTorch {torch.__version__} installed with CUDA {torch.version.cuda}')" echo "PyTorch CUDA installation complete" diff --git a/.github/workflows/cuda-windows.yml b/.github/workflows/cuda-windows.yml index 3318697abd7..618c6559fb6 100644 --- a/.github/workflows/cuda-windows.yml +++ b/.github/workflows/cuda-windows.yml @@ -63,7 +63,7 @@ jobs: echo "::group::Verify pre-installed dependencies" x86_64-w64-mingw32-g++ --version - python3 -c "import torch; print(f'PyTorch {torch.__version__} with CUDA {torch.version.cuda}')" + conda run -n "py_${PYTHON_VERSION}" python3 -c "import torch; print(f'PyTorch {torch.__version__} with CUDA {torch.version.cuda}')" echo "WINDOWS_CUDA_HOME=${WINDOWS_CUDA_HOME}" ls -la "${WINDOWS_CUDA_HOME}" echo "::endgroup::" From 10909a015737a7f69f3aa95a762e5e234713d15d Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Tue, 16 Dec 2025 09:47:59 -0800 Subject: [PATCH 07/15] perms issue --- .ci/docker/common/install_cuda_windows_cross_compile.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.ci/docker/common/install_cuda_windows_cross_compile.sh b/.ci/docker/common/install_cuda_windows_cross_compile.sh index c350d822480..e9310f7bf4c 100644 --- a/.ci/docker/common/install_cuda_windows_cross_compile.sh +++ b/.ci/docker/common/install_cuda_windows_cross_compile.sh @@ -92,6 +92,9 @@ install_windows_cuda() { echo "Extracting CUDA toolkit..." 7z x "${CUDA_INSTALLER}" -o"extracted" -y + # Fix permissions so ci-user can access the files + chmod -R a+rX "${INSTALL_DIR}" + # Clean up installer to save space rm -f "${CUDA_INSTALLER}" From 4dde99f7c976f6c4bfb864de0a862bbe82af2c51 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Tue, 16 Dec 2025 12:04:48 -0800 Subject: [PATCH 08/15] try bumping pin --- torch_pin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch_pin.py b/torch_pin.py index e934463cb70..4f86c779974 100644 --- a/torch_pin.py +++ b/torch_pin.py @@ -1,2 +1,2 @@ TORCH_VERSION = "2.10.0" -NIGHTLY_VERSION = "dev20251120" +NIGHTLY_VERSION = "dev20251216" From 550647db5f03ef6e8312764a4f441ca10ad56d85 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Tue, 16 Dec 2025 14:39:12 -0800 Subject: [PATCH 09/15] pin bump 2 --- torch_pin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch_pin.py b/torch_pin.py index 4f86c779974..ab3f9c1c027 100644 --- a/torch_pin.py +++ b/torch_pin.py @@ -1,2 +1,2 @@ -TORCH_VERSION = "2.10.0" +TORCH_VERSION = "2.11.0" NIGHTLY_VERSION = "dev20251216" From 4e9a1f4a7a2b0e84404db234c8223c250fa0c78a Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Tue, 16 Dec 2025 17:47:43 -0800 Subject: [PATCH 10/15] pin 3 --- torch_pin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch_pin.py b/torch_pin.py index ab3f9c1c027..17bde97f11b 100644 --- a/torch_pin.py +++ b/torch_pin.py @@ -1,2 +1,2 @@ TORCH_VERSION = "2.11.0" -NIGHTLY_VERSION = "dev20251216" +NIGHTLY_VERSION = "dev20251214" From 43bb970d916cb5f0b133019a699c95565340028e Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 17 Dec 2025 10:27:02 -0800 Subject: [PATCH 11/15] add nvcc to docker --- .ci/docker/common/install_cuda.sh | 57 +++++++++++++++++++++++++++++++ .ci/docker/ubuntu/Dockerfile | 9 ++++- 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 .ci/docker/common/install_cuda.sh diff --git a/.ci/docker/common/install_cuda.sh b/.ci/docker/common/install_cuda.sh new file mode 100644 index 00000000000..8464fba0747 --- /dev/null +++ b/.ci/docker/common/install_cuda.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Install Linux CUDA toolkit +# This installs nvcc and other CUDA development tools needed for compiling CUDA code + +set -ex + +# CUDA version must be specified (e.g., 12.8) +CUDA_VERSION="${CUDA_VERSION:?CUDA_VERSION must be set}" + +# Convert version format (e.g., 12.8 -> 12-8 for package names) +CUDA_VERSION_DASH=$(echo "${CUDA_VERSION}" | tr '.' '-') + +# Add NVIDIA package repository +apt-get update +apt-get install -y --no-install-recommends \ + gnupg2 \ + ca-certificates \ + wget + +# Download and install the CUDA keyring +wget -q "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb" -O /tmp/cuda-keyring.deb +dpkg -i /tmp/cuda-keyring.deb +rm /tmp/cuda-keyring.deb + +apt-get update + +# Install CUDA toolkit (nvcc and development libraries) +# We install a minimal set of packages needed for compilation: +# - cuda-nvcc: The CUDA compiler +# - cuda-cudart-dev: CUDA runtime development files +# - cuda-nvrtc-dev: CUDA runtime compilation library +# - libcublas-dev: cuBLAS development files +# - libcusparse-dev: cuSPARSE development files +# - libcufft-dev: cuFFT development files +apt-get install -y --no-install-recommends \ + "cuda-nvcc-${CUDA_VERSION_DASH}" \ + "cuda-cudart-dev-${CUDA_VERSION_DASH}" \ + "cuda-nvrtc-dev-${CUDA_VERSION_DASH}" \ + "libcublas-dev-${CUDA_VERSION_DASH}" \ + "libcusparse-dev-${CUDA_VERSION_DASH}" \ + "libcufft-dev-${CUDA_VERSION_DASH}" + +# Clean up +apt-get clean +rm -rf /var/lib/apt/lists/* + +# Verify installation +/usr/local/cuda-${CUDA_VERSION}/bin/nvcc --version + +echo "CUDA ${CUDA_VERSION} toolkit installation complete" +echo "CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}" diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile index 118873d9155..24e34e6189b 100644 --- a/.ci/docker/ubuntu/Dockerfile +++ b/.ci/docker/ubuntu/Dockerfile @@ -100,14 +100,21 @@ ARG MEDIATEK_SDK ARG CUDA_WINDOWS_CROSS_COMPILE ARG CUDA_VERSION +COPY ./common/install_cuda.sh install_cuda.sh COPY ./common/install_pytorch_cuda.sh install_pytorch_cuda.sh COPY ./common/install_cuda_windows_cross_compile.sh install_cuda_windows_cross_compile.sh COPY ./common/utils.sh utils.sh RUN if [ -n "${CUDA_WINDOWS_CROSS_COMPILE}" ]; then \ + CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda.sh && \ CUDA_VERSION=${CUDA_VERSION} bash ./install_pytorch_cuda.sh && \ bash ./install_cuda_windows_cross_compile.sh; \ fi -RUN rm -f install_pytorch_cuda.sh install_cuda_windows_cross_compile.sh utils.sh +RUN rm -f install_cuda.sh install_pytorch_cuda.sh install_cuda_windows_cross_compile.sh utils.sh +# Set up CUDA environment for Linux compilation (nvcc, etc.) +ENV CUDA_HOME=/usr/local/cuda +ENV PATH=${CUDA_HOME}/bin:${PATH} +ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} +# Windows CUDA for cross-compilation ENV WINDOWS_CUDA_HOME=/opt/cuda-windows/extracted/cuda_cudart/cudart USER ci-user From dc7087b33942cbb7f4e86d7ce31d08454379531d Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 17 Dec 2025 12:10:32 -0800 Subject: [PATCH 12/15] try not using docker again --- .github/workflows/cuda-windows.yml | 38 ++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/.github/workflows/cuda-windows.yml b/.github/workflows/cuda-windows.yml index 618c6559fb6..b540ebabbf7 100644 --- a/.github/workflows/cuda-windows.yml +++ b/.github/workflows/cuda-windows.yml @@ -1,7 +1,7 @@ # Test ExecuTorch CUDA Windows Cross-Compilation Export # This workflow tests model export targeting CUDA Windows using optimum-executorch. -# It runs on a Linux machine with CUDA and uses the executorch-ubuntu-22.04-cuda-windows -# Docker image which has mingw pre-installed for Windows cross-compilation. +# It runs on a Linux machine with CUDA and installs mingw + Windows CUDA SDK at runtime +# for Windows cross-compilation. name: Test CUDA Windows Export @@ -54,23 +54,47 @@ jobs: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda gpu-arch-version: 12.8 - docker-image: ci-image:executorch-ubuntu-22.04-cuda-windows + use-custom-docker-registry: false submodules: recursive upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }} ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | set -eux - echo "::group::Verify pre-installed dependencies" + echo "::group::Install Windows cross-compilation dependencies" + # Install mingw-w64 cross-compiler + sudo apt-get update + sudo apt-get install -y --no-install-recommends g++-mingw-w64-x86-64 mingw-w64-tools p7zip-full x86_64-w64-mingw32-g++ --version - conda run -n "py_${PYTHON_VERSION}" python3 -c "import torch; print(f'PyTorch {torch.__version__} with CUDA {torch.version.cuda}')" + + # Download and extract Windows CUDA toolkit + # We need this for cross-compiling CUDA code for Windows + # Note: CUDA 12.8 installer is versioned as 12.8.1 with driver 572.61 + CUDA_INSTALLER_VERSION="12.8.1" + CUDA_DRIVER_VERSION="572.61" + WINDOWS_CUDA_INSTALL_DIR="/tmp/cuda-windows" + mkdir -p "${WINDOWS_CUDA_INSTALL_DIR}" + + CUDA_INSTALLER="cuda_${CUDA_INSTALLER_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe" + CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_INSTALLER_VERSION}/local_installers/${CUDA_INSTALLER}" + + echo "Downloading Windows CUDA toolkit from ${CUDA_URL}..." + wget -q "${CUDA_URL}" -O "${WINDOWS_CUDA_INSTALL_DIR}/${CUDA_INSTALLER}" + + echo "Extracting Windows CUDA toolkit..." + 7z x "${WINDOWS_CUDA_INSTALL_DIR}/${CUDA_INSTALLER}" -o"${WINDOWS_CUDA_INSTALL_DIR}/extracted" -y + + # Clean up installer + rm -f "${WINDOWS_CUDA_INSTALL_DIR}/${CUDA_INSTALLER}" + + # Set environment variable for Windows CUDA + export WINDOWS_CUDA_HOME="${WINDOWS_CUDA_INSTALL_DIR}/extracted/cuda_cudart/cudart" echo "WINDOWS_CUDA_HOME=${WINDOWS_CUDA_HOME}" ls -la "${WINDOWS_CUDA_HOME}" echo "::endgroup::" echo "::group::Setup ExecuTorch" - # Use --use-pt-pinned-commit to skip reinstalling PyTorch (already in Docker with CUDA support) - ./install_executorch.sh --use-pt-pinned-commit + ./install_executorch.sh echo "::endgroup::" echo "::group::Setup Huggingface" From ba3b9a04363f8aff766a002e8879609d52afca1d Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 17 Dec 2025 14:04:32 -0800 Subject: [PATCH 13/15] back to docker --- .ci/docker/ubuntu/Dockerfile | 3 ++- .github/workflows/cuda-windows.yml | 38 ++++++------------------------ 2 files changed, 9 insertions(+), 32 deletions(-) diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile index 24e34e6189b..fab9df65795 100644 --- a/.ci/docker/ubuntu/Dockerfile +++ b/.ci/docker/ubuntu/Dockerfile @@ -113,7 +113,8 @@ RUN rm -f install_cuda.sh install_pytorch_cuda.sh install_cuda_windows_cross_com # Set up CUDA environment for Linux compilation (nvcc, etc.) ENV CUDA_HOME=/usr/local/cuda ENV PATH=${CUDA_HOME}/bin:${PATH} -ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} +# Ensure system libstdc++ is found before conda's (GLIBCXX_3.4.30 compatibility) +ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} # Windows CUDA for cross-compilation ENV WINDOWS_CUDA_HOME=/opt/cuda-windows/extracted/cuda_cudart/cudart diff --git a/.github/workflows/cuda-windows.yml b/.github/workflows/cuda-windows.yml index b540ebabbf7..17081bad80a 100644 --- a/.github/workflows/cuda-windows.yml +++ b/.github/workflows/cuda-windows.yml @@ -1,7 +1,7 @@ # Test ExecuTorch CUDA Windows Cross-Compilation Export # This workflow tests model export targeting CUDA Windows using optimum-executorch. -# It runs on a Linux machine with CUDA and installs mingw + Windows CUDA SDK at runtime -# for Windows cross-compilation. +# It runs on a Linux machine with CUDA and uses the executorch-ubuntu-22.04-cuda-windows +# Docker image which has mingw and Windows CUDA SDK pre-installed for cross-compilation. name: Test CUDA Windows Export @@ -54,47 +54,23 @@ jobs: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda gpu-arch-version: 12.8 - use-custom-docker-registry: false + docker-image: ci-image:executorch-ubuntu-22.04-cuda-windows submodules: recursive upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }} ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | set -eux - echo "::group::Install Windows cross-compilation dependencies" - # Install mingw-w64 cross-compiler - sudo apt-get update - sudo apt-get install -y --no-install-recommends g++-mingw-w64-x86-64 mingw-w64-tools p7zip-full + echo "::group::Verify pre-installed dependencies" x86_64-w64-mingw32-g++ --version - - # Download and extract Windows CUDA toolkit - # We need this for cross-compiling CUDA code for Windows - # Note: CUDA 12.8 installer is versioned as 12.8.1 with driver 572.61 - CUDA_INSTALLER_VERSION="12.8.1" - CUDA_DRIVER_VERSION="572.61" - WINDOWS_CUDA_INSTALL_DIR="/tmp/cuda-windows" - mkdir -p "${WINDOWS_CUDA_INSTALL_DIR}" - - CUDA_INSTALLER="cuda_${CUDA_INSTALLER_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe" - CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_INSTALLER_VERSION}/local_installers/${CUDA_INSTALLER}" - - echo "Downloading Windows CUDA toolkit from ${CUDA_URL}..." - wget -q "${CUDA_URL}" -O "${WINDOWS_CUDA_INSTALL_DIR}/${CUDA_INSTALLER}" - - echo "Extracting Windows CUDA toolkit..." - 7z x "${WINDOWS_CUDA_INSTALL_DIR}/${CUDA_INSTALLER}" -o"${WINDOWS_CUDA_INSTALL_DIR}/extracted" -y - - # Clean up installer - rm -f "${WINDOWS_CUDA_INSTALL_DIR}/${CUDA_INSTALLER}" - - # Set environment variable for Windows CUDA - export WINDOWS_CUDA_HOME="${WINDOWS_CUDA_INSTALL_DIR}/extracted/cuda_cudart/cudart" + nvcc --version + python3 -c "import torch; print(f'PyTorch {torch.__version__} with CUDA {torch.version.cuda}')" echo "WINDOWS_CUDA_HOME=${WINDOWS_CUDA_HOME}" ls -la "${WINDOWS_CUDA_HOME}" echo "::endgroup::" echo "::group::Setup ExecuTorch" - ./install_executorch.sh + ./install_executorch.sh --use-pt-pinned-commit echo "::endgroup::" echo "::group::Setup Huggingface" From 19d961d50ac017242ab46b9d683c4081a946933c Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 17 Dec 2025 14:09:29 -0800 Subject: [PATCH 14/15] remove hallucinated hack --- .ci/docker/common/install_conda.sh | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/.ci/docker/common/install_conda.sh b/.ci/docker/common/install_conda.sh index 8c1c7da63d4..7e36af55cac 100755 --- a/.ci/docker/common/install_conda.sh +++ b/.ci/docker/common/install_conda.sh @@ -54,25 +54,6 @@ install_pip_dependencies() { popd } -fix_conda_ubuntu_libstdcxx() { - cat /etc/issue - # WARNING: This is a HACK from PyTorch core to be able to build PyTorch on 22.04. - # Specifically, ubuntu-20+ all comes lib libstdc++ newer than 3.30+, but anaconda - # is stuck with 3.29. So, remove libstdc++6.so.3.29 as installed by - # https://anaconda.org/anaconda/libstdcxx-ng/files?version=11.2.0 - # - # PyTorch sev: https://github.com/pytorch/pytorch/issues/105248 - # Ref: https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_conda.sh - if grep -e "2[02].04." /etc/issue >/dev/null; then - rm /opt/conda/envs/py_${PYTHON_VERSION}/lib/libstdc++.so* - fi -} - install_miniconda install_python install_pip_dependencies -# Hack breaks the job on aarch64 but is still necessary everywhere -# else. -if [ "$(uname -m)" != "aarch64" ]; then - fix_conda_ubuntu_libstdcxx -fi From daab8f0c76f1922976e194fdeafac5b92419191c Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 17 Dec 2025 22:31:43 -0800 Subject: [PATCH 15/15] python3 -> python --- .ci/docker/common/install_pytorch_cuda.sh | 12 ++++++++++-- .github/workflows/cuda-windows.yml | 4 ++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.ci/docker/common/install_pytorch_cuda.sh b/.ci/docker/common/install_pytorch_cuda.sh index f5b0396354d..ab836fde063 100644 --- a/.ci/docker/common/install_pytorch_cuda.sh +++ b/.ci/docker/common/install_pytorch_cuda.sh @@ -16,15 +16,23 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" # Default CUDA version if not specified CUDA_VERSION="${CUDA_VERSION:-12.8}" +# Ensure PYTHON_VERSION is set (should be set by Dockerfile ENV) +if [ -z "${PYTHON_VERSION}" ]; then + echo "ERROR: PYTHON_VERSION environment variable is not set" + exit 1 +fi + +echo "Using Python version: ${PYTHON_VERSION}" + # Convert CUDA version to PyTorch wheel suffix (e.g., 12.8 -> cu128) CUDA_SUFFIX="cu$(echo ${CUDA_VERSION} | tr -d '.')" echo "Installing PyTorch with CUDA ${CUDA_VERSION} (${CUDA_SUFFIX})..." -# Install PyTorch from nightly with specific CUDA version +# Install PyTorch from nightly with specific CUDA version into the conda environment pip_install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/nightly/${CUDA_SUFFIX}" # Verify installation -conda_run python3 -c "import torch; print(f'PyTorch {torch.__version__} installed with CUDA {torch.version.cuda}')" +conda_run python -c "import torch; print(f'PyTorch {torch.__version__} installed with CUDA {torch.version.cuda}')" echo "PyTorch CUDA installation complete" diff --git a/.github/workflows/cuda-windows.yml b/.github/workflows/cuda-windows.yml index 17081bad80a..14289641117 100644 --- a/.github/workflows/cuda-windows.yml +++ b/.github/workflows/cuda-windows.yml @@ -64,13 +64,13 @@ jobs: echo "::group::Verify pre-installed dependencies" x86_64-w64-mingw32-g++ --version nvcc --version - python3 -c "import torch; print(f'PyTorch {torch.__version__} with CUDA {torch.version.cuda}')" + python -c "import torch; print(f'PyTorch {torch.__version__} with CUDA {torch.version.cuda}')" echo "WINDOWS_CUDA_HOME=${WINDOWS_CUDA_HOME}" ls -la "${WINDOWS_CUDA_HOME}" echo "::endgroup::" echo "::group::Setup ExecuTorch" - ./install_executorch.sh --use-pt-pinned-commit + PYTHON_EXECUTABLE=python ./install_executorch.sh --use-pt-pinned-commit echo "::endgroup::" echo "::group::Setup Huggingface"