Skip to content

Android ExecuTorch :Improve ET Dump (Prod / Release ready) with Runtime control #16360

@psiddh

Description

@psiddh

ETDump Android Runtime Control - Feature Summary

🎯 What Changed

Before: Hardcoded profiling, always-on overhead, if turned on. Offers very less flexible code, not ready for Prod
After: Runtime control, zero overhead when disabled, production-ready


✨ Key Improvements

1. Runtime Control (Most Important!)

// OLD: Always profiling if built with flag (can't turn off)
Module module = Module.load("model.pte");
module.forward(inputs); // ALWAYS profiling = ALWAYS overhead

// NEW: Enable/disable on-demand
module.forward(inputs); // FAST! No profiling by default

ETDump.enableProfiling("/sdcard/profile.etdump");
module.forward(inputs); // NOW profiling
ETDump.disableProfiling();
module.forward(inputs); // FAST again!

2. Configurable Output Paths

// OLD: Hardcoded /data/local/tmp/result.etdump (needs root!)
module.etdump(); // ❌ Requires rooted device

// NEW: Any writable path
ETDump.enableProfiling(getCacheDir() + "/profile.etdump"); // ✅ App directory
ETDump.enableProfiling(getExternalFilesDir(null) + "/debug.etdump"); // ✅ External storage
ETDump.enableProfiling("/sdcard/Android/data/com.myapp/profile.etdump"); // ✅ App-specific

3. Zero Overhead

// OLD: 2-5% performance penalty always present
for (int i = 0; i < 1000; i++) {
module.forward(inputs); // ALL runs slowed down
}

// NEW: Zero overhead when disabled
for (int i = 0; i < 1000; i++) {
module.forward(inputs); // FULL SPEED!
}

4. Single APK Deployment

// OLD: Need TWO APKs
// my-app-debug.apk (with profiling, slow)
// my-app-release.apk (without profiling, can't debug)

// NEW: ONE APK with runtime control
if (BuildConfig.DEBUG || remoteConfig.enableProfiling) {
ETDump.enableProfiling(path);
}
// Same APK in production and debug! ✅


🎓 Use Case Examples

Use Case 1: Debug Slow Inferences in Production

long start = System.currentTimeMillis();
module.forward(inputs);
long duration = System.currentTimeMillis() - start;

if (duration > 500) { // Slower than expected
// Automatically capture profile for analysis
ETDump.enableProfiling(getCacheDir() + "/slow_run_" + duration + "ms.etdump");
module.forward(inputs); // Re-run with profiling
ETDump.writeETDump();
uploadToAnalyticsServer(ETDump.getETDumpData()); // Send to team
ETDump.disableProfiling();
}

Use Case 2: A/B Test Model Performance

// Compare two models side-by-side
ETDump.enableProfiling(getCacheDir() + "/model_v1.etdump");
Module modelV1 = Module.load("model_v1.pte");
long timeV1 = benchmarkModel(modelV1);
ETDump.writeETDump();

ETDump.enableProfiling(getCacheDir() + "/model_v2.etdump");
Module modelV2 = Module.load("model_v2.pte");
long timeV2 = benchmarkModel(modelV2);
ETDump.writeETDump();

// Analyze which is faster
Log.i(TAG, "Model V1: " + timeV1 + "ms, Model V2: " + timeV2 + "ms");
// Use Inspector to see operator-level breakdown

Use Case 3: Profile 1 of 1000 Inferences

// Only profile one specific run
for (int i = 0; i < 1000; i++) {
if (i == 42) {
ETDump.enableProfiling(getCacheDir() + "/inference_42.etdump");
}

  module.forward(inputs);  // 999 fast, 1 profiled

  if (i == 42) {
      ETDump.writeETDump();
      ETDump.disableProfiling();
  }

}

Use Case 4: Remote-Controlled Profiling

public class ModelRunner {
void runInference() {
// Check remote config (Firebase, etc.)
if (FirebaseRemoteConfig.getInstance().getBoolean("enable_profiling")) {
String userId = getCurrentUserId();
ETDump.enableProfiling(getCacheDir() + "/profile_" + userId + ".etdump");
}

      module.forward(inputs);

      if (ETDump.isProfilingEnabled()) {
          ETDump.writeETDump();
          uploadToServer(ETDump.getETDumpData());
          ETDump.disableProfiling();
      }
  }

}
// Enable profiling for specific users without rebuilding! ✅

Use Case 5: Conditional Profiling (Device-Specific)

// Only profile on high-end devices
boolean isHighEnd = Runtime.getRuntime().availableProcessors() >= 8;

if (isHighEnd && BuildConfig.DEBUG) {
ETDump.enableProfiling(getExternalFilesDir(null) + "/profile.etdump");
module.forward(inputs);
ETDump.writeETDump();
ETDump.disableProfiling();
} else {
module.forward(inputs); // Fast, no profiling
}

Use Case 6: CI/CD Performance Regression Tests

@test
public void testModelPerformanceRegression() {
String etdumpPath = new File(context.getCacheDir(), "ci_test.etdump").getAbsolutePath();

  ETDump.enableProfiling(etdumpPath);
  Module module = Module.load(modelPath);

  long start = System.nanoTime();
  module.forward(inputs);
  long duration = System.nanoTime() - start;

  ETDump.writeETDump();

  // Assert performance hasn't regressed
  assertTrue("Model too slow: " + duration + "ns", duration < 50_000_000); // 50ms

  // Can analyze ETDump file in CI for operator-level breakdowns
  byte[] profileData = ETDump.getETDumpData();
  assertNotNull(profileData);

}

Use Case 7: Compare Delegate Performance

// Profile XNNPACK vs default backend
ETDump.enableProfiling(getCacheDir() + "/xnnpack.etdump");
Module xnnpackModel = Module.load("model_xnnpack.pte");
xnnpackModel.forward(inputs);
ETDump.writeETDump();

ETDump.enableProfiling(getCacheDir() + "/cpu.etdump");
Module cpuModel = Module.load("model_cpu.pte");
cpuModel.forward(inputs);
ETDump.writeETDump();

// Use ExecuTorch Inspector to compare:
// python -m executorch.sdk.inspector.inspector_cli
// --etdump_path xnnpack.etdump --model_path model_xnnpack.pte

Use Case 8: Programmatic Upload (No File I/O)

// Get profiling data without writing to file
ETDump.enableProfiling("/tmp/dummy.etdump"); // Path required but not used
module.forward(inputs);

byte[] profileData = ETDump.getETDumpData(); // Get data directly
if (profileData != null) {
// Upload to analytics server
analyticsClient.upload("model_profile", profileData);

  // Or compress and send
  byte[] compressed = gzip(profileData);
  sendToServer(compressed);

}

ETDump.disableProfiling();

🚀 Complete Example

public class InferenceActivity extends Activity {
private Module module;

  @Override
  protected void onCreate(Bundle savedInstanceState) {
      super.onCreate(savedInstanceState);

      // Load model (profiling disabled by default)
      module = Module.load(getModelPath());
  }

  public void runInference(Tensor input) {
      // Normal fast inference
      EValue[] outputs = module.forward(EValue.from(input));

      // Display results
      updateUI(outputs);
  }

  public void debugSlowInference(Tensor input) {
      // Enable profiling for debugging
      String profilePath = getCacheDir() + "/debug_" + System.currentTimeMillis() + ".etdump";
      ETDump.enableProfiling(profilePath);

      long start = System.nanoTime();
      EValue[] outputs = module.forward(EValue.from(input));
      long duration = System.nanoTime() - start;

      // Save profile
      ETDump.writeETDump();
      ETDump.disableProfiling();

      Log.i(TAG, "Profiled inference took " + (duration / 1_000_000) + "ms");
      Log.i(TAG, "Profile saved to: " + profilePath);

      // Can now analyze with:
      // adb pull <profilePath> .
      // python -m executorch.sdk.inspector.inspector_cli --etdump_path profile.etdump
  }

}


✅ Bottom Line

One feature, massive improvements:

  • ✅ Zero overhead in production
  • ✅ Debug issues on-demand
  • ✅ Single APK deployment
  • ✅ Full ExecuTorch Inspector integration

Build once, profile when needed!

Metadata

Metadata

Assignees

Labels

No labels
No labels

Projects

Status

In progress

Status

Todo

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions