From b54d281b48e72e4eb71ae9264d60140e0a03b8b5 Mon Sep 17 00:00:00 2001 From: tkocmathla Date: Wed, 12 Nov 2025 07:42:21 -0500 Subject: [PATCH] Fix #4210. Fix data_dir assignment and add tests for 2 new cases: 1. When `--temp-dir` is set and `--data-dir` is not 2. When `raw_model_modified` and model uses external data --- .../torch_mlir/tools/import_onnx/__main__.py | 9 +- .../python/onnx_importer/command_line_test.py | 82 +++++++++++++++---- 2 files changed, 72 insertions(+), 19 deletions(-) diff --git a/python/torch_mlir/tools/import_onnx/__main__.py b/python/torch_mlir/tools/import_onnx/__main__.py index 5ec5c63f820e..972cfdaa8678 100644 --- a/python/torch_mlir/tools/import_onnx/__main__.py +++ b/python/torch_mlir/tools/import_onnx/__main__.py @@ -71,10 +71,7 @@ def load_onnx_model(args: argparse.Namespace) -> onnx.ModelProto: # temp directory instead of a hard-coded path in order to avoid data races # by default. input_dir = os.path.dirname(os.path.abspath(args.input_file)) - temp_dir = ( - Path(input_dir if args.temp_dir is None else args.temp_dir) - / "onnx-importer-temp" - ) + temp_dir = Path(args.temp_dir or input_dir) / "onnx-importer-temp" shutil.rmtree(temp_dir, ignore_errors=True) temp_dir.mkdir(exist_ok=True) @@ -121,10 +118,13 @@ def load_onnx_model(args: argparse.Namespace) -> onnx.ModelProto: # onnx.shape_inference.infer_shapes_path(temp_raw_file, temp_inferred_file) # inferred_model = onnx.load(temp_inferred_file) + data_dir = Path(args.data_dir or input_dir) + # Model is too big for in-memory inference: do file-based shape inference # to a temp file. # First need to save as model when it has been changed (e.g. version conversion). if raw_model_modified: + data_dir = temp_dir temp_raw_file = temp_dir / "raw.onnx" onnx.save(raw_model, temp_raw_file, save_as_external_data=True) temp_inferred_file = temp_dir / "inferred.onnx" @@ -146,7 +146,6 @@ def load_onnx_model(args: argparse.Namespace) -> onnx.ModelProto: # Load the temp file and the external data. inferred_model = onnx.load(temp_inferred_file, load_external_data=False) - data_dir = Path(input_dir if args.temp_dir is None else args.data_dir) onnx.load_external_data_for_model(inferred_model, str(data_dir)) # Remove the inferred shape file unless asked to keep it diff --git a/test/python/onnx_importer/command_line_test.py b/test/python/onnx_importer/command_line_test.py index 998e1098030f..d9dbbe14abae 100644 --- a/test/python/onnx_importer/command_line_test.py +++ b/test/python/onnx_importer/command_line_test.py @@ -13,6 +13,7 @@ import shutil import sys import subprocess +import tempfile import unittest import unittest.mock @@ -39,6 +40,8 @@ OUTPUT_PATH.mkdir(parents=True, exist_ok=True) +MOCK_MAXIMUM_PROTOBUF = 1 << 20 + def const_model() -> onnx.ModelProto: # Note: data_path must be relative to model_file @@ -87,7 +90,26 @@ def linear_model() -> onnx.ModelProto: return onnx_model -ALL_MODELS = [const_model, linear_model] +def path_based_shape_inference_model() -> onnx.ModelProto: + # Create a model with a serialized form that's large enough to require + # path-based shape inference. + dtype = numpy.float32 + byte_size = numpy.dtype(dtype).itemsize + tensor_size = MOCK_MAXIMUM_PROTOBUF // byte_size + 1 + large_tensor = numpy.random.rand(tensor_size).astype(dtype) + assert large_tensor.nbytes > MOCK_MAXIMUM_PROTOBUF + node1 = make_node( + "Constant", + [], + ["large_const"], + value=numpy_helper.from_array(large_tensor, name="large_const"), + ) + X = make_tensor_value_info("large_const", TensorProto.FLOAT, [tensor_size]) + graph = make_graph([node1], "large_const_graph", [], [X]) + return make_model(graph) + + +ALL_MODELS = [const_model, linear_model, path_based_shape_inference_model] class CommandLineTest(unittest.TestCase): @@ -110,7 +132,12 @@ def run_model_intern(self, onnx_model: onnx.ModelProto, model_name: str): args = __main__.parse_arguments([str(model_file), "-o", str(mlir_file)]) __main__.main(args) - def run_model_extern(self, onnx_model: onnx.ModelProto, model_name: str): + def run_model_extern( + self, + onnx_model: onnx.ModelProto, + model_name: str, + extra_args: list[str] | None = None, + ): run_path = self.get_run_path(model_name) model_file = run_path / f"{model_name}-e.onnx" mlir_file = run_path / f"{model_name}-e.torch.mlir" @@ -127,20 +154,41 @@ def run_model_extern(self, onnx_model: onnx.ModelProto, model_name: str): onnx.save(onnx_model, model_file) temp_dir = run_path / "temp" temp_dir.mkdir(exist_ok=True) - args = __main__.parse_arguments( - [ - str(model_file), - "-o", - str(mlir_file), - "--keep-temps", - "--temp-dir", - str(temp_dir), - "--data-dir", - str(run_path), - ] - ) + raw_args = [ + str(model_file), + "-o", + str(mlir_file), + "--keep-temps", + "--temp-dir", + str(temp_dir), + "--data-dir", + str(run_path), + ] + if extra_args: + raw_args.extend(extra_args) + args = __main__.parse_arguments(raw_args) __main__.main(args) + @unittest.mock.patch("onnx.checker.MAXIMUM_PROTOBUF", MOCK_MAXIMUM_PROTOBUF) + def run_model_explicit_temp_implicit_data( + self, onnx_model: onnx.ModelProto, model_name: str + ): + run_path = self.get_run_path(model_name) + model_file = run_path / f"{model_name}-explicit_temp_implicit_data.onnx" + mlir_file = run_path / f"{model_name}-explicit_temp_implicit_data.torch.mlir" + onnx.save(onnx_model, model_file) + with tempfile.TemporaryDirectory(dir=run_path) as temp_dir: + args = __main__.parse_arguments( + [ + str(model_file), + "-o", + str(mlir_file), + "--temp-dir", + str(temp_dir), + ] + ) + __main__.main(args) + def test_all(self): for model_func in ALL_MODELS: model_name = model_func.__name__ @@ -150,6 +198,12 @@ def test_all(self): self.run_model_intern(model, model_name) with self.subTest("External data"): self.run_model_extern(model, model_name) + with self.subTest("External data, raw model modified"): + self.run_model_extern( + model, model_name, extra_args=["--clear-domain"] + ) + with self.subTest("Explicit temp dir, implicit data dir"): + self.run_model_explicit_temp_implicit_data(model, model_name) if __name__ == "__main__":