From ca444a2f732b1b52b88e2412c0891b6dab1dad34 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Mon, 8 Dec 2025 16:52:07 -0600 Subject: [PATCH 1/4] add cmake to read-the-docs --- .readthedocs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 928d6343..88aef97a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,6 +9,8 @@ build: os: ubuntu-22.04 tools: python: "3.10" + apt_packages: + - cmake # Install CMake system-wide # Build documentation in the docs/ directory with Sphinx sphinx: From 8fff84f1de43caa8904cec2acc4745a7795ac5d0 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Mon, 8 Dec 2025 21:47:22 -0600 Subject: [PATCH 2/4] fix some doc errors--formatting can still be improved --- src/qonnx/custom_op/registry.py | 9 ++++--- src/qonnx/transformation/fixedpt_quantize.py | 2 ++ src/qonnx/transformation/insert.py | 4 +-- src/qonnx/transformation/qcdq_to_qonnx.py | 24 +++++++++++------- src/qonnx/transformation/qonnx_to_qcdq.py | 4 +++ src/qonnx/transformation/quantize_graph.py | 26 +++++++++++++++----- 6 files changed, 49 insertions(+), 20 deletions(-) diff --git a/src/qonnx/custom_op/registry.py b/src/qonnx/custom_op/registry.py index e9f6f0e7..f6f52376 100644 --- a/src/qonnx/custom_op/registry.py +++ b/src/qonnx/custom_op/registry.py @@ -469,9 +469,12 @@ def get_ops_in_domain(domain: str) -> List[Tuple[str, Type[CustomOp]]]: List of (op_type, op_class) tuples Example: - ops = get_ops_in_domain("qonnx.custom_op.general") - for op_name, op_class in ops: - print(f"{op_name}: {op_class}") + :: + + ops = get_ops_in_domain("qonnx.custom_op.general") + for op_name, op_class in ops: + print(f"{op_name}: {op_class}") + """ module_path = resolve_domain(domain) ops_dict = {} diff --git a/src/qonnx/transformation/fixedpt_quantize.py b/src/qonnx/transformation/fixedpt_quantize.py index 3b3357ed..3ebbf538 100644 --- a/src/qonnx/transformation/fixedpt_quantize.py +++ b/src/qonnx/transformation/fixedpt_quantize.py @@ -43,6 +43,7 @@ class FixedPointQuantizeParamsFromDict(Transformation): """ Quantize model parameters to a given fixed-point representation. The self.max_err dictionary stores the maximum error for each quantized input after calling. + Parameters: fixedpt_dict: Dictionary containing tensor names and their corresponding target fixed-point data type or its canonical name @@ -91,6 +92,7 @@ class FixedPointQuantizeParams(Transformation): Identifies specific operations in a model (e.g., "Add", "Mul") using a filter function, and quantizes any non-quantized input initializers to the given fixed-point representation. The self.max_err dictionary stores the maximum error for each quantized input after calling. + Parameters: fixedpt_dtype: The fixed-point data type or its canonical name to use for quantization. op_filter: A lambda function to filter operations in the model graph diff --git a/src/qonnx/transformation/insert.py b/src/qonnx/transformation/insert.py index ae550b40..b736f1e4 100644 --- a/src/qonnx/transformation/insert.py +++ b/src/qonnx/transformation/insert.py @@ -57,8 +57,8 @@ class InsertIdentity(Transformation): the graph output will be replaced with a new tensor name _identity Parameters: - tensor_name (str): The name of the tensor where the Identity node will be inserted. - producer_or_consumer (str): Indicates whether the Identity node will be inserted before ('producer') + tensor_name (str): The name of the tensor where the Identity node will be inserted. + producer_or_consumer (str): Indicates whether the Identity node will be inserted before ('producer') or after ('consumer') the tensor_name. """ diff --git a/src/qonnx/transformation/qcdq_to_qonnx.py b/src/qonnx/transformation/qcdq_to_qonnx.py index b7e35c0d..faaf7dbf 100644 --- a/src/qonnx/transformation/qcdq_to_qonnx.py +++ b/src/qonnx/transformation/qcdq_to_qonnx.py @@ -41,7 +41,10 @@ def extract_elem_type(elem_type: int, clip_range=None) -> Tuple[int, int, bool]: """ Return Quant attribute specification based on element type and (optional) clipping range. - Returns: (bitwidth, signed, is_narrow_qnt) + + Returns: + (bitwidth, signed, is_narrow_qnt) + """ is_narrow = False # pylint: disable=no-member @@ -82,14 +85,17 @@ class QCDQToQuant(Transformation): during the quantization process into a QONNX Quant node. If a Clip node is found between the QuantizeLinear+DequantizeLinear, this will be taken into account for the Quant bitwidth calculation. - Input - ----- - A model potentially quantized with QuantizeLinear, (optional) Clip and - DequantizeLinear nodes. - Output - ------ - A model with QuantizeLinear, Clip and DequantizeLinear nodes re-fused back into QONNX - Quant nodes. + + Input: + + A model potentially quantized with QuantizeLinear, (optional) Clip and + DequantizeLinear nodes. + + Output: + + A model with QuantizeLinear, Clip and DequantizeLinear nodes re-fused back into QONNX + Quant nodes. + """ def __init__(self) -> None: diff --git a/src/qonnx/transformation/qonnx_to_qcdq.py b/src/qonnx/transformation/qonnx_to_qcdq.py index 28a3495b..9b22ba0c 100644 --- a/src/qonnx/transformation/qonnx_to_qcdq.py +++ b/src/qonnx/transformation/qonnx_to_qcdq.py @@ -120,12 +120,14 @@ def qcdq_pattern(op, x, scale, zero_point, bitwidth, signed, narrow, rounding_mo def is_valid_qcdq_transformation(context, x, scale, zero_point, bitwidth, signed, narrow, rounding_mode, **_) -> bool: """Condition to check if the Quant node can be replaced. The following conditions must be satisfied: + - the scale, zero-point and bitwidth inputs for Quant must be statically specified by an initializer - the bitwidth must be an integer in the range [2, 8] # TODO: Change max bitwidth to 16 for opset >= 21 - the zero-point tensor must be zero - the scale must be a scalar value or 1D tensor - the rounding_mode attribute must be ROUND + """ # Check scale @@ -158,12 +160,14 @@ class QuantToQCDQ(Transformation): """Replace QONNX Quant-style quantization nodes with QuantizeLinear -> Clip -> DequantizeLinear (QCDQ)-style quantization nodes. The following restictions apply on the Quant: + - the scale, zero-point and bitwidth inputs for Quant must be statically specified by an initializer - the bitwidth must be an integer in the range [2, 8] - the zero-point tensor must be zero - the scale must be a scalar value or 1D tensor - the rounding_mode attribute must be ROUND + BipolarQuant is not (yet) supported. """ diff --git a/src/qonnx/transformation/quantize_graph.py b/src/qonnx/transformation/quantize_graph.py index 230650bd..5a4df025 100644 --- a/src/qonnx/transformation/quantize_graph.py +++ b/src/qonnx/transformation/quantize_graph.py @@ -144,33 +144,45 @@ class QuantizeGraph(Transformation): as the parameters. 1) Expectations: + a) Onnx model in the modelwraper format. b) Model must be cleaned using qonnx.util.cleanup.cleanup_model() c) Batchsize to be set. 2) Steps to transform are: + Step1: Finding the input for the quant node. + Step2: Finding the consumer of the quant node output. + Step3: Finding the shape for the output tensor of quant node. + Note: The output tensor of the quant node must have the same shape as the consumer of the input - to the quant node. + to the quant node. 3) Input: + A dict "quantnode_map" specifying the criterion, positions, and input parameters like scale, bitwidth, zeropoint, and others for a specific quantnode. Criterion: - a) name: This will allow users to add quant nodes for specific node like "Conv_0" and "Gemm_0". + + a) name: + This will allow users to add quant nodes for specific node like "Conv_0" and "Gemm_0". Note: using this users can have quant nodes with different parameters. Ex: quantizing "Conv_0" and "Conv_1" with bitwidth of 4 and 6, respectively. - b) op_type: This will allow users to add quant nodes for all nodes of a particular op_type such + + b) op_type: + This will allow users to add quant nodes for all nodes of a particular op_type such as, "Conv", "Gemm", and others. Note: All quant nodes created using op_type criterion will have the same input parameters (scale, zeropoint, bitwidth, and others.) - c) name and op_type: In this case, quant nodes will be added with precedence to "Name" - in comparison to "op_type". + + c) name and op_type: + In this case, quant nodes will be added with precedence to "Name" in comparison to "op_type". Positions: ("input", index) or ("output", index) + a) "input": indicates that the user want to quantize the input of the selected node. b) "output": indicates that the user want to quantize the output of the selected node. c) index: refers to the input/output index to quantize (a node can have multiple inputs and outputs) @@ -188,7 +200,8 @@ class QuantizeGraph(Transformation): 5) Return: Returns a model with new quant nodes created at the positions specified using the "quantnode_map". - 6) Example: + 6) Example:: + quantnode_map = {"name": {"Conv_0": [(("input", 0), (1, 0, 8, 0, 1, "ROUND")), (("input", 1), (1, 0, 8, 0, 1, "ROUND")), (("output", 0), (1, 0, 8, 0, 1, "ROUND"))], @@ -200,6 +213,7 @@ class QuantizeGraph(Transformation): (("input", 1), (1, 0, 8, 0, 1, "ROUND")), (("input", 2), (1, 0, 8, 0, 1, "ROUND")), (("output", 0), (1, 0, 8, 0, 1, "ROUND"))]}} + """ def __init__(self, quantnode_map): From 8209bea1f79ca534f293fd1f1bf9a5c92a306462 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Mon, 8 Dec 2025 21:52:31 -0600 Subject: [PATCH 3/4] fix links now that versions are included --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 09c19a9c..14be9489 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ QONNX .. note:: **QONNX** is currently under active development. APIs will likely change. -QONNX (Quantized ONNX) introduces three new custom operators -- `Quant `_, `BipolarQuant `_ and `Trunc `_ -- in order to represent arbitrary-precision uniform quantization in ONNX. This enables: +QONNX (Quantized ONNX) introduces three new custom operators -- `Quant `_, `BipolarQuant `_ and `Trunc `_ -- in order to represent arbitrary-precision uniform quantization in ONNX. This enables: * Representation of binary, ternary, 3-bit, 4-bit, 6-bit or any other quantization. From 290d81ceeacfe8415e65befd0111ece53df4c92d Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 9 Dec 2025 17:34:19 -0600 Subject: [PATCH 4/4] update some documentation, links --- docs/changelog.rst | 9 ++++++++- docs/conf.py | 10 +++++++--- docs/index.rst | 20 +++++++++++++++----- docs/license.rst | 2 +- docs/readme.rst | 3 ++- docs/requirements.txt | 2 ++ docs/tutorials.rst | 12 ++++-------- 7 files changed, 39 insertions(+), 19 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 91fa1518..bebdb130 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1 +1,8 @@ -.. _changes: +======================== +Release Notes +======================== + +.. changelog:: + :changelog-url: https://fastmachinelearning.org/qonnx/release_notes.html + :github: https://github.com/fastmachinelearning/qonnx/releases/ + :pypi: https://pypi.org/project/qonnx/ diff --git a/docs/conf.py b/docs/conf.py index 4fb39d6d..96f4a3ec 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -72,13 +72,17 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.autosummary', 'sphinx.ext.coverage', #'sphinx.ext.viewcode', 'sphinx.ext.doctest', 'sphinx.ext.ifconfig', 'sphinx.ext.mathjax', - 'sphinx.ext.napoleon'] + 'sphinx.ext.napoleon', 'myst_parser', 'sphinx_github_changelog'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = { + '.rst': 'restructuredtext', + '.txt': 'restructuredtext', + '.md': 'markdown', +} # The encoding of source files. # source_encoding = 'utf-8-sig' @@ -88,7 +92,7 @@ # General information about the project. project = u'qonnx' -copyright = u'2021-2022 QONNX Contributors' +copyright = u'2021-2025 QONNX Contributors' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docs/index.rst b/docs/index.rst index 14be9489..f4652996 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,9 +4,10 @@ QONNX .. note:: **QONNX** is currently under active development. APIs will likely change. -QONNX (Quantized ONNX) introduces three new custom operators -- `Quant `_, `BipolarQuant `_ and `Trunc `_ -- in order to represent arbitrary-precision uniform quantization in ONNX. This enables: +QONNX (Quantized ONNX) introduces four new custom operators -- `IntQuant`_, `BipolarQuant`_, `FloatQuant`_, and `Trunc`_ +-- in order to represent arbitrary-precision uniform quantization in ONNX. This enables: -* Representation of binary, ternary, 3-bit, 4-bit, 6-bit or any other quantization. +* Representation of binary, ternary, 3-bit, 4-bit, 6-bit or any other quantization, or quantized floating-point values. * Quantization is an operator itself, and can be applied to any parameter or layer input. @@ -33,11 +34,13 @@ Quickstart Operator definitions +++++++++++++++++++++ -* `Quant `_ for 2-to-arbitrary-bit quantization, with scaling and zero-point +* `IntQuant`_ for 2-to-arbitrary-bit quantization, with scaling and zero-point -* `BipolarQuant `_ for 1-bit (bipolar) quantization, with scaling and zero-point +* `BipolarQuant`_ for 1-bit (bipolar) quantization, with scaling and zero-point -* `Trunc `_ for truncating to a specified number of bits, with scaling and zero-point +* `FloatQuant`_ for arbitrary-precision-float-quantized values + +* `Trunc`_ for truncating to a specified number of bits, with scaling and zero-point Installation +++++++++++++ @@ -90,11 +93,18 @@ QONNX also uses GitHub actions to run the full test suite on PRs. ONNX-Based Compiler Infrastructure Tutorials + qonnx-custom-ops/overview API License Contributors + Change log Index * :ref:`modindex` * :ref:`search` + +.. _IntQuant: qonnx-custom-ops/intquant_v1.html +.. _BipolarQuant: qonnx-custom-ops/bipolarquant_v1.html +.. _FloatQuant: qonnx-custom-ops/floatquant_v1.html +.. _Trunc: qonnx-custom-ops/trunc_v2.html diff --git a/docs/license.rst b/docs/license.rst index a5103f77..0765500d 100644 --- a/docs/license.rst +++ b/docs/license.rst @@ -4,4 +4,4 @@ License ======== -.. include:: ../LICENSE +.. literalinclude:: ../LICENSE diff --git a/docs/readme.rst b/docs/readme.rst index 81995ef4..bae2d922 100644 --- a/docs/readme.rst +++ b/docs/readme.rst @@ -1,2 +1,3 @@ .. _readme: -.. include:: ../README.rst +.. include:: ../README.md + :parser: myst_parser.sphinx_ \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index caa72889..f4e68464 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -12,3 +12,5 @@ sigtools==2.0.3 sphinx==4.0.3 sphinx_rtd_theme==1.1.1 toposort==1.7.0 +myst_parser +sphinx_github_changelog diff --git a/docs/tutorials.rst b/docs/tutorials.rst index 6f024148..39cd38fd 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -10,17 +10,13 @@ All Jupyter notebooks can be found under the `notebook folder