Update on "Use c10 version of half/bfloat16 in executorch"

Accomplished by importing relevant files from c10 into executorch/runtime/core/portable_type/c10, and then using `using` in the top-level ExecuTorch headers. This approach should keep the ExecuTorch build hermetic for embedded use cases. In the future, we should add a CI job to ensure the c10 files stay identical to the PyTorch ones. Differential Revision: [D66106969](https://our.internmc.facebook.com/intern/diff/D66106969/) [ghstack-poisoned]
pytorch · Feb 4, 2025 · 87c3252 · 87c3252
2 parents 16bfea2 + 52c4f3c
commit 87c3252
Show file tree

Hide file tree

Showing 135 changed files with 3,735 additions and 1,617 deletions.
diff --git a/.ci/scripts/test_eval_llama_mmlu.sh b/.ci/scripts/test_eval_llama_mmlu.sh
@@ -43,6 +43,7 @@ run_and_verify() {
 	--tasks mmlu \
 	-f 5 \
 	--max_seq_length 2048 \
+	--max_context_length 2048 \
 	--limit 5 > result.txt
 
     # Verify result.txt

diff --git a/.ci/scripts/test_eval_llama_wikitext.sh b/.ci/scripts/test_eval_llama_wikitext.sh
@@ -41,6 +41,7 @@ run_and_verify() {
 	-kv \
 	-d fp32 \
 	--max_seq_length 2048 \
+	--max_context_length 2048 \
 	--limit 5 > result.txt
 
     # Verify result.txt

diff --git a/.github/workflows/apple.yml b/.github/workflows/apple.yml
@@ -37,7 +37,7 @@ jobs:
         id: set_version
         shell: bash
         run: |
-          VERSION="0.4.0.$(TZ='PST8PDT' date +%Y%m%d)"
+          VERSION="0.5.0.$(TZ='PST8PDT' date +%Y%m%d)"
           echo "version=$VERSION" >> "$GITHUB_OUTPUT"
 
   build-demo-ios:

diff --git a/.github/workflows/doc-build.yml b/.github/workflows/doc-build.yml
@@ -84,8 +84,8 @@ jobs:
     needs: build
     if: github.repository == 'pytorch/executorch' && github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/v'))
     permissions:
+      id-token: write
       contents: write
-      contents: read
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       repository: pytorch/executorch

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -221,7 +221,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_executorch.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
 
         # install Llava requirements
         bash examples/models/llama/install_requirements.sh
@@ -484,7 +484,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_executorch.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
 
         # install phi-3-mini requirements
         bash examples/models/phi-3-mini/install_requirements.sh
@@ -514,7 +514,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_executorch.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
@@ -544,7 +544,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_executorch.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
@@ -574,7 +574,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_executorch.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh

diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -150,7 +150,7 @@ jobs:
         conda activate "${CONDA_ENV}"
 
         source .ci/scripts/utils.sh
-        install_executorch
+        install_executorch "use-pt-pinned-commit"
 
         .ci/scripts/setup-arm-baremetal-tools.sh
 
@@ -180,7 +180,7 @@ jobs:
         conda activate "${CONDA_ENV}"
 
         source .ci/scripts/utils.sh
-        install_executorch
+        install_executorch "use-pt-pinned-commit"
 
         .ci/scripts/setup-arm-baremetal-tools.sh
 

diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -1,4 +1,4 @@
-merge_base_with = "origin/main"
+merge_base_with = "main"
 
 [[linter]]
 code = 'FLAKE8'

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -44,6 +44,38 @@ Meta has a [bounty program](https://www.facebook.com/whitehat/) for the safe
 disclosure of security bugs. In those cases, please go through the process
 outlined on that page and do not file a public issue.
 
+### Issue Labels
+
+#### Module/Partner Labels
+
+[Labels beginning with `module:`](https://github.com/pytorch/executorch/labels?q=%22module%3A+%22)
+indicate the area that the issue relates to. The ExecuTorch oncall will
+typically add this label.
+
+[Labels beginning with `partner:`](https://github.com/pytorch/executorch/labels?q=%22partner%3A+%22)
+indicate the ExecuTorch partner who owns the issue. The ExecuTorch oncall will
+typically add this label.
+
+#### Lifecycle Labels
+
+The ExecuTorch oncall will triage new issues. If the issue requires more
+information from the issue's author, oncall will add the `need-user-input` label
+and wait for the author to respond.
+
+Once the issue contains enough information, the oncall will:
+- Ensure that the title is descriptive
+- Add one of the labels:
+  - `bug`: The issue describes an unexpected problem
+  - `feature`: The issue describes a request for new functionality
+  - `rfc`: The issue describes a proposed change to functionality
+- Add one `module:` label or one `partner:` label, as described above
+- Add the `triaged` label
+
+After this point, the oncall has finished the triage process, and the
+module owner or partner is responsible for resolving the issue. (See
+https://github.com/pytorch/executorch/issues/7679 for the mapping of labels to
+owners.)
+
 ### Claiming Issues
 We'd love your help closing out [open
 issues](https://github.com/pytorch/executorch/issues?q=sort%3Aupdated-desc+is%3Aissue+is%3Aopen)

diff --git a/README-wheel.md b/README-wheel.md
@@ -4,20 +4,21 @@ standard on-device iOS and Android mobile deployments. One of the main goals for
 ExecuTorch is to enable wider customization and deployment capabilities of the
 PyTorch programs.
 
-The `executorch` pip package is in alpha.
-* Supported python versions: 3.10, 3.11
+The `executorch` pip package is in beta.
+* Supported python versions: 3.10, 3.11, 3.12
 * Compatible systems: Linux x86_64, macOS aarch64
 
-The prebuilt `executorch.extension.pybindings.portable_lib` module included in
-this package provides a way to run ExecuTorch `.pte` files, with some
-restrictions:
+The prebuilt `executorch.runtime` module included in this package provides a way
+to run ExecuTorch `.pte` files, with some restrictions:
 * Only [core ATen
   operators](https://pytorch.org/executorch/stable/ir-ops-set-definition.html)
   are linked into the prebuilt module
 * Only the [XNNPACK backend
   delegate](https://pytorch.org/executorch/main/native-delegates-executorch-xnnpack-delegate.html)
-  is linked into the prebuilt module
-* [macOS only] [Core ML](https://pytorch.org/executorch/main/build-run-coreml.html) and [MPS](https://pytorch.org/executorch/main/build-run-mps.html) backend delegates are linked into the prebuilt module.
+  is linked into the prebuilt module.
+* \[macOS only] [Core ML](https://pytorch.org/executorch/main/build-run-coreml.html)
+  and [MPS](https://pytorch.org/executorch/main/build-run-mps.html) backend
+  delegates are also linked into the prebuilt module.
 
 Please visit the [ExecuTorch website](https://pytorch.org/executorch/) for
 tutorials and documentation. Here are some starting points:

diff --git a/README.md b/README.md
@@ -1,9 +1,36 @@
-# ExecuTorch
-
-**ExecuTorch** is an end-to-end solution for enabling on-device inference
-capabilities across mobile and edge devices including wearables, embedded
-devices and microcontrollers. It is part of the PyTorch Edge ecosystem and
-enables efficient deployment of PyTorch models to edge devices.
+<div align="center">
+  <img src="./docs/source/_static/img/et-logo.png" alt="Logo" width="200">
+  <h1 align="center">ExecuTorch: A powerful on-device AI Framework</h1>
+</div>
+
+
+<div align="center">
+  <a href="https://github.com/pytorch/executorch/graphs/contributors"><img src="https://img.shields.io/github/contributors/pytorch/executorch?style=for-the-badge&color=blue" alt="Contributors"></a>
+  <a href="https://github.com/pytorch/executorch/stargazers"><img src="https://img.shields.io/github/stars/pytorch/executorch?style=for-the-badge&color=blue" alt="Stargazers"></a>
+  <a href="https://discord.gg/MeacgB7A"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community"></a>
+  <a href="https://pytorch.org/executorch/stable/index.html"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
+  <hr>
+</div>
+
+**ExecuTorch** is an end-to-end solution for on-device inference and training. It powers much of Meta's on-device AI functionality including: Facebook, Oculus, Meta Glasses, Instagram, WhatsApp and more.
+
+It covers a wide gamut of models including: LLMs (Large Language Models), CV (Computer Vision), ASR (Automatic Speech Recognition), TTS (Text to Speech).
+
+Platform Support:
+- Operating Systems:
+  - iOS
+  - Mac
+  - Android
+  - Linux
+  - Microcontrollers
+
+- Hardware Acceleration:
+  - Apple
+  - ARM
+  - Cadence
+  - MediaTek
+  - Vulkan
+  - XNNPACK
 
 Key value propositions of ExecuTorch are:
 
@@ -17,35 +44,21 @@ Key value propositions of ExecuTorch are:
   experience due to a lightweight runtime and utilizing full hardware
   capabilities such as CPUs, NPUs, and DSPs.
 
-For a comprehensive technical overview of ExecuTorch and step-by-step tutorials,
-please visit our documentation website [for the latest release](https://pytorch.org/executorch/stable/index.html) (or the [main branch](https://pytorch.org/executorch/main/index.html)).
-
-Check out the [Getting Started](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) page for a quick spin.
-
-Check out the examples of [Llama](./examples/models/llama/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.
+## Getting Started
+To get started you can:
 
+- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/main/index.html) on getting things running locally and deploy a model to a device
+- Use this [Colab Notebook](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) to start playing around right away
+- Jump straight into LLMs use cases by following specific instructions for [Llama](./examples/models/llama/README.md) and [Llava](./examples/models/llava/README.md)
 
-**[UPDATE - 10/24]** We have added support for running [Llama 3.2 Quantized 1B/3B](./examples/models/llama/README.md) models via ExecuTorch.
-
-## Feedback
+## Feedback and Engagement
 
 We welcome any feedback, suggestions, and bug reports from the community to help
-us improve our technology. Please use the [PyTorch
-Forums](https://discuss.pytorch.org/c/executorch) for discussion and feedback
-about ExecuTorch using the **ExecuTorch** category, and our [GitHub
-repository](https://github.com/pytorch/executorch/issues) for bug reporting.
-
-We recommend using the latest release tag from the
-[Releases](https://github.com/pytorch/executorch/releases) page when developing.
+us improve our technology. Check out the [Discussion Board](https://github.com/pytorch/executorch/discussions) or chat real time with us on [Discord](https://discord.gg/MeacgB7A)
 
 ## Contributing
 
-See [CONTRIBUTING.md](CONTRIBUTING.md) for details about issues, PRs, code
-style, CI jobs, and other development topics.
-
-To connect with us and other community members, we invite you to join PyTorch Slack community by filling out this [form](https://docs.google.com/forms/d/e/1FAIpQLSeADnUNW36fjKjYzyHDOzEB_abKQE9b6gqqW9NXse6O0MWh0A/viewform). Once you've joined, you can:
-* Head to the `#executorch-general` channel for general questions, discussion, and community support.
-* Join the `#executorch-contributors` channel if you're interested in contributing directly to project development.
+We welcome contributions. To get started review the [guidelines](CONTRIBUTING.md) and chat with us on [Discord](https://discord.gg/MeacgB7A)
 
 
 ## Directory Structure

diff --git a/backends/arm/README.md b/backends/arm/README.md
@@ -122,6 +122,18 @@ The you can run the tests with
 pytest -c /dev/null -v -n auto backends/arm/test --arm_run_corstoneFVP
 ```
 
+## Passes
+
+With the default passes in the Arm Ethos-U backend, assuming the model lowers fully to the
+Ethos-U, the exported program is composed of a Quantize node, Ethos-U custom delegate
+and a Dequantize node. In some circumstances, you may want to feed quantized input to the Neural
+Network straight away, e.g. if you have a camera sensor outputting (u)int8 data and keep all the
+arithmetic of the application in the int8 domain. For these cases, you can apply the
+`exir/passes/quantize_io_pass.py`. See the unit test in `executorch/backends/arm/
+test/passes/test_ioquantization_pass.py`for an example how to feed quantized inputs and
+obtain quantized outputs.
+
+
 ### Code coverage
 
 To get code coverage:

diff --git a/backends/arm/_passes/annotate_channels_last_dim_order_pass.py b/backends/arm/_passes/annotate_channels_last_dim_order_pass.py
@@ -129,6 +129,7 @@ def insert_input_transpose(node, input_node, graph_module):
             permute_node.meta["tosa_dim_order"] = tuple(
                 range(len(input_node.meta["val"].size()))
             )
+            permute_node.meta["val"] = input_node.meta["val"]
 
     @staticmethod
     def insert_output_transpose(node, graph_module):
@@ -141,6 +142,9 @@ def insert_output_transpose(node, graph_module):
             permute_node.meta["tosa_dim_order"] = (
                 AnnotateChannelsLastDimOrder.NHWC_order
             )
+            permute_node.meta["val"] = node.meta["val"].permute(
+                AnnotateChannelsLastDimOrder.NHWC_order
+            )
             node.meta["tosa_dim_order"] = (0, 1, 2, 3)
             users = [user for user in node.users if user != permute_node]
             for user in users:

diff --git a/backends/arm/_passes/annotate_decomposed_matmul.py b/backends/arm/_passes/annotate_decomposed_matmul.py
@@ -6,9 +6,12 @@
 
 import itertools
 
+from typing import List
+
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import create_node
-from executorch.backends.arm.tosa_quant_utils import dq_op, q_op
+
+from executorch.backends.arm.tosa_quant_utils import dq_op, q_op, QuantArgs
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import GraphModule
@@ -24,6 +27,22 @@ class AnnotateDecomposedMatmulPass(ExportPass):
     matmul-op (can be mm or bmm).
     """
 
+    def _match_partition_to_node(
+        self, node: torch.fx.Node, partitioned_inputs: List[torch.fx.Node]
+    ) -> torch.fx.Node:
+        """
+        The partition.input_nodes order is not guaranteed. Compare these
+        with the matmul node inputs coming in and return the nodes
+        in the correct order.
+        """
+        if not node or node in partitioned_inputs or node.op == "placeholder":
+            return node
+        else:
+            return self._match_partition_to_node(
+                node.all_input_nodes[0], partitioned_inputs
+            )
+        raise RuntimeError(f"Cannot find an input node which matches, {node}.")
+
     def call(self, graph_module: GraphModule) -> PassResult:
         matmul_partitions = get_source_partitions(
             graph_module.graph,
@@ -45,28 +64,36 @@ def call(self, graph_module: GraphModule) -> PassResult:
             matmul_node = [
                 node for node in partition.nodes if node.target in matmul_targets
             ][0]
+
             if quantized_input:
                 matmul_args = matmul_node.all_input_nodes
-                for i in range(len(matmul_args)):
-                    input_node = partition.input_nodes[i]
-                    matmul_input_node = matmul_args[i]
+                for node in matmul_args:
+                    input_node = self._match_partition_to_node(
+                        node, partition.input_nodes
+                    )
+
                     # Remove partition input dq-node
                     input_node.replace_all_uses_with(input_node.all_input_nodes[0])
                     graph_module.graph.erase_node(input_node)
-                    input_node_qargs = input_node.args[1:]
+                    input_node_qargs = QuantArgs.from_operator(
+                        input_node.target, input_node.args
+                    )
+
                     with graph_module.graph.inserting_before(matmul_node):
                         # Create new dq-node before matmul
                         dq_node = create_node(
                             graph=graph_module.graph,
                             op_target=dq_op,
                         )
-                        dq_node.args = (matmul_input_node, *input_node_qargs)
-                        matmul_node.replace_input_with(matmul_input_node, dq_node)
+                        dq_node.args = (node, *input_node_qargs)
+                        matmul_node.replace_input_with(node, dq_node)
 
             partition_output = list(partition.output_nodes[0].users)[0]
             quantized_output = partition_output.target == q_op
             if quantized_output:
-                output_node_qargs = partition_output.args[1:]
+                output_node_qargs = QuantArgs.from_operator(
+                    partition_output.target, partition_output.args
+                )
                 with graph_module.graph.inserting_after(matmul_node):
                     # Create q-node after matmul
                     q_node = create_node(