Skip to content

Commit

Permalink
Update on "Use c10 version of half/bfloat16 in executorch"
Browse files Browse the repository at this point in the history
Accomplished by importing relevant files from c10 into
executorch/runtime/core/portable_type/c10, and then using `using` in
the top-level ExecuTorch headers. This approach should keep the
ExecuTorch build hermetic for embedded use cases. In the future, we
should add a CI job to ensure the c10 files stay identical to the
PyTorch ones.

Differential Revision: [D66106969](https://our.internmc.facebook.com/intern/diff/D66106969/)

[ghstack-poisoned]
  • Loading branch information
Github Executorch committed Feb 4, 2025
2 parents 16bfea2 + 52c4f3c commit 87c3252
Show file tree
Hide file tree
Showing 135 changed files with 3,735 additions and 1,617 deletions.
1 change: 1 addition & 0 deletions .ci/scripts/test_eval_llama_mmlu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ run_and_verify() {
--tasks mmlu \
-f 5 \
--max_seq_length 2048 \
--max_context_length 2048 \
--limit 5 > result.txt

# Verify result.txt
Expand Down
1 change: 1 addition & 0 deletions .ci/scripts/test_eval_llama_wikitext.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ run_and_verify() {
-kv \
-d fp32 \
--max_seq_length 2048 \
--max_context_length 2048 \
--limit 5 > result.txt

# Verify result.txt
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/apple.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
id: set_version
shell: bash
run: |
VERSION="0.4.0.$(TZ='PST8PDT' date +%Y%m%d)"
VERSION="0.5.0.$(TZ='PST8PDT' date +%Y%m%d)"
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
build-demo-ios:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/doc-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ jobs:
needs: build
if: github.repository == 'pytorch/executorch' && github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/v'))
permissions:
id-token: write
contents: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/executorch
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ jobs:
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
# install pybind
bash install_executorch.sh --pybind xnnpack
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
# install Llava requirements
bash examples/models/llama/install_requirements.sh
Expand Down Expand Up @@ -484,7 +484,7 @@ jobs:
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
# install pybind
bash install_executorch.sh --pybind xnnpack
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
# install phi-3-mini requirements
bash examples/models/phi-3-mini/install_requirements.sh
Expand Down Expand Up @@ -514,7 +514,7 @@ jobs:
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
# install pybind
bash install_executorch.sh --pybind xnnpack
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
# install llama requirements
bash examples/models/llama/install_requirements.sh
Expand Down Expand Up @@ -544,7 +544,7 @@ jobs:
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
# install pybind
bash install_executorch.sh --pybind xnnpack
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
# install llama requirements
bash examples/models/llama/install_requirements.sh
Expand Down Expand Up @@ -574,7 +574,7 @@ jobs:
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
# install pybind
bash install_executorch.sh --pybind xnnpack
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
# install llama requirements
bash examples/models/llama/install_requirements.sh
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ jobs:
conda activate "${CONDA_ENV}"
source .ci/scripts/utils.sh
install_executorch
install_executorch "use-pt-pinned-commit"
.ci/scripts/setup-arm-baremetal-tools.sh
Expand Down Expand Up @@ -180,7 +180,7 @@ jobs:
conda activate "${CONDA_ENV}"
source .ci/scripts/utils.sh
install_executorch
install_executorch "use-pt-pinned-commit"
.ci/scripts/setup-arm-baremetal-tools.sh
Expand Down
2 changes: 1 addition & 1 deletion .lintrunner.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
merge_base_with = "origin/main"
merge_base_with = "main"

[[linter]]
code = 'FLAKE8'
Expand Down
32 changes: 32 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,38 @@ Meta has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.

### Issue Labels

#### Module/Partner Labels

[Labels beginning with `module:`](https://github.com/pytorch/executorch/labels?q=%22module%3A+%22)
indicate the area that the issue relates to. The ExecuTorch oncall will
typically add this label.

[Labels beginning with `partner:`](https://github.com/pytorch/executorch/labels?q=%22partner%3A+%22)
indicate the ExecuTorch partner who owns the issue. The ExecuTorch oncall will
typically add this label.

#### Lifecycle Labels

The ExecuTorch oncall will triage new issues. If the issue requires more
information from the issue's author, oncall will add the `need-user-input` label
and wait for the author to respond.

Once the issue contains enough information, the oncall will:
- Ensure that the title is descriptive
- Add one of the labels:
- `bug`: The issue describes an unexpected problem
- `feature`: The issue describes a request for new functionality
- `rfc`: The issue describes a proposed change to functionality
- Add one `module:` label or one `partner:` label, as described above
- Add the `triaged` label

After this point, the oncall has finished the triage process, and the
module owner or partner is responsible for resolving the issue. (See
https://github.com/pytorch/executorch/issues/7679 for the mapping of labels to
owners.)

### Claiming Issues
We'd love your help closing out [open
issues](https://github.com/pytorch/executorch/issues?q=sort%3Aupdated-desc+is%3Aissue+is%3Aopen)
Expand Down
15 changes: 8 additions & 7 deletions README-wheel.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,21 @@ standard on-device iOS and Android mobile deployments. One of the main goals for
ExecuTorch is to enable wider customization and deployment capabilities of the
PyTorch programs.

The `executorch` pip package is in alpha.
* Supported python versions: 3.10, 3.11
The `executorch` pip package is in beta.
* Supported python versions: 3.10, 3.11, 3.12
* Compatible systems: Linux x86_64, macOS aarch64

The prebuilt `executorch.extension.pybindings.portable_lib` module included in
this package provides a way to run ExecuTorch `.pte` files, with some
restrictions:
The prebuilt `executorch.runtime` module included in this package provides a way
to run ExecuTorch `.pte` files, with some restrictions:
* Only [core ATen
operators](https://pytorch.org/executorch/stable/ir-ops-set-definition.html)
are linked into the prebuilt module
* Only the [XNNPACK backend
delegate](https://pytorch.org/executorch/main/native-delegates-executorch-xnnpack-delegate.html)
is linked into the prebuilt module
* [macOS only] [Core ML](https://pytorch.org/executorch/main/build-run-coreml.html) and [MPS](https://pytorch.org/executorch/main/build-run-mps.html) backend delegates are linked into the prebuilt module.
is linked into the prebuilt module.
* \[macOS only] [Core ML](https://pytorch.org/executorch/main/build-run-coreml.html)
and [MPS](https://pytorch.org/executorch/main/build-run-mps.html) backend
delegates are also linked into the prebuilt module.

Please visit the [ExecuTorch website](https://pytorch.org/executorch/) for
tutorials and documentation. Here are some starting points:
Expand Down
69 changes: 41 additions & 28 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,36 @@
# ExecuTorch

**ExecuTorch** is an end-to-end solution for enabling on-device inference
capabilities across mobile and edge devices including wearables, embedded
devices and microcontrollers. It is part of the PyTorch Edge ecosystem and
enables efficient deployment of PyTorch models to edge devices.
<div align="center">
<img src="./docs/source/_static/img/et-logo.png" alt="Logo" width="200">
<h1 align="center">ExecuTorch: A powerful on-device AI Framework</h1>
</div>


<div align="center">
<a href="https://github.com/pytorch/executorch/graphs/contributors"><img src="https://img.shields.io/github/contributors/pytorch/executorch?style=for-the-badge&color=blue" alt="Contributors"></a>
<a href="https://github.com/pytorch/executorch/stargazers"><img src="https://img.shields.io/github/stars/pytorch/executorch?style=for-the-badge&color=blue" alt="Stargazers"></a>
<a href="https://discord.gg/MeacgB7A"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community"></a>
<a href="https://pytorch.org/executorch/stable/index.html"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
<hr>
</div>

**ExecuTorch** is an end-to-end solution for on-device inference and training. It powers much of Meta's on-device AI functionality including: Facebook, Oculus, Meta Glasses, Instagram, WhatsApp and more.

It covers a wide gamut of models including: LLMs (Large Language Models), CV (Computer Vision), ASR (Automatic Speech Recognition), TTS (Text to Speech).

Platform Support:
- Operating Systems:
- iOS
- Mac
- Android
- Linux
- Microcontrollers

- Hardware Acceleration:
- Apple
- ARM
- Cadence
- MediaTek
- Vulkan
- XNNPACK

Key value propositions of ExecuTorch are:

Expand All @@ -17,35 +44,21 @@ Key value propositions of ExecuTorch are:
experience due to a lightweight runtime and utilizing full hardware
capabilities such as CPUs, NPUs, and DSPs.

For a comprehensive technical overview of ExecuTorch and step-by-step tutorials,
please visit our documentation website [for the latest release](https://pytorch.org/executorch/stable/index.html) (or the [main branch](https://pytorch.org/executorch/main/index.html)).

Check out the [Getting Started](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) page for a quick spin.

Check out the examples of [Llama](./examples/models/llama/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.
## Getting Started
To get started you can:

- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/main/index.html) on getting things running locally and deploy a model to a device
- Use this [Colab Notebook](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) to start playing around right away
- Jump straight into LLMs use cases by following specific instructions for [Llama](./examples/models/llama/README.md) and [Llava](./examples/models/llava/README.md)

**[UPDATE - 10/24]** We have added support for running [Llama 3.2 Quantized 1B/3B](./examples/models/llama/README.md) models via ExecuTorch.

## Feedback
## Feedback and Engagement

We welcome any feedback, suggestions, and bug reports from the community to help
us improve our technology. Please use the [PyTorch
Forums](https://discuss.pytorch.org/c/executorch) for discussion and feedback
about ExecuTorch using the **ExecuTorch** category, and our [GitHub
repository](https://github.com/pytorch/executorch/issues) for bug reporting.

We recommend using the latest release tag from the
[Releases](https://github.com/pytorch/executorch/releases) page when developing.
us improve our technology. Check out the [Discussion Board](https://github.com/pytorch/executorch/discussions) or chat real time with us on [Discord](https://discord.gg/MeacgB7A)

## Contributing

See [CONTRIBUTING.md](CONTRIBUTING.md) for details about issues, PRs, code
style, CI jobs, and other development topics.

To connect with us and other community members, we invite you to join PyTorch Slack community by filling out this [form](https://docs.google.com/forms/d/e/1FAIpQLSeADnUNW36fjKjYzyHDOzEB_abKQE9b6gqqW9NXse6O0MWh0A/viewform). Once you've joined, you can:
* Head to the `#executorch-general` channel for general questions, discussion, and community support.
* Join the `#executorch-contributors` channel if you're interested in contributing directly to project development.
We welcome contributions. To get started review the [guidelines](CONTRIBUTING.md) and chat with us on [Discord](https://discord.gg/MeacgB7A)


## Directory Structure
Expand Down
12 changes: 12 additions & 0 deletions backends/arm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,18 @@ The you can run the tests with
pytest -c /dev/null -v -n auto backends/arm/test --arm_run_corstoneFVP
```

## Passes

With the default passes in the Arm Ethos-U backend, assuming the model lowers fully to the
Ethos-U, the exported program is composed of a Quantize node, Ethos-U custom delegate
and a Dequantize node. In some circumstances, you may want to feed quantized input to the Neural
Network straight away, e.g. if you have a camera sensor outputting (u)int8 data and keep all the
arithmetic of the application in the int8 domain. For these cases, you can apply the
`exir/passes/quantize_io_pass.py`. See the unit test in `executorch/backends/arm/
test/passes/test_ioquantization_pass.py`for an example how to feed quantized inputs and
obtain quantized outputs.


### Code coverage

To get code coverage:
Expand Down
4 changes: 4 additions & 0 deletions backends/arm/_passes/annotate_channels_last_dim_order_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def insert_input_transpose(node, input_node, graph_module):
permute_node.meta["tosa_dim_order"] = tuple(
range(len(input_node.meta["val"].size()))
)
permute_node.meta["val"] = input_node.meta["val"]

@staticmethod
def insert_output_transpose(node, graph_module):
Expand All @@ -141,6 +142,9 @@ def insert_output_transpose(node, graph_module):
permute_node.meta["tosa_dim_order"] = (
AnnotateChannelsLastDimOrder.NHWC_order
)
permute_node.meta["val"] = node.meta["val"].permute(
AnnotateChannelsLastDimOrder.NHWC_order
)
node.meta["tosa_dim_order"] = (0, 1, 2, 3)
users = [user for user in node.users if user != permute_node]
for user in users:
Expand Down
43 changes: 35 additions & 8 deletions backends/arm/_passes/annotate_decomposed_matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@

import itertools

from typing import List

import torch
from executorch.backends.arm._passes.arm_pass_utils import create_node
from executorch.backends.arm.tosa_quant_utils import dq_op, q_op

from executorch.backends.arm.tosa_quant_utils import dq_op, q_op, QuantArgs
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult
from torch.fx import GraphModule
Expand All @@ -24,6 +27,22 @@ class AnnotateDecomposedMatmulPass(ExportPass):
matmul-op (can be mm or bmm).
"""

def _match_partition_to_node(
self, node: torch.fx.Node, partitioned_inputs: List[torch.fx.Node]
) -> torch.fx.Node:
"""
The partition.input_nodes order is not guaranteed. Compare these
with the matmul node inputs coming in and return the nodes
in the correct order.
"""
if not node or node in partitioned_inputs or node.op == "placeholder":
return node
else:
return self._match_partition_to_node(
node.all_input_nodes[0], partitioned_inputs
)
raise RuntimeError(f"Cannot find an input node which matches, {node}.")

def call(self, graph_module: GraphModule) -> PassResult:
matmul_partitions = get_source_partitions(
graph_module.graph,
Expand All @@ -45,28 +64,36 @@ def call(self, graph_module: GraphModule) -> PassResult:
matmul_node = [
node for node in partition.nodes if node.target in matmul_targets
][0]

if quantized_input:
matmul_args = matmul_node.all_input_nodes
for i in range(len(matmul_args)):
input_node = partition.input_nodes[i]
matmul_input_node = matmul_args[i]
for node in matmul_args:
input_node = self._match_partition_to_node(
node, partition.input_nodes
)

# Remove partition input dq-node
input_node.replace_all_uses_with(input_node.all_input_nodes[0])
graph_module.graph.erase_node(input_node)
input_node_qargs = input_node.args[1:]
input_node_qargs = QuantArgs.from_operator(
input_node.target, input_node.args
)

with graph_module.graph.inserting_before(matmul_node):
# Create new dq-node before matmul
dq_node = create_node(
graph=graph_module.graph,
op_target=dq_op,
)
dq_node.args = (matmul_input_node, *input_node_qargs)
matmul_node.replace_input_with(matmul_input_node, dq_node)
dq_node.args = (node, *input_node_qargs)
matmul_node.replace_input_with(node, dq_node)

partition_output = list(partition.output_nodes[0].users)[0]
quantized_output = partition_output.target == q_op
if quantized_output:
output_node_qargs = partition_output.args[1:]
output_node_qargs = QuantArgs.from_operator(
partition_output.target, partition_output.args
)
with graph_module.graph.inserting_after(matmul_node):
# Create q-node after matmul
q_node = create_node(
Expand Down
Loading

0 comments on commit 87c3252

Please sign in to comment.