E2E performance #6
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: E2E performance mode | |
on: | |
workflow_dispatch: | |
inputs: | |
suite: | |
description: Test suite | |
type: choice | |
options: | |
- all | |
- huggingface | |
- timm_models | |
- torchbench | |
default: all | |
mode: | |
description: Inference/Training | |
type: choice | |
options: | |
- all | |
- inference | |
- training | |
default: all | |
dtype: | |
description: Data type | |
type: choice | |
options: | |
- all | |
- amp_bf16 | |
- amp_fp16 | |
- bfloat16 | |
- float16 | |
- float32 | |
default: all | |
TORCH_COMPILE_DEBUG: | |
description: TORCH_COMPILE_DEBUG | |
type: string | |
default: "" | |
# schedule: | |
# - cron: "5 1 * * *" | |
permissions: read-all | |
env: | |
BASE: /home/runner | |
LLVM_SYSPATH: /home/runner/packages/llvm | |
BACKEND: XPU | |
TRITON_DISABLE_LINE_INFO: 1 | |
USE_AOT_DEVLIST: pvc | |
PYTHON_VERSION: "3.10" | |
LLVM_REPO: intel/llvm.git | |
LLVM_BRANCH: genx | |
BENCHMARK_REPO: weishi-deng/benchmark.git | |
BENCHMARK_BRANCH: main | |
jobs: | |
matrix: | |
name: Matrix | |
runs-on: | |
- glados | |
- spr | |
- cpu | |
outputs: | |
suite: ${{ steps.set-matrix.outputs.suite }} | |
mode: ${{ steps.set-matrix.outputs.mode }} | |
dtype: ${{ steps.set-matrix.outputs.dtype }} | |
timeout-minutes: 10 | |
steps: | |
- name: Set matrix | |
id: set-matrix | |
run: | | |
if [[ -z "${{ inputs.suite }}" || "${{ inputs.suite }}" == "all" ]]; then | |
suite='["huggingface", "timm_models", "torchbench"]' | |
else | |
suite='["${{ inputs.suite }}"]' | |
fi | |
if [[ -z "${{ inputs.mode }}" || "${{ inputs.mode }}" == "all" ]]; then | |
mode='["inference", "training"]' | |
else | |
mode='["${{ inputs.mode }}"]' | |
fi | |
if [[ -z "${{ inputs.dtype }}" || "${{ inputs.dtype }}" == "all" ]]; then | |
dtype='["amp_bf16", "amp_fp16", "bfloat16", "float16", "float32"]' | |
else | |
dtype='["${{ inputs.dtype }}"]' | |
fi | |
echo "suite=$suite" >> $GITHUB_OUTPUT | |
echo "mode=$mode" >> $GITHUB_OUTPUT | |
echo "dtype=$dtype" >> $GITHUB_OUTPUT | |
build: | |
name: Test | |
needs: matrix | |
runs-on: | |
- glados | |
- spr | |
- runner-0.0.6 | |
strategy: | |
matrix: | |
suite: ${{ fromJson(needs.matrix.outputs.suite) }} | |
mode: ${{ fromJson(needs.matrix.outputs.mode) }} | |
dtype: ${{ fromJson(needs.matrix.outputs.dtype) }} | |
max-parallel: 2 | |
fail-fast: false | |
timeout-minutes: 720 | |
defaults: | |
run: | |
shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}" | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Get LLVM commit id | |
uses: ./.github/actions/get-commit-id | |
with: | |
repository: ${{ env.LLVM_REPO }} | |
branch: ${{ env.LLVM_BRANCH }} | |
variable: LLVM_COMMIT_ID | |
- name: Get benchmark commit id | |
uses: ./.github/actions/get-commit-id | |
with: | |
repository: ${{ env.BENCHMARK_REPO }} | |
branch: ${{ env.BENCHMARK_BRANCH }} | |
variable: BENCHMARK_COMMIT_ID | |
- name: Load pip cache | |
id: pip-cache | |
uses: ./.github/actions/load | |
with: | |
path: $HOME/.cache/pip | |
# pip cache per commit id just to minimize network traffic | |
key: pip-$PYTHON_VERSION-$GITHUB_SHA | |
- name: Install Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install Python build dependencies | |
run: | | |
pip install wheel | |
- name: Setup PyTorch | |
uses: ./.github/actions/setup-pytorch | |
- name: Install latest nightly wheels | |
uses: ./.github/actions/install-wheels | |
- name: Identify pinned versions | |
run: | | |
cd pytorch | |
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" >> "${GITHUB_ENV}" | |
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" >> "${GITHUB_ENV}" | |
# TIMM depends on torch and torchvision, so, in general, it needs to be installed before | |
# installing custom torch and torchvision, but instead we install its dependencies except | |
# torch and torchvision. | |
- name: Install TIMM Models | |
if: ${{ matrix.suite == 'timm_models' }} | |
run: | | |
# install timm without dependencies | |
pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@$TIMM_COMMIT_ID | |
# install timm dependencies without torch and torchvision | |
pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/$TIMM_COMMIT_ID/requirements.txt | grep -vE torch) | |
- name: Generate Triton cache key | |
id: triton-key | |
run: | | |
COMPOSITE_KEY=$(echo $PYTHON_VERSION $LLVM_COMMIT_ID $GITHUB_SHA | sha256sum - | cut -d\ -f1) | |
echo "key=triton-$COMPOSITE_KEY" >> $GITHUB_OUTPUT | |
- name: Load Triton wheels from a cache | |
id: triton-cache | |
uses: ./.github/actions/load | |
with: | |
path: python/dist | |
key: ${{ steps.triton-key.outputs.key }} | |
- name: Generate packages cache key | |
if: ${{ steps.triton-cache.outputs.status == 'miss' }} | |
id: packages-key | |
env: | |
# Increase this value to reset cache | |
CACHE_NUMBER: 1 | |
run: | | |
COMPOSITE_KEY=$(echo $LLVM_COMMIT_ID ${{ hashFiles('scripts/compile-triton.sh') }} | sha256sum - | cut -d\ -f1) | |
echo "key=packages-$COMPOSITE_KEY-$CACHE_NUMBER" >> $GITHUB_OUTPUT | |
- name: Load packages from a cache | |
if: ${{ steps.triton-cache.outputs.status == 'miss' }} | |
id: packages-cache | |
uses: ./.github/actions/load | |
with: | |
path: $HOME/packages | |
key: ${{ steps.packages-key.outputs.key }} | |
- name: Build packages | |
if: ${{ steps.triton-cache.outputs.status == 'miss' && steps.packages-cache.outputs.status == 'miss' }} | |
run: | | |
./scripts/compile-triton.sh --llvm | |
- name: Save packages to a cache | |
if: ${{ steps.triton-cache.outputs.status == 'miss' && steps.packages-cache.outputs.status == 'miss' }} | |
uses: ./.github/actions/save | |
with: | |
path: ${{ steps.packages-cache.outputs.path }} | |
dest: ${{ steps.packages-cache.outputs.dest }} | |
- name: Build Triton wheels | |
if: ${{ steps.triton-cache.outputs.status == 'miss' }} | |
run: | | |
export DEBUG=1 | |
cd python | |
python setup.py bdist_wheel | |
- name: Install Triton | |
run: | | |
pip install python/dist/*.whl | |
- name: Save Triton wheels to a cache | |
if: ${{ steps.triton-cache.outputs.status == 'miss' }} | |
uses: ./.github/actions/save | |
with: | |
path: ${{ steps.triton-cache.outputs.path }} | |
dest: ${{ steps.triton-cache.outputs.dest }} | |
- name: Install python test dependencies | |
run: | | |
pip install pyyaml pandas scipy numpy psutil pyre_extensions torchrec transformers==$TRANSFORMERS_VERSION | |
- name: Clone pytorch benchmark | |
if: ${{ matrix.suite == 'torchbench' }} | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.BENCHMARK_REPO }} | |
ref: ${{ env.BENCHMARK_BRANCH }} | |
submodules: recursive | |
path: benchmark | |
- name: Install pytorch benchmark | |
if: ${{ matrix.suite == 'torchbench' }} | |
run: | | |
cd benchmark | |
python install.py | |
pip install -e . | |
- name: Run e2e performance benchmarks | |
env: | |
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
run: | | |
export WORKSPACE=$GITHUB_WORKSPACE | |
if [[ "${{ inputs.TORCH_COMPILE_DEBUG }}" == "1" ]] ; then | |
export TORCH_COMPILE_DEBUG="1" | |
# torch will save debug logs to $TORCH_COMPILE_DEBUG_DIR/torch_compile_debug | |
export TORCH_COMPILE_DEBUG_DIR=$GITHUB_WORKSPACE | |
fi | |
cd pytorch | |
$GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh ${{ matrix.suite }} ${{ matrix.dtype }} ${{ matrix.mode }} performance xpu 0 | |
- name: Report environment details | |
run: | | |
mkdir -p inductor_log | |
TIMESTAMP=$(date '+%Y%m%d%H%M%S') | |
echo "TIMESTAMP=$TIMESTAMP" >> "${GITHUB_ENV}" | |
cat <<EOF | tee inductor_log/.env | |
TIMESTAMP=$TIMESTAMP | |
JOB_NAME=${{ join(matrix.*, '-') }} | |
GITHUB_RUN_ID=$GITHUB_RUN_ID | |
GITHUB_RUN_NUMBER=$GITHUB_RUN_NUMBER | |
GITHUB_RUN_ATTEMPT=$GITHUB_RUN_ATTEMPT | |
PYTHON_VERSION=$PYTHON_VERSION | |
PYTORCH_REPO=$PYTORCH_REPO | |
PYTORCH_COMMIT_ID=$PYTORCH_COMMIT_ID | |
IPEX_REPO=$IPEX_REPO | |
IPEX_COMMIT_ID=$IPEX_COMMIT_ID | |
LLVM_REPO=$LLVM_REPO | |
LLVM_COMMIT_ID=$LLVM_COMMIT_ID | |
BENCHMARK_REPO=$BENCHMARK_REPO | |
BENCHMARK_COMMIT_ID=$BENCHMARK_COMMIT_ID | |
TRITON_REPO=$GITHUB_REPOSITORY | |
TRITON_COMMIT_ID=$GITHUB_SHA | |
TORCHVISION_COMMIT_ID=$TORCHVISION_COMMIT_ID | |
TORCHTEXT_COMMIT_ID=$TORCHTEXT_COMMIT_ID | |
TORCHAUDIO_COMMIT_ID=$TORCHAUDIO_COMMIT_ID | |
TRANSFORMERS_VERSION=$TRANSFORMERS_VERSION | |
TIMM_COMMIT_ID=$TIMM_COMMIT_ID | |
EOF | |
- name: Copy reports | |
run: | | |
if [[ -d torch_compile_debug ]]; then | |
cp -rT torch_compile_debug inductor_log | |
fi | |
mkdir -p /cache/reports/e2e-performance | |
TMPDIR=$(mktemp -d -p /cache/reports/e2e-performance XXXXXXXXX.tmp) | |
cp -rT $GITHUB_WORKSPACE/inductor_log $TMPDIR | |
mv -T $TMPDIR /cache/reports/e2e-performance/$TIMESTAMP-${{ join(matrix.*, '-') }} || rm -rf $TMPDIR | |
- name: Upload test logs | |
uses: actions/upload-artifact@v4 | |
with: | |
name: logs-${{ join(matrix.*, '-') }} | |
path: inductor_log | |
- name: Save pip cache | |
if: ${{ steps.pip-cache.outputs.status == 'miss' }} | |
uses: ./.github/actions/save | |
with: | |
path: ${{ steps.pip-cache.outputs.path }} | |
dest: ${{ steps.pip-cache.outputs.dest }} |