Refine v1 perf benchmark to align with v2 (#3006)

* Add --benchmark-type w/ accuracy|efficiency|all options * Add perf-benchmark tox env * Refine perf workflow to align with v2 * Add dummy perf tests for visual prompting * Fix weekly workflow --------- Signed-off-by: Songki Choi <[email protected]>
openvinotoolkit · Feb 29, 2024 · cd17e23 · cd17e23
1 parent 6d2e956
commit cd17e23
Show file tree

Hide file tree

Showing 13 changed files with 362 additions and 239 deletions.
diff --git a/.github/workflows/perf-accuracy.yml b/.github/workflows/perf-accuracy.yml
diff --git a/.github/workflows/perf-speed.yml b/.github/workflows/perf-speed.yml
diff --git a/.github/workflows/perf_accuracy.yaml b/.github/workflows/perf_accuracy.yaml
@@ -0,0 +1,146 @@
+name: Perf-Accuracy Benchmark
+
+on:
+  workflow_dispatch: # run on request (no need for PR)
+    inputs:
+      model-category:
+        type: choice
+        description: Model category to run benchmark
+        options:
+          - default # speed, balance, accuracy models only
+          - all # default + other models
+        default: default
+      data-size:
+        type: choice
+        description: Dataset size to run benchmark
+        options:
+          - small
+          - medium
+          - large
+          - all
+        default: all
+      num-repeat:
+        description: Overrides default per-data-size number of repeat setting
+        default: 0
+      num-epoch:
+        description: Overrides default per-model number of epoch setting
+        default: 0
+      eval-upto:
+        type: choice
+        description: The last operation to evaluate. 'optimize' means all.
+        options:
+          - train
+          - export
+          - optimize
+        default: optimize
+      pytest-args:
+        type: string
+        description: |
+          Additional perf-benchmark pytest arguments.
+          "-k detection" -> detection task only
+          "--dry-run" -> print command w/o execution.
+      dara-root:
+        type: string
+        description: Root directory containing validation data in CI server.
+        default: /home/validation/data/new/
+      artifact-prefix:
+        type: string
+        default: perf-accuracy-benchmark
+  workflow_call:
+    inputs:
+      model-category:
+        type: string
+        description: Model category to run benchmark [default, all]
+        default: default
+      data-size:
+        type: string
+        description: Dataset size to run benchmark [small, medium, large, all]
+        default: all
+      num-repeat:
+        type: number
+        description: Overrides default per-data-size number of repeat setting
+        default: 0
+      num-epoch:
+        type: number
+        description: Overrides default per-model number of epoch setting
+        default: 0
+      eval-upto:
+        type: string
+        description: The last operation to evaluate. 'optimize' means all. [train, export, optimize]
+        default: optimize
+      pytest-args:
+        type: string
+        description: |
+          Additional perf-benchmark pytest arguments.
+          "-k detection" -> detection task only
+          "--dry-run" -> print command w/o execution.
+      data-root:
+        type: string
+        description: Root directory containing validation data in CI server.
+        default: /home/validation/data/new/
+      artifact-prefix:
+        type: string
+        default: perf-accuracy-benchmark
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+  Perf-Accuracy-Benchmark:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - task-short: "ano"
+            task: "anomaly"
+          - task-short: "cls"
+            task: "classification"
+          - task-short: "det"
+            task: "detection"
+          - task-short: "isg"
+            task: "instance_segmentation"
+          - task-short: "ssg"
+            task: "semantic_segmentation"
+          - task-short: "vsp"
+            task: "visual_prompting"
+    name: Perf-Accuracy-Benchmark-${{ matrix.task-short }}
+    runs-on: [self-hosted, linux, x64, dmount]
+    timeout-minutes: 8640
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      - name: Set up Python
+        uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1
+        with:
+          python-version: "3.10"
+      - name: Install dependencies
+        run: |
+          pip install --require-hashes --no-deps -r requirements/gh-actions.txt
+          pip-compile --generate-hashes -o /tmp/otx-dev-requirements.txt requirements/dev.txt
+          pip install --require-hashes --no-deps -r /tmp/otx-dev-requirements.txt
+          rm /tmp/otx-dev-requirements.txt
+      - name: Run Tests
+        env:
+          MLFLOW_TRACKING_SERVER_URI: ${{ vars.MLFLOW_TRACKING_SERVER_URI }}
+          BENCHMARK_RESULTS_CLEAR: ${{ vars.BENCHMARK_RESULTS_CLEAR }}
+          GH_CTX_REF_NAME: ${{ github.ref_name }}
+          GH_CTX_SHA: ${{ github.sha }}
+        run: >
+          tox -vv -e perf-benchmark -- tests/perf/test_${{ matrix.task }}.py ${{ inputs.pytest-args }}
+          --benchmark-type accuracy
+          --model-category ${{ inputs.model-category }}
+          --data-root ${{ inputs.data-root }}
+          --data-size ${{ inputs.data-size }}
+          --num-repeat ${{ inputs.num-repeat }}
+          --num-epoch ${{ inputs.num-epoch }}
+          --eval-upto ${{ inputs.eval-upto }}
+          --summary-csv .tox/perf-accuracy-benchmark-${{ matrix.task-short }}.csv
+          --mlflow-tracking-uri ${{ vars.MLFLOW_TRACKING_SERVER_URI }}
+          --user-name ${{ github.triggering_actor }}
+      - name: Upload test results
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
+        with:
+          name: ${{ inputs.artifact-prefix }}-${{ matrix.task-short }}
+          path: .tox/perf-*.csv
+        # Use always() to always run this step to publish test results when there are test failures
+        if: ${{ always() }}