Skip to content

add max_length parametrization for tokenizer encode #1823

add max_length parametrization for tokenizer encode

add max_length parametrization for tokenizer encode #1823

Workflow file for this run

name: Windows (VS 2019, Python 3.11)
on:
workflow_dispatch:
pull_request:
merge_group:
push:
branches:
- master
- 'releases/**'
concurrency:
# github.ref is not unique in post-commit
group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-windows
cancel-in-progress: true
env:
PYTHON_VERSION: '3.11'
OV_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }}
permissions: read-all
jobs:
openvino_download:
name: Download prebuilt OpenVINO
outputs:
status: ${{ steps.openvino_download.outcome }}
ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }}
ov_version: ${{ steps.openvino_download.outputs.ov_version }}
timeout-minutes: 10
defaults:
run:
shell: bash
runs-on: aks-linux-2-cores-8gb
container:
image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0'
volumes:
- /mount:/mount
- ${{ github.workspace }}:${{ github.workspace }}
continue-on-error: true
steps:
- uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master
id: openvino_download
with:
platform: 'windows'
commit_packages_to_provide: 'wheels'
revision: 'latest_available_commit'
openvino_tokenizers_cpack:
name: OpenVINO tokenizers cpack (BUILD_FAST_TOKENIZERS=${{ matrix.build_fast_tokenizers }}, BUILD_TYPE=${{ matrix.build_type }})
strategy:
matrix:
build_fast_tokenizers: [ON]
build_type: [Release] # TODO: Add Debug build when OV provider is ready or use OV package
needs: [ openvino_download ]
if: |
always() &&
(needs.openvino_download.outputs.status == 'success')
timeout-minutes: 45
defaults:
run:
shell: pwsh
runs-on: windows-latest
env:
CMAKE_GENERATOR: 'Ninja'
OPENVINO_REPO: ${{ github.workspace }}\src\ov
OV_INSTALL_DIR: ${{ github.workspace }}\b\ov
OV_TOKENIZERS_INSTALL_DIR: ${{ github.workspace }}\b\ov_tk
OPENVINO_TOKENIZERS_REPO: ${{ github.workspace }}\src\tk
BUILD_DIR: ${{ github.workspace }}\b\tk
steps:
- name: Clone Openvino tokenizers
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
path: ${{ env.OPENVINO_TOKENIZERS_REPO }}
- name: Clone Openvino
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: 'openvinotoolkit/openvino'
path: ${{ env.OPENVINO_REPO }}
ref: ${{ env.OV_BRANCH }}
sparse-checkout: |
install_build_dependencies.sh
- name: Setup Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Download OpenVINO package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
path: ${{ env.OV_INSTALL_DIR }}
merge-multiple: true
#
# Build
#
- name: Install build dependencies
run: |
Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-win.zip -OutFile ninja-win.zip -MaximumRetryCount 10
Expand-Archive -Force ninja-win.zip
# Add it to the GitHub Path so it would be available in the subsequent steps
Add-Content -Path $env:GITHUB_PATH -Value "${{ github.workspace }}/ninja-win"
- name: Configure Developer Command Prompt for Microsoft Visual C++
uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
- name: CMake configure - tokenizers
shell: pwsh
run: |
${{ env.OV_INSTALL_DIR }}/setupvars.ps1
cmake -DBUILD_FAST_TOKENIZERS="${{ matrix.build_fast_tokenizers }}" `
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} `
-S ${{ env.OPENVINO_TOKENIZERS_REPO }} `
-B ${{ env.BUILD_DIR }}
env:
CMAKE_BUILD_PARALLEL_LEVEL: '4'
- name: Cmake build - tokenizers
run: cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ matrix.build_type }} --verbose
env:
ICU_DATA_FILTER_FILE: ${{ env.OPENVINO_TOKENIZERS_REPO }}\\src\\icu_filter_en.json
- name: Cmake install - tokenizers
run: |
cmake --install ${{ env.BUILD_DIR }} --config=${{ matrix.build_type }} --prefix=${{ env.OV_TOKENIZERS_INSTALL_DIR }}/ov_tokenizers
- name: Pack Artifacts
run: |
$file=Get-ChildItem -Path "${{ env.OV_TOKENIZERS_INSTALL_DIR }}"
$compress = @{
Path = $file
CompressionLevel = "Optimal"
DestinationPath = "${{ env.BUILD_DIR }}/ov_tokenizers.zip"
}
Compress-Archive @compress
#
# Upload build artifacts
#
- name: Upload openvino tokenizers package
if: ${{ always() }}
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
name: openvino_tokenizers_cpack_${{ matrix.build_fast_tokenizers }}_${{ matrix.build_type }}
path: ${{ env.BUILD_DIR }}/*.zip
if-no-files-found: 'error'
openvino_tokenizers_wheel:
name: OpenVINO tokenizers wheel
needs: [ openvino_download ]
if: |
always() &&
(needs.openvino_download.outputs.status == 'success')
timeout-minutes: 25
defaults:
run:
shell: pwsh
runs-on: windows-latest
env:
OPENVINO_REPO: ${{ github.workspace }}\src\ov
INSTALL_DIR: ${{ github.workspace }}\b\ov
OPENVINO_TOKENIZERS_REPO: ${{ github.workspace }}\src\tk
BUILD_DIR: ${{ github.workspace }}\b\tk
steps:
- name: Clone Openvino tokenizers
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
path: ${{ env.OPENVINO_TOKENIZERS_REPO }}
- name: Clone Openvino
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: 'openvinotoolkit/openvino'
path: ${{ env.OPENVINO_REPO }}
ref: ${{ env.OV_BRANCH }}
sparse-checkout: |
install_build_dependencies.sh
- name: Setup Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: Download OpenVINO package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
path: ${{ env.INSTALL_DIR }}
merge-multiple: true
#
# Build
#
- name: Build tokenizers wheel
run: |
python -m pip wheel -v --no-deps --wheel-dir ${env:BUILD_DIR} `
${{ needs.openvino_download.outputs.ov_wheel_source }} `
--config-settings=override=cmake.build_path="${env:CMAKE_BUILD_DIR}" `
${env:OPENVINO_TOKENIZERS_REPO}
env:
CMAKE_BUILD_PARALLEL_LEVEL: '4'
CMAKE_BUILD_DIR: 'D:\a\b'
working-directory: ${{ env.INSTALL_DIR }}
#
# Upload build artifacts
#
- name: Upload openvino tokenizers wheel
if: ${{ always() }}
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
name: openvino_tokenizers_wheel
path: ${{ env.BUILD_DIR }}/*.whl
if-no-files-found: 'error'
openvino_tokenizers_tests:
name: OpenVINO tokenizers tests
needs: [ openvino_download, openvino_tokenizers_wheel]
if: always() && needs.openvino_tokenizers_wheel.result == 'success'
timeout-minutes: 45
defaults:
run:
shell: pwsh
runs-on: windows-latest
env:
OPENVINO_REPO: ${{ github.workspace }}\\openvino
INSTALL_DIR: ${{ github.workspace }}\\openvino\\install
OPENVINO_TOKENIZERS_REPO: ${{ github.workspace }}\\openvino_tokenizers
BUILD_DIR: ${{ github.workspace }}\\openvino_tokenizers\\build
steps:
- name: Clone Openvino tokenizers sources and tests
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
path: ${{ env.OPENVINO_TOKENIZERS_REPO }}
- name: Setup Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: Download tokenizers package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: openvino_tokenizers_wheel
path: ${{ env.INSTALL_DIR }}\\ov_tokenizers
- name: Download OpenVINO package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
path: ${{ env.INSTALL_DIR }}
merge-multiple: true
- name: Install OpenVINO Python wheel from pre-built artifacts
run: |
python3 -m pip install openvino==${{ needs.openvino_download.outputs.ov_version }} ${{ needs.openvino_download.outputs.ov_wheel_source }}
working-directory: ${{ env.INSTALL_DIR }}
- name: Install OpenVINO tokenizers wheel
run: |
# Find and install wheel
$ovCoreWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\\ov_tokenizers" -Filter openvino_tokenizers*.whl | % { $_.FullName }
python3 -m pip install "$ovCoreWheelPath[all]"
env:
PIP_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
- name: Tokenizers regression tests (using openvino python modules)
run: |
. "${{ env.INSTALL_DIR }}/setupvars.ps1"
python3 -m pytest tokenizers_test.py
working-directory: ${{ env.OPENVINO_TOKENIZERS_REPO }}/tests