diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 44e115423c..2916c340bf 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -267,6 +267,7 @@ jobs: run: | source ${OV_INSTALL_DIR}/setupvars.sh python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels + python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels python -m pytest -v ./tests/python_tests --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template" - run: > @@ -358,6 +359,7 @@ jobs: run: | source ${OV_INSTALL_DIR}/setupvars.sh python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels + python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke" genai_package: diff --git a/.github/workflows/llm_bench-python.yml b/.github/workflows/llm_bench-python.yml index 77f26d33a0..0486a2a746 100644 --- a/.github/workflows/llm_bench-python.yml +++ b/.github/workflows/llm_bench-python.yml @@ -101,11 +101,9 @@ jobs: python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1 - name: WWB Tests run: | - GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.WWB_PATH }}/requirements.txt pip install git+https://github.com/huggingface/optimum-intel.git - GIT_CLONE_PROTECTION_ACTIVE=false pip install ${{ env.WWB_PATH }} - python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --force-reinstall - python -m pytest -v tools/who_what_benchmark/tests + GIT_CLONE_PROTECTION_ACTIVE=false PIP_PRE=1 PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly pip install ${{ env.WWB_PATH }} + python -m pytest -v ${{ env.WWB_PATH }}/tests stateful: runs-on: ubuntu-20.04 steps: @@ -122,9 +120,7 @@ jobs: grep beam_idx pytorch/dldt/FP32/openvino_model.xml - name: WWB Tests run: | - GIT_CLONE_PROTECTION_ACTIVE=false pip install -r tools/who_what_benchmark/requirements.txt - pip install git+https://github.com/huggingface/optimum-intel.git - GIT_CLONE_PROTECTION_ACTIVE=false pip install tools/who_what_benchmark/ pip install pytest - python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --force-reinstall - python -m pytest -v tools/who_what_benchmark/tests + pip install git+https://github.com/huggingface/optimum-intel.git + GIT_CLONE_PROTECTION_ACTIVE=false PIP_PRE=1 PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly pip install ${{ env.WWB_PATH }} + python -m pytest -v ${{ env.WWB_PATH }}/tests diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 5b1b7622ac..7a4ee31beb 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -234,6 +234,7 @@ jobs: source ${OV_INSTALL_DIR}/setupvars.sh python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels python -c "from openvino_genai import LLMPipeline" + python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template" genai_python_lib_whisper: @@ -298,6 +299,7 @@ jobs: source ${OV_INSTALL_DIR}/setupvars.sh python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels python -c "from openvino_genai import LLMPipeline" + python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke" genai_package: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 17a1abb288..649d678c02 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -244,6 +244,7 @@ jobs: run: | . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels + python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template" genai_python_lib_whisper: @@ -308,6 +309,7 @@ jobs: run: | . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels + python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke" genai_python_lib_vlm: diff --git a/tests/python_tests/requirements.txt b/tests/python_tests/requirements.txt index bf13b0de7f..f0dbef2700 100644 --- a/tests/python_tests/requirements.txt +++ b/tests/python_tests/requirements.txt @@ -3,7 +3,6 @@ optimum-intel @ git+https://github.com/huggingface/optimum-intel.git numpy<2.0.0; sys_platform == 'darwin' onnx==1.16.1 pytest -tools/who_what_benchmark # requirements for specific models # - hf-tiny-model-private/tiny-random-RoFormerForCausalLM diff --git a/tests/python_tests/test_cache_optimizations.py b/tests/python_tests/test_cache_optimizations.py index a34e604382..45704f9dc6 100644 --- a/tests/python_tests/test_cache_optimizations.py +++ b/tests/python_tests/test_cache_optimizations.py @@ -7,7 +7,6 @@ import pytest -import whowhatbench from optimum.intel.openvino import OVModelForCausalLM from openvino_genai import ContinuousBatchingPipeline, SchedulerConfig, GenerationResult, GenerationConfig, CacheEvictionConfig, AggregationMode @@ -97,6 +96,8 @@ class CacheOptTestStruct: ]) @pytest.mark.parametrize("enable_prefix_caching", [True, False]) # prefix caching shouldn't impact similarity def test_cache_optimized_generation_is_similar_to_unoptimized(converted_model, test_struct, enable_prefix_caching): + import whowhatbench + seqs_per_request = 32 scheduler_config = get_scheduler_config(test_struct.num_kv_blocks) diff --git a/tools/who_what_benchmark/README.md b/tools/who_what_benchmark/README.md index 0e597859d2..cc16beb390 100644 --- a/tools/who_what_benchmark/README.md +++ b/tools/who_what_benchmark/README.md @@ -17,13 +17,22 @@ WWB provides default datasets for the supported use cases. However, it is relati * Validation of Visual Language pipelines. Computes similarity score between generated images with Diffusers library, Optimum-Intel, and OpenVINO GenAI via `VisualTextEvaluator` class. ### Installation -Install WWB and its requirements from the source using `pip` or any other package manager. For example, - -* `python -m venv eval_env` -* `source eval_env/bin/activate` -* `pip install -r requirements.txt` -* `pip install openvino.genai` to validate with OpenVINO GenAI API -* `pip install .` +To install WWB and its dependencies, follow these steps: +1. Set up a Python virtual environment (recommended): +``` + python -m venv eval_env + source eval_env/bin/activate +``` +2. Install WWB from the source directory: +``` + pip install . +``` +To install WWB with nightly builds of openvino, openvino-tokenizers, and openvino-genai, use the following command: +``` +PIP_PRE=1 \ +PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly \ +pip install . +``` ## Usage ### Compare Text-generation Models (LLMs) diff --git a/tools/who_what_benchmark/requirements.txt b/tools/who_what_benchmark/requirements.txt index bea6453c6b..9d151abbf3 100644 --- a/tools/who_what_benchmark/requirements.txt +++ b/tools/who_what_benchmark/requirements.txt @@ -1,9 +1,10 @@ transformers>=4.35.2 sentence-transformers>=2.2.2 -openvino>=2024.3.0 +openvino +openvino-tokenizers +openvino-genai openvino-telemetry optimum-intel>=1.19.0 -openvino-tokenizers pandas>=2.0.3 numpy>=1.23.5 tqdm>=4.66.1