diff --git a/modules/custom_operations/pyproject.toml b/modules/custom_operations/pyproject.toml index d8e6bb5e1..3fe3a6b46 100644 --- a/modules/custom_operations/pyproject.toml +++ b/modules/custom_operations/pyproject.toml @@ -27,7 +27,7 @@ dev = [ "pytest_harvest" ] transformers = [ - "transformers[sentencepiece,tiktoken]" + "transformers[sentencepiece]" ] tiktoken = [ "tiktoken" diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/README.md b/modules/custom_operations/user_ie_extensions/tokenizer/python/README.md index a8adb5cc3..d39b9f9bc 100644 --- a/modules/custom_operations/user_ie_extensions/tokenizer/python/README.md +++ b/modules/custom_operations/user_ie_extensions/tokenizer/python/README.md @@ -199,22 +199,22 @@ This report is autogenerated and includes tokenizers and detokenizers tests. To BPE - 92.941176 + 92.94 850 SentencePiece - 62.000000 + 62.00 800 Tiktoken - 95.000000 + 95.00 100 WordPiece - 99.204244 + 99.20 377 @@ -235,289 +235,289 @@ This report is autogenerated and includes tokenizers and detokenizers tests. To BPE EleutherAI/gpt-j-6b - 96.000000 + 96.00 50 BPE EleutherAI/gpt-neo-125m - 96.000000 + 96.00 50 BPE EleutherAI/gpt-neox-20b - 92.000000 + 92.00 50 BPE EleutherAI/pythia-12b-deduped - 92.000000 + 92.00 50 BPE KoboldAI/fairseq-dense-13B - 96.000000 + 96.00 50 BPE Salesforce/codegen-16B-multi - 92.000000 + 92.00 50 BPE ai-forever/rugpt3large_based_on_gpt2 - 94.000000 + 94.00 50 BPE bigscience/bloom - 98.000000 + 98.00 50 BPE facebook/bart-large-mnli - 96.000000 + 96.00 50 BPE facebook/galactica-120b - 94.000000 + 94.00 50 BPE facebook/opt-66b - 96.000000 + 96.00 50 BPE gpt2 - 96.000000 + 96.00 50 BPE laion/CLIP-ViT-bigG-14-laion2B-39B-b160k - 60.000000 + 60.00 50 BPE microsoft/deberta-base - 96.000000 + 96.00 50 BPE roberta-base - 96.000000 + 96.00 50 BPE sentence-transformers/all-roberta-large-v1 - 96.000000 + 96.00 50 BPE stabilityai/stablecode-completion-alpha-3b-4k - 94.000000 + 94.00 50 SentencePiece NousResearch/Llama-2-13b-hf - 100.000000 + 100.00 50 SentencePiece NousResearch/Llama-2-13b-hf_slow - 100.000000 + 100.00 50 SentencePiece THUDM/chatglm2-6b - 50.000000 + 50.00 50 SentencePiece THUDM/chatglm2-6b_slow - 50.000000 + 50.00 50 SentencePiece THUDM/chatglm3-6b - 100.000000 + 100.00 50 SentencePiece THUDM/chatglm3-6b_slow - 100.000000 + 100.00 50 SentencePiece camembert-base - 26.000000 + 26.00 50 SentencePiece camembert-base_slow - 26.000000 + 26.00 50 SentencePiece codellama/CodeLlama-7b-hf - 100.000000 + 100.00 50 SentencePiece codellama/CodeLlama-7b-hf_slow - 100.000000 + 100.00 50 SentencePiece microsoft/deberta-v3-base - 94.000000 + 94.00 50 SentencePiece microsoft/deberta-v3-base_slow - 98.000000 + 98.00 50 SentencePiece xlm-roberta-base - 0.000000 + 0.00 50 SentencePiece xlm-roberta-base_slow - 0.000000 + 0.00 50 SentencePiece xlnet-base-cased - 24.000000 + 24.00 50 SentencePiece xlnet-base-cased_slow - 24.000000 + 24.00 50 Tiktoken Qwen/Qwen-14B-Chat - 96.000000 + 96.00 50 Tiktoken Salesforce/xgen-7b-8k-base - 94.000000 + 94.00 50 WordPiece ProsusAI/finbert - 100.000000 + 100.00 29 WordPiece bert-base-multilingual-cased - 100.000000 + 100.00 29 WordPiece bert-large-cased - 100.000000 + 100.00 29 WordPiece cointegrated/rubert-tiny2 - 100.000000 + 100.00 29 WordPiece distilbert-base-uncased-finetuned-sst-2-english - 100.000000 + 100.00 29 WordPiece google/electra-base-discriminator - 100.000000 + 100.00 29 WordPiece google/mobilebert-uncased - 100.000000 + 100.00 29 WordPiece jhgan/ko-sbert-sts - 100.000000 + 100.00 29 WordPiece prajjwal1/bert-mini - 100.000000 + 100.00 29 WordPiece rajiv003/ernie-finetuned-qqp - 100.000000 + 100.00 29 WordPiece rasa/LaBSE - 89.655172 + 89.66 29 WordPiece sentence-transformers/all-MiniLM-L6-v2 - 100.000000 + 100.00 29 WordPiece squeezebert/squeezebert-uncased - 100.000000 + 100.00 29 diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py b/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py index a8b2fb0c1..01d0ef825 100644 --- a/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py +++ b/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py @@ -10,6 +10,7 @@ def pytest_addoption(parser): parser.addoption("--update_readme", help="Update test coverage report in README.md") + PASS_RATES_FILE = Path(__file__).parent / "pass_rates.json" @@ -58,9 +59,9 @@ def add_tokenizer_type(row): "To update it run pytest with `--update_readme` flag.\n\n" "### Coverage by Tokenizer Type\n\n" ) - grouped_by_type.style.hide_index().to_html(new_readme, exclude_styles=True) + grouped_by_type.style.format(precision=2).hide_index().to_html(new_readme, exclude_styles=True) new_readme.write("\n### Coverage by Model Type\n\n") - grouped_by_model.style.hide_index().to_html(new_readme, exclude_styles=True) + grouped_by_model.style.format(precision=2).hide_index().to_html(new_readme, exclude_styles=True) with open(readme_path, "w") as f: f.write(new_readme.getvalue())