diff --git a/modules/custom_operations/pyproject.toml b/modules/custom_operations/pyproject.toml
index d8e6bb5e1..3fe3a6b46 100644
--- a/modules/custom_operations/pyproject.toml
+++ b/modules/custom_operations/pyproject.toml
@@ -27,7 +27,7 @@ dev = [
"pytest_harvest"
]
transformers = [
- "transformers[sentencepiece,tiktoken]"
+ "transformers[sentencepiece]"
]
tiktoken = [
"tiktoken"
diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/README.md b/modules/custom_operations/user_ie_extensions/tokenizer/python/README.md
index a8adb5cc3..d39b9f9bc 100644
--- a/modules/custom_operations/user_ie_extensions/tokenizer/python/README.md
+++ b/modules/custom_operations/user_ie_extensions/tokenizer/python/README.md
@@ -199,22 +199,22 @@ This report is autogenerated and includes tokenizers and detokenizers tests. To
BPE |
- 92.941176 |
+ 92.94 |
850 |
SentencePiece |
- 62.000000 |
+ 62.00 |
800 |
Tiktoken |
- 95.000000 |
+ 95.00 |
100 |
WordPiece |
- 99.204244 |
+ 99.20 |
377 |
@@ -235,289 +235,289 @@ This report is autogenerated and includes tokenizers and detokenizers tests. To
BPE |
EleutherAI/gpt-j-6b |
- 96.000000 |
+ 96.00 |
50 |
BPE |
EleutherAI/gpt-neo-125m |
- 96.000000 |
+ 96.00 |
50 |
BPE |
EleutherAI/gpt-neox-20b |
- 92.000000 |
+ 92.00 |
50 |
BPE |
EleutherAI/pythia-12b-deduped |
- 92.000000 |
+ 92.00 |
50 |
BPE |
KoboldAI/fairseq-dense-13B |
- 96.000000 |
+ 96.00 |
50 |
BPE |
Salesforce/codegen-16B-multi |
- 92.000000 |
+ 92.00 |
50 |
BPE |
ai-forever/rugpt3large_based_on_gpt2 |
- 94.000000 |
+ 94.00 |
50 |
BPE |
bigscience/bloom |
- 98.000000 |
+ 98.00 |
50 |
BPE |
facebook/bart-large-mnli |
- 96.000000 |
+ 96.00 |
50 |
BPE |
facebook/galactica-120b |
- 94.000000 |
+ 94.00 |
50 |
BPE |
facebook/opt-66b |
- 96.000000 |
+ 96.00 |
50 |
BPE |
gpt2 |
- 96.000000 |
+ 96.00 |
50 |
BPE |
laion/CLIP-ViT-bigG-14-laion2B-39B-b160k |
- 60.000000 |
+ 60.00 |
50 |
BPE |
microsoft/deberta-base |
- 96.000000 |
+ 96.00 |
50 |
BPE |
roberta-base |
- 96.000000 |
+ 96.00 |
50 |
BPE |
sentence-transformers/all-roberta-large-v1 |
- 96.000000 |
+ 96.00 |
50 |
BPE |
stabilityai/stablecode-completion-alpha-3b-4k |
- 94.000000 |
+ 94.00 |
50 |
SentencePiece |
NousResearch/Llama-2-13b-hf |
- 100.000000 |
+ 100.00 |
50 |
SentencePiece |
NousResearch/Llama-2-13b-hf_slow |
- 100.000000 |
+ 100.00 |
50 |
SentencePiece |
THUDM/chatglm2-6b |
- 50.000000 |
+ 50.00 |
50 |
SentencePiece |
THUDM/chatglm2-6b_slow |
- 50.000000 |
+ 50.00 |
50 |
SentencePiece |
THUDM/chatglm3-6b |
- 100.000000 |
+ 100.00 |
50 |
SentencePiece |
THUDM/chatglm3-6b_slow |
- 100.000000 |
+ 100.00 |
50 |
SentencePiece |
camembert-base |
- 26.000000 |
+ 26.00 |
50 |
SentencePiece |
camembert-base_slow |
- 26.000000 |
+ 26.00 |
50 |
SentencePiece |
codellama/CodeLlama-7b-hf |
- 100.000000 |
+ 100.00 |
50 |
SentencePiece |
codellama/CodeLlama-7b-hf_slow |
- 100.000000 |
+ 100.00 |
50 |
SentencePiece |
microsoft/deberta-v3-base |
- 94.000000 |
+ 94.00 |
50 |
SentencePiece |
microsoft/deberta-v3-base_slow |
- 98.000000 |
+ 98.00 |
50 |
SentencePiece |
xlm-roberta-base |
- 0.000000 |
+ 0.00 |
50 |
SentencePiece |
xlm-roberta-base_slow |
- 0.000000 |
+ 0.00 |
50 |
SentencePiece |
xlnet-base-cased |
- 24.000000 |
+ 24.00 |
50 |
SentencePiece |
xlnet-base-cased_slow |
- 24.000000 |
+ 24.00 |
50 |
Tiktoken |
Qwen/Qwen-14B-Chat |
- 96.000000 |
+ 96.00 |
50 |
Tiktoken |
Salesforce/xgen-7b-8k-base |
- 94.000000 |
+ 94.00 |
50 |
WordPiece |
ProsusAI/finbert |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
bert-base-multilingual-cased |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
bert-large-cased |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
cointegrated/rubert-tiny2 |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
distilbert-base-uncased-finetuned-sst-2-english |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
google/electra-base-discriminator |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
google/mobilebert-uncased |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
jhgan/ko-sbert-sts |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
prajjwal1/bert-mini |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
rajiv003/ernie-finetuned-qqp |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
rasa/LaBSE |
- 89.655172 |
+ 89.66 |
29 |
WordPiece |
sentence-transformers/all-MiniLM-L6-v2 |
- 100.000000 |
+ 100.00 |
29 |
WordPiece |
squeezebert/squeezebert-uncased |
- 100.000000 |
+ 100.00 |
29 |
diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py b/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py
index a8b2fb0c1..01d0ef825 100644
--- a/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py
+++ b/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py
@@ -10,6 +10,7 @@
def pytest_addoption(parser):
parser.addoption("--update_readme", help="Update test coverage report in README.md")
+
PASS_RATES_FILE = Path(__file__).parent / "pass_rates.json"
@@ -58,9 +59,9 @@ def add_tokenizer_type(row):
"To update it run pytest with `--update_readme` flag.\n\n"
"### Coverage by Tokenizer Type\n\n"
)
- grouped_by_type.style.hide_index().to_html(new_readme, exclude_styles=True)
+ grouped_by_type.style.format(precision=2).hide_index().to_html(new_readme, exclude_styles=True)
new_readme.write("\n### Coverage by Model Type\n\n")
- grouped_by_model.style.hide_index().to_html(new_readme, exclude_styles=True)
+ grouped_by_model.style.format(precision=2).hide_index().to_html(new_readme, exclude_styles=True)
with open(readme_path, "w") as f:
f.write(new_readme.getvalue())