From b486189b0612074c553ccad5d2444b3115edeaac Mon Sep 17 00:00:00 2001
From: Michael <michael.guenther@jina.ai>
Date: Mon, 23 Sep 2024 16:56:02 +0200
Subject: [PATCH] refactor: add second model to semantic chunking test

---
 tests/test_chunking_methods.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/test_chunking_methods.py b/tests/test_chunking_methods.py
index ff21fc5..02c3e17 100644
--- a/tests/test_chunking_methods.py
+++ b/tests/test_chunking_methods.py
@@ -98,9 +98,13 @@ def test_chunk_by_tokens():
         assert end - start <= 10
 
 
-def test_chunk_semantically():
+@pytest.mark.parametrize(
+    'model_name',
+    ['jinaai/jina-embeddings-v2-small-en', 'sentence-transformers/all-MiniLM-L6-v2'],
+)
+def test_chunk_semantically(model_name):
     chunker = Chunker(chunking_strategy="semantic")
-    tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-embeddings-v2-small-en')
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
     tokens = tokenizer.encode_plus(
         EXAMPLE_TEXT_1, add_special_tokens=False, return_offsets_mapping=True
     )
@@ -108,7 +112,7 @@ def test_chunk_semantically():
         EXAMPLE_TEXT_1,
         tokenizer=tokenizer,
         chunking_strategy='semantic',
-        embedding_model_name='jinaai/jina-embeddings-v2-small-en',
+        embedding_model_name=model_name,
     )
 
     # check if it returns boundary cues