-
Notifications
You must be signed in to change notification settings - Fork 131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore: inherit from FilterDocumentsTestWithDataframe
in Document Stores
#1290
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,6 @@ | |
|
||
import base64 | ||
import os | ||
import random | ||
from typing import List | ||
from unittest.mock import MagicMock, patch | ||
|
||
|
@@ -14,18 +13,17 @@ | |
from haystack.dataclasses.document import Document | ||
from haystack.document_stores.errors import DocumentStoreError | ||
from haystack.testing.document_store import ( | ||
TEST_EMBEDDING_1, | ||
TEST_EMBEDDING_2, | ||
CountDocumentsTest, | ||
DeleteDocumentsTest, | ||
FilterDocumentsTest, | ||
FilterDocumentsTestWithDataframe, | ||
WriteDocumentsTest, | ||
create_filterable_docs, | ||
) | ||
from haystack.utils.auth import Secret | ||
from numpy import array as np_array | ||
from numpy import array_equal as np_array_equal | ||
from numpy import float32 as np_float32 | ||
from pandas import DataFrame | ||
from weaviate.collections.classes.data import DataObject | ||
from weaviate.config import AdditionalConfig, ConnectionConfig, Proxies, Timeout | ||
from weaviate.embedded import ( | ||
|
@@ -50,7 +48,9 @@ def test_init_is_lazy(_mock_client): | |
|
||
|
||
@pytest.mark.integration | ||
class TestWeaviateDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest): | ||
class TestWeaviateDocumentStore( | ||
CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest, FilterDocumentsTestWithDataframe | ||
): | ||
@pytest.fixture | ||
def document_store(self, request) -> WeaviateDocumentStore: | ||
# Use a different index for each test so we can run them in parallel | ||
|
@@ -78,60 +78,24 @@ def filterable_docs(self) -> List[Document]: | |
Weaviate forces RFC 3339 date strings. | ||
The original fixture uses ISO 8601 date strings. | ||
""" | ||
documents = [] | ||
for i in range(3): | ||
documents.append( | ||
Document( | ||
content=f"A Foo Document {i}", | ||
meta={ | ||
"name": f"name_{i}", | ||
"page": "100", | ||
"chapter": "intro", | ||
"number": 2, | ||
"date": "1969-07-21T20:17:40Z", | ||
}, | ||
embedding=[random.random() for _ in range(768)], # noqa: S311 | ||
) | ||
) | ||
documents.append( | ||
Document( | ||
content=f"A Bar Document {i}", | ||
meta={ | ||
"name": f"name_{i}", | ||
"page": "123", | ||
"chapter": "abstract", | ||
"number": -2, | ||
"date": "1972-12-11T19:54:58Z", | ||
}, | ||
embedding=[random.random() for _ in range(768)], # noqa: S311 | ||
) | ||
) | ||
documents.append( | ||
Document( | ||
content=f"A Foobar Document {i}", | ||
meta={ | ||
"name": f"name_{i}", | ||
"page": "90", | ||
"chapter": "conclusion", | ||
"number": -10, | ||
"date": "1989-11-09T17:53:00Z", | ||
}, | ||
embedding=[random.random() for _ in range(768)], # noqa: S311 | ||
) | ||
) | ||
documents.append( | ||
Document( | ||
content=f"Document {i} without embedding", | ||
meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"}, | ||
) | ||
) | ||
documents.append(Document(dataframe=DataFrame([i]), meta={"name": f"table_doc_{i}"})) | ||
documents.append( | ||
Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1) | ||
) | ||
documents.append( | ||
Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2) | ||
) | ||
documents = create_filterable_docs(include_dataframe_docs=False) | ||
for i in range(len(documents)): | ||
if date := documents[i].meta.get("date"): | ||
documents[i].meta["date"] = f"{date}Z" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's okay to update the meta data of a Document here directly. Just keep in mind that the Document's ID is not re-created. So the ID of this document here and a newly created Document that is initialized with the same attribute values will have different IDs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right! |
||
return documents | ||
|
||
@pytest.fixture | ||
def filterable_docs_with_dataframe(self) -> List[Document]: | ||
""" | ||
This fixture has been copied from haystack/testing/document_store.py and modified to | ||
use a different date format. | ||
Weaviate forces RFC 3339 date strings. | ||
The original fixture uses ISO 8601 date strings. | ||
""" | ||
documents = create_filterable_docs(include_dataframe_docs=True) | ||
for i in range(len(documents)): | ||
if date := documents[i].meta.get("date"): | ||
documents[i].meta["date"] = f"{date}Z" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for Weaviate, I simplified the existing code and added a new |
||
return documents | ||
|
||
def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Chroma does not support dataframes, so in this case I'm not using the new testing class.
It is no longer necessary to skip these tests: they are no longer part of the basic tests.