Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: inherit from FilterDocumentsTestWithDataframe in Document Stores #1290

Merged
merged 5 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions integrations/astra/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from haystack import Document
from haystack.document_stores.errors import MissingDocumentError
from haystack.document_stores.types import DuplicatePolicy
from haystack.testing.document_store import DocumentStoreBaseTests
from haystack.testing.document_store import DocumentStoreBaseTests, FilterDocumentsTestWithDataframe

from haystack_integrations.document_stores.astra import AstraDocumentStore

Expand Down Expand Up @@ -47,7 +47,7 @@ def test_to_dict(mock_auth): # noqa
os.environ.get("ASTRA_DB_APPLICATION_TOKEN", "") == "", reason="ASTRA_DB_APPLICATION_TOKEN env var not set"
)
@pytest.mark.skipif(os.environ.get("ASTRA_DB_API_ENDPOINT", "") == "", reason="ASTRA_DB_API_ENDPOINT env var not set")
class TestDocumentStore(DocumentStoreBaseTests):
class TestDocumentStore(DocumentStoreBaseTests, FilterDocumentsTestWithDataframe):
"""
Common test cases will be provided by `DocumentStoreBaseTests` but
you can add more to this class.
Expand Down
1 change: 0 additions & 1 deletion integrations/azure_ai_search/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from haystack_integrations.document_stores.azure_ai_search import AzureAISearchDocumentStore


# This is the approximate time in seconds it takes for the documents to be available in Azure Search index
SLEEP_TIME_IN_SECONDS = 10
MAX_WAIT_TIME_FOR_INDEX_DELETION = 10
Expand Down
3 changes: 2 additions & 1 deletion integrations/azure_ai_search/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
CountDocumentsTest,
DeleteDocumentsTest,
FilterDocumentsTest,
FilterDocumentsTestWithDataframe,
WriteDocumentsTest,
)
from haystack.utils.auth import EnvVarSecret, Secret
Expand Down Expand Up @@ -155,7 +156,7 @@ def _random_embeddings(n):
],
indirect=True,
)
class TestFilters(FilterDocumentsTest):
class TestFilters(FilterDocumentsTest, FilterDocumentsTestWithDataframe):

# Overriding to change "date" to compatible ISO 8601 format
# and remove incompatible fields (dataframes) for Azure search index
Expand Down
12 changes: 0 additions & 12 deletions integrations/chroma/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,18 +406,6 @@ def test_nested_logical_filters(self, document_store: ChromaDocumentStore, filte
],
)

@pytest.mark.skip(reason="Filter on dataframe contents is not supported.")
def test_comparison_equal_with_dataframe(
self, document_store: ChromaDocumentStore, filterable_docs: List[Document]
):
pass

@pytest.mark.skip(reason="Filter on dataframe contents is not supported.")
def test_comparison_not_equal_with_dataframe(
self, document_store: ChromaDocumentStore, filterable_docs: List[Document]
):
pass

Comment on lines -409 to -420
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Chroma does not support dataframes, so in this case I'm not using the new testing class.
It is no longer necessary to skip these tests: they are no longer part of the basic tests.

@pytest.mark.skip(reason="Chroma does not support comparison with null values")
def test_comparison_equal_with_none(self, document_store, filterable_docs):
pass
Expand Down
4 changes: 2 additions & 2 deletions integrations/elasticsearch/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from haystack.dataclasses.document import Document
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
from haystack.document_stores.types import DuplicatePolicy
from haystack.testing.document_store import DocumentStoreBaseTests
from haystack.testing.document_store import DocumentStoreBaseTests, FilterDocumentsTestWithDataframe

from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore

Expand Down Expand Up @@ -70,7 +70,7 @@ def test_from_dict(_mock_elasticsearch_client):


@pytest.mark.integration
class TestDocumentStore(DocumentStoreBaseTests):
class TestDocumentStore(DocumentStoreBaseTests, FilterDocumentsTestWithDataframe):
"""
Common test cases will be provided by `DocumentStoreBaseTests` but
you can add more to this class.
Expand Down
4 changes: 2 additions & 2 deletions integrations/mongodb_atlas/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from haystack.dataclasses.document import ByteStream, Document
from haystack.document_stores.errors import DuplicateDocumentError
from haystack.document_stores.types import DuplicatePolicy
from haystack.testing.document_store import DocumentStoreBaseTests
from haystack.testing.document_store import DocumentStoreBaseTests, FilterDocumentsTestWithDataframe
from haystack.utils import Secret
from pandas import DataFrame
from pymongo import MongoClient
Expand All @@ -35,7 +35,7 @@ def test_init_is_lazy(_mock_client):
reason="No MongoDB Atlas connection string provided",
)
@pytest.mark.integration
class TestDocumentStore(DocumentStoreBaseTests):
class TestDocumentStore(DocumentStoreBaseTests, FilterDocumentsTestWithDataframe):
@pytest.fixture
def document_store(self):
database_name = "haystack_integration_test"
Expand Down
4 changes: 2 additions & 2 deletions integrations/opensearch/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from haystack.dataclasses.document import Document
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
from haystack.document_stores.types import DuplicatePolicy
from haystack.testing.document_store import DocumentStoreBaseTests
from haystack.testing.document_store import DocumentStoreBaseTests, FilterDocumentsTestWithDataframe
from haystack.utils.auth import Secret
from opensearchpy.exceptions import RequestError

Expand Down Expand Up @@ -265,7 +265,7 @@ def test_to_dict_aws_auth(self, _mock_opensearch_client, monkeypatch: pytest.Mon


@pytest.mark.integration
class TestDocumentStore(DocumentStoreBaseTests):
class TestDocumentStore(DocumentStoreBaseTests, FilterDocumentsTestWithDataframe):
"""
Common test cases will be provided by `DocumentStoreBaseTests` but
you can add more to this class.
Expand Down
4 changes: 2 additions & 2 deletions integrations/pgvector/tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest
from haystack.dataclasses.document import Document
from haystack.testing.document_store import FilterDocumentsTest
from haystack.testing.document_store import FilterDocumentsTest, FilterDocumentsTestWithDataframe
from pandas import DataFrame
from psycopg.sql import SQL
from psycopg.types.json import Jsonb
Expand All @@ -17,7 +17,7 @@


@pytest.mark.integration
class TestFilters(FilterDocumentsTest):
class TestFilters(FilterDocumentsTest, FilterDocumentsTestWithDataframe):
def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
"""
This overrides the default assert_documents_are_equal from FilterDocumentsTest.
Expand Down
3 changes: 2 additions & 1 deletion integrations/pinecone/tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
from haystack.dataclasses.document import Document
from haystack.testing.document_store import (
FilterDocumentsTest,
FilterDocumentsTestWithDataframe,
)


@pytest.mark.integration
@pytest.mark.skipif("PINECONE_API_KEY" not in os.environ, reason="PINECONE_API_KEY not set")
class TestFilters(FilterDocumentsTest):
class TestFilters(FilterDocumentsTest, FilterDocumentsTestWithDataframe):
def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
for doc in received:
# Pinecone seems to convert integers to floats (undocumented behavior)
Expand Down
4 changes: 2 additions & 2 deletions integrations/qdrant/tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

import pytest
from haystack import Document
from haystack.testing.document_store import FilterDocumentsTest
from haystack.testing.document_store import FilterDocumentsTest, FilterDocumentsTestWithDataframe
from haystack.utils.filters import FilterError
from qdrant_client.http import models

from haystack_integrations.document_stores.qdrant import QdrantDocumentStore


class TestQdrantStoreBaseTests(FilterDocumentsTest):
class TestQdrantStoreBaseTests(FilterDocumentsTest, FilterDocumentsTestWithDataframe):
@pytest.fixture
def document_store(self) -> QdrantDocumentStore:
return QdrantDocumentStore(
Expand Down
82 changes: 23 additions & 59 deletions integrations/weaviate/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import base64
import os
import random
from typing import List
from unittest.mock import MagicMock, patch

Expand All @@ -14,18 +13,17 @@
from haystack.dataclasses.document import Document
from haystack.document_stores.errors import DocumentStoreError
from haystack.testing.document_store import (
TEST_EMBEDDING_1,
TEST_EMBEDDING_2,
CountDocumentsTest,
DeleteDocumentsTest,
FilterDocumentsTest,
FilterDocumentsTestWithDataframe,
WriteDocumentsTest,
create_filterable_docs,
)
from haystack.utils.auth import Secret
from numpy import array as np_array
from numpy import array_equal as np_array_equal
from numpy import float32 as np_float32
from pandas import DataFrame
from weaviate.collections.classes.data import DataObject
from weaviate.config import AdditionalConfig, ConnectionConfig, Proxies, Timeout
from weaviate.embedded import (
Expand All @@ -50,7 +48,9 @@ def test_init_is_lazy(_mock_client):


@pytest.mark.integration
class TestWeaviateDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest):
class TestWeaviateDocumentStore(
CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest, FilterDocumentsTestWithDataframe
):
@pytest.fixture
def document_store(self, request) -> WeaviateDocumentStore:
# Use a different index for each test so we can run them in parallel
Expand Down Expand Up @@ -78,60 +78,24 @@ def filterable_docs(self) -> List[Document]:
Weaviate forces RFC 3339 date strings.
The original fixture uses ISO 8601 date strings.
"""
documents = []
for i in range(3):
documents.append(
Document(
content=f"A Foo Document {i}",
meta={
"name": f"name_{i}",
"page": "100",
"chapter": "intro",
"number": 2,
"date": "1969-07-21T20:17:40Z",
},
embedding=[random.random() for _ in range(768)], # noqa: S311
)
)
documents.append(
Document(
content=f"A Bar Document {i}",
meta={
"name": f"name_{i}",
"page": "123",
"chapter": "abstract",
"number": -2,
"date": "1972-12-11T19:54:58Z",
},
embedding=[random.random() for _ in range(768)], # noqa: S311
)
)
documents.append(
Document(
content=f"A Foobar Document {i}",
meta={
"name": f"name_{i}",
"page": "90",
"chapter": "conclusion",
"number": -10,
"date": "1989-11-09T17:53:00Z",
},
embedding=[random.random() for _ in range(768)], # noqa: S311
)
)
documents.append(
Document(
content=f"Document {i} without embedding",
meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"},
)
)
documents.append(Document(dataframe=DataFrame([i]), meta={"name": f"table_doc_{i}"}))
documents.append(
Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1)
)
documents.append(
Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2)
)
documents = create_filterable_docs(include_dataframe_docs=False)
for i in range(len(documents)):
if date := documents[i].meta.get("date"):
documents[i].meta["date"] = f"{date}Z"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's okay to update the meta data of a Document here directly. Just keep in mind that the Document's ID is not re-created. So the ID of this document here and a newly created Document that is initialized with the same attribute values will have different IDs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right!

return documents

@pytest.fixture
def filterable_docs_with_dataframe(self) -> List[Document]:
"""
This fixture has been copied from haystack/testing/document_store.py and modified to
use a different date format.
Weaviate forces RFC 3339 date strings.
The original fixture uses ISO 8601 date strings.
"""
documents = create_filterable_docs(include_dataframe_docs=True)
for i in range(len(documents)):
if date := documents[i].meta.get("date"):
documents[i].meta["date"] = f"{date}Z"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for Weaviate, I simplified the existing code and added a new filterable_docs_with_dataframe fixture.

return documents

def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
Expand Down
Loading