diff --git a/integrations/pgvector/README.md b/integrations/pgvector/README.md index a2d325c54..4859762f9 100644 --- a/integrations/pgvector/README.md +++ b/integrations/pgvector/README.md @@ -22,7 +22,7 @@ pip install pgvector-haystack Ensure that you have a PostgreSQL running with the `pgvector` extension. For a quick setup using Docker, run: ``` -docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector +docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres pgvector/pgvector:pg17 ``` then run the tests: diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index 87655a5ec..648ae88af 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -389,7 +389,9 @@ def _handle_hnsw(self): ) return - sql_drop_index = SQL("DROP INDEX IF EXISTS {index_name}").format(index_name=Identifier(self.hnsw_index_name)) + sql_drop_index = SQL("DROP INDEX IF EXISTS {schema_name}.{index_name}").format( + schema_name=Identifier(self.schema_name), index_name=Identifier(self.hnsw_index_name) + ) self._execute_sql(sql_drop_index, error_msg="Could not drop HNSW index") self._create_hnsw_index() diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index baa921137..a331a990e 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -5,6 +5,7 @@ from unittest.mock import patch import numpy as np +import psycopg import pytest from haystack.dataclasses.document import ByteStream, Document from haystack.document_stores.errors import DuplicateDocumentError @@ -259,3 +260,47 @@ def test_from_pg_to_haystack_documents(): assert haystack_docs[2].meta == {"meta_key": "meta_value"} assert haystack_docs[2].embedding == [0.7, 0.8, 0.9] assert haystack_docs[2].score is None + + +@pytest.mark.integration +def test_hnsw_index_recreation(): + def get_index_oid(document_store, schema_name, index_name): + sql_get_index_oid = """ + SELECT c.oid + FROM pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE c.relkind = 'i' + AND n.nspname = %s + AND c.relname = %s; + """ + return document_store.cursor.execute(sql_get_index_oid, (schema_name, index_name)).fetchone()[0] + + # create a new schema + connection_string = "postgresql://postgres:postgres@localhost:5432/postgres" + schema_name = "test_schema" + with psycopg.connect(connection_string, autocommit=True) as conn: + conn.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name}") + + # create a first document store and trigger the creation of the hnsw index + params = { + "connection_string": Secret.from_token(connection_string), + "schema_name": schema_name, + "table_name": "haystack_test_hnsw_index_recreation", + "search_strategy": "hnsw", + } + ds1 = PgvectorDocumentStore(**params) + ds1._initialize_table() + + # get the hnsw index oid + hnws_index_name = "haystack_hnsw_index" + first_oid = get_index_oid(ds1, ds1.schema_name, hnws_index_name) + + # create second document store with recreation enabled + ds2 = PgvectorDocumentStore(**params, hnsw_recreate_index_if_exists=True) + ds2._initialize_table() + + # get the index oid + second_oid = get_index_oid(ds2, ds2.schema_name, hnws_index_name) + + # verify that oids differ + assert second_oid != first_oid, "Index was not recreated (OID remained the same)"