Skip to content

Commit

Permalink
fix: PgvectorDocumentStore - use appropriate schema name if dropping …
Browse files Browse the repository at this point in the history
…index (#1277)

* fix: Add schema name if dropping index in pgvector store

* fix: Remove check for deletion in src

* new integration test

---------

Co-authored-by: anakin87 <[email protected]>
  • Loading branch information
mabartcz and anakin87 authored Jan 10, 2025
1 parent 20c9437 commit 20011ec
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 2 deletions.
2 changes: 1 addition & 1 deletion integrations/pgvector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pip install pgvector-haystack

Ensure that you have a PostgreSQL running with the `pgvector` extension. For a quick setup using Docker, run:
```
docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector
docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres pgvector/pgvector:pg17
```

then run the tests:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,9 @@ def _handle_hnsw(self):
)
return

sql_drop_index = SQL("DROP INDEX IF EXISTS {index_name}").format(index_name=Identifier(self.hnsw_index_name))
sql_drop_index = SQL("DROP INDEX IF EXISTS {schema_name}.{index_name}").format(
schema_name=Identifier(self.schema_name), index_name=Identifier(self.hnsw_index_name)
)
self._execute_sql(sql_drop_index, error_msg="Could not drop HNSW index")

self._create_hnsw_index()
Expand Down
45 changes: 45 additions & 0 deletions integrations/pgvector/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from unittest.mock import patch

import numpy as np
import psycopg
import pytest
from haystack.dataclasses.document import ByteStream, Document
from haystack.document_stores.errors import DuplicateDocumentError
Expand Down Expand Up @@ -259,3 +260,47 @@ def test_from_pg_to_haystack_documents():
assert haystack_docs[2].meta == {"meta_key": "meta_value"}
assert haystack_docs[2].embedding == [0.7, 0.8, 0.9]
assert haystack_docs[2].score is None


@pytest.mark.integration
def test_hnsw_index_recreation():
def get_index_oid(document_store, schema_name, index_name):
sql_get_index_oid = """
SELECT c.oid
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind = 'i'
AND n.nspname = %s
AND c.relname = %s;
"""
return document_store.cursor.execute(sql_get_index_oid, (schema_name, index_name)).fetchone()[0]

# create a new schema
connection_string = "postgresql://postgres:postgres@localhost:5432/postgres"
schema_name = "test_schema"
with psycopg.connect(connection_string, autocommit=True) as conn:
conn.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")

# create a first document store and trigger the creation of the hnsw index
params = {
"connection_string": Secret.from_token(connection_string),
"schema_name": schema_name,
"table_name": "haystack_test_hnsw_index_recreation",
"search_strategy": "hnsw",
}
ds1 = PgvectorDocumentStore(**params)
ds1._initialize_table()

# get the hnsw index oid
hnws_index_name = "haystack_hnsw_index"
first_oid = get_index_oid(ds1, ds1.schema_name, hnws_index_name)

# create second document store with recreation enabled
ds2 = PgvectorDocumentStore(**params, hnsw_recreate_index_if_exists=True)
ds2._initialize_table()

# get the index oid
second_oid = get_index_oid(ds2, ds2.schema_name, hnws_index_name)

# verify that oids differ
assert second_oid != first_oid, "Index was not recreated (OID remained the same)"

0 comments on commit 20011ec

Please sign in to comment.