Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Milvus vector db #244

Merged
merged 6 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ GRAPH_DATABASE_URL=
GRAPH_DATABASE_USERNAME=
GRAPH_DATABASE_PASSWORD=

# "qdrant", "pgvector", "weaviate" or "lancedb"
# "qdrant", "pgvector", "weaviate", "milvus" or "lancedb"
VECTOR_DB_PROVIDER="lancedb"
# Not needed if using "lancedb" or "pgvector"
VECTOR_DB_URL=
Expand Down
64 changes: 64 additions & 0 deletions .github/workflows/test_milvus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: test | milvus

on:
workflow_dispatch:
pull_request:
branches:
- main
types: [labeled, synchronize]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

env:
RUNTIME__LOG_LEVEL: ERROR
ENV: 'dev'

jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml

run_milvus:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
dexters1 marked this conversation as resolved.
Show resolved Hide resolved
runs-on: ubuntu-latest
strategy:
fail-fast: false
defaults:
run:
shell: bash

steps:
- name: Check out
uses: actions/checkout@master

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11.x'

- name: Install Poetry
# https://github.com/snok/install-poetry#running-on-windows
uses: snok/[email protected]
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true

- name: Install dependencies
run: poetry install -E milvus --no-interaction

- name: Run default basic pipeline
env:
ENV: 'dev'
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: poetry run python ./cognee/tests/test_milvus.py

- name: Clean up disk space
run: |
sudo rm -rf ~/.cache
sudo rm -rf /tmp/*
df -h
31 changes: 22 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ pip install 'cognee[qdrant]'
pip install 'cognee[neo4j]'
```

### With pip with Milvus support

```bash
pip install 'cognee[milvus]'
```

### With poetry

```bash
Expand Down Expand Up @@ -83,6 +89,12 @@ poetry add cognee -E qdrant
poetry add cognee -E neo4j
```

### With poetry with Milvus support

```bash
poetry add cognee -E milvus
```


## 💻 Basic Usage

Expand Down Expand Up @@ -356,12 +368,13 @@ pip install cognee
}
</style>

| Name | Type | Current state | Known Issues |
|------------------|--------------------|-------------------|---------------------------------------|
| Qdrant | Vector | Stable &#x2705; | |
| Weaviate | Vector | Stable &#x2705; | |
| LanceDB | Vector | Stable &#x2705; | |
| Neo4j | Graph | Stable &#x2705; | |
| NetworkX | Graph | Stable &#x2705; | |
| FalkorDB | Vector/Graph | Unstable &#x274C; | |
| PGVector | Vector | Unstable &#x274C; | Postgres DB returns the Timeout error |
| Name | Type | Current state | Known Issues |
|----------|--------------------|-------------------|--------------|
| Qdrant | Vector | Stable &#x2705; | |
| Weaviate | Vector | Stable &#x2705; | |
| LanceDB | Vector | Stable &#x2705; | |
| Neo4j | Graph | Stable &#x2705; | |
| NetworkX | Graph | Stable &#x2705; | |
| FalkorDB | Vector/Graph | Unstable &#x274C; | |
| PGVector | Vector | Stable &#x2705; | |
| Milvus | Vector | Stable &#x2705; | |
43 changes: 29 additions & 14 deletions cognee/infrastructure/databases/vector/create_vector_engine.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from typing import Dict


class VectorConfig(Dict):
vector_db_url: str
vector_db_port: str
vector_db_key: str
vector_db_provider: str


def create_vector_engine(config: VectorConfig, embedding_engine):
if config["vector_db_provider"] == "weaviate":
from .weaviate_db import WeaviateAdapter
Expand All @@ -16,24 +18,37 @@ def create_vector_engine(config: VectorConfig, embedding_engine):
return WeaviateAdapter(
config["vector_db_url"],
config["vector_db_key"],
embedding_engine = embedding_engine
embedding_engine=embedding_engine
)

elif config["vector_db_provider"] == "qdrant":
if not (config["vector_db_url"] and config["vector_db_key"]):
raise EnvironmentError("Missing requred Qdrant credentials!")

from .qdrant.QDrantAdapter import QDrantAdapter

return QDrantAdapter(
url = config["vector_db_url"],
api_key = config["vector_db_key"],
embedding_engine = embedding_engine
url=config["vector_db_url"],
api_key=config["vector_db_key"],
embedding_engine=embedding_engine
)

elif config['vector_db_provider'] == 'milvus':
from .milvus.MilvusAdapter import MilvusAdapter

if not config["vector_db_url"]:
raise EnvironmentError("Missing required Milvus credentials!")

return MilvusAdapter(
url=config["vector_db_url"],
api_key=config['vector_db_key'],
embedding_engine=embedding_engine
)


elif config["vector_db_provider"] == "pgvector":
from cognee.infrastructure.databases.relational import get_relational_config

# Get configuration for postgres database
relational_config = get_relational_config()
db_username = relational_config.db_username
Expand All @@ -52,8 +67,8 @@ def create_vector_engine(config: VectorConfig, embedding_engine):
from .pgvector.PGVectorAdapter import PGVectorAdapter

return PGVectorAdapter(
connection_string,
config["vector_db_key"],
connection_string,
config["vector_db_key"],
embedding_engine,
)

Expand All @@ -64,16 +79,16 @@ def create_vector_engine(config: VectorConfig, embedding_engine):
from ..hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter

return FalkorDBAdapter(
database_url = config["vector_db_url"],
database_port = config["vector_db_port"],
embedding_engine = embedding_engine,
database_url=config["vector_db_url"],
database_port=config["vector_db_port"],
embedding_engine=embedding_engine,
)

else:
from .lancedb.LanceDBAdapter import LanceDBAdapter

return LanceDBAdapter(
url = config["vector_db_url"],
api_key = config["vector_db_key"],
embedding_engine = embedding_engine,
url=config["vector_db_url"],
api_key=config["vector_db_key"],
embedding_engine=embedding_engine,
)
Loading
Loading