diff --git a/.github/workflows/test_milvus.yml b/.github/workflows/test_milvus.yml new file mode 100644 index 000000000..2cfd88993 --- /dev/null +++ b/.github/workflows/test_milvus.yml @@ -0,0 +1,64 @@ +name: test | milvus + +on: + workflow_dispatch: + pull_request: + branches: + - main + types: [labeled, synchronize] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + RUNTIME__LOG_LEVEL: ERROR + ENV: 'dev' + +jobs: + get_docs_changes: + name: docs changes + uses: ./.github/workflows/get_docs_changes.yml + + run_milvus: + name: test + needs: get_docs_changes + if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + defaults: + run: + shell: bash + + steps: + - name: Check out + uses: actions/checkout@master + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11.x' + + - name: Install Poetry + # https://github.com/snok/install-poetry#running-on-windows + uses: snok/install-poetry@v1.3.2 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Install dependencies + run: poetry install -E milvus --no-interaction + + - name: Run default basic pipeline + env: + ENV: 'dev' + LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: poetry run python ./cognee/tests/test_milvus.py + + - name: Clean up disk space + run: | + sudo rm -rf ~/.cache + sudo rm -rf /tmp/* + df -h diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py index bfc0bbd18..d3774542a 100644 --- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py +++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import asyncio import logging from typing import List, Optional @@ -6,7 +8,6 @@ from ..vector_db_interface import VectorDBInterface from ..models.ScoredResult import ScoredResult from ..embeddings.EmbeddingEngine import EmbeddingEngine -from pymilvus import MilvusClient logger = logging.getLogger("MilvusAdapter") @@ -31,8 +32,9 @@ def __init__(self, url: str, api_key: Optional[str], embedding_engine: Embedding self.embedding_engine = embedding_engine - def get_milvus_client(self) -> MilvusClient: - if self.api_key is not None: + def get_milvus_client(self) -> "MilvusClient": + from pymilvus import MilvusClient + if self.api_key: client = MilvusClient(uri=self.url, token=self.api_key) else: client = MilvusClient(uri=self.url) diff --git a/cognee/tests/test_milvus.py b/cognee/tests/test_milvus.py index d565f6446..b32d3590b 100644 --- a/cognee/tests/test_milvus.py +++ b/cognee/tests/test_milvus.py @@ -16,6 +16,14 @@ async def main(): pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_milvus")).resolve()) cognee.config.system_root_directory(cognee_directory_path) + cognee.config.set_vector_db_config( + { + "vector_db_url": os.path.join(cognee_directory_path, "databases/milvus.db"), + "vector_db_key": "", + "vector_db_provider": "milvus" + } + ) + await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) diff --git a/poetry.lock b/poetry.lock index 4b8262648..3611dacea 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "aiofiles" @@ -2041,7 +2041,7 @@ typing-extensions = ">=4.7,<5" name = "grpcio" version = "1.67.1" description = "HTTP/2-based RPC framework" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, @@ -2751,8 +2751,6 @@ optional = false python-versions = "*" files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, - {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, - {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -3608,7 +3606,7 @@ files = [ name = "milvus-lite" version = "2.4.10" description = "A lightweight version of Milvus wrapped with Python." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "milvus_lite-2.4.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fc4246d3ed7d1910847afce0c9ba18212e93a6e9b8406048436940578dfad5cb"}, @@ -4956,7 +4954,7 @@ files = [ name = "protobuf" version = "5.28.3" description = "" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24"}, @@ -5382,7 +5380,7 @@ extra = ["pygments (>=2.12)"] name = "pymilvus" version = "2.5.0" description = "Python Sdk for Milvus" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pymilvus-2.5.0-py3-none-any.whl", hash = "sha256:a0e8653d8fe78019abfda79b3404ef7423f312501e8cbd7dc728051ce8732652"}, @@ -7122,7 +7120,7 @@ files = [ name = "ujson" version = "5.10.0" description = "Ultra fast JSON encoder and decoder for Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"}, @@ -7765,6 +7763,7 @@ groq = ["groq"] langchain = ["langchain_text_splitters", "langsmith"] langfuse = ["langfuse"] llama-index = ["llama-index-core"] +milvus = ["pymilvus"] neo4j = ["neo4j"] notebook = [] postgres = ["asyncpg", "pgvector", "psycopg2"] @@ -7775,4 +7774,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "6d578f99d990d462114faecd28a81aa50417bc541d64a67b53063f6c107eb3d3" +content-hash = "d6b10b74a910202f224ff34fa06ad3d2767796a6492a96724de0d608ac0356c5"