From bd644a1434be5bb87f0c60e577c0068bb13b52ea Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Tue, 7 Jan 2025 13:33:05 +0100 Subject: [PATCH] fix: Fixes duplicated edges in cognify by limiting the recursion depth in add datapoints --- cognee/api/v1/cognify/cognify_v2.py | 2 +- cognee/modules/graph/utils/get_graph_from_model.py | 3 ++- cognee/tasks/storage/add_data_points.py | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index c14f0097..4e2db5a7 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -94,7 +94,7 @@ async def run_cognify_pipeline(dataset: Dataset, user: User, graph_model: BaseMo summarization_model = cognee_config.summarization_model, task_config = { "batch_size": 10 } ), - Task(add_data_points, task_config = { "batch_size": 10 }), + Task(add_data_points, only_root = True, task_config = { "batch_size": 10 }), ] pipeline = run_tasks(tasks, data_documents, "cognify_pipeline") diff --git a/cognee/modules/graph/utils/get_graph_from_model.py b/cognee/modules/graph/utils/get_graph_from_model.py index c675792b..1b7c0908 100644 --- a/cognee/modules/graph/utils/get_graph_from_model.py +++ b/cognee/modules/graph/utils/get_graph_from_model.py @@ -7,6 +7,7 @@ async def get_graph_from_model( added_nodes: dict, added_edges: dict, visited_properties: dict = None, + only_root = False, include_root = True, ): if str(data_point.id) in added_nodes: @@ -86,7 +87,7 @@ async def get_graph_from_model( })) added_edges[str(edge_key)] = True - if str(field_value.id) in added_nodes: + if str(field_value.id) in added_nodes or only_root: continue property_nodes, property_edges = await get_graph_from_model( diff --git a/cognee/tasks/storage/add_data_points.py b/cognee/tasks/storage/add_data_points.py index 2ce0fc57..47cae430 100644 --- a/cognee/tasks/storage/add_data_points.py +++ b/cognee/tasks/storage/add_data_points.py @@ -5,7 +5,7 @@ from .index_data_points import index_data_points -async def add_data_points(data_points: list[DataPoint]): +async def add_data_points(data_points: list[DataPoint], only_root = False): nodes = [] edges = [] @@ -19,6 +19,7 @@ async def add_data_points(data_points: list[DataPoint]): added_nodes = added_nodes, added_edges = added_edges, visited_properties = visited_properties, + only_root = only_root, ) for data_point in data_points ])