From 58da2d9e577fa640c612077b9080d44b99f14860 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Tue, 7 Jan 2025 11:01:37 +0100 Subject: [PATCH 1/2] fix: Fixes faulty logging format and sets up error logging in dynamic steps example --- cognee/shared/utils.py | 14 ++++++++++++++ examples/python/dynamic_steps_example.py | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index b75076e55..4f0b1bc3b 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -12,6 +12,8 @@ import matplotlib.pyplot as plt import tiktoken import nltk +import logging +import sys from cognee.base_config import get_base_config from cognee.infrastructure.databases.graph import get_graph_engine @@ -283,6 +285,18 @@ def extract_sentiment_vader(text): return polarity_scores +def setup_logging(log_level=logging.INFO): + """ This method sets up the logging configuration. """ + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s\n") + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setFormatter(formatter) + stream_handler.setLevel(log_level) + + logging.basicConfig( + level=log_level, + handlers=[stream_handler], + ) + if __name__ == "__main__": sample_text = "I love sunny days, but I hate the rain." diff --git a/examples/python/dynamic_steps_example.py b/examples/python/dynamic_steps_example.py index 6af31750f..7c0af8f0c 100644 --- a/examples/python/dynamic_steps_example.py +++ b/examples/python/dynamic_steps_example.py @@ -1,7 +1,9 @@ import cognee import asyncio +import logging from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search from cognee.modules.retrieval.brute_force_triplet_search import format_triplets +from cognee.shared.utils import setup_logging job_1 = """ CV 1: Relevant @@ -186,6 +188,8 @@ async def main(enable_steps): print(format_triplets(results)) if __name__ == '__main__': + setup_logging(logging.ERROR) + rebuild_kg = True retrieve = True steps_to_enable = { From bd644a1434be5bb87f0c60e577c0068bb13b52ea Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Tue, 7 Jan 2025 13:33:05 +0100 Subject: [PATCH 2/2] fix: Fixes duplicated edges in cognify by limiting the recursion depth in add datapoints --- cognee/api/v1/cognify/cognify_v2.py | 2 +- cognee/modules/graph/utils/get_graph_from_model.py | 3 ++- cognee/tasks/storage/add_data_points.py | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index c14f00978..4e2db5a70 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -94,7 +94,7 @@ async def run_cognify_pipeline(dataset: Dataset, user: User, graph_model: BaseMo summarization_model = cognee_config.summarization_model, task_config = { "batch_size": 10 } ), - Task(add_data_points, task_config = { "batch_size": 10 }), + Task(add_data_points, only_root = True, task_config = { "batch_size": 10 }), ] pipeline = run_tasks(tasks, data_documents, "cognify_pipeline") diff --git a/cognee/modules/graph/utils/get_graph_from_model.py b/cognee/modules/graph/utils/get_graph_from_model.py index c675792bf..1b7c0908b 100644 --- a/cognee/modules/graph/utils/get_graph_from_model.py +++ b/cognee/modules/graph/utils/get_graph_from_model.py @@ -7,6 +7,7 @@ async def get_graph_from_model( added_nodes: dict, added_edges: dict, visited_properties: dict = None, + only_root = False, include_root = True, ): if str(data_point.id) in added_nodes: @@ -86,7 +87,7 @@ async def get_graph_from_model( })) added_edges[str(edge_key)] = True - if str(field_value.id) in added_nodes: + if str(field_value.id) in added_nodes or only_root: continue property_nodes, property_edges = await get_graph_from_model( diff --git a/cognee/tasks/storage/add_data_points.py b/cognee/tasks/storage/add_data_points.py index 2ce0fc57d..47cae4309 100644 --- a/cognee/tasks/storage/add_data_points.py +++ b/cognee/tasks/storage/add_data_points.py @@ -5,7 +5,7 @@ from .index_data_points import index_data_points -async def add_data_points(data_points: list[DataPoint]): +async def add_data_points(data_points: list[DataPoint], only_root = False): nodes = [] edges = [] @@ -19,6 +19,7 @@ async def add_data_points(data_points: list[DataPoint]): added_nodes = added_nodes, added_edges = added_edges, visited_properties = visited_properties, + only_root = only_root, ) for data_point in data_points ])