diff --git a/.github/workflows/profiling.yaml b/.github/workflows/profiling.yaml index 0ecbc960..2408a8f7 100644 --- a/.github/workflows/profiling.yaml +++ b/.github/workflows/profiling.yaml @@ -94,7 +94,7 @@ jobs: # chmod +x cognee/api/v1/cognify/code_graph_pipeline.py # # Run Scalene # poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py -# + # # Compare profiling results # - name: Compare profiling results # run: | diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py index da58cf58..2f7b406a 100644 --- a/cognee/api/v1/config/config.py +++ b/cognee/api/v1/config/config.py @@ -131,6 +131,18 @@ def set_relational_db_config(config_dict: dict): message=f"'{key}' is not a valid attribute of the config." ) + @staticmethod + def set_graph_db_config(config_dict: dict) -> None: + """ + Updates the graph db config with values from config_dict. + """ + graph_db_config = get_graph_config() + for key, value in config_dict.items(): + if hasattr(graph_db_config, key): + object.__setattr__(graph_db_config, key, value) + else: + raise AttributeError(message=f"'{key}' is not a valid attribute of the config.") + @staticmethod def set_vector_db_config(config_dict: dict): """ diff --git a/cognee/api/v1/search/search_v2.py b/cognee/api/v1/search/search_v2.py index 52fc565c..4166fd3f 100644 --- a/cognee/api/v1/search/search_v2.py +++ b/cognee/api/v1/search/search_v2.py @@ -15,6 +15,7 @@ from cognee.tasks.graph import query_graph_connections from cognee.tasks.summarization import query_summaries from cognee.tasks.completion import query_completion +from cognee.tasks.completion import graph_query_completion class SearchType(Enum): @@ -22,6 +23,7 @@ class SearchType(Enum): INSIGHTS = "INSIGHTS" CHUNKS = "CHUNKS" COMPLETION = "COMPLETION" + GRAPH_COMPLETION = "GRAPH_COMPLETION" async def search( @@ -65,6 +67,7 @@ async def specific_search(query_type: SearchType, query: str, user) -> list: SearchType.INSIGHTS: query_graph_connections, SearchType.CHUNKS: query_chunks, SearchType.COMPLETION: query_completion, + SearchType.GRAPH_COMPLETION: graph_query_completion, } search_task = search_tasks.get(query_type) diff --git a/cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt b/cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt new file mode 100644 index 00000000..fe37a266 --- /dev/null +++ b/cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt @@ -0,0 +1 @@ +Answer the question using the provided context. If the provided context is not connected to the question, just answer "The provided knowledge base does not contain the answer to the question". Be as brief as possible. \ No newline at end of file diff --git a/cognee/infrastructure/llm/prompts/graph_context_for_question.txt b/cognee/infrastructure/llm/prompts/graph_context_for_question.txt new file mode 100644 index 00000000..dae7138a --- /dev/null +++ b/cognee/infrastructure/llm/prompts/graph_context_for_question.txt @@ -0,0 +1,2 @@ +The question is: `{{ question }}` +and here is the context provided with a set of relationships from a knowledge graph separated by \n---\n each represented as node1 -- relation -- node2 triplet: `{{ context }}` \ No newline at end of file diff --git a/cognee/tasks/completion/__init__.py b/cognee/tasks/completion/__init__.py index 8a5aaa68..4bbbd01e 100644 --- a/cognee/tasks/completion/__init__.py +++ b/cognee/tasks/completion/__init__.py @@ -1 +1,2 @@ from .query_completion import query_completion +from .graph_query_completion import graph_query_completion diff --git a/cognee/tasks/completion/graph_query_completion.py b/cognee/tasks/completion/graph_query_completion.py new file mode 100644 index 00000000..b130d4f7 --- /dev/null +++ b/cognee/tasks/completion/graph_query_completion.py @@ -0,0 +1,46 @@ +from cognee.infrastructure.databases.vector import get_vector_engine +from cognee.tasks.completion.exceptions import NoRelevantDataFound +from cognee.infrastructure.llm.get_llm_client import get_llm_client +from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt +from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search + + +def retrieved_edges_to_string(retrieved_edges: list) -> str: + edge_strings = [] + for edge in retrieved_edges: + node1_string = edge.node1.attributes.get("text") or edge.node1.attributes.get("name") + node2_string = edge.node2.attributes.get("text") or edge.node2.attributes.get("name") + edge_string = edge.attributes["relationship_type"] + edge_str = f"{node1_string} -- {edge_string} -- {node2_string}" + edge_strings.append(edge_str) + return "\n---\n".join(edge_strings) + + +async def graph_query_completion(query: str) -> list: + """ + Parameters: + - query (str): The query string to compute. + + Returns: + - list: Answer to the query. + """ + found_triplets = await brute_force_triplet_search(query, top_k=5) + + if len(found_triplets) == 0: + raise NoRelevantDataFound + + args = { + "question": query, + "context": retrieved_edges_to_string(found_triplets), + } + user_prompt = render_prompt("graph_context_for_question.txt", args) + system_prompt = read_query_prompt("answer_simple_question_restricted.txt") + + llm_client = get_llm_client() + computed_answer = await llm_client.acreate_structured_output( + text_input=user_prompt, + system_prompt=system_prompt, + response_model=str, + ) + + return [computed_answer] diff --git a/cognee/tasks/repo_processor/expand_dependency_graph.py b/cognee/tasks/repo_processor/expand_dependency_graph.py index d3f5d1b0..cc957742 100644 --- a/cognee/tasks/repo_processor/expand_dependency_graph.py +++ b/cognee/tasks/repo_processor/expand_dependency_graph.py @@ -5,10 +5,9 @@ from cognee.infrastructure.engine import DataPoint from cognee.shared.CodeGraphEntities import CodeFile, CodePart from cognee.tasks.repo_processor.extract_code_parts import extract_code_parts - import logging -logger = logging.getLogger("task:repo_processor") +logger = logging.getLogger(__name__) def _add_code_parts_nodes_and_edges(code_file: CodeFile, part_type, code_parts) -> None: diff --git a/cognee/tasks/repo_processor/extract_code_parts.py b/cognee/tasks/repo_processor/extract_code_parts.py index c181a87d..f2514623 100644 --- a/cognee/tasks/repo_processor/extract_code_parts.py +++ b/cognee/tasks/repo_processor/extract_code_parts.py @@ -3,7 +3,7 @@ import logging -logger = logging.getLogger("task:repo_processor") +logger = logging.getLogger(__name__) def _extract_parts_from_module(module, parts_dict: Dict[str, List[str]]) -> Dict[str, List[str]]: diff --git a/cognee/tasks/repo_processor/get_local_dependencies.py b/cognee/tasks/repo_processor/get_local_dependencies.py index 92b50cd0..b0ac2829 100644 --- a/cognee/tasks/repo_processor/get_local_dependencies.py +++ b/cognee/tasks/repo_processor/get_local_dependencies.py @@ -12,7 +12,7 @@ import logging -logger = logging.getLogger("task:repo_processor") +logger = logging.getLogger(__name__) @contextmanager diff --git a/cognee/tasks/repo_processor/get_source_code_chunks.py b/cognee/tasks/repo_processor/get_source_code_chunks.py index 84169e3b..82fa46cf 100644 --- a/cognee/tasks/repo_processor/get_source_code_chunks.py +++ b/cognee/tasks/repo_processor/get_source_code_chunks.py @@ -9,7 +9,7 @@ from cognee.infrastructure.engine import DataPoint from cognee.shared.CodeGraphEntities import CodeFile, CodePart, SourceCodeChunk -logger = logging.getLogger("task:get_source_code_chunks") +logger = logging.getLogger(__name__) def _count_tokens(tokenizer: tiktoken.Encoding, source_code: str) -> int: diff --git a/cognee/tasks/repo_processor/top_down_repo_parse.py b/cognee/tasks/repo_processor/top_down_repo_parse.py index aed97192..87b7b8c9 100644 --- a/cognee/tasks/repo_processor/top_down_repo_parse.py +++ b/cognee/tasks/repo_processor/top_down_repo_parse.py @@ -4,7 +4,9 @@ import parso from tqdm import tqdm -from . import logger +import logging + +logger = logging.getLogger(__name__) _NODE_TYPE_MAP = { "funcdef": "func_def", diff --git a/examples/python/dynamic_steps_example.py b/examples/python/dynamic_steps_example.py index 165d907d..11596a5e 100644 --- a/examples/python/dynamic_steps_example.py +++ b/examples/python/dynamic_steps_example.py @@ -1,8 +1,8 @@ import cognee import asyncio import logging -from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search -from cognee.modules.retrieval.brute_force_triplet_search import format_triplets + +from cognee.api.v1.search import SearchType from cognee.shared.utils import setup_logging job_1 = """ @@ -185,14 +185,14 @@ async def main(enable_steps): # Step 4: Query insights if enable_steps.get("retriever"): - results = await brute_force_triplet_search( - "Who has the most experience with graphic design?" + search_results = await cognee.search( + SearchType.GRAPH_COMPLETION, query_text="Who has experience in design tools?" ) - print(format_triplets(results)) + print(search_results) if __name__ == "__main__": - setup_logging(logging.ERROR) + setup_logging(logging.INFO) rebuild_kg = True retrieve = True