diff --git a/Dockerfile b/Dockerfile index f0642ded9..5f76aff3a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,6 +41,8 @@ RUN apt-get update -q && \ /var/tmp/* WORKDIR /app +# Set the PYTHONPATH environment variable to include the /app directory +ENV PYTHONPATH=/app COPY cognitive_architecture/ /app/cognitive_architecture COPY main.py /app diff --git a/cognitive_architecture/config.py b/cognitive_architecture/config.py index d5cc58184..9286753e7 100644 --- a/cognitive_architecture/config.py +++ b/cognitive_architecture/config.py @@ -1,4 +1,5 @@ """Configuration for cognee - cognitive architecture framework.""" +import logging import os import configparser import uuid @@ -36,6 +37,7 @@ class Config: db_user: str = os.getenv("DB_USER", "cognee") db_password: str = os.getenv("DB_PASSWORD", "cognee") sqlalchemy_logging: bool = os.getenv("SQLALCHEMY_LOGGING", True) + graph_name = os.getenv("GRAPH_NAME", "cognee_graph.pkl") # Model parameters model: str = "gpt-4-1106-preview" @@ -55,29 +57,34 @@ class Config: or os.getenv("AWS_ENV") == "dev" or os.getenv("AWS_ENV") == "prd" ): + load_dotenv() + logging.info("graph_db_url: %s", os.getenv("GRAPH_DB_URL_PROD")) graph_database_url: str = os.getenv("GRAPH_DB_URL_PROD") graph_database_username: str = os.getenv("GRAPH_DB_USER") graph_database_password: str = os.getenv("GRAPH_DB_PW") else: + logging.info("graph_db_urlvvv: %s", os.getenv("GRAPH_DB_URL")) graph_database_url: str = os.getenv("GRAPH_DB_URL") graph_database_username: str = os.getenv("GRAPH_DB_USER") graph_database_password: str = os.getenv("GRAPH_DB_PW") weaviate_url: str = os.getenv("WEAVIATE_URL") weaviate_api_key: str = os.getenv("WEAVIATE_API_KEY") - postgres_user: str = os.getenv("POSTGRES_USER") - postgres_password: str = os.getenv("POSTGRES_PASSWORD") - postgres_db: str = os.getenv("POSTGRES_DB") + if ( os.getenv("ENV") == "prod" or os.getenv("ENV") == "dev" or os.getenv("AWS_ENV") == "dev" or os.getenv("AWS_ENV") == "prd" ): - postgres_host: str = os.getenv("POSTGRES_PROD_HOST") - elif os.getenv("ENV") == "docker": - postgres_host: str = os.getenv("POSTGRES_HOST_DOCKER") - elif os.getenv("ENV") == "local": - postgres_host: str = os.getenv("POSTGRES_HOST_LOCAL") + load_dotenv() + db_type = 'postgresql' + + db_host: str = os.getenv("POSTGRES_HOST") + logging.info("db_host: %s", db_host) + db_user: str = os.getenv("POSTGRES_USER") + db_password: str = os.getenv("POSTGRES_PASSWORD") + db_name: str = os.getenv("POSTGRES_DB") + # Client ID anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex) diff --git a/cognitive_architecture/database/graphdb/networkx/networkx_graph.py b/cognitive_architecture/database/graphdb/networkx/networkx_graph.py index ae005ccdf..8fd1b4b84 100644 --- a/cognitive_architecture/database/graphdb/networkx/networkx_graph.py +++ b/cognitive_architecture/database/graphdb/networkx/networkx_graph.py @@ -1,9 +1,15 @@ import pickle - +from pathlib import Path +from cognitive_architecture.config import Config import networkx as nx +config = Config() +config = config.load() + class NetworkXGraphDB: + """A class to manage a graph database using NetworkX""" + # graph_path = (Path(config.db_path) / config.graph_name).absolute() def __init__(self, filename="cognee_graph.pkl"): self.filename = filename try: diff --git a/cognitive_architecture/database/relationaldb/database.py b/cognitive_architecture/database/relationaldb/database.py index b0410a557..0c819f902 100644 --- a/cognitive_architecture/database/relationaldb/database.py +++ b/cognitive_architecture/database/relationaldb/database.py @@ -14,17 +14,19 @@ def get_sqlalchemy_database_url( db_type = globalConfig.db_type, db_name = globalConfig.db_name, - base_path = globalConfig.db_path, + db_path = globalConfig.db_path, user = globalConfig.db_user, password = globalConfig.db_password, host = globalConfig.db_host, port = globalConfig.db_port, ): """Get the SQLAlchemy database URL based on parameters.""" - db_path = (Path(base_path) / db_name).absolute() + if db_type == "sqlite": + db_path = (Path(db_path) / db_name).absolute() return f"sqlite+aiosqlite:///{db_path}" # SQLite uses file path elif db_type == "duckdb": + db_path = (Path(db_path) / db_name).absolute() return f"duckdb+aiosqlite:///{db_path}" elif db_type == "postgresql": # Ensure optional parameters are handled gracefully diff --git a/cognitive_architecture/database/relationaldb/models/docs.py b/cognitive_architecture/database/relationaldb/models/docs.py index f51404b45..4d643f3b9 100644 --- a/cognitive_architecture/database/relationaldb/models/docs.py +++ b/cognitive_architecture/database/relationaldb/models/docs.py @@ -1,3 +1,4 @@ +""" This module contains the MemoryModel class, which is a SQLAlchemy model for the memory table in the relational database. """ from datetime import datetime from sqlalchemy import Column, String, DateTime, ForeignKey, Boolean from sqlalchemy.orm import relationship @@ -7,6 +8,7 @@ class DocsModel(Base): + """ Docs model""" __tablename__ = "docs" id = Column(String, primary_key=True) diff --git a/cognitive_architecture/database/relationaldb/models/memory.py b/cognitive_architecture/database/relationaldb/models/memory.py index 8d88c00aa..3e5b0bb10 100644 --- a/cognitive_architecture/database/relationaldb/models/memory.py +++ b/cognitive_architecture/database/relationaldb/models/memory.py @@ -1,3 +1,4 @@ +""" This module contains the MemoryModel class, which is a SQLAlchemy model for the memory table in the relational database. """ from datetime import datetime from sqlalchemy import Column, String, DateTime, ForeignKey from sqlalchemy.orm import relationship @@ -5,6 +6,7 @@ class MemoryModel(Base): + """ Memory model""" __tablename__ = "memories" id = Column(String, primary_key=True) diff --git a/cognitive_architecture/database/relationaldb/models/metadatas.py b/cognitive_architecture/database/relationaldb/models/metadatas.py index 094134ee6..3121bd672 100644 --- a/cognitive_architecture/database/relationaldb/models/metadatas.py +++ b/cognitive_architecture/database/relationaldb/models/metadatas.py @@ -1,4 +1,5 @@ # metadata.py +""" MetaData model """ from datetime import datetime from sqlalchemy import Column, String, DateTime, ForeignKey from sqlalchemy.orm import relationship @@ -8,6 +9,7 @@ class MetaDatas(Base): + """ MetaData model""" __tablename__ = "metadatas" id = Column(String, primary_key=True) diff --git a/cognitive_architecture/database/relationaldb/models/operation.py b/cognitive_architecture/database/relationaldb/models/operation.py index f3f55ed86..42842295a 100644 --- a/cognitive_architecture/database/relationaldb/models/operation.py +++ b/cognitive_architecture/database/relationaldb/models/operation.py @@ -1,4 +1,5 @@ # operation.py +""" Operation model """ from datetime import datetime from sqlalchemy import Column, Integer, String, DateTime, ForeignKey from sqlalchemy.orm import relationship @@ -8,6 +9,7 @@ class Operation(Base): + """ Operation model""" __tablename__ = "operations" id = Column(String, primary_key=True) diff --git a/cognitive_architecture/database/relationaldb/models/sessions.py b/cognitive_architecture/database/relationaldb/models/sessions.py index d88cd5b47..54b305cf3 100644 --- a/cognitive_architecture/database/relationaldb/models/sessions.py +++ b/cognitive_architecture/database/relationaldb/models/sessions.py @@ -1,4 +1,5 @@ # session.py +""" Session model """ from datetime import datetime from sqlalchemy import Column, Integer, String, DateTime, ForeignKey from sqlalchemy.orm import relationship @@ -9,6 +10,7 @@ class Session(Base): + """ Session model""" __tablename__ = "sessions" id = Column(String, primary_key=True) diff --git a/cognitive_architecture/database/relationaldb/models/user.py b/cognitive_architecture/database/relationaldb/models/user.py index 1cffc24c5..568ad3c53 100644 --- a/cognitive_architecture/database/relationaldb/models/user.py +++ b/cognitive_architecture/database/relationaldb/models/user.py @@ -1,4 +1,5 @@ # user.py +""" User model """ from datetime import datetime from sqlalchemy import Column, String, DateTime from sqlalchemy.orm import relationship @@ -14,6 +15,7 @@ class User(Base): + """ User model""" __tablename__ = "users" id = Column(String, primary_key=True, index=True) diff --git a/cognitive_architecture/fetch_secret.py b/cognitive_architecture/fetch_secret.py index 9c8b992d6..65b5326a9 100644 --- a/cognitive_architecture/fetch_secret.py +++ b/cognitive_architecture/fetch_secret.py @@ -12,12 +12,11 @@ # Add the parent directory to sys.path sys.path.insert(0, parent_dir) -# API_ENABLED = os.environ.get("API_ENABLED", "False").lower() == "true" - environment = os.getenv("AWS_ENV", "dev") -def fetch_secret(secret_name, region_name, env_file_path): +def fetch_secret(secret_name:str, region_name:str, env_file_path:str): + """Fetch the secret from AWS Secrets Manager and write it to the .env file.""" print("Initializing session") session = boto3.session.Session() print("Session initialized") @@ -28,20 +27,19 @@ def fetch_secret(secret_name, region_name, env_file_path): response = client.get_secret_value(SecretId=secret_name) except Exception as e: print(f"Error retrieving secret: {e}") - return None + return f"Error retrieving secret: {e}" if "SecretString" in response: secret = response["SecretString"] else: secret = response["SecretBinary"] + with open(env_file_path, "w") as env_file: + env_file.write(secret) + print("Secrets are added to the .env file.") + if os.path.exists(env_file_path): print(f"The .env file is located at: {env_file_path}") - - with open(env_file_path, "w") as env_file: - env_file.write(secret) - print("Secrets are added to the .env file.") - load_dotenv() print("The .env file is loaded.") else: diff --git a/cognitive_architecture/llm/queries.py b/cognitive_architecture/llm/queries.py index 265fe5086..816a80167 100644 --- a/cognitive_architecture/llm/queries.py +++ b/cognitive_architecture/llm/queries.py @@ -1,38 +1,38 @@ +""" This module contains the functions that are used to query the language model. """ import os - -from ..shared.data_models import Node, Edge, KnowledgeGraph, GraphQLQuery, MemorySummary -from ..config import Config import instructor from openai import OpenAI +import logging +from ..shared.data_models import KnowledgeGraph, MemorySummary +from ..config import Config + + config = Config() config.load() -print(config.model) -print(config.openai_key) - OPENAI_API_KEY = config.openai_key aclient = instructor.patch(OpenAI()) -import logging - # Function to read query prompts from files def read_query_prompt(filename): + """Read a query prompt from a file.""" try: with open(filename, "r") as file: return file.read() except FileNotFoundError: - logging.info(f"Error: File not found. Attempted to read: {filename}") - logging.info(f"Current working directory: {os.getcwd()}") + logging.info(f"Error: File not found. Attempted to read: %s {filename}") + logging.info(f"Current working directory: %s {os.getcwd()}") return None except Exception as e: - logging.info(f"An error occurred: {e}") + logging.info(f"An error occurred: %s {e}") return None def generate_graph(input) -> KnowledgeGraph: + """Generate a knowledge graph from a user query.""" model = "gpt-4-1106-preview" user_prompt = f"Use the given format to extract information from the following input: {input}." system_prompt = read_query_prompt( @@ -57,20 +57,26 @@ def generate_graph(input) -> KnowledgeGraph: async def generate_summary(input) -> MemorySummary: + """Generate a summary from a user query.""" out = aclient.chat.completions.create( model=config.model, messages=[ { "role": "user", - "content": f"""Use the given format summarize and reduce the following input: {input}. """, + "content": f"""Use the given format summarize + and reduce the following input: {input}. """, }, { "role": "system", "content": """You are a top-tier algorithm - designed for summarizing existing knowledge graphs in structured formats based on a knowledge graph. + designed for summarizing existing knowledge + graphs in structured formats based on a knowledge graph. ## 1. Strict Compliance - Adhere to the rules strictly. Non-compliance will result in termination. - ## 2. Don't forget your main goal is to reduce the number of nodes in the knowledge graph while preserving the information contained in it.""", + Adhere to the rules strictly. + Non-compliance will result in termination. + ## 2. Don't forget your main goal + is to reduce the number of nodes in the knowledge graph + while preserving the information contained in it.""", }, ], response_model=MemorySummary, @@ -79,6 +85,7 @@ async def generate_summary(input) -> MemorySummary: def user_query_to_edges_and_nodes(input: str) -> KnowledgeGraph: + """Generate a knowledge graph from a user query.""" system_prompt = read_query_prompt( "cognitive_architecture/llm/prompts/generate_graph_prompt.txt" ) @@ -87,7 +94,8 @@ def user_query_to_edges_and_nodes(input: str) -> KnowledgeGraph: messages=[ { "role": "user", - "content": f"""Use the given format to extract information from the following input: {input}. """, + "content": f"""Use the given format to + extract information from the following input: {input}. """, }, {"role": "system", "content": system_prompt}, ], diff --git a/cognitive_architecture/openai_tools.py b/cognitive_architecture/openai_tools.py index 179117377..b968a508f 100644 --- a/cognitive_architecture/openai_tools.py +++ b/cognitive_architecture/openai_tools.py @@ -1,3 +1,4 @@ +"""Tools for interacting with OpenAI's GPT-3, GPT-4 API""" import asyncio import random import os diff --git a/cognitive_architecture/shared/data_models.py b/cognitive_architecture/shared/data_models.py index 38650b988..c95000bd5 100644 --- a/cognitive_architecture/shared/data_models.py +++ b/cognitive_architecture/shared/data_models.py @@ -1,9 +1,10 @@ +"""Data models for the cognitive architecture.""" from typing import Optional, List - from pydantic import BaseModel, Field class Node(BaseModel): + """Node in a knowledge graph.""" id: int description: str category: str @@ -14,6 +15,7 @@ class Node(BaseModel): class Edge(BaseModel): + """Edge in a knowledge graph.""" source: int target: int description: str @@ -23,14 +25,17 @@ class Edge(BaseModel): class KnowledgeGraph(BaseModel): + """Knowledge graph.""" nodes: List[Node] = Field(..., default_factory=list) edges: List[Edge] = Field(..., default_factory=list) class GraphQLQuery(BaseModel): + """GraphQL query.""" query: str class MemorySummary(BaseModel): + """ Memory summary. """ nodes: List[Node] = Field(..., default_factory=list) edges: List[Edge] = Field(..., default_factory=list) diff --git a/cognitive_architecture/shared/language_processing.py b/cognitive_architecture/shared/language_processing.py index bb112cd07..203b80050 100644 --- a/cognitive_architecture/shared/language_processing.py +++ b/cognitive_architecture/shared/language_processing.py @@ -1,9 +1,10 @@ +""" This module provides language processing functions for language detection and translation. """ +import logging import boto3 from botocore.exceptions import BotoCoreError, ClientError from langdetect import detect, LangDetectException import iso639 -import logging # Basic configuration of the logging system logging.basicConfig( @@ -30,7 +31,7 @@ def detect_language(text): try: # Detect the language using langdetect detected_lang_iso639_1 = detect(trimmed_text) - logging.info(f"Detected ISO 639-1 code: {detected_lang_iso639_1}") + logging.info(f"Detected ISO 639-1 code: %s {detected_lang_iso639_1}") # Special case: map 'hr' (Croatian) to 'sr' (Serbian ISO 639-2) if detected_lang_iso639_1 == "hr": @@ -38,9 +39,9 @@ def detect_language(text): return detected_lang_iso639_1 except LangDetectException as e: - logging.error(f"Language detection error: {e}") + logging.error(f"Language detection error: %s {e}") except Exception as e: - logging.error(f"Unexpected error: {e}") + logging.error(f"Unexpected error: %s {e}") return -1 @@ -57,8 +58,10 @@ def translate_text( Parameters: text (str): The text to be translated. - source_language (str): The source language code (e.g., 'sr' for Serbian). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php - target_language (str): The target language code (e.g., 'en' for English). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php + source_language (str): The source language code (e.g., 'sr' for Serbian). + ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php + target_language (str): The target language code (e.g., 'en' for English). + ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php region_name (str): AWS region name. Returns: @@ -82,20 +85,9 @@ def translate_text( return result.get("TranslatedText", "No translation found.") except BotoCoreError as e: - logging.info(f"BotoCoreError occurred: {e}") + logging.info(f"BotoCoreError occurred: %s {e}") return "Error with AWS Translate service configuration or request." except ClientError as e: - logging.info(f"ClientError occurred: {e}") + logging.info(f"ClientError occurred: %s {e}") return "Error with AWS client or network issue." - - -source_language = "sr" -target_language = "en" -text_to_translate = "Ja volim da pecam i idem na reku da šetam pored nje ponekad" - -translated_text = translate_text(text_to_translate, source_language, target_language) -print(translated_text) - - -# print(detect_language("Koliko krava ide u setnju?")) diff --git a/cognitive_architecture/utils.py b/cognitive_architecture/utils.py index a5335e884..80654059c 100644 --- a/cognitive_architecture/utils.py +++ b/cognitive_architecture/utils.py @@ -1,3 +1,5 @@ +""" This module contains utility functions for the cognitive architecture. """ + import os import random import string @@ -13,7 +15,13 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select import logging - +from cognitive_architecture.database.relationaldb.models.operation import Operation +from cognitive_architecture.database.relationaldb.database_crud import ( + session_scope, + add_entity, + update_entity, + fetch_job_id, +) class Node: def __init__(self, id, description, color): @@ -72,6 +80,7 @@ def get_document_names(doc_input): def format_dict(d): + """ Format a dictionary as a string.""" # Initialize an empty list to store formatted items formatted_items = [] @@ -93,6 +102,7 @@ def format_dict(d): def append_uuid_to_variable_names(variable_mapping): + """ Append a UUID to the variable names to make them unique.""" unique_variable_mapping = {} for original_name in variable_mapping.values(): unique_name = f"{original_name}_{uuid.uuid4().hex}" @@ -102,6 +112,7 @@ def append_uuid_to_variable_names(variable_mapping): # Update the functions to use the unique variable names def create_node_variable_mapping(nodes): + """ Create a mapping of node identifiers to unique variable names.""" mapping = {} for node in nodes: variable_name = f"{node['category']}{node['id']}".lower() @@ -110,6 +121,7 @@ def create_node_variable_mapping(nodes): def create_edge_variable_mapping(edges): + """ Create a mapping of edge identifiers to unique variable names.""" mapping = {} for edge in edges: # Construct a unique identifier for the edge @@ -124,17 +136,10 @@ def generate_letter_uuid(length=8): return "".join(random.choice(letters) for _ in range(length)) -from cognitive_architecture.database.relationaldb.models.operation import Operation -from cognitive_architecture.database.relationaldb.database_crud import ( - session_scope, - add_entity, - update_entity, - fetch_job_id, -) - async def get_vectordb_namespace(session: AsyncSession, user_id: str): + """ Asynchronously retrieves the latest memory names for a given user.""" try: result = await session.execute( select(MemoryModel.memory_name) @@ -151,6 +156,7 @@ async def get_vectordb_namespace(session: AsyncSession, user_id: str): async def get_vectordb_document_name(session: AsyncSession, user_id: str): + """ Asynchronously retrieves the latest memory names for a given user.""" try: result = await session.execute( select(DocsModel.doc_name) @@ -167,6 +173,7 @@ async def get_vectordb_document_name(session: AsyncSession, user_id: str): async def get_model_id_name(session: AsyncSession, id: str): + """ Asynchronously retrieves the latest memory names for a given user.""" try: result = await session.execute( select(MemoryModel.memory_name) @@ -236,12 +243,6 @@ async def get_unsumarized_vector_db_namespace(session: AsyncSession, user_id: st return memory_details, docs - # except Exception as e: - # # Handle the exception as needed - # print(f"An error occurred: {e}") - # return None - - async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str): """ Asynchronously retrieves memory names associated with a specific document ID.