Merge pull request #44 from topoteretes/feature/postgres_deployment

Fixes to database manager
topoteretes · Feb 20, 2024 · 2fe437c · 2fe437c
2 parents 0ee8899 + 423e0d5
commit 2fe437c
Show file tree

Hide file tree

Showing 16 changed files with 105 additions and 71 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -41,6 +41,8 @@ RUN apt-get update -q && \
         /var/tmp/*
 
 WORKDIR /app
+# Set the PYTHONPATH environment variable to include the /app directory
+ENV PYTHONPATH=/app
 
 COPY cognitive_architecture/ /app/cognitive_architecture
 COPY main.py /app

diff --git a/cognitive_architecture/config.py b/cognitive_architecture/config.py
@@ -1,4 +1,5 @@
 """Configuration for cognee - cognitive architecture framework."""
+import logging
 import os
 import configparser
 import uuid
@@ -36,6 +37,7 @@ class Config:
     db_user: str = os.getenv("DB_USER", "cognee")
     db_password: str = os.getenv("DB_PASSWORD", "cognee")
     sqlalchemy_logging: bool = os.getenv("SQLALCHEMY_LOGGING", True)
+    graph_name = os.getenv("GRAPH_NAME", "cognee_graph.pkl")
 
     # Model parameters
     model: str = "gpt-4-1106-preview"
@@ -55,29 +57,34 @@ class Config:
         or os.getenv("AWS_ENV") == "dev"
         or os.getenv("AWS_ENV") == "prd"
     ):
+        load_dotenv()
+        logging.info("graph_db_url: %s", os.getenv("GRAPH_DB_URL_PROD"))
         graph_database_url: str = os.getenv("GRAPH_DB_URL_PROD")
         graph_database_username: str = os.getenv("GRAPH_DB_USER")
         graph_database_password: str = os.getenv("GRAPH_DB_PW")
     else:
+        logging.info("graph_db_urlvvv: %s", os.getenv("GRAPH_DB_URL"))
         graph_database_url: str = os.getenv("GRAPH_DB_URL")
         graph_database_username: str = os.getenv("GRAPH_DB_USER")
         graph_database_password: str = os.getenv("GRAPH_DB_PW")
     weaviate_url: str = os.getenv("WEAVIATE_URL")
     weaviate_api_key: str = os.getenv("WEAVIATE_API_KEY")
-    postgres_user: str = os.getenv("POSTGRES_USER")
-    postgres_password: str = os.getenv("POSTGRES_PASSWORD")
-    postgres_db: str = os.getenv("POSTGRES_DB")
+
     if (
         os.getenv("ENV") == "prod"
         or os.getenv("ENV") == "dev"
         or os.getenv("AWS_ENV") == "dev"
         or os.getenv("AWS_ENV") == "prd"
     ):
-        postgres_host: str = os.getenv("POSTGRES_PROD_HOST")
-    elif os.getenv("ENV") == "docker":
-        postgres_host: str = os.getenv("POSTGRES_HOST_DOCKER")
-    elif os.getenv("ENV") == "local":
-        postgres_host: str = os.getenv("POSTGRES_HOST_LOCAL")
+        load_dotenv()
+        db_type = 'postgresql'
+
+        db_host: str = os.getenv("POSTGRES_HOST")
+        logging.info("db_host: %s", db_host)
+        db_user: str = os.getenv("POSTGRES_USER")
+        db_password: str = os.getenv("POSTGRES_PASSWORD")
+        db_name: str = os.getenv("POSTGRES_DB")
+
 
     # Client ID
     anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex)

diff --git a/cognitive_architecture/database/graphdb/networkx/networkx_graph.py b/cognitive_architecture/database/graphdb/networkx/networkx_graph.py
@@ -1,9 +1,15 @@
 import pickle
-
+from pathlib import Path
+from cognitive_architecture.config import Config
 import networkx as nx
+config = Config()
+config  = config.load()
+
 
 
 class NetworkXGraphDB:
+    """A class to manage a graph database using NetworkX"""
+    # graph_path = (Path(config.db_path) / config.graph_name).absolute()
     def __init__(self, filename="cognee_graph.pkl"):
         self.filename = filename
         try:

diff --git a/cognitive_architecture/database/relationaldb/database.py b/cognitive_architecture/database/relationaldb/database.py
@@ -14,17 +14,19 @@
 def get_sqlalchemy_database_url(
     db_type = globalConfig.db_type,
     db_name = globalConfig.db_name,
-    base_path = globalConfig.db_path,
+    db_path = globalConfig.db_path,
     user = globalConfig.db_user,
     password = globalConfig.db_password,
     host = globalConfig.db_host,
     port = globalConfig.db_port,
 ):
     """Get the SQLAlchemy database URL based on parameters."""
-    db_path = (Path(base_path) / db_name).absolute()
+
     if db_type == "sqlite":
+        db_path = (Path(db_path) / db_name).absolute()
         return f"sqlite+aiosqlite:///{db_path}"  # SQLite uses file path
     elif db_type == "duckdb":
+        db_path = (Path(db_path) / db_name).absolute()
         return f"duckdb+aiosqlite:///{db_path}"
     elif db_type == "postgresql":
         # Ensure optional parameters are handled gracefully

diff --git a/cognitive_architecture/database/relationaldb/models/docs.py b/cognitive_architecture/database/relationaldb/models/docs.py
@@ -1,3 +1,4 @@
+""" This module contains the MemoryModel class, which is a SQLAlchemy model for the memory table in the relational database. """
 from datetime import datetime
 from sqlalchemy import Column, String, DateTime, ForeignKey, Boolean
 from sqlalchemy.orm import relationship
@@ -7,6 +8,7 @@
 
 
 class DocsModel(Base):
+    """ Docs model"""
     __tablename__ = "docs"
 
     id = Column(String, primary_key=True)

diff --git a/cognitive_architecture/database/relationaldb/models/memory.py b/cognitive_architecture/database/relationaldb/models/memory.py
@@ -1,10 +1,12 @@
+""" This module contains the MemoryModel class, which is a SQLAlchemy model for the memory table in the relational database. """
 from datetime import datetime
 from sqlalchemy import Column, String, DateTime, ForeignKey
 from sqlalchemy.orm import relationship
 from ..database import Base
 
 
 class MemoryModel(Base):
+    """ Memory model"""
     __tablename__ = "memories"
 
     id = Column(String, primary_key=True)

diff --git a/cognitive_architecture/database/relationaldb/models/metadatas.py b/cognitive_architecture/database/relationaldb/models/metadatas.py
@@ -1,4 +1,5 @@
 # metadata.py
+""" MetaData model """
 from datetime import datetime
 from sqlalchemy import Column, String, DateTime, ForeignKey
 from sqlalchemy.orm import relationship
@@ -8,6 +9,7 @@
 
 
 class MetaDatas(Base):
+    """ MetaData model"""
     __tablename__ = "metadatas"
 
     id = Column(String, primary_key=True)

diff --git a/cognitive_architecture/database/relationaldb/models/operation.py b/cognitive_architecture/database/relationaldb/models/operation.py
@@ -1,4 +1,5 @@
 # operation.py
+""" Operation model """
 from datetime import datetime
 from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
 from sqlalchemy.orm import relationship
@@ -8,6 +9,7 @@
 
 
 class Operation(Base):
+    """ Operation model"""
     __tablename__ = "operations"
 
     id = Column(String, primary_key=True)

diff --git a/cognitive_architecture/database/relationaldb/models/sessions.py b/cognitive_architecture/database/relationaldb/models/sessions.py
@@ -1,4 +1,5 @@
 # session.py
+""" Session model """
 from datetime import datetime
 from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
 from sqlalchemy.orm import relationship
@@ -9,6 +10,7 @@
 
 
 class Session(Base):
+    """ Session model"""
     __tablename__ = "sessions"
 
     id = Column(String, primary_key=True)

diff --git a/cognitive_architecture/database/relationaldb/models/user.py b/cognitive_architecture/database/relationaldb/models/user.py
@@ -1,4 +1,5 @@
 # user.py
+""" User model """
 from datetime import datetime
 from sqlalchemy import Column, String, DateTime
 from sqlalchemy.orm import relationship
@@ -14,6 +15,7 @@
 
 
 class User(Base):
+    """ User model"""
     __tablename__ = "users"
 
     id = Column(String, primary_key=True, index=True)

diff --git a/cognitive_architecture/fetch_secret.py b/cognitive_architecture/fetch_secret.py
@@ -12,12 +12,11 @@
 # Add the parent directory to sys.path
 sys.path.insert(0, parent_dir)
 
-# API_ENABLED = os.environ.get("API_ENABLED", "False").lower() == "true"
-
 environment = os.getenv("AWS_ENV", "dev")
 
 
-def fetch_secret(secret_name, region_name, env_file_path):
+def fetch_secret(secret_name:str, region_name:str, env_file_path:str):
+    """Fetch the secret from AWS Secrets Manager and write it to the .env file."""
     print("Initializing session")
     session = boto3.session.Session()
     print("Session initialized")
@@ -28,20 +27,19 @@ def fetch_secret(secret_name, region_name, env_file_path):
         response = client.get_secret_value(SecretId=secret_name)
     except Exception as e:
         print(f"Error retrieving secret: {e}")
-        return None
+        return f"Error retrieving secret: {e}"
 
     if "SecretString" in response:
         secret = response["SecretString"]
     else:
         secret = response["SecretBinary"]
 
+    with open(env_file_path, "w") as env_file:
+        env_file.write(secret)
+        print("Secrets are added to the .env file.")
+
     if os.path.exists(env_file_path):
         print(f"The .env file is located at: {env_file_path}")
-
-        with open(env_file_path, "w") as env_file:
-            env_file.write(secret)
-            print("Secrets are added to the .env file.")
-
         load_dotenv()
         print("The .env file is loaded.")
     else:

diff --git a/cognitive_architecture/llm/queries.py b/cognitive_architecture/llm/queries.py
@@ -1,38 +1,38 @@
+""" This module contains the functions that are used to query the language model. """
 import os
-
-from ..shared.data_models import Node, Edge, KnowledgeGraph, GraphQLQuery, MemorySummary
-from ..config import Config
 import instructor
 from openai import OpenAI
+import logging
+from ..shared.data_models import  KnowledgeGraph,  MemorySummary
+from ..config import Config
+
+
 
 config = Config()
 config.load()
 
-print(config.model)
-print(config.openai_key)
-
 OPENAI_API_KEY = config.openai_key
 
 aclient = instructor.patch(OpenAI())
 
-import logging
-
 
 # Function to read query prompts from files
 def read_query_prompt(filename):
+    """Read a query prompt from a file."""
     try:
         with open(filename, "r") as file:
             return file.read()
     except FileNotFoundError:
-        logging.info(f"Error: File not found. Attempted to read: {filename}")
-        logging.info(f"Current working directory: {os.getcwd()}")
+        logging.info(f"Error: File not found. Attempted to read: %s {filename}")
+        logging.info(f"Current working directory: %s {os.getcwd()}")
         return None
     except Exception as e:
-        logging.info(f"An error occurred: {e}")
+        logging.info(f"An error occurred: %s {e}")
         return None
 
 
 def generate_graph(input) -> KnowledgeGraph:
+    """Generate a knowledge graph from a user query."""
     model = "gpt-4-1106-preview"
     user_prompt = f"Use the given format to extract information from the following input: {input}."
     system_prompt = read_query_prompt(
@@ -57,20 +57,26 @@ def generate_graph(input) -> KnowledgeGraph:
 
 
 async def generate_summary(input) -> MemorySummary:
+    """Generate a summary from a user query."""
     out = aclient.chat.completions.create(
         model=config.model,
         messages=[
             {
                 "role": "user",
-                "content": f"""Use the given format summarize and reduce the following input: {input}. """,
+                "content": f"""Use the given format summarize 
+                and reduce the following input: {input}. """,
             },
             {
                 "role": "system",
                 "content": """You are a top-tier algorithm
-                designed for summarizing existing knowledge graphs in structured formats based on a knowledge graph.
+                designed for summarizing existing knowledge 
+                graphs in structured formats based on a knowledge graph.
                 ## 1. Strict Compliance
-                Adhere to the rules strictly. Non-compliance will result in termination.
-                ## 2. Don't forget your main goal is to reduce the number of nodes in the knowledge graph while preserving the information contained in it.""",
+                Adhere to the rules strictly. 
+                Non-compliance will result in termination.
+                ## 2. Don't forget your main goal 
+                is to reduce the number of nodes in the knowledge graph 
+                while preserving the information contained in it.""",
             },
         ],
         response_model=MemorySummary,
@@ -79,6 +85,7 @@ async def generate_summary(input) -> MemorySummary:
 
 
 def user_query_to_edges_and_nodes(input: str) -> KnowledgeGraph:
+    """Generate a knowledge graph from a user query."""
     system_prompt = read_query_prompt(
         "cognitive_architecture/llm/prompts/generate_graph_prompt.txt"
     )
@@ -87,7 +94,8 @@ def user_query_to_edges_and_nodes(input: str) -> KnowledgeGraph:
         messages=[
             {
                 "role": "user",
-                "content": f"""Use the given format to extract information from the following input: {input}. """,
+                "content": f"""Use the given format to
+                 extract information from the following input: {input}. """,
             },
             {"role": "system", "content": system_prompt},
         ],

diff --git a/cognitive_architecture/openai_tools.py b/cognitive_architecture/openai_tools.py
@@ -1,3 +1,4 @@
+"""Tools for interacting with OpenAI's GPT-3, GPT-4 API"""
 import asyncio
 import random
 import os

diff --git a/cognitive_architecture/shared/data_models.py b/cognitive_architecture/shared/data_models.py
@@ -1,9 +1,10 @@
+"""Data models for the cognitive architecture."""
 from typing import Optional, List
-
 from pydantic import BaseModel, Field
 
 
 class Node(BaseModel):
+    """Node in a knowledge graph."""
     id: int
     description: str
     category: str
@@ -14,6 +15,7 @@ class Node(BaseModel):
 
 
 class Edge(BaseModel):
+    """Edge in a knowledge graph."""
     source: int
     target: int
     description: str
@@ -23,14 +25,17 @@ class Edge(BaseModel):
 
 
 class KnowledgeGraph(BaseModel):
+    """Knowledge graph."""
     nodes: List[Node] = Field(..., default_factory=list)
     edges: List[Edge] = Field(..., default_factory=list)
 
 
 class GraphQLQuery(BaseModel):
+    """GraphQL query."""
     query: str
 
 
 class MemorySummary(BaseModel):
+    """ Memory summary. """
     nodes: List[Node] = Field(..., default_factory=list)
     edges: List[Edge] = Field(..., default_factory=list)