Merge pull request #35 from topoteretes/update_package_and_version

Update package and version
topoteretes · Jan 25, 2024 · 62344eb · 62344eb
2 parents e189213 + 54772d0
commit 62344eb
Show file tree

Hide file tree

Showing 11 changed files with 133 additions and 103 deletions.
diff --git a/.env.template b/.env.template
@@ -7,7 +7,6 @@ POSTGRES_PASSWORD = bla
 POSTGRES_DB = bubu
 POSTGRES_HOST = localhost
 POSTGRES_HOST_DOCKER = postgres
-SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B
 COG_ARCH_DIR = cognitive_architecture
 GRAPH_DB_URL =
 GRAPH_DB_PW =

diff --git a/README.md b/README.md
@@ -15,18 +15,18 @@ AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready
 <p align="left"><i>Open-source framework for building and testing RAGs and Cognitive Architectures, designed for accuracy, transparency, and control.</i></p>
 
 <p align="left">
-<a href="https://github.com/topoteretes/PromethAI-Memory/fork" target="blank">
-<img src="https://img.shields.io/github/forks/topoteretes/PromethAI-Memory?style=for-the-badge" alt="promethAI forks"/>
+<a href="https://github.com/topoteretes/cognee/fork" target="blank">
+<img src="https://img.shields.io/github/forks/topoteretes/cognee?style=for-the-badge" alt="cognee forks"/>
 </a>
 
-<a href="https://github.com/topoteretes/PromethAI-Backend/stargazers" target="blank">
-<img src="https://img.shields.io/github/stars/topoteretes/PromethAI-Memory?style=for-the-badge" alt="promethAI stars"/>
+<a href="https://github.com/topoteretes/cognee/stargazers" target="blank">
+<img src="https://img.shields.io/github/stars/topoteretes/cognee?style=for-the-badge" alt="cognee stars"/>
 </a>
-<a href="https://github.com/topoteretes/PromethAI-Backend/pulls" target="blank">
-<img src="https://img.shields.io/github/issues-pr/topoteretes/PromethAI-Memory?style=for-the-badge" alt="promethAI pull-requests"/>
+<a href="https://github.com/topoteretes/cognee/pulls" target="blank">
+<img src="https://img.shields.io/github/issues-pr/topoteretes/cognee?style=for-the-badge" alt="cognee pull-requests"/>
 </a>
-<a href='https://github.com/topoteretes/PromethAI-Backend/releases'>
-<img src='https://img.shields.io/github/release/topoteretes/PromethAI-Memory?&label=Latest&style=for-the-badge'>
+<a href='https://github.com/topoteretes/cognee/releases'>
+<img src='https://img.shields.io/github/release/topoteretes/cognee?&label=Latest&style=for-the-badge'>
 </a>
 
 </p>
@@ -72,40 +72,28 @@ AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready
 
 
 
-This repo is built to test and evolve RAG architecture, inspired by human cognitive processes, using Python. It's aims to be production ready, testable, but give great visibility in how we build RAG applications.
-
-This project is a part of the [PromethAI](https://prometh.ai/) ecosystem.
-
-The project run in iterations, from POC towards production ready code.
-The iterations are numbered from 0 to 7, with 0 being the simplest iteration and 7 being the most complex one.
-To run a specific iteration, navigate to the iteration's folder and follow the instructions in the README file.
+This repo is built to test and evolve RAG architecture, inspired by human cognitive processes, using Python. 
+It's aims to be production ready, testable, and give great visibility in how we build RAG applications.
+It runs in iterations, from POC towards production ready code.
 To read more about the approach and details on cognitive architecture, see the blog post: [AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready Apps](https://topoteretes.notion.site/Going-beyond-Langchain-Weaviate-and-towards-a-production-ready-modern-data-platform-7351d77a1eba40aab4394c24bef3a278?pvs=4)
-
-_Keep Ithaka always in your mind.
-Arriving there is what you’re destined for.
-But don’t hurry the journey at all.
-Better if it lasts for years_
+Try it on Whatsapp with one of our partners Keepi.ai by typing /save {content} followed by /query {content}
 
 
 ### Current Focus
 
-#### Level 4 - Dynamic Graph Memory Manager + DB + Rag Test Manager
-Scope: Use Neo4j to map the user queries into a knowledge graph based on cognitive architecture
-Blog post: Soon!
-- Dynamic Memory Manager -> store the data in N hierarchical stores
-- Dynamic Graph -> map the user queries into a knowledge graph
-- Classification -> classify the user queries and choose relevant graph nodes
-- Context manager -> generate context for LLM to process containing Semantic, Episodic and Vector store data
-- Postgres DB to store metadata 
-- Docker
-- API 
+#### Level 5 - Integration to keepi.ai and other apps
+Scope: Use Neo4j to map user preferences into a graph structure consisting of semantic, episodic, and procedural memory. 
+Fetch information and store information and files on Whatsapp chatbot using Keepi.ai
+Use the graph to answer user queries and store new information in the graph.
+
+
 
 ![Image](https://github.com/topoteretes/PromethAI-Memory/blob/main/level_4/User_graph.png)
 
 
 ### Installation
 
-### Run the level 4
+### Run cognee
 
 Make sure you have Docker, Poetry, and Python 3.11 installed and postgres installed.
 
@@ -115,28 +103,13 @@ Copy the .env.example to .env and fill in the variables
 
 ```docker compose up   ```
 
-Run 
+And send API requests add-memory, user-query-to-graph, document-to-graph-db, user-query-processor to the locahost:8000
 
-``` python main.py ``` 
 
 If you are running natively, change ENVIRONMENT to local in the .env file
 If you are running in docker, change ENVIRONMENT to postgres in the .env file
 
 
-Run
-
-``` python main.py ``` 
-
-Or run
-
-    ``` docker compose up promethai-mem ```
-
-And send API requests add-memory, user-query-to-graph, document-to-graph-db, user-query-processor to the locahost:8000
-
-
-
-
-
 
 
 

diff --git a/cognitive_architecture/database/graph_database/graph.py b/cognitive_architecture/database/graph_database/graph.py
@@ -602,6 +602,64 @@ def update_document_node_with_db_ids(self, vectordb_namespace: str, document_id:
 
         return cypher_query
 
+    def run_merge_query(self, user_id: str,  memory_type: str,
+                              similarity_threshold: float) -> str:
+        """
+        Constructs a Cypher query to merge nodes in a Neo4j database based on a similarity threshold.
+
+        This method creates a Cypher query that finds pairs of nodes with a specified memory type
+        connected via a specified relationship type to the same 'Memory' node. If the Levenshtein
+        similarity between the 'description' properties of these nodes is greater than the
+        specified threshold, the nodes are merged using the apoc.refactor.mergeNodes procedure.
+
+        Parameters:
+        user_id (str): The ID of the user whose related nodes are to be merged.
+        memory_type (str): The memory type property of the nodes to be merged.
+        similarity_threshold (float): The threshold above which nodes will be considered similar enough to be merged.
+
+        Returns:
+        str: A Cypher query string that can be executed in a Neo4j session.
+        """
+        if memory_type == 'SemanticMemory':
+            relationship_base = 'HAS_SEMANTIC_MEMORY'
+            relationship_type = 'HAS_KNOWLEDGE'
+            memory_label = 'semantic'
+        elif memory_type == 'EpisodicMemory':
+            relationship_base = 'HAS_EPISODIC_MEMORY'
+            # relationship_type = 'EPISODIC_MEMORY'
+            relationship_type = 'HAS_EVENT'
+            memory_label='episodic'
+        elif memory_type == 'Buffer':
+            relationship_base = 'HAS_BUFFER_MEMORY'
+            relationship_type = 'CURRENTLY_HOLDING'
+            memory_label= 'buffer'
+
+
+        query= f"""MATCH (u:User {{userId: '{user_id}'}})-[:{relationship_base}]->(sm:{memory_type})
+                    MATCH (sm)-[:{relationship_type}]->(n)
+                    RETURN labels(n) AS NodeType, collect(n) AS Nodes
+                    """
+
+        node_results = self.query(query)
+
+        node_types = [record['NodeType'] for record in node_results]
+
+        for node in node_types:
+            query = f"""
+                MATCH (u:User {{userId: "{user_id}"}})-[:{relationship_base}]->(m:{memory_type}) 
+                 MATCH (m)-[:{relationship_type}]->(n1:{node[0]} {{memory_type: "{memory_label}"}}),
+                       (m)-[:{relationship_type}]->(n2:{node[0]} {{memory_type: "{memory_label}"}})
+                 WHERE id(n1) < id(n2) AND
+                       apoc.text.levenshteinSimilarity(toLower(n1.description), toLower(n2.description)) > {similarity_threshold}
+                 WITH n1, n2
+                 LIMIT 1
+                CALL apoc.refactor.mergeNodes([n1, n2], {{mergeRels: true}}) YIELD node
+                 RETURN node
+            """
+            self.query(query)
+            self.close()
+        return query
+
     def get_namespaces_by_document_category(self, user_id: str, category: str):
         """
         Retrieve a list of Vectordb namespaces for documents of a specified category associated with a given user.
@@ -754,7 +812,7 @@ def retrieve_node_id_for_memory_type(self, topic: str = 'SemanticMemory'):
 
 
 
-from networkx_graph import NetworkXGraphDB
+from .networkx_graph import NetworkXGraphDB
 class GraphDBFactory:
     def create_graph_db(self, db_type, **kwargs):
         if db_type == 'neo4j':

diff --git a/cognitive_architecture/llm/queries.py b/cognitive_architecture/llm/queries.py
@@ -17,18 +17,27 @@
 aclient = instructor.patch(OpenAI())
 
 load_dotenv()
+import logging
 
 
 # Function to read query prompts from files
 def read_query_prompt(filename):
-    with open(filename, 'r') as file:
-        return file.read()
+    try:
+        with open(filename, 'r') as file:
+            return file.read()
+    except FileNotFoundError:
+        logging.info(f"Error: File not found. Attempted to read: {filename}")
+        logging.info(f"Current working directory: {os.getcwd()}")
+        return None
+    except Exception as e:
+        logging.info(f"An error occurred: {e}")
+        return None
 
 
 def generate_graph(input) -> KnowledgeGraph:
-    model = "gpt-4-1106-preview"  # Define the model here
+    model = "gpt-4-1106-preview"
     user_prompt = f"Use the given format to extract information from the following input: {input}."
-    system_prompt = read_query_prompt('prompts/generate_graph_prompt.txt')
+    system_prompt = read_query_prompt('cognitive_architecture/llm/prompts/generate_graph_prompt.txt')
 
     out = aclient.chat.completions.create(
         model=model,
@@ -50,7 +59,7 @@ def generate_graph(input) -> KnowledgeGraph:
 
 async def generate_summary(input) -> MemorySummary:
     out =  aclient.chat.completions.create(
-        model="gpt-4-1106-preview",
+        model=config.model,
         messages=[
             {
                 "role": "user",
@@ -69,7 +78,7 @@ async def generate_summary(input) -> MemorySummary:
 
 
 def user_query_to_edges_and_nodes( input: str) ->KnowledgeGraph:
-    system_prompt = read_query_prompt('prompts/generate_graph_prompt.txt')
+    system_prompt = read_query_prompt('cognitive_architecture/llm/prompts/generate_graph_prompt.txt')
     return aclient.chat.completions.create(
         model=config.model,
         messages=[

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -11,19 +11,19 @@ services:
       - NEO4J_AUTH=neo4j/pleaseletmein
       - NEO4J_PLUGINS=["apoc"]
     networks:
-      - promethai_mem_backend
+      - cognee_backend
 
-  promethai_mem:
+  cognee:
     networks:
-      - promethai_mem_backend
+      - cognee_backend
     build:
       context: ./
     volumes:
       - "./:/app"
       - ./.data:/app/.data
-
     environment:
       - HOST=0.0.0.0
+      - ENVIRONMENT=local
     profiles: ["exclude-from-up"]
     ports:
       - 8000:8000
@@ -38,8 +38,6 @@ services:
         limits:
           cpus: "4.0"
           memory: 8GB
-
-
   postgres:
     image: postgres
     container_name: postgres
@@ -49,31 +47,10 @@ services:
       - POSTGRES_PASSWORD=bla
       - POSTGRES_DB=bubu
     networks:
-      - promethai_mem_backend
+      - cognee_backend
     ports:
       - "5432:5432"
-
-#  superset:
-#    platform: linux/amd64
-#    build:
-#      context: ./superset
-#      dockerfile: Dockerfile
-#    container_name: superset
-#    environment:
-#      - ADMIN_USERNAME=admin
-#      - [email protected]
-#      - ADMIN_PASSWORD=admin
-#      - POSTGRES_USER=bla
-#      - POSTGRES_PASSWORD=bla
-#      - POSTGRES_DB=bubu
-#    networks:
-#      - promethai_mem_backend
-#    ports:
-#      - '8088:8088'
-#    depends_on:
-#      - postgres
-
 networks:
-  promethai_mem_backend:
-    name: promethai_mem_backend
+  cognee_backend:
+    name: cognee_backend
 
diff --git a/entrypoint.sh b/entrypoint.sh
@@ -2,10 +2,15 @@
 export ENVIRONMENT
 # Run Python scripts with error handling
 echo "Running fetch_secret.py"
-python cognitive_architecture/fetch_secret.py
-if [ $? -ne 0 ]; then
-    echo "Error: fetch_secret.py failed"
-    exit 1
+if [ "$ENVIRONMENT" != "local" ]; then
+    echo "Running fetch_secret.py"
+    python cognitive_architecture/fetch_secret.py
+    if [ $? -ne 0 ]; then
+        echo "Error: fetch_secret.py failed"
+        exit 1
+    fi
+else
+    echo "ENVIRONMENT ($ENVIRONMENT) is active, skipping fetch_secret.py"
 fi
 
 echo "Running create_database.py"
@@ -14,7 +19,6 @@ if [ $? -ne 0 ]; then
     echo "Error: create_database.py failed"
     exit 1
 fi
-
 # Start Gunicorn
 echo "Starting Gunicorn"
 gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app
diff --git a/iterations/level_3/vectordb/basevectordb.py b/iterations/level_3/vectordb/basevectordb.py
@@ -276,7 +276,7 @@ async def fetch_memories(
         n_of_observations: Optional[int] = 2,
     ):
         logging.info(namespace)
-        logging.info("The search type is %", search_type)
+        logging.info("The search type is %", str(search_type))
         logging.info(params)
         logging.info(observation)
 

diff --git a/iterations/level_3/vectordb/vectordb.py b/iterations/level_3/vectordb/vectordb.py
@@ -185,7 +185,7 @@ async def fetch_memories(self, observation: str, namespace: str = None, search_t
         client = self.init_weaviate(namespace =self.namespace)
         if search_type is None:
             search_type = 'hybrid'
-        logging.info("The search type is 2 %", search_type)
+        logging.info("The search type is %s", str(search_type))
 
         if not namespace:
             namespace = self.namespace

diff --git a/iterations/level_3/vectorstore_manager.py b/iterations/level_3/vectorstore_manager.py
@@ -340,7 +340,7 @@ async def dynamic_method_call(
         )
 
     async def add_dynamic_memory_class(self, class_name: str, namespace: str):
-        logging.info("Here is the memory id %s", self.memory_id[0])
+        logging.info("Here is the memory id %s", str(self.memory_id[0]))
         new_memory_class = DynamicBaseMemory(
             class_name,
             self.user_id,