Added initial API logic, crud for graph, connected vectordb and graph

topoteretes · Nov 15, 2023 · 6914c86 · 6914c86
1 parent 35f46e6
commit 6914c86
Show file tree

Hide file tree

Showing 5 changed files with 275 additions and 27 deletions.
diff --git a/level_4/cognitive_architecture/classifiers/classifier.py b/level_4/cognitive_architecture/classifiers/classifier.py
@@ -1,7 +1,64 @@
+from langchain.prompts import ChatPromptTemplate
+import json
 
+#TO DO, ADD ALL CLASSIFIERS HERE
 
 
-#TO DO, ADD ALL CLASSIFIERS HERE
+
+
+
+
+
+from langchain.chains import create_extraction_chain
+from langchain.chat_models import ChatOpenAI
+
+from ..config import Config
+
+config = Config()
+config.load()
+OPENAI_API_KEY = config.openai_key
+from langchain.document_loaders import TextLoader
+from langchain.document_loaders import DirectoryLoader
+
+
+async def classify_documents(query):
+
+    llm = ChatOpenAI(temperature=0, model=config.model)
+    prompt_classify = ChatPromptTemplate.from_template(
+        """You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}"""
+    )
+    json_structure = [{
+        "name": "summarizer",
+        "description": "Summarization and classification",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "DocumentCategory": {
+                    "type": "string",
+                    "description": "The classification of documents in groups such as legal, medical, etc."
+                },
+                "Title": {
+                    "type": "string",
+                    "description": "The title of the document"
+                },
+                "Summary": {
+                    "type": "string",
+                    "description": "The summary of the document"
+                }
+
+
+            }, "required": ["DocumentCategory", "Title", "Summary"] }
+    }]
+    chain_filter = prompt_classify | llm.bind(function_call={"name": "summarizer"}, functions=json_structure)
+    classifier_output = await chain_filter.ainvoke({"query": query})
+    arguments_str = classifier_output.additional_kwargs['function_call']['arguments']
+    print("This is the arguments string", arguments_str)
+    arguments_dict = json.loads(arguments_str)
+    classfier_value = arguments_dict.get('summarizer', None)
+
+    print("This is the classifier value", classfier_value)
+
+    return classfier_value
 
 
 
@@ -12,4 +69,4 @@ def classify_retrieval():
 
 # classify documents according to type of document
 def classify_call():
-    pass
+    pass
diff --git a/level_4/cognitive_architecture/database/graph_database/graph.py b/level_4/cognitive_architecture/database/graph_database/graph.py
@@ -485,23 +485,14 @@ def create_document_node_cypher(self, document_summary: dict, user_id: str) -> s
 
     def update_document_node_with_namespace(self, user_id: str, vectordb_namespace: str, document_title: str):
         # Generate the Cypher query
-        cypher_query = '''
-        MATCH (user:User {userId: $user_id})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {title: $document_title})
-        SET document.vectordbNamespace = $vectordb_namespace
+        cypher_query = f'''
+        MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {{title: '{document_title}'}})
+        SET document.vectordbNamespace = '{vectordb_namespace}'
         RETURN document
         '''
 
-        # Parameters for the query
-        parameters = {
-            'user_id': user_id,
-            'vectordb_namespace': vectordb_namespace,
-            'document_title': document_title
-        }
-
-        # Execute the query with the provided parameters
-        result = self.query(cypher_query, parameters)
 
-        return result
+        return cypher_query
 
 
 

diff --git a/level_4/cognitive_architecture/database/postgres/models/operation.py b/level_4/cognitive_architecture/database/postgres/models/operation.py
@@ -13,8 +13,6 @@ class Operation(Base):
     user_id = Column(String, ForeignKey('users.id'), index=True)  # Link to User
     operation_type = Column(String, nullable=True)
     operation_status = Column(String, nullable=True)
-    operation_params = Column(String, nullable=True)
-    number_of_files = Column(Integer, nullable=True)
     test_set_id = Column(String, ForeignKey('test_sets.id'), index=True)
     created_at = Column(DateTime, default=datetime.utcnow)
     updated_at = Column(DateTime, onupdate=datetime.utcnow)

diff --git a/level_4/cognitive_architecture/utils.py b/level_4/cognitive_architecture/utils.py
@@ -1,3 +1,6 @@
+import os
+import random
+import string
 import uuid
 
 from graphviz import Digraph
@@ -32,7 +35,44 @@ def __init__(self, source, target, label, color):
 #     dot.render("knowledge_graph.gv", view=True)
 #
 #
-
+def get_document_names(doc_input):
+    """
+    Get a list of document names.
+
+    This function takes doc_input, which can be a folder path, a single document file path, or a document name as a string.
+    It returns a list of document names based on the doc_input.
+
+    Args:
+        doc_input (str): The doc_input can be a folder path, a single document file path, or a document name as a string.
+
+    Returns:
+        list: A list of document names.
+
+    Example usage:
+        - Folder path: get_document_names(".data")
+        - Single document file path: get_document_names(".data/example.pdf")
+        - Document name provided as a string: get_document_names("example.docx")
+
+    """
+    if isinstance(doc_input, list):
+        return doc_input
+    if os.path.isdir(doc_input):
+        # doc_input is a folder
+        folder_path = doc_input
+        document_names = []
+        for filename in os.listdir(folder_path):
+            if os.path.isfile(os.path.join(folder_path, filename)):
+                document_names.append(filename)
+        return document_names
+    elif os.path.isfile(doc_input):
+        # doc_input is a single document file
+        return [os.path.basename(doc_input)]
+    elif isinstance(doc_input, str):
+        # doc_input is a document name provided as a string
+        return [doc_input]
+    else:
+        # doc_input is not valid
+        return []
 
 def format_dict(d):
     # Initialize an empty list to store formatted items
@@ -76,4 +116,13 @@ def create_edge_variable_mapping(edges):
         # Construct a unique identifier for the edge
         variable_name = f"edge{edge['source']}to{edge['target']}".lower()
         mapping[(edge['source'], edge['target'])] = variable_name
-    return mapping
+    return mapping
+
+
+
+def generate_letter_uuid(length=8):
+    """Generate a random string of uppercase letters with the specified length."""
+    letters = string.ascii_uppercase  # A-Z
+    return "".join(random.choice(letters) for _ in range(length))
+
+