Skip to content

Commit

Permalink
Added initial API logic, crud for graph, connected vectordb and graph
Browse files Browse the repository at this point in the history
  • Loading branch information
Vasilije1990 committed Nov 15, 2023
1 parent 35f46e6 commit 6914c86
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 27 deletions.
61 changes: 59 additions & 2 deletions level_4/cognitive_architecture/classifiers/classifier.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,64 @@
from langchain.prompts import ChatPromptTemplate
import json

#TO DO, ADD ALL CLASSIFIERS HERE


#TO DO, ADD ALL CLASSIFIERS HERE





from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI

from ..config import Config

config = Config()
config.load()
OPENAI_API_KEY = config.openai_key
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader


async def classify_documents(query):

llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}"""
)
json_structure = [{
"name": "summarizer",
"description": "Summarization and classification",
"parameters": {
"type": "object",
"properties": {
"DocumentCategory": {
"type": "string",
"description": "The classification of documents in groups such as legal, medical, etc."
},
"Title": {
"type": "string",
"description": "The title of the document"
},
"Summary": {
"type": "string",
"description": "The summary of the document"
}


}, "required": ["DocumentCategory", "Title", "Summary"] }
}]
chain_filter = prompt_classify | llm.bind(function_call={"name": "summarizer"}, functions=json_structure)
classifier_output = await chain_filter.ainvoke({"query": query})
arguments_str = classifier_output.additional_kwargs['function_call']['arguments']
print("This is the arguments string", arguments_str)
arguments_dict = json.loads(arguments_str)
classfier_value = arguments_dict.get('summarizer', None)

print("This is the classifier value", classfier_value)

return classfier_value



Expand All @@ -12,4 +69,4 @@ def classify_retrieval():

# classify documents according to type of document
def classify_call():
pass
pass
17 changes: 4 additions & 13 deletions level_4/cognitive_architecture/database/graph_database/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,23 +485,14 @@ def create_document_node_cypher(self, document_summary: dict, user_id: str) -> s

def update_document_node_with_namespace(self, user_id: str, vectordb_namespace: str, document_title: str):
# Generate the Cypher query
cypher_query = '''
MATCH (user:User {userId: $user_id})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {title: $document_title})
SET document.vectordbNamespace = $vectordb_namespace
cypher_query = f'''
MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {{title: '{document_title}'}})
SET document.vectordbNamespace = '{vectordb_namespace}'
RETURN document
'''

# Parameters for the query
parameters = {
'user_id': user_id,
'vectordb_namespace': vectordb_namespace,
'document_title': document_title
}

# Execute the query with the provided parameters
result = self.query(cypher_query, parameters)

return result
return cypher_query



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ class Operation(Base):
user_id = Column(String, ForeignKey('users.id'), index=True) # Link to User
operation_type = Column(String, nullable=True)
operation_status = Column(String, nullable=True)
operation_params = Column(String, nullable=True)
number_of_files = Column(Integer, nullable=True)
test_set_id = Column(String, ForeignKey('test_sets.id'), index=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, onupdate=datetime.utcnow)
Expand Down
53 changes: 51 additions & 2 deletions level_4/cognitive_architecture/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import random
import string
import uuid

from graphviz import Digraph
Expand Down Expand Up @@ -32,7 +35,44 @@ def __init__(self, source, target, label, color):
# dot.render("knowledge_graph.gv", view=True)
#
#

def get_document_names(doc_input):
"""
Get a list of document names.
This function takes doc_input, which can be a folder path, a single document file path, or a document name as a string.
It returns a list of document names based on the doc_input.
Args:
doc_input (str): The doc_input can be a folder path, a single document file path, or a document name as a string.
Returns:
list: A list of document names.
Example usage:
- Folder path: get_document_names(".data")
- Single document file path: get_document_names(".data/example.pdf")
- Document name provided as a string: get_document_names("example.docx")
"""
if isinstance(doc_input, list):
return doc_input
if os.path.isdir(doc_input):
# doc_input is a folder
folder_path = doc_input
document_names = []
for filename in os.listdir(folder_path):
if os.path.isfile(os.path.join(folder_path, filename)):
document_names.append(filename)
return document_names
elif os.path.isfile(doc_input):
# doc_input is a single document file
return [os.path.basename(doc_input)]
elif isinstance(doc_input, str):
# doc_input is a document name provided as a string
return [doc_input]
else:
# doc_input is not valid
return []

def format_dict(d):
# Initialize an empty list to store formatted items
Expand Down Expand Up @@ -76,4 +116,13 @@ def create_edge_variable_mapping(edges):
# Construct a unique identifier for the edge
variable_name = f"edge{edge['source']}to{edge['target']}".lower()
mapping[(edge['source'], edge['target'])] = variable_name
return mapping
return mapping



def generate_letter_uuid(length=8):
"""Generate a random string of uppercase letters with the specified length."""
letters = string.ascii_uppercase # A-Z
return "".join(random.choice(letters) for _ in range(length))


Loading

0 comments on commit 6914c86

Please sign in to comment.