Skip to content

Commit

Permalink
Build the docker and push
Browse files Browse the repository at this point in the history
  • Loading branch information
Vasilije1990 committed Dec 16, 2023
1 parent 7d0ee16 commit 05aaee6
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 96 deletions.
7 changes: 6 additions & 1 deletion level_4/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,15 @@ async def user_query_to_graph(payload: Payload):

@app.post("/document-to-graph-db")
async def document_to_graph_db(payload: Payload):
logging.info("Adding documents to graph db")
try:
decoded_payload = payload.payload
if 'settings' in decoded_payload and decoded_payload['settings'] is not None:
settings_for_loader = decoded_payload['settings']
else:
settings_for_loader = None
async with session_scope(session=AsyncSessionLocal()) as session:
result = await add_documents_to_graph_db(session =session, user_id = decoded_payload['user_id'], loader_settings =decoded_payload['settings'])
result = await add_documents_to_graph_db(session =session, user_id = decoded_payload['user_id'], loader_settings =settings_for_loader)
return result

except Exception as e:
Expand Down
36 changes: 36 additions & 0 deletions level_4/bin/dockerize
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
set -euo pipefail

AWS_REGION=${region:-eu-west-1}
AWS_DEPLOYMENT_ACCOUNT=${account:-463722570299}
AWS_REPOSITORY=${repo:-"${AWS_DEPLOYMENT_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com"}

STAGE=${stage:-"dev"}
SHA_SHORT="$(git rev-parse --short HEAD)"
CUR_DATE="$(date +%Y%m%d%H%M%S)"
VERSION="$STAGE-$CUR_DATE-$SHA_SHORT"
IMAGE_NAME=${image_name:-promethai-${STAGE}-promethai-backend}

REPO_NAME="${AWS_REPOSITORY}/${IMAGE_NAME}"
FULL_IMAGE_NAME="${REPO_NAME}:${VERSION}"
APP_DIR=${app_dir:-"."}

PUBLISH=${publish:-false}

echo "Building docker image ${FULL_IMAGE_NAME} located in dir ${app_dir}"

pushd "${APP_DIR}" &&
docker buildx build --platform linux/amd64 \
--build-arg STAGE=${STAGE} \
-t "${FULL_IMAGE_NAME}" . &&
echo "${VERSION}" >/tmp/.DOCKER_IMAGE_VERSION &&
echo "Successfully built docker image ${FULL_IMAGE_NAME}"

if [ "${PUBLISH}" = true ]; then
echo "Pushing docker image ${FULL_IMAGE_NAME} to ECR repository to AWS account ${AWS_DEPLOYMENT_ACCOUNT}"
if [ "${PUBLISH}" = true ]; then
echo "logging in"
aws ecr get-login-password --region "${AWS_REGION}" | docker login --username AWS --password-stdin "${AWS_REPOSITORY}"
fi
docker push "${FULL_IMAGE_NAME}" &&
echo "Successfully pushed docker image ${FULL_IMAGE_NAME} to ECR repository"
fi
10 changes: 7 additions & 3 deletions level_4/cognitive_architecture/database/vectordb/vectordb.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ async def add_memories(self, observation, loader_settings=None, params=None, nam
# Update Weaviate memories here
if namespace is None:
namespace = self.namespace
retriever = self.init_weaviate(embeddings=embeddings,namespace = namespace, retriever_type="single_document_context")
retriever = self.init_weaviate(embeddings=OpenAIEmbeddings(),namespace = namespace, retriever_type="single_document_context")
if loader_settings:
# Assuming _document_loader returns a list of documents
documents = await _document_loader(observation, loader_settings)
Expand All @@ -167,15 +167,19 @@ async def add_memories(self, observation, loader_settings=None, params=None, nam
Document(metadata=params, page_content=doc.page_content)])
else:
chunk_count = 0
documents = await _document_loader(observation, loader_settings)
from cognitive_architecture.database.vectordb.chunkers.chunkers import chunk_data
documents = [chunk_data(chunk_strategy="VANILLA", source_data=observation, chunk_size=50,
chunk_overlap=20)]
for doc in documents[0]:
chunk_count += 1
params['chunk_order'] = chunk_count
# document_to_load = self._stuct(observation, params, metadata_schema_class)

logging.info("Loading document with defautl loader settings %s", str(doc))

# logging.info("Loading document with defautl loader settings %s", str(document_to_load))
retriever.add_documents([
Document(metadata=params, page_content=doc)])
Document(metadata=params, page_content=doc.page_content)])

async def fetch_memories(self, observation: str, namespace: str = None, search_type: str = 'hybrid', **kwargs):
"""
Expand Down
50 changes: 27 additions & 23 deletions level_4/cognitive_architecture/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import uuid

from graphviz import Digraph
from sqlalchemy import or_
from sqlalchemy.orm import contains_eager


Expand Down Expand Up @@ -194,34 +195,37 @@ async def get_unsumarized_vector_db_namespace(session: AsyncSession, user_id: st
Example Usage:
"""
try:
result = await session.execute(
select(Operation)
.join(Operation.docs) # Explicit join with docs table
.join(Operation.memories) # Explicit join with memories table
.options(
contains_eager(Operation.docs), # Informs ORM of the join for docs
contains_eager(Operation.memories) # Informs ORM of the join for memories
)
.where(
(Operation.user_id == user_id) & # Filter by user_id
(Operation.docs.graph_summary == False) # Filter by user_id
)
.order_by(Operation.created_at.desc()) # Order by creation date
# try:
result = await session.execute(
select(Operation)
.join(Operation.docs) # Explicit join with docs table
.join(Operation.memories) # Explicit join with memories table
.options(
contains_eager(Operation.docs), # Informs ORM of the join for docs
contains_eager(Operation.memories) # Informs ORM of the join for memories
)
.where(
(Operation.user_id == user_id) & # Filter by user_id
or_(
DocsModel.graph_summary == False, # Condition 1: graph_summary is False
DocsModel.graph_summary == None # Condition 3: graph_summary is None
) # Filter by user_id
)
.order_by(Operation.created_at.desc()) # Order by creation date
)

operations = result.unique().scalars().all()
operations = result.unique().scalars().all()

# Extract memory names and document names and IDs
memory_names = [memory.memory_name for op in operations for memory in op.memories]
docs = [(doc.doc_name, doc.id) for op in operations for doc in op.docs]
# Extract memory names and document names and IDs
memory_names = [memory.memory_name for op in operations for memory in op.memories]
docs = [(doc.doc_name, doc.id) for op in operations for doc in op.docs]

return memory_names, docs
return memory_names, docs

except Exception as e:
# Handle the exception as needed
print(f"An error occurred: {e}")
return None
# except Exception as e:
# # Handle the exception as needed
# print(f"An error occurred: {e}")
# return None
async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
"""
Asynchronously retrieves memory names associated with a specific document ID.
Expand Down
141 changes: 72 additions & 69 deletions level_4/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
DocsModel(
id=str(uuid.uuid4()),
operation_id=job_id,
graph_summary= False,
doc_name=doc
)
)
Expand Down Expand Up @@ -139,7 +140,7 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
observation=content, params=params, loader_settings=loader_settings)

await update_entity(session, Operation, job_id, "SUCCESS")
return result, namespace_id
# return result, namespace_id


async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_input: str):
Expand Down Expand Up @@ -175,74 +176,76 @@ async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_inpu

async def add_documents_to_graph_db(session: AsyncSession, user_id: str= None, loader_settings:dict=None, stupid_local_testing_flag=False): #clean this up Vasilije, don't be sloppy
""""""
try:
# await update_document_vectordb_namespace(postgres_session, user_id)
memory_names, docs = await get_unsumarized_vector_db_namespace(session, user_id)
logging.info("Memory names are", memory_names)
logging.info("Docs are", docs)
for doc, memory_name in zip(docs, memory_names):
doc_name, doc_id = doc
if stupid_local_testing_flag:
classification = [{
"DocumentCategory": "Literature",
"Title": "Bartleby, the Scrivener",
"Summary": "The document is a narrative about an enigmatic copyist named Bartleby who works in a law office. Despite initially being a diligent employee, Bartleby begins to refuse tasks with the phrase 'I would prefer not to' and eventually stops working altogether. His passive resistance and mysterious behavior confound the narrator, who is also his employer. Bartleby's refusal to leave the office leads to various complications, and he is eventually taken to the Tombs as a vagrant. The story ends with Bartleby's death and the revelation that he may have previously worked in the Dead Letter Office, which adds a layer of poignancy to his character.",
"d_id": "2a5c571f-bad6-4649-a4ac-36e4bb4f34cd"
},
{
"DocumentCategory": "Science",
"Title": "The Mysterious World of Quantum Mechanics",
"Summary": "This article delves into the fundamentals of quantum mechanics, exploring its paradoxical nature where particles can exist in multiple states simultaneously. It discusses key experiments and theories that have shaped our understanding of the quantum world, such as the double-slit experiment, Schrödinger's cat, and quantum entanglement. The piece also touches upon the implications of quantum mechanics for future technology, including quantum computing and cryptography.",
"d_id": "f4e2c3b1-4567-8910-11a2-b3c4d5e6f7g8"
},
{
"DocumentCategory": "History",
"Title": "The Rise and Fall of the Roman Empire",
"Summary": "This essay provides an overview of the Roman Empire's history, from its foundation to its eventual decline. It examines the political, social, and economic factors that contributed to the empire's expansion and success, as well as those that led to its downfall. Key events and figures such as Julius Caesar, the Punic Wars, and the transition from republic to empire are discussed. The essay concludes with an analysis of the empire's lasting impact on Western civilization.",
"d_id": "8h7g6f5e-4d3c-2b1a-09e8-d7c6b5a4f3e2"
},
{
"DocumentCategory": "Technology",
"Title": "The Future of Artificial Intelligence",
"Summary": "This report explores the current state and future prospects of artificial intelligence (AI). It covers the evolution of AI from simple algorithms to advanced neural networks capable of deep learning. The document discusses various applications of AI in industries such as healthcare, finance, and transportation, as well as ethical considerations and potential risks associated with AI development. Predictions for future advancements and their societal impact are also presented.",
"d_id": "3c2b1a09-d8e7-f6g5-h4i3-j1k2l3m4n5o6"
},
{
"DocumentCategory": "Economics",
"Title": "Global Economic Trends and Predictions",
"Summary": "This analysis examines major trends in the global economy, including the rise of emerging markets, the impact of technology on job markets, and shifts in international trade. It delves into the economic effects of recent global events, such as pandemics and geopolitical conflicts, and discusses how these might shape future economic policies and practices. The document provides predictions for economic growth, inflation rates, and currency fluctuations in the coming years.",
"d_id": "7k6j5h4g-3f2e-1d0c-b8a9-m7n6o5p4q3r2"
}
]
for classification in classification:

neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
password=config.graph_database_password)
rs = neo4j_graph_db.create_document_node_cypher(classification, user_id)
neo4j_graph_db.query(rs, classification)

# select doc from the store
neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name, document_id=doc_id)
else:
try:
classification_content = fetch_document_vectordb_namespace(session, user_id, memory_name)
except:
classification_content = "None"

classification = await classify_documents(doc_name, document_id =doc_id, content=classification_content)

logging.info("Classification is", str(classification))
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
password=config.graph_database_password)
rs = neo4j_graph_db.create_document_node_cypher(classification, user_id)
neo4j_graph_db.query(rs, classification)

# select doc from the store
neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name,
document_id=doc_id)
await update_entity(session, DocsModel, doc_id, True)
except:
pass


# try:
# await update_document_vectordb_namespace(postgres_session, user_id)
memory_names, docs = await get_unsumarized_vector_db_namespace(session, user_id)
logging.info("Memory names are", memory_names)
logging.info("Docs are", docs)
for doc, memory_name in zip(docs, memory_names):
doc_name, doc_id = doc
# if stupid_local_testing_flag:
# classification = [{
# "DocumentCategory": "Literature",
# "Title": "Bartleby, the Scrivener",
# "Summary": "The document is a narrative about an enigmatic copyist named Bartleby who works in a law office. Despite initially being a diligent employee, Bartleby begins to refuse tasks with the phrase 'I would prefer not to' and eventually stops working altogether. His passive resistance and mysterious behavior confound the narrator, who is also his employer. Bartleby's refusal to leave the office leads to various complications, and he is eventually taken to the Tombs as a vagrant. The story ends with Bartleby's death and the revelation that he may have previously worked in the Dead Letter Office, which adds a layer of poignancy to his character.",
# "d_id": "2a5c571f-bad6-4649-a4ac-36e4bb4f34cd"
# },
# {
# "DocumentCategory": "Science",
# "Title": "The Mysterious World of Quantum Mechanics",
# "Summary": "This article delves into the fundamentals of quantum mechanics, exploring its paradoxical nature where particles can exist in multiple states simultaneously. It discusses key experiments and theories that have shaped our understanding of the quantum world, such as the double-slit experiment, Schrödinger's cat, and quantum entanglement. The piece also touches upon the implications of quantum mechanics for future technology, including quantum computing and cryptography.",
# "d_id": "f4e2c3b1-4567-8910-11a2-b3c4d5e6f7g8"
# },
# {
# "DocumentCategory": "History",
# "Title": "The Rise and Fall of the Roman Empire",
# "Summary": "This essay provides an overview of the Roman Empire's history, from its foundation to its eventual decline. It examines the political, social, and economic factors that contributed to the empire's expansion and success, as well as those that led to its downfall. Key events and figures such as Julius Caesar, the Punic Wars, and the transition from republic to empire are discussed. The essay concludes with an analysis of the empire's lasting impact on Western civilization.",
# "d_id": "8h7g6f5e-4d3c-2b1a-09e8-d7c6b5a4f3e2"
# },
# {
# "DocumentCategory": "Technology",
# "Title": "The Future of Artificial Intelligence",
# "Summary": "This report explores the current state and future prospects of artificial intelligence (AI). It covers the evolution of AI from simple algorithms to advanced neural networks capable of deep learning. The document discusses various applications of AI in industries such as healthcare, finance, and transportation, as well as ethical considerations and potential risks associated with AI development. Predictions for future advancements and their societal impact are also presented.",
# "d_id": "3c2b1a09-d8e7-f6g5-h4i3-j1k2l3m4n5o6"
# },
# {
# "DocumentCategory": "Economics",
# "Title": "Global Economic Trends and Predictions",
# "Summary": "This analysis examines major trends in the global economy, including the rise of emerging markets, the impact of technology on job markets, and shifts in international trade. It delves into the economic effects of recent global events, such as pandemics and geopolitical conflicts, and discusses how these might shape future economic policies and practices. The document provides predictions for economic growth, inflation rates, and currency fluctuations in the coming years.",
# "d_id": "7k6j5h4g-3f2e-1d0c-b8a9-m7n6o5p4q3r2"
# }
# ]
# for classification in classification:
#
# neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
# password=config.graph_database_password)
# rs = neo4j_graph_db.create_document_node_cypher(classification, user_id)
# neo4j_graph_db.query(rs, classification)
#
# # select doc from the store
# neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name, document_id=doc_id)
# else:
try:
classification_content = await fetch_document_vectordb_namespace(session, user_id, memory_name)
except:
classification_content = "None"
#
# classification = await classify_documents(doc_name, document_id =doc_id, content=classification_content)
#
# logging.info("Classification is", str(classification))
# neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
# password=config.graph_database_password)
# rs = neo4j_graph_db.create_document_node_cypher(classification, user_id)
# neo4j_graph_db.query(rs, classification)
#
# # select doc from the store
# neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name,
# document_id=doc_id)
await update_entity(session, DocsModel, doc_id, True)
# except:
# pass

class ResponseString(BaseModel):
response: str = Field(..., default_factory=list)
Expand Down

0 comments on commit 05aaee6

Please sign in to comment.