From 0b6f9b0dca19abbd2a2407c2a4eeea6867404aed Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 8 Nov 2023 20:18:12 +0100 Subject: [PATCH] Added dynamic graph creation --- level_4/CONTRIBUTING.md | 61 +++ level_4/Dockerfile | 8 +- level_4/LICENSE | 201 +++++++++ level_4/__init__.py | 0 level_4/cognitive_architecture/__init__.py | 0 .../database/__init__.py | 0 .../database/database.py | 80 ++++ .../database/database_crud.py | 26 ++ level_4/cognitive_architecture/entrypoint.sh | 20 + .../cognitive_architecture/fetch_secret.py | 75 +++ .../cognitive_architecture/models/__init__.py | 0 level_4/cognitive_architecture/models/docs.py | 19 + .../cognitive_architecture/models/memory.py | 26 ++ .../models/metadatas.py | 26 ++ .../models/operation.py | 31 ++ .../cognitive_architecture/models/sessions.py | 25 + .../models/testoutput.py | 51 +++ .../cognitive_architecture/models/testset.py | 27 ++ level_4/cognitive_architecture/models/user.py | 29 ++ .../cognitive_architecture/openai_tools.py | 140 ++++++ .../scripts/__init__.py | 0 .../scripts/create_database.py | 67 +++ .../vectordb/__init__.py | 0 .../vectordb/basevectordb.py | 297 ++++++++++++ .../vectordb/chunkers/__init__.py | 0 .../vectordb/chunkers/chunkers.py | 88 ++++ .../vectordb/loaders/__init__.py | 0 .../vectordb/loaders/loaders.py | 81 ++++ .../vectordb/vectordb.py | 341 ++++++++++++++ .../vectorstore_manager.py | 426 ++++++++++++++++++ level_4/main.py | 20 +- 31 files changed, 2155 insertions(+), 10 deletions(-) create mode 100644 level_4/CONTRIBUTING.md create mode 100644 level_4/LICENSE create mode 100644 level_4/__init__.py create mode 100644 level_4/cognitive_architecture/__init__.py create mode 100644 level_4/cognitive_architecture/database/__init__.py create mode 100644 level_4/cognitive_architecture/database/database.py create mode 100644 level_4/cognitive_architecture/database/database_crud.py create mode 100755 level_4/cognitive_architecture/entrypoint.sh create mode 100644 level_4/cognitive_architecture/fetch_secret.py create mode 100644 level_4/cognitive_architecture/models/__init__.py create mode 100644 level_4/cognitive_architecture/models/docs.py create mode 100644 level_4/cognitive_architecture/models/memory.py create mode 100644 level_4/cognitive_architecture/models/metadatas.py create mode 100644 level_4/cognitive_architecture/models/operation.py create mode 100644 level_4/cognitive_architecture/models/sessions.py create mode 100644 level_4/cognitive_architecture/models/testoutput.py create mode 100644 level_4/cognitive_architecture/models/testset.py create mode 100644 level_4/cognitive_architecture/models/user.py create mode 100644 level_4/cognitive_architecture/openai_tools.py create mode 100644 level_4/cognitive_architecture/scripts/__init__.py create mode 100644 level_4/cognitive_architecture/scripts/create_database.py create mode 100644 level_4/cognitive_architecture/vectordb/__init__.py create mode 100644 level_4/cognitive_architecture/vectordb/basevectordb.py create mode 100644 level_4/cognitive_architecture/vectordb/chunkers/__init__.py create mode 100644 level_4/cognitive_architecture/vectordb/chunkers/chunkers.py create mode 100644 level_4/cognitive_architecture/vectordb/loaders/__init__.py create mode 100644 level_4/cognitive_architecture/vectordb/loaders/loaders.py create mode 100644 level_4/cognitive_architecture/vectordb/vectordb.py create mode 100644 level_4/cognitive_architecture/vectorstore_manager.py diff --git a/level_4/CONTRIBUTING.md b/level_4/CONTRIBUTING.md new file mode 100644 index 000000000..96f41f48e --- /dev/null +++ b/level_4/CONTRIBUTING.md @@ -0,0 +1,61 @@ +# ๐Ÿš€ How to Contribute to PromethAI Memory + +Thank you for investing time in contributing to our project! Here's a guide to get you started. + +## 1. ๐Ÿš€ Getting Started + +### ๐Ÿด Fork the Repository + +To start your journey, you'll need your very own copy of PromethAI Memory. Think of it as your own innovation lab. ๐Ÿงช + +1. Navigate to the PromethAI Memory repository on GitHub. +2. In the upper-right corner, click the 'Fork' button. + +### ๐Ÿš€ Clone the Repository + +Next, let's bring your newly forked repository to your local machine. + +``` +shell +git clone https://github.com/topoteretes/PromethAI-Memory.git +``` + + +## 2. ๐Ÿ› ๏ธ Making Changes +๐ŸŒŸ Create a Branch +Get ready to channel your creativity. Begin by creating a new branch for your incredible features. ๐Ÿงžโ€โ™‚๏ธ + + +Copy code +git checkout -b feature/your-feature-name +โœ๏ธ Make Your Changes +Now's your chance to shine! Dive in and make your contributions. ๐ŸŒ  + + +## 3. ๐Ÿš€ Submitting Changes +๐Ÿš€ Create a Pull Request +You're on the verge of completion! It's time to showcase your hard work. ๐ŸŒ + +Go to PromethAI Memory on GitHub. +Hit the "New Pull Request" button. +Select the base branch (main) and the compare branch (the one with your features). +Craft a compelling title and provide a detailed description of your contributions. ๐ŸŽฉ +5. ๐Ÿ” Review and Approval +The project maintainers will review your work, possibly suggest improvements, or request further details. Once you receive approval, your contributions will become part of PromethAI Memory! + +6. ๐Ÿ“œ Code of Conduct +Ensure you adhere to the project's Code of Conduct throughout your participation. + +7. ๐Ÿ“ซ Contact +If you need assistance or simply wish to connect, we're here for you. Contact us by filing an issue on the GitHub repository or by messaging us on our Discord server. + +Thanks for helping to evolve PromethAI Memory! + +arduino +Copy code + +Be sure to replace `https://github.com/your-username/PromethAI-Memory.git` with the actual URL of your forked repository and `https://github.com/cpacker/promethai-memory` with the actual URL of the main repository, and also update the Discord invite link to the correct one for PromethAI Memory. + + + + diff --git a/level_4/Dockerfile b/level_4/Dockerfile index feb677fe3..14df5ee91 100644 --- a/level_4/Dockerfile +++ b/level_4/Dockerfile @@ -42,10 +42,10 @@ RUN apt-get update -q && \ #RUN playwright install-deps WORKDIR /app -COPY . /app -COPY scripts/ /app -COPY entrypoint.sh /app/entrypoint.sh -COPY scripts/create_database.py /app/create_database.py +COPY cognitive_architecture/ /app +COPY cognitive_architecture/scripts/ /app +COPY cognitive_architecture/entrypoint.sh /app/entrypoint.sh +COPY cognitive_architecture/scripts/create_database.py /app/create_database.py RUN chmod +x /app/entrypoint.sh ENTRYPOINT ["/app/entrypoint.sh"] \ No newline at end of file diff --git a/level_4/LICENSE b/level_4/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/level_4/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/level_4/__init__.py b/level_4/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/__init__.py b/level_4/cognitive_architecture/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/database/__init__.py b/level_4/cognitive_architecture/database/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/database/database.py b/level_4/cognitive_architecture/database/database.py new file mode 100644 index 000000000..b100f5c8c --- /dev/null +++ b/level_4/cognitive_architecture/database/database.py @@ -0,0 +1,80 @@ +import os +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession +from sqlalchemy.orm import declarative_base, sessionmaker +from contextlib import asynccontextmanager +from sqlalchemy.exc import OperationalError +import asyncio +import sys +from dotenv import load_dotenv + +load_dotenv() + + +# this is needed to import classes from other modules +script_dir = os.path.dirname(os.path.abspath(__file__)) +# Get the parent directory of your script and add it to sys.path +parent_dir = os.path.dirname(script_dir) +sys.path.append(parent_dir) + + +# in seconds +MAX_RETRIES = 3 +RETRY_DELAY = 5 + +username = os.getenv('POSTGRES_USER') +password = os.getenv('POSTGRES_PASSWORD') +database_name = os.getenv('POSTGRES_DB') +import os + +environment = os.environ.get("ENVIRONMENT") + +if environment == "local": + host= os.getenv('POSTGRES_HOST') + +elif environment == "docker": + host= os.getenv('POSTGRES_HOST_DOCKER') +else: + host= os.getenv('POSTGRES_HOST_DOCKER') + + + +# Use the asyncpg driver for async operation +SQLALCHEMY_DATABASE_URL = f"postgresql+asyncpg://{username}:{password}@{host}:5432/{database_name}" + + +engine = create_async_engine( + SQLALCHEMY_DATABASE_URL, + pool_recycle=3600, + echo=True # Enable logging for tutorial purposes +) +# Use AsyncSession for the session +AsyncSessionLocal = sessionmaker( + bind=engine, + class_=AsyncSession, + expire_on_commit=False, +) + +Base = declarative_base() + +# Use asynccontextmanager to define an async context manager +@asynccontextmanager +async def get_db(): + db = AsyncSessionLocal() + try: + yield db + finally: + await db.close() + +# Use async/await syntax for the async function +async def safe_db_operation(db_op, *args, **kwargs): + for attempt in range(MAX_RETRIES): + async with get_db() as db: + try: + # Ensure your db_op is also async + return await db_op(db, *args, **kwargs) + except OperationalError as e: + await db.rollback() + if "server closed the connection unexpectedly" in str(e) and attempt < MAX_RETRIES - 1: + await asyncio.sleep(RETRY_DELAY) + else: + raise \ No newline at end of file diff --git a/level_4/cognitive_architecture/database/database_crud.py b/level_4/cognitive_architecture/database/database_crud.py new file mode 100644 index 000000000..65924bfae --- /dev/null +++ b/level_4/cognitive_architecture/database/database_crud.py @@ -0,0 +1,26 @@ + +from contextlib import asynccontextmanager +import logging + +logger = logging.getLogger(__name__) + +@asynccontextmanager +async def session_scope(session): + """Provide a transactional scope around a series of operations.""" + + # session = AsyncSessionLocal() + try: + yield session + await session.commit() + except Exception as e: + await session.rollback() + logger.error(f"Session rollback due to: {str(e)}") + raise + finally: + await session.close() + + +async def add_entity(session, entity): + async with session_scope(session) as s: # Use your async session_scope + s.add(entity) # No need to commit; session_scope takes care of it + return "Successfully added entity" \ No newline at end of file diff --git a/level_4/cognitive_architecture/entrypoint.sh b/level_4/cognitive_architecture/entrypoint.sh new file mode 100755 index 000000000..4cf551d01 --- /dev/null +++ b/level_4/cognitive_architecture/entrypoint.sh @@ -0,0 +1,20 @@ +#!/bin/bash +export ENVIRONMENT +# Run Python scripts with error handling +echo "Running fetch_secret.py" +python fetch_secret.py +if [ $? -ne 0 ]; then + echo "Error: fetch_secret.py failed" + exit 1 +fi + +echo "Running create_database.py" +python create_database.py +if [ $? -ne 0 ]; then + echo "Error: create_database.py failed" + exit 1 +fi + +# Start Gunicorn +echo "Starting Gunicorn" +gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app diff --git a/level_4/cognitive_architecture/fetch_secret.py b/level_4/cognitive_architecture/fetch_secret.py new file mode 100644 index 000000000..6c422d1af --- /dev/null +++ b/level_4/cognitive_architecture/fetch_secret.py @@ -0,0 +1,75 @@ +import os +from dotenv import load_dotenv +from api import start_api_server + +# API_ENABLED = os.environ.get("API_ENABLED", "False").lower() == "true" +import boto3 + +environment = os.getenv("AWS_ENV", "dev") + + + +def fetch_secret(secret_name, region_name, env_file_path): + session = boto3.session.Session() + client = session.client(service_name="secretsmanager", region_name=region_name) + + try: + response = client.get_secret_value(SecretId=secret_name) + except Exception as e: + print(f"Error retrieving secret: {e}") + return None + + if "SecretString" in response: + secret = response["SecretString"] + else: + secret = response["SecretBinary"] + + with open(env_file_path, "w") as env_file: + env_file.write(secret) + + if os.path.exists(env_file_path): + print(f"The .env file is located at: {os.path.abspath(env_file_path)}") + load_dotenv() + PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "") + + print("LEN OF PINECONE_API_KEY", len(PINECONE_API_KEY)) + else: + print("The .env file was not found.") + return "Success in loading env files" + + +env_file = ".env" +if os.path.exists(env_file): + # Load default environment variables (.env) + load_dotenv() + print("Talk to the AI!") + + +else: + secrets = fetch_secret( + f"promethai-{environment}-backend-secretso-promethaijs-dotenv", + "eu-west-1", + ".env", + ) + if secrets: + print(secrets) + load_dotenv() + + +# Check if "dev" is present in the task ARN +if "dev" in environment: + # Fetch the secret + secrets = fetch_secret( + f"promethai-dev-backend-secretso-promethaijs-dotenv", + "eu-west-1", + ".env", + ) + load_dotenv() +elif "prd" in environment: + # Fetch the secret + secrets = fetch_secret( + f"promethai-prd-backend-secretso-promethaijs-dotenv", + "eu-west-1", + ".env", + ) + load_dotenv() diff --git a/level_4/cognitive_architecture/models/__init__.py b/level_4/cognitive_architecture/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/models/docs.py b/level_4/cognitive_architecture/models/docs.py new file mode 100644 index 000000000..38166956b --- /dev/null +++ b/level_4/cognitive_architecture/models/docs.py @@ -0,0 +1,19 @@ + +from datetime import datetime +from sqlalchemy import Column, Integer, String, DateTime, ForeignKey +from sqlalchemy.orm import relationship +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from database.database import Base +class DocsModel(Base): + __tablename__ = 'docs' + + id = Column(String, primary_key=True) + operation_id = Column(String, ForeignKey('operations.id'), index=True) + doc_name = Column(String, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, onupdate=datetime.utcnow) + + + operations = relationship("Operation", back_populates="docs") \ No newline at end of file diff --git a/level_4/cognitive_architecture/models/memory.py b/level_4/cognitive_architecture/models/memory.py new file mode 100644 index 000000000..e19ecddc4 --- /dev/null +++ b/level_4/cognitive_architecture/models/memory.py @@ -0,0 +1,26 @@ +# memory.py +from datetime import datetime +from sqlalchemy import Column, Integer, String, DateTime, ForeignKey +from sqlalchemy.orm import relationship +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from database.database import Base + +class MemoryModel(Base): + __tablename__ = 'memories' + + id = Column(String, primary_key=True) + user_id = Column(String, ForeignKey('users.id'), index=True) + operation_id = Column(String, ForeignKey('operations.id'), index=True) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, onupdate=datetime.utcnow) + methods_list = Column(String , nullable=True) + attributes_list = Column(String, nullable=True) + + user = relationship("User", back_populates="memories") + operation = relationship("Operation", back_populates="memories") + metadatas = relationship("MetaDatas", back_populates="memory", cascade="all, delete-orphan") + + def __repr__(self): + return f"" diff --git a/level_4/cognitive_architecture/models/metadatas.py b/level_4/cognitive_architecture/models/metadatas.py new file mode 100644 index 000000000..7d2716cba --- /dev/null +++ b/level_4/cognitive_architecture/models/metadatas.py @@ -0,0 +1,26 @@ +# metadata.py +from datetime import datetime +from sqlalchemy import Column, Integer, String, DateTime, ForeignKey +from sqlalchemy.orm import relationship +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from database.database import Base + + +class MetaDatas(Base): + __tablename__ = 'metadatas' + + id = Column(String, primary_key=True) + user_id = Column(String, ForeignKey('users.id'), index=True) + version = Column(String, nullable=False) + contract_metadata = Column(String, nullable=False) + memory_id = Column(String, ForeignKey('memories.id'), index=True) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, onupdate=datetime.utcnow) + + user = relationship("User", back_populates="metadatas") + memory = relationship("MemoryModel", back_populates="metadatas") + + def __repr__(self): + return f"" diff --git a/level_4/cognitive_architecture/models/operation.py b/level_4/cognitive_architecture/models/operation.py new file mode 100644 index 000000000..1d06657d9 --- /dev/null +++ b/level_4/cognitive_architecture/models/operation.py @@ -0,0 +1,31 @@ +# operation.py +from datetime import datetime +from sqlalchemy import Column, Integer, String, DateTime, ForeignKey +from sqlalchemy.orm import relationship +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from database.database import Base + + +class Operation(Base): + __tablename__ = 'operations' + + id = Column(String, primary_key=True) + user_id = Column(String, ForeignKey('users.id'), index=True) # Link to User + operation_type = Column(String, nullable=True) + operation_status = Column(String, nullable=True) + operation_params = Column(String, nullable=True) + number_of_files = Column(Integer, nullable=True) + test_set_id = Column(String, ForeignKey('test_sets.id'), index=True) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, onupdate=datetime.utcnow) + memories = relationship("MemoryModel", back_populates="operation") + + # Relationships + user = relationship("User", back_populates="operations") + test_set = relationship("TestSet", back_populates="operations") + docs = relationship("DocsModel", back_populates="operations") + + def __repr__(self): + return f"" diff --git a/level_4/cognitive_architecture/models/sessions.py b/level_4/cognitive_architecture/models/sessions.py new file mode 100644 index 000000000..41110d52d --- /dev/null +++ b/level_4/cognitive_architecture/models/sessions.py @@ -0,0 +1,25 @@ +# session.py +from datetime import datetime +from sqlalchemy import Column, Integer, String, DateTime, ForeignKey +from sqlalchemy.orm import relationship +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from database.database import Base + + +class Session(Base): + __tablename__ = 'sessions' + + id = Column(String, primary_key=True) + user_id = Column(String, ForeignKey('users.id'), index=True) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, onupdate=datetime.utcnow) + + # Corrected relationship name + user = relationship("User", back_populates="sessions") + + # operations = relationship("Operation", back_populates="session", cascade="all, delete-orphan") + + def __repr__(self): + return f"" diff --git a/level_4/cognitive_architecture/models/testoutput.py b/level_4/cognitive_architecture/models/testoutput.py new file mode 100644 index 000000000..4731a6e46 --- /dev/null +++ b/level_4/cognitive_architecture/models/testoutput.py @@ -0,0 +1,51 @@ +# test_output.py +from datetime import datetime +from sqlalchemy import Column, Integer, String, DateTime, ForeignKey +from sqlalchemy.orm import relationship +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from database.database import Base + + +from datetime import datetime +from sqlalchemy import Column, String, DateTime, ForeignKey, JSON +from sqlalchemy.orm import relationship +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from database.database import Base + + +class TestOutput(Base): + """ + Represents the output result of a specific test set. + """ + __tablename__ = 'test_outputs' + + set_id = Column(String, primary_key=True) + id = Column(String, nullable=True) + user_id = Column(String, ForeignKey('users.id'), index=True) # Added user_id field + test_set_id = Column(String, ForeignKey('test_sets.id'), index=True) + operation_id = Column(String, ForeignKey('operations.id'), index=True) + test_params= Column(String, nullable=True) + test_result = Column(String, nullable=True) + test_score = Column(String, nullable=True) + test_metric_name = Column(String, nullable=True) + test_query = Column(String, nullable=True) + test_output = Column(String, nullable=True) + test_expected_output = Column(String, nullable=True) + test_context = Column(String, nullable=True) + number_of_memories = Column(String, nullable=True) + + test_results = Column(JSON, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, onupdate=datetime.utcnow) + + # Relationships + user = relationship("User", back_populates="test_outputs") # Added relationship with User + test_set = relationship("TestSet", back_populates="test_outputs") + operation = relationship("Operation", backref="test_outputs") + + def __repr__(self): + return f"" diff --git a/level_4/cognitive_architecture/models/testset.py b/level_4/cognitive_architecture/models/testset.py new file mode 100644 index 000000000..b9f17a192 --- /dev/null +++ b/level_4/cognitive_architecture/models/testset.py @@ -0,0 +1,27 @@ +# test_set.py +from datetime import datetime +from sqlalchemy import Column, Integer, String, DateTime, ForeignKey +from sqlalchemy.orm import relationship +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from database.database import Base + + +class TestSet(Base): + __tablename__ = 'test_sets' + + id = Column(String, primary_key=True) + user_id = Column(String, ForeignKey('users.id'), index=True) # Ensure uniqueness + + content = Column(String, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, onupdate=datetime.utcnow) + + user = relationship("User", back_populates="test_sets") + operations = relationship("Operation", back_populates="test_set") + + test_outputs = relationship("TestOutput", back_populates="test_set", cascade="all, delete-orphan") + + def __repr__(self): + return f"" diff --git a/level_4/cognitive_architecture/models/user.py b/level_4/cognitive_architecture/models/user.py new file mode 100644 index 000000000..d9514c3cf --- /dev/null +++ b/level_4/cognitive_architecture/models/user.py @@ -0,0 +1,29 @@ +# user.py +from datetime import datetime +from sqlalchemy import Column, Integer, String, DateTime, ForeignKey +from sqlalchemy.orm import relationship +from sqlalchemy.ext.declarative import declarative_base +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from database.database import Base + + +class User(Base): + __tablename__ = 'users' + + id = Column(String, primary_key=True, index=True) + session_id = Column(String, nullable=True, unique=True) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, onupdate=datetime.utcnow) + + # Relationships + memories = relationship("MemoryModel", back_populates="user", cascade="all, delete-orphan") + operations = relationship("Operation", back_populates="user", cascade="all, delete-orphan") + sessions = relationship("Session", back_populates="user", cascade="all, delete-orphan") + test_sets = relationship("TestSet", back_populates="user", cascade="all, delete-orphan") + test_outputs = relationship("TestOutput", back_populates="user", cascade="all, delete-orphan") + metadatas = relationship("MetaDatas", back_populates="user") + + def __repr__(self): + return f"" diff --git a/level_4/cognitive_architecture/openai_tools.py b/level_4/cognitive_architecture/openai_tools.py new file mode 100644 index 000000000..3f180edfc --- /dev/null +++ b/level_4/cognitive_architecture/openai_tools.py @@ -0,0 +1,140 @@ +import asyncio +import random +import os +import time + + + +HOST = os.getenv("OPENAI_API_BASE") +HOST_TYPE = os.getenv("BACKEND_TYPE") # default None == ChatCompletion + +import openai + +if HOST is not None: + openai.api_base = HOST + + +def retry_with_exponential_backoff( + func, + initial_delay: float = 1, + exponential_base: float = 2, + jitter: bool = True, + max_retries: int = 20, + errors: tuple = (openai.error.RateLimitError,), +): + """Retry a function with exponential backoff.""" + + def wrapper(*args, **kwargs): + # Initialize variables + num_retries = 0 + delay = initial_delay + + # Loop until a successful response or max_retries is hit or an exception is raised + while True: + try: + return func(*args, **kwargs) + + # Retry on specified errors + except errors as e: + # Increment retries + num_retries += 1 + + # Check if max retries has been reached + if num_retries > max_retries: + raise Exception(f"Maximum number of retries ({max_retries}) exceeded.") + + # Increment the delay + delay *= exponential_base * (1 + jitter * random.random()) + + # Sleep for the delay + time.sleep(delay) + + # Raise exceptions for any errors not specified + except Exception as e: + raise e + + return wrapper + + +@retry_with_exponential_backoff +def completions_with_backoff(**kwargs): + # Local model + return openai.ChatCompletion.create(**kwargs) + + +def aretry_with_exponential_backoff( + func, + initial_delay: float = 1, + exponential_base: float = 2, + jitter: bool = True, + max_retries: int = 20, + errors: tuple = (openai.error.RateLimitError,), +): + """Retry a function with exponential backoff.""" + + async def wrapper(*args, **kwargs): + # Initialize variables + num_retries = 0 + delay = initial_delay + + # Loop until a successful response or max_retries is hit or an exception is raised + while True: + try: + return await func(*args, **kwargs) + + # Retry on specified errors + except errors as e: + print(f"acreate (backoff): caught error: {e}") + # Increment retries + num_retries += 1 + + # Check if max retries has been reached + if num_retries > max_retries: + raise Exception(f"Maximum number of retries ({max_retries}) exceeded.") + + # Increment the delay + delay *= exponential_base * (1 + jitter * random.random()) + + # Sleep for the delay + await asyncio.sleep(delay) + + # Raise exceptions for any errors not specified + except Exception as e: + raise e + + return wrapper + + +@aretry_with_exponential_backoff +async def acompletions_with_backoff(**kwargs): + return await openai.ChatCompletion.acreate(**kwargs) + + +@aretry_with_exponential_backoff +async def acreate_embedding_with_backoff(**kwargs): + """Wrapper around Embedding.acreate w/ backoff""" + return await openai.Embedding.acreate(**kwargs) + + +async def async_get_embedding_with_backoff(text, model="text-embedding-ada-002"): + """To get text embeddings, import/call this function + It specifies defaults + handles rate-limiting + is async""" + text = text.replace("\n", " ") + response = await acreate_embedding_with_backoff(input=[text], model=model) + embedding = response["data"][0]["embedding"] + return embedding + + +@retry_with_exponential_backoff +def create_embedding_with_backoff(**kwargs): + return openai.Embedding.create(**kwargs) + + +def get_embedding_with_backoff(text, model="text-embedding-ada-002"): + text = text.replace("\n", " ") + response = create_embedding_with_backoff(input=[text], model=model) + embedding = response["data"][0]["embedding"] + return embedding + + + diff --git a/level_4/cognitive_architecture/scripts/__init__.py b/level_4/cognitive_architecture/scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/scripts/create_database.py b/level_4/cognitive_architecture/scripts/create_database.py new file mode 100644 index 000000000..f6715741b --- /dev/null +++ b/level_4/cognitive_architecture/scripts/create_database.py @@ -0,0 +1,67 @@ +import sys +import os + +# this is needed to import classes from other modules +script_dir = os.path.dirname(os.path.abspath(__file__)) +# Get the parent directory of your script and add it to sys.path +parent_dir = os.path.dirname(script_dir) +sys.path.append(parent_dir) + +from database.database import Base, engine +import models.memory +import models.metadatas +import models.operation +import models.sessions +import models.testoutput +import models.testset +import models.user +import models.docs +from sqlalchemy import create_engine, text +import psycopg2 +from dotenv import load_dotenv +load_dotenv() + + +def create_admin_engine(username, password, host, database_name): + admin_url = f"postgresql://{username}:{password}@{host}:5432/{database_name}" + return create_engine(admin_url) + + +def database_exists(username, password, host, db_name): + engine = create_admin_engine(username, password, host, db_name) + connection = engine.connect() + query = text(f"SELECT 1 FROM pg_database WHERE datname='{db_name}'") + result = connection.execute(query).fetchone() + connection.close() + engine.dispose() + return result is not None + + +def create_database(username, password, host, db_name): + engine = create_admin_engine(username, password, host, db_name) + connection = engine.raw_connection() + connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) + cursor = connection.cursor() + cursor.execute(f"CREATE DATABASE {db_name}") + cursor.close() + connection.close() + engine.dispose() + + +def create_tables(engine): + Base.metadata.create_all(bind=engine) + +if __name__ == "__main__": + username = os.getenv('POSTGRES_USER') + password = os.getenv('POSTGRES_PASSWORD') + database_name = os.getenv('POSTGRES_DB') + host = os.getenv('POSTGRES_HOST') + + engine = create_admin_engine(username, password, host, database_name) + + if not database_exists(username, password, host, database_name): + print(f"Database {database_name} does not exist. Creating...") + create_database(username, password, host, database_name) + print(f"Database {database_name} created successfully.") + + create_tables(engine) \ No newline at end of file diff --git a/level_4/cognitive_architecture/vectordb/__init__.py b/level_4/cognitive_architecture/vectordb/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/vectordb/basevectordb.py b/level_4/cognitive_architecture/vectordb/basevectordb.py new file mode 100644 index 000000000..81f4f7618 --- /dev/null +++ b/level_4/cognitive_architecture/vectordb/basevectordb.py @@ -0,0 +1,297 @@ + +import logging +from io import BytesIO +import os, sys +# Add the parent directory to sys.path +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB +import sqlalchemy as sa +logging.basicConfig(level=logging.INFO) +import marvin +import requests +from dotenv import load_dotenv +from langchain.document_loaders import PyPDFLoader +from langchain.retrievers import WeaviateHybridSearchRetriever +from weaviate.gql.get import HybridFusion +from models.sessions import Session +from models.testset import TestSet +from models.testoutput import TestOutput +from models.metadatas import MetaDatas +from models.operation import Operation +from sqlalchemy.orm import sessionmaker +from database.database import engine +load_dotenv() +from typing import Optional +import time +import tracemalloc + +tracemalloc.start() + +from datetime import datetime +from langchain.embeddings.openai import OpenAIEmbeddings + +from langchain.schema import Document +import uuid +import weaviate +from marshmallow import Schema, fields +import json + + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") +marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY") + +class VectorDBFactory: + def __init__(self): + self.db_map = { + "pinecone": PineconeVectorDB, + "weaviate": WeaviateVectorDB, + # Add more database types and their corresponding classes here + } + + def create_vector_db( + self, + user_id: str, + index_name: str, + memory_id: str, + db_type: str = "weaviate", + namespace: str = None, + embeddings=None, + ): + if db_type in self.db_map: + return self.db_map[db_type]( + user_id, + index_name, + memory_id, + namespace, + embeddings + ) + + raise ValueError(f"Unsupported database type: {db_type}") + +class BaseMemory: + def __init__( + self, + user_id: str, + memory_id: Optional[str], + index_name: Optional[str], + db_type: str, + namespace: str, + embeddings: Optional[None], + ): + self.user_id = user_id + self.memory_id = memory_id + self.index_name = index_name + self.namespace = namespace + self.embeddings = embeddings + self.db_type = db_type + factory = VectorDBFactory() + self.vector_db = factory.create_vector_db( + self.user_id, + self.index_name, + self.memory_id, + db_type=self.db_type, + namespace=self.namespace, + embeddings=self.embeddings + ) + + def init_client(self, embeddings, namespace: str): + return self.vector_db.init_client(embeddings, namespace) + + +# class VectorDBFactory: +# def create_vector_db( +# self, +# user_id: str, +# index_name: str, +# memory_id: str, +# db_type: str = "pinecone", +# namespace: str = None, +# embeddings = None, +# ): +# db_map = {"pinecone": PineconeVectorDB, "weaviate": WeaviateVectorDB} +# +# if db_type in db_map: +# return db_map[db_type]( +# user_id, +# index_name, +# memory_id, +# namespace, +# embeddings +# ) +# +# raise ValueError(f"Unsupported database type: {db_type}") +# +# class BaseMemory: +# def __init__( +# self, +# user_id: str, +# memory_id: Optional[str], +# index_name: Optional[str], +# db_type: str, +# namespace: str, +# embeddings: Optional[None], +# ): +# self.user_id = user_id +# self.memory_id = memory_id +# self.index_name = index_name +# self.namespace = namespace +# self.embeddings = embeddings +# self.db_type = db_type +# factory = VectorDBFactory() +# self.vector_db = factory.create_vector_db( +# self.user_id, +# self.index_name, +# self.memory_id, +# db_type=self.db_type, +# namespace=self.namespace, +# embeddings=self.embeddings +# ) +# +# def init_client(self, embeddings, namespace: str): +# +# return self.vector_db.init_weaviate_client(embeddings, namespace) + + def create_field(self, field_type, **kwargs): + field_mapping = { + "Str": fields.Str, + "Int": fields.Int, + "Float": fields.Float, + "Bool": fields.Bool, + + } + return field_mapping[field_type](**kwargs) + + def create_dynamic_schema(self, params): + """Create a dynamic schema based on provided parameters.""" + + dynamic_fields = {field_name: fields.Str() for field_name in params.keys()} + # Create a Schema instance with the dynamic fields + dynamic_schema_instance = Schema.from_dict(dynamic_fields)() + return dynamic_schema_instance + + + async def get_version_from_db(self, user_id, memory_id): + # Logic to retrieve the version from the database. + + Session = sessionmaker(bind=engine) + session = Session() + try: + # Querying both fields: contract_metadata and created_at + result = ( + session.query(MetaDatas.contract_metadata, MetaDatas.created_at) + .filter_by(user_id=user_id) # using parameter, not self.user_id + .order_by(MetaDatas.created_at.desc()) + .first() + ) + + if result: + + version_in_db, created_at = result + logging.info(f"version_in_db: {version_in_db}") + from ast import literal_eval + version_in_db= literal_eval(version_in_db) + version_in_db = version_in_db.get("version") + return [version_in_db, created_at] + else: + return None + + finally: + session.close() + + async def update_metadata(self, user_id, memory_id, version_in_params, params): + version_from_db = await self.get_version_from_db(user_id, memory_id) + Session = sessionmaker(bind=engine) + session = Session() + + # If there is no metadata, insert it. + if version_from_db is None: + + session.add(MetaDatas(id = str(uuid.uuid4()), user_id=self.user_id, version = str(int(time.time())) ,memory_id=self.memory_id, contract_metadata=params)) + session.commit() + return params + + # If params version is higher, update the metadata. + elif version_in_params > version_from_db[0]: + session.add(MetaDatas(id = str(uuid.uuid4()), user_id=self.user_id, memory_id=self.memory_id, contract_metadata=params)) + session.commit() + return params + else: + return params + + + async def add_memories( + self, + observation: Optional[str] = None, + loader_settings: dict = None, + params: Optional[dict] = None, + namespace: Optional[str] = None, + custom_fields: Optional[str] = None, + embeddings: Optional[str] = None, + + ): + from ast import literal_eval + class DynamicSchema(Schema): + pass + + default_version = 'current_timestamp' + version_in_params = params.get("version", default_version) + + # Check and update metadata version in DB. + schema_fields = params + + def create_field(field_type, **kwargs): + field_mapping = { + "Str": fields.Str, + "Int": fields.Int, + "Float": fields.Float, + "Bool": fields.Bool, + } + return field_mapping[field_type](**kwargs) + + # Dynamic Schema Creation + params['user_id'] = self.user_id + + + schema_instance = self.create_dynamic_schema(params) # Always creating Str field, adjust as needed + + logging.info(f"params : {params}") + + # Schema Validation + schema_instance = schema_instance + print("Schema fields: ", [field for field in schema_instance._declared_fields]) + loaded_params = schema_instance.load(params) + + return await self.vector_db.add_memories( + observation=observation, loader_settings=loader_settings, + params=loaded_params, namespace=namespace, metadata_schema_class = schema_instance, embeddings=embeddings + ) + # Add other db_type conditions if necessary + + async def fetch_memories( + self, + observation: str, + search_type: Optional[str] = None, + params: Optional[str] = None, + namespace: Optional[str] = None, + n_of_observations: Optional[int] = 2, + ): + logging.info(namespace) + logging.info("The search type is %", search_type) + logging.info(params) + logging.info(observation) + + return await self.vector_db.fetch_memories( + observation=observation, search_type= search_type, params=params, + namespace=namespace, + n_of_observations=n_of_observations + ) + + async def delete_memories(self, namespace:str, params: Optional[str] = None): + return await self.vector_db.delete_memories(namespace,params) + + + async def count_memories(self, namespace:str, params: Optional[str] = None): + return await self.vector_db.count_memories(namespace,params) + + + diff --git a/level_4/cognitive_architecture/vectordb/chunkers/__init__.py b/level_4/cognitive_architecture/vectordb/chunkers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/vectordb/chunkers/chunkers.py b/level_4/cognitive_architecture/vectordb/chunkers/chunkers.py new file mode 100644 index 000000000..225b72e87 --- /dev/null +++ b/level_4/cognitive_architecture/vectordb/chunkers/chunkers.py @@ -0,0 +1,88 @@ +from langchain.document_loaders import PyPDFLoader +import sys, os +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from shared.chunk_strategy import ChunkStrategy +import re +def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_overlap=None): + + if chunk_strategy == ChunkStrategy.VANILLA: + chunked_data = vanilla_chunker(source_data, chunk_size, chunk_overlap) + + elif chunk_strategy == ChunkStrategy.PARAGRAPH: + chunked_data = chunk_data_by_paragraph(source_data,chunk_size, chunk_overlap) + + elif chunk_strategy == ChunkStrategy.SENTENCE: + chunked_data = chunk_by_sentence(source_data, chunk_size, chunk_overlap) + elif chunk_strategy == ChunkStrategy.EXACT: + chunked_data = chunk_data_exact(source_data, chunk_size, chunk_overlap) + else: + chunked_data = vanilla_chunker(source_data, chunk_size, chunk_overlap) + return chunked_data + + +def vanilla_chunker(source_data, chunk_size=100, chunk_overlap=20): + # adapt this for different chunking strategies + from langchain.text_splitter import RecursiveCharacterTextSplitter + text_splitter = RecursiveCharacterTextSplitter( + # Set a really small chunk size, just to show. + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + length_function=len + ) + try: + pages = text_splitter.create_documents([source_data]) + except: + pages = text_splitter.create_documents(source_data.content) + # pages = source_data.load_and_split() + return pages +def chunk_data_exact(data_chunks, chunk_size, chunk_overlap): + data = "".join(data_chunks) + chunks = [] + for i in range(0, len(data), chunk_size - chunk_overlap): + chunks.append(data[i:i + chunk_size]) + return chunks + + +def chunk_by_sentence(data_chunks, chunk_size, overlap): + # Split by periods, question marks, exclamation marks, and ellipses + data = "".join(data_chunks) + + # The regular expression is used to find series of charaters that end with one the following chaacters (. ! ? ...) + sentence_endings = r'(?<=[.!?โ€ฆ]) +' + sentences = re.split(sentence_endings, data) + + sentence_chunks = [] + for sentence in sentences: + if len(sentence) > chunk_size: + chunks = chunk_data_exact([sentence], chunk_size, overlap) + sentence_chunks.extend(chunks) + else: + sentence_chunks.append(sentence) + return sentence_chunks + + +def chunk_data_by_paragraph(data_chunks, chunk_size, overlap, bound=0.75): + data = "".join(data_chunks) + total_length = len(data) + chunks = [] + check_bound = int(bound * chunk_size) + start_idx = 0 + + while start_idx < total_length: + # Set the end index to the minimum of start_idx + default_chunk_size or total_length + end_idx = min(start_idx + chunk_size, total_length) + + # Find the next paragraph index within the current chunk and bound + next_paragraph_index = data.find('\n\n', start_idx + check_bound, end_idx) + + # If a next paragraph index is found within the current chunk + if next_paragraph_index != -1: + # Update end_idx to include the paragraph delimiter + end_idx = next_paragraph_index + 2 + + chunks.append(data[start_idx:end_idx + overlap]) + + # Update start_idx to be the current end_idx + start_idx = end_idx + + return chunks \ No newline at end of file diff --git a/level_4/cognitive_architecture/vectordb/loaders/__init__.py b/level_4/cognitive_architecture/vectordb/loaders/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/vectordb/loaders/loaders.py b/level_4/cognitive_architecture/vectordb/loaders/loaders.py new file mode 100644 index 000000000..ce8a13c73 --- /dev/null +++ b/level_4/cognitive_architecture/vectordb/loaders/loaders.py @@ -0,0 +1,81 @@ +from io import BytesIO +import fitz +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +from vectordb.chunkers.chunkers import chunk_data +from llama_hub.file.base import SimpleDirectoryReader +from langchain.document_loaders import UnstructuredURLLoader +from langchain.document_loaders import DirectoryLoader +import logging +import os +from langchain.document_loaders import TextLoader +import requests +async def _document_loader( observation: str, loader_settings: dict): + + document_format = loader_settings.get("format", "text") + loader_strategy = loader_settings.get("strategy", "VANILLA") + chunk_size = loader_settings.get("chunk_size", 500) + chunk_overlap = loader_settings.get("chunk_overlap", 20) + import logging + import os + + logging.info("LOADER SETTINGS %s", loader_settings) + + list_of_docs = loader_settings["path"] + chunked_doc = [] + + if loader_settings.get("source") == "URL": + for file in list_of_docs: + if document_format == "PDF": + pdf_response = requests.get(file) + pdf_stream = BytesIO(pdf_response.content) + with fitz.open(stream=pdf_stream, filetype='pdf') as doc: + file_content = "" + for page in doc: + file_content += page.get_text() + pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size, + chunk_overlap=chunk_overlap) + + chunked_doc.append(pages) + + elif document_format == "TEXT": + loader = UnstructuredURLLoader(urls=file) + file_content = loader.load() + pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size, + chunk_overlap=chunk_overlap) + chunked_doc.append(pages) + + elif loader_settings.get("source") == "DEVICE": + + current_directory = os.getcwd() + logging.info("Current Directory: %s", current_directory) + + loader = DirectoryLoader(".data", recursive=True) + if document_format == "PDF": + # loader = SimpleDirectoryReader(".data", recursive=True, exclude_hidden=True) + documents = loader.load() + pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size, + chunk_overlap=chunk_overlap) + logging.info("Documents: %s", documents) + # pages = documents.load_and_split() + chunked_doc.append(pages) + + + elif document_format == "TEXT": + documents = loader.load() + pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size, + chunk_overlap=chunk_overlap) + logging.info("Documents: %s", documents) + # pages = documents.load_and_split() + chunked_doc.append(pages) + + else: + raise ValueError(f"Error: ") + return chunked_doc + + + + + diff --git a/level_4/cognitive_architecture/vectordb/vectordb.py b/level_4/cognitive_architecture/vectordb/vectordb.py new file mode 100644 index 000000000..0cc66066b --- /dev/null +++ b/level_4/cognitive_architecture/vectordb/vectordb.py @@ -0,0 +1,341 @@ + +# Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client +import logging + +from langchain.text_splitter import RecursiveCharacterTextSplitter +from marshmallow import Schema, fields +from loaders.loaders import _document_loader +# Add the parent directory to sys.path + + +logging.basicConfig(level=logging.INFO) +from langchain.retrievers import WeaviateHybridSearchRetriever, ParentDocumentRetriever +from weaviate.gql.get import HybridFusion +import tracemalloc +tracemalloc.start() +import os +from langchain.embeddings.openai import OpenAIEmbeddings +from dotenv import load_dotenv +from langchain.schema import Document +import weaviate + +load_dotenv() + + +LTM_MEMORY_ID_DEFAULT = "00000" +ST_MEMORY_ID_DEFAULT = "0000" +BUFFER_ID_DEFAULT = "0000" +class VectorDB: + OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") + + def __init__( + self, + user_id: str, + index_name: str, + memory_id: str, + namespace: str = None, + embeddings = None, + ): + self.user_id = user_id + self.index_name = index_name + self.namespace = namespace + self.memory_id = memory_id + self.embeddings = embeddings + +class PineconeVectorDB(VectorDB): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.init_pinecone(self.index_name) + + def init_pinecone(self, index_name): + # Pinecone initialization logic + pass + +import langchain.embeddings +class WeaviateVectorDB(VectorDB): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.init_weaviate(embeddings= self.embeddings, namespace = self.namespace) + + def init_weaviate(self, embeddings=OpenAIEmbeddings(), namespace=None,retriever_type="",): + # Weaviate initialization logic + auth_config = weaviate.auth.AuthApiKey( + api_key=os.environ.get("WEAVIATE_API_KEY") + ) + client = weaviate.Client( + url=os.environ.get("WEAVIATE_URL"), + auth_client_secret=auth_config, + additional_headers={"X-OpenAI-Api-Key": os.environ.get("OPENAI_API_KEY")}, + ) + + if retriever_type == "single_document_context": + retriever = WeaviateHybridSearchRetriever( + client=client, + index_name=namespace, + text_key="text", + attributes=[], + embedding=embeddings, + create_schema_if_missing=True, + ) + return retriever + elif retriever_type == "multi_document_context": + retriever = WeaviateHybridSearchRetriever( + client=client, + index_name=namespace, + text_key="text", + attributes=[], + embedding=embeddings, + create_schema_if_missing=True, + ) + return retriever + else : + return client + # child_splitter = RecursiveCharacterTextSplitter(chunk_size=400) + # store = InMemoryStore() + # retriever = ParentDocumentRetriever( + # vectorstore=vectorstore, + # docstore=store, + # child_splitter=child_splitter, + # ) + from marshmallow import Schema, fields + + def create_document_structure(observation, params, metadata_schema_class=None): + """ + Create and validate a document structure with optional custom fields. + + :param observation: Content of the document. + :param params: Metadata information. + :param metadata_schema_class: Custom metadata schema class (optional). + :return: A list containing the validated document data. + """ + document_data = { + "metadata": params, + "page_content": observation + } + + def get_document_schema(): + class DynamicDocumentSchema(Schema): + metadata = fields.Nested(metadata_schema_class, required=True) + page_content = fields.Str(required=True) + + return DynamicDocumentSchema + + # Validate and deserialize, defaulting to "1.0" if not provided + CurrentDocumentSchema = get_document_schema() + loaded_document = CurrentDocumentSchema().load(document_data) + return [loaded_document] + + def _stuct(self, observation, params, metadata_schema_class =None): + """Utility function to create the document structure with optional custom fields.""" + + + # Construct document data + document_data = { + "metadata": params, + "page_content": observation + } + def get_document_schema(): + class DynamicDocumentSchema(Schema): + metadata = fields.Nested(metadata_schema_class, required=True) + page_content = fields.Str(required=True) + + return DynamicDocumentSchema + # Validate and deserialize # Default to "1.0" if not provided + CurrentDocumentSchema = get_document_schema() + loaded_document = CurrentDocumentSchema().load(document_data) + return [loaded_document] + async def add_memories(self, observation, loader_settings=None, params=None, namespace=None, metadata_schema_class=None, embeddings = 'hybrid'): + # Update Weaviate memories here + if namespace is None: + namespace = self.namespace + retriever = self.init_weaviate(embeddings=embeddings,namespace = namespace, retriever_type="single_document_context") + if loader_settings: + # Assuming _document_loader returns a list of documents + documents = await _document_loader(observation, loader_settings) + logging.info("here are the docs %s", str(documents)) + for doc in documents[0]: + document_to_load = self._stuct(doc.page_content, params, metadata_schema_class) + + logging.info("Loading document with provided loader settings %s", str(document_to_load)) + retriever.add_documents([ + Document(metadata=document_to_load[0]['metadata'], page_content=document_to_load[0]['page_content'])]) + else: + document_to_load = self._stuct(observation, params, metadata_schema_class) + + logging.info("Loading document with defautl loader settings %s", str(document_to_load)) + retriever.add_documents([ + Document(metadata=document_to_load[0]['metadata'], page_content=document_to_load[0]['page_content'])]) + + async def fetch_memories(self, observation: str, namespace: str = None, search_type: str = 'hybrid', **kwargs): + """ + Fetch documents from weaviate. + + Parameters: + - observation (str): User query. + - namespace (str, optional): Type of memory accessed. + - search_type (str, optional): Type of search ('text', 'hybrid', 'bm25', 'generate', 'generate_grouped'). Defaults to 'hybrid'. + - **kwargs: Additional parameters for flexibility. + + Returns: + List of documents matching the query or an empty list in case of error. + + Example: + fetch_memories(query="some query", search_type='text', additional_param='value') + """ + client = self.init_weaviate(namespace =self.namespace) + if search_type is None: + search_type = 'hybrid' + logging.info("The search type is 2 %", search_type) + + if not namespace: + namespace = self.namespace + + logging.info("Query on namespace %s", namespace) + + params_user_id = { + "path": ["user_id"], + "operator": "Like", + "valueText": self.user_id, + } + + def list_objects_of_class(class_name, schema): + return [ + prop["name"] + for class_obj in schema["classes"] + if class_obj["class"] == class_name + for prop in class_obj["properties"] + ] + + base_query = client.query.get( + namespace, list(list_objects_of_class(namespace, client.schema.get())) + ).with_additional( + ["id", "creationTimeUnix", "lastUpdateTimeUnix", "score", 'distance'] + ).with_where(params_user_id).with_limit(10) + + n_of_observations = kwargs.get('n_of_observations', 2) + + # try: + if search_type == 'text': + query_output = ( + base_query + .with_near_text({"concepts": [observation]}) + .with_autocut(n_of_observations) + .do() + ) + elif search_type == 'hybrid': + query_output = ( + base_query + .with_hybrid(query=observation, fusion_type=HybridFusion.RELATIVE_SCORE) + .with_autocut(n_of_observations) + .do() + ) + elif search_type == 'bm25': + query_output = ( + base_query + .with_bm25(query=observation) + .with_autocut(n_of_observations) + .do() + ) + elif search_type == 'generate': + generate_prompt = kwargs.get('generate_prompt', "") + query_output = ( + base_query + .with_generate(single_prompt=observation) + .with_near_text({"concepts": [observation]}) + .with_autocut(n_of_observations) + .do() + ) + elif search_type == 'generate_grouped': + generate_prompt = kwargs.get('generate_prompt', "") + query_output = ( + base_query + .with_generate(grouped_task=observation) + .with_near_text({"concepts": [observation]}) + .with_autocut(n_of_observations) + .do() + ) + else: + logging.error(f"Invalid search_type: {search_type}") + return [] + # except Exception as e: + # logging.error(f"Error executing query: {str(e)}") + # return [] + + return query_output + + async def delete_memories(self, namespace:str, params: dict = None): + if namespace is None: + namespace = self.namespace + client = self.init_weaviate(namespace = self.namespace) + if params: + where_filter = { + "path": ["id"], + "operator": "Equal", + "valueText": params.get("id", None), + } + return client.batch.delete_objects( + class_name=self.namespace, + # Same `where` filter as in the GraphQL API + where=where_filter, + ) + else: + # Delete all objects + print("HERE IS THE USER ID", self.user_id) + return client.batch.delete_objects( + class_name=namespace, + where={ + "path": ["version"], + "operator": "Equal", + "valueText": "1.0", + }, + ) + + + async def count_memories(self, namespace: str = None, params: dict = None) -> int: + """ + Count memories in a Weaviate database. + + Args: + namespace (str, optional): The Weaviate namespace to count memories in. If not provided, uses the default namespace. + + Returns: + int: The number of memories in the specified namespace. + """ + if namespace is None: + namespace = self.namespace + + client = self.init_weaviate(namespace =namespace) + + try: + object_count = client.query.aggregate(namespace).with_meta_count().do() + return object_count + except Exception as e: + logging.info(f"Error counting memories: {str(e)}") + # Handle the error or log it + return 0 + + def update_memories(self, observation, namespace: str, params: dict = None): + client = self.init_weaviate(namespace = self.namespace) + + client.data_object.update( + data_object={ + # "text": observation, + "user_id": str(self.user_id), + "version": params.get("version", None) or "", + "agreement_id": params.get("agreement_id", None) or "", + "privacy_policy": params.get("privacy_policy", None) or "", + "terms_of_service": params.get("terms_of_service", None) or "", + "format": params.get("format", None) or "", + "schema_version": params.get("schema_version", None) or "", + "checksum": params.get("checksum", None) or "", + "owner": params.get("owner", None) or "", + "license": params.get("license", None) or "", + "validity_start": params.get("validity_start", None) or "", + "validity_end": params.get("validity_end", None) or "" + # **source_metadata, + }, + class_name="Test", + uuid=params.get("id", None), + consistency_level=weaviate.data.replication.ConsistencyLevel.ALL, # default QUORUM + ) + return diff --git a/level_4/cognitive_architecture/vectorstore_manager.py b/level_4/cognitive_architecture/vectorstore_manager.py new file mode 100644 index 000000000..77afe16ba --- /dev/null +++ b/level_4/cognitive_architecture/vectorstore_manager.py @@ -0,0 +1,426 @@ +import logging + +from sqlalchemy.future import select + +logging.basicConfig(level=logging.INFO) +import marvin +from dotenv import load_dotenv +from level_4.cognitive_architecture.models import User +from level_4.cognitive_architecture.models.memory import MemoryModel + +load_dotenv() +import ast +import tracemalloc +from level_4.cognitive_architecture.database.database_crud import add_entity + +tracemalloc.start() + +import os +from dotenv import load_dotenv +import uuid + +load_dotenv() + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") +marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY") +from vectordb.basevectordb import BaseMemory + + +class DynamicBaseMemory(BaseMemory): + def __init__( + self, + name: str, + user_id: str, + memory_id: str, + index_name: str, + db_type: str, + namespace: str, + embeddings=None, + ): + super().__init__(user_id, memory_id, index_name, db_type, namespace, embeddings) + self.name = name + self.attributes = set() + self.methods = set() + self.inheritance = None + self.associations = [] + + async def add_method(self, method_name): + """ + Add a method to the memory class. + + Args: + - method_name (str): The name of the method to be added. + + Returns: + None + """ + self.methods.add(method_name) + + async def add_attribute(self, attribute_name): + """ + Add an attribute to the memory class. + + Args: + - attribute_name (str): The name of the attribute to be added. + + Returns: + None + """ + self.attributes.add(attribute_name) + + async def get_attribute(self, attribute_name): + """ + Check if the attribute is in the memory class. + + Args: + - attribute_name (str): The name of the attribute to be checked. + + Returns: + bool: True if attribute exists, False otherwise. + """ + return attribute_name in self.attributes + + async def add_association(self, associated_memory): + """ + Add an association to another memory class. + + Args: + - associated_memory (MemoryClass): The memory class to be associated with. + + Returns: + None + """ + if associated_memory not in self.associations: + self.associations.append(associated_memory) + # Optionally, establish a bidirectional association + associated_memory.associations.append(self) + + +class Attribute: + def __init__(self, name): + """ + Initialize the Attribute class. + + Args: + - name (str): The name of the attribute. + + Attributes: + - name (str): Stores the name of the attribute. + """ + self.name = name + + +class Method: + def __init__(self, name): + """ + Initialize the Method class. + + Args: + - name (str): The name of the method. + + Attributes: + - name (str): Stores the name of the method. + """ + self.name = name + + +class Memory: + def __init__( + self, + user_id: str = "676", + session=None, + index_name: str = None, + knowledge_source: str = None, + knowledge_type: str = None, + db_type: str = "weaviate", + namespace: str = None, + memory_id: str = None, + ) -> None: + self.load_environment_variables() + self.memory_id = memory_id + self.user_id = user_id + self.session = session + self.index_name = index_name + self.db_type = db_type + self.knowledge_source = knowledge_source + self.knowledge_type = knowledge_type + self.long_term_memory = None + self.short_term_memory = None + self.namespace = namespace + self.memory_instances = [] + # inspect and fix this + self.memory_class = DynamicBaseMemory( + "Memory", user_id, str(self.memory_id), index_name, db_type, namespace + ) + + def load_environment_variables(self) -> None: + load_dotenv() + self.OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0)) + self.OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") + + @classmethod + async def create_memory(cls, user_id: str, session, **kwargs): + """ + Class method that acts as a factory method for creating Memory instances. + It performs necessary DB checks or updates before instance creation. + """ + existing_user = await cls.check_existing_user(user_id, session) + + if existing_user: + # Handle existing user scenario... + memory_id = await cls.check_existing_memory(user_id, session) + logging.info( + f"Existing user {user_id} found in the DB. Memory ID: {memory_id}" + ) + else: + # Handle new user scenario... + memory_id = await cls.handle_new_user(user_id, session) + logging.info( + f"New user {user_id} created in the DB. Memory ID: {memory_id}" + ) + + return cls(user_id=user_id, session=session, memory_id=memory_id, **kwargs) + + async def list_memory_classes(self): + """ + Lists all available memory classes in the memory instance. + """ + # Use a list comprehension to filter attributes that end with '_class' + return [attr for attr in dir(self) if attr.endswith("_class")] + + @staticmethod + async def check_existing_user(user_id: str, session): + """Check if a user exists in the DB and return it.""" + result = await session.execute(select(User).where(User.id == user_id)) + return result.scalar_one_or_none() + + @staticmethod + async def check_existing_memory(user_id: str, session): + """Check if a user memory exists in the DB and return it.""" + result = await session.execute( + select(MemoryModel.id).where(MemoryModel.user_id == user_id) + ) + return result.scalar_one_or_none() + + @staticmethod + async def handle_new_user(user_id: str, session): + """Handle new user creation in the DB and return the new memory ID.""" + + # handle these better in terms of retry and error handling + memory_id = str(uuid.uuid4()) + new_user = User(id=user_id) + await add_entity(session, new_user) + + memory = MemoryModel( + id=memory_id, + user_id=user_id, + methods_list=str(["Memory", "SemanticMemory", "EpisodicMemory"]), + attributes_list=str( + [ + "user_id", + "index_name", + "db_type", + "knowledge_source", + "knowledge_type", + "memory_id", + "long_term_memory", + "short_term_memory", + "namespace", + ] + ), + ) + await add_entity(session, memory) + return memory_id + + async def add_memory_instance(self, memory_class_name: str): + """Add a new memory instance to the memory_instances list.""" + instance = DynamicBaseMemory( + memory_class_name, + self.user_id, + self.memory_id, + self.index_name, + self.db_type, + self.namespace, + ) + print("The following instance was defined", instance) + self.memory_instances.append(instance) + + async def query_method(self): + methods_list = await self.session.execute( + select(MemoryModel.methods_list).where(MemoryModel.id == self.memory_id) + ) + methods_list = methods_list.scalar_one_or_none() + return methods_list + + async def manage_memory_attributes(self, existing_user): + """Manage memory attributes based on the user existence.""" + if existing_user: + print(f"ID before query: {self.memory_id}, type: {type(self.memory_id)}") + + # attributes_list = await self.session.query(MemoryModel.attributes_list).filter_by(id=self.memory_id[0]).scalar() + attributes_list = await self.query_method() + logging.info(f"Attributes list: {attributes_list}") + if attributes_list is not None: + attributes_list = ast.literal_eval(attributes_list) + await self.handle_attributes(attributes_list) + else: + logging.warning("attributes_list is None!") + else: + attributes_list = [ + "user_id", + "index_name", + "db_type", + "knowledge_source", + "knowledge_type", + "memory_id", + "long_term_memory", + "short_term_memory", + "namespace", + ] + await self.handle_attributes(attributes_list) + + async def handle_attributes(self, attributes_list): + """Handle attributes for existing memory instances.""" + for attr in attributes_list: + await self.memory_class.add_attribute(attr) + + async def manage_memory_methods(self, existing_user): + """ + Manage memory methods based on the user existence. + """ + if existing_user: + # Fetch existing methods from the database + # methods_list = await self.session.query(MemoryModel.methods_list).filter_by(id=self.memory_id).scalar() + + methods_list = await self.session.execute( + select(MemoryModel.methods_list).where( + MemoryModel.id == self.memory_id[0] + ) + ) + methods_list = methods_list.scalar_one_or_none() + methods_list = ast.literal_eval(methods_list) + else: + # Define default methods for a new user + methods_list = [ + "async_create_long_term_memory", + "async_init", + "add_memories", + "fetch_memories", + "delete_memories", + "async_create_short_term_memory", + "_create_buffer_context", + "_get_task_list", + "_run_main_buffer", + "_available_operations", + "_provide_feedback", + ] + # Apply methods to memory instances + for class_instance in self.memory_instances: + for method in methods_list: + class_instance.add_method(method) + + async def dynamic_method_call( + self, dynamic_base_memory_instance, method_name: str, *args, **kwargs + ): + if method_name in dynamic_base_memory_instance.methods: + method = getattr(dynamic_base_memory_instance, method_name, None) + if method: + return await method(*args, **kwargs) + raise AttributeError( + f"{dynamic_base_memory_instance.name} object has no attribute {method_name}" + ) + + async def add_dynamic_memory_class(self, class_name: str, namespace: str): + logging.info("Here is the memory id %s", self.memory_id[0]) + new_memory_class = DynamicBaseMemory( + class_name, + self.user_id, + self.memory_id[0], + self.index_name, + self.db_type, + namespace, + ) + setattr(self, f"{class_name.lower()}_class", new_memory_class) + return new_memory_class + + async def add_attribute_to_class(self, class_instance, attribute_name: str): + # add this to database for a particular user and load under memory id + await class_instance.add_attribute(attribute_name) + + async def add_method_to_class(self, class_instance, method_name: str): + # add this to database for a particular user and load under memory id + await class_instance.add_method(method_name) + + +async def main(): + # if you want to run the script as a standalone script, do so with the examples below + # memory = Memory(user_id="TestUser") + # await memory.async_init() + params = { + "version": "1.0", + "agreement_id": "AG123456", + "privacy_policy": "https://example.com/privacy", + "terms_of_service": "https://example.com/terms", + "format": "json", + "schema_version": "1.1", + "checksum": "a1b2c3d4e5f6", + "owner": "John Doe", + "license": "MIT", + "validity_start": "2023-08-01", + "validity_end": "2024-07-31", + } + loader_settings = { + "format": "PDF", + "source": "URL", + "path": "https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf", + } + # memory_instance = Memory(namespace='SEMANTICMEMORY') + # sss = await memory_instance.dynamic_method_call(memory_instance.semantic_memory_class, 'fetch_memories', observation='some_observation') + + from level_4.cognitive_architecture.database.database_crud import session_scope + from level_4.cognitive_architecture.database.database import AsyncSessionLocal + + async with session_scope(AsyncSessionLocal()) as session: + memory = await Memory.create_memory("676", session, namespace="SEMANTICMEMORY") + + # Adding a memory instance + await memory.add_memory_instance("ExampleMemory") + + # Managing memory attributes + existing_user = await Memory.check_existing_user("676", session) + print("here is the existing user", existing_user) + await memory.manage_memory_attributes(existing_user) + # aeehuvyq_semanticememory_class + + await memory.add_dynamic_memory_class("semanticmemory", "SEMANTICMEMORY") + await memory.add_method_to_class(memory.semanticmemory_class, "add_memories") + await memory.add_method_to_class(memory.semanticmemory_class, "fetch_memories") + # sss = await memory.dynamic_method_call(memory.semanticmemory_class, 'add_memories', + # observation='some_observation', params=params, loader_settings=loader_settings) + + susu = await memory.dynamic_method_call( + memory.semanticmemory_class, + "fetch_memories", + observation="some_observation", + ) + print(susu) + + # Adding a dynamic memory class + # dynamic_memory = memory.add_dynamic_memory_class("DynamicMemory", "ExampleNamespace") + + # memory_instance = Memory(namespace='PROCEDURALMEMORY', session=session) + # procedural_memory_class = memory_instance.add_dynamic_memory_class('ProceduralMemory', 'PROCEDURALMEMORY') + # memory_instance.add_method_to_class(procedural_memory_class, 'add_memories') + # + + # print(sss) + # load_jack_london = await memory._add_semantic_memory(observation = "bla", loader_settings=loader_settings, params=params) + # print(load_jack_london) + + modulator = {"relevance": 0.1, "frequency": 0.1} + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) diff --git a/level_4/main.py b/level_4/main.py index a4e9e6a4d..6701dde32 100644 --- a/level_4/main.py +++ b/level_4/main.py @@ -2,6 +2,7 @@ import typer import os +import uuid # import marvin # from pydantic_settings import BaseSettings from langchain.chains import GraphCypherQAChain @@ -37,6 +38,9 @@ def create_config_dir(): from dotenv import load_dotenv import uuid +from graphviz import Digraph + + load_dotenv() OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") @@ -185,9 +189,6 @@ def execute_cypher_query(query: str): -from graphviz import Digraph - - class Node: def __init__(self, id, description, color): self.id = id @@ -215,9 +216,6 @@ def visualize_knowledge_graph(kg: KnowledgeGraph): dot.render("knowledge_graph.gv", view=True) -import uuid - - def create_base_queries_from_user( user_id: str): # Create the user and memory components if they don't exist user_memory_cypher = f""" @@ -370,6 +368,16 @@ def generate_memory_type_relationships_with_uuid_and_time_context(nodes, unique_ # Translate the KnowledgeGraph into Cypher queries + + # Make document summary in Semantic Memory + # Document summary links to a Namespace in Vector Store + # Categorize document types in Semantic Memory + # Make a spine classifier that retrieves the relevant document namespaces from Vector Store + # + # Connect document summary to chunks in Weaviate vector store + + + # print(cypher_query) # # # # # Execute the Cypher queries to create the graph in Neo4j