From 9ade04ea78a5146df98e8ddee806f00dffe3b38f Mon Sep 17 00:00:00 2001 From: Umberto Griffo <1609440+umbertogriffo@users.noreply.github.com> Date: Tue, 10 Dec 2024 08:27:31 +0000 Subject: [PATCH] fix: fixed phi-3.5 and dropped phi-3.1 --- README.md | 3 +-- chatbot/bot/client/lama_cpp_client.py | 2 +- chatbot/bot/model/{model.py => base_model.py} | 0 .../{model_settings.py => model_registry.py} | 4 +--- chatbot/bot/model/settings/llama.py | 2 +- chatbot/bot/model/settings/openchat.py | 2 +- chatbot/bot/model/settings/phi.py | 19 +++++-------------- chatbot/bot/model/settings/stablelm_zephyr.py | 2 +- chatbot/bot/model/settings/starling.py | 2 +- chatbot/chatbot_app.py | 2 +- chatbot/rag_chatbot_app.py | 2 +- 11 files changed, 14 insertions(+), 26 deletions(-) rename chatbot/bot/model/{model.py => base_model.py} (100%) rename chatbot/bot/model/{model_settings.py => model_registry.py} (90%) diff --git a/README.md b/README.md index d75dba6..194f3d8 100644 --- a/README.md +++ b/README.md @@ -146,8 +146,7 @@ format. | `openchat-3.6` - OpenChat 3.6 | ✅ | 8B | [link](https://huggingface.co/bartowski/openchat-3.6-8b-20240522-GGUF) | | `openchat-3.5` - OpenChat 3.5 | ✅ | 7B | [link](https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF) | | `starling` Starling Beta | ✅ | 7B | Is trained from `Openchat-3.5-0106`. It's recommended if you prefer more verbosity over OpenChat - [link](https://huggingface.co/bartowski/Starling-LM-7B-beta-GGUF) | | | -| `phi-3.5` Phi-3.5 Mini 128K Instruct | ✅ | 3.8B | Set `max-new-tokens` up to `1024`. Not recommended for RAG. [link](https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF) | -| `phi-3.1` Phi-3.1 Mini 128K Instruct | ✅ | 3.8B | Set `max-new-tokens` up to `1024`. Not recommended for RAG. [link](https://huggingface.co/bartowski/Phi-3.1-mini-128k-instruct-GGUF) | +| `phi-3.5` Phi-3.5 Mini 128K Instruct | ✅ | 3.8B | [link](https://huggingface.co/MaziyarPanahi/Phi-3.5-mini-instruct-GGUF) | | `stablelm-zephyr` StableLM Zephyr OpenOrca | ✅ | 3B | [link](https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF) | ## Supported Response Synthesis strategies diff --git a/chatbot/bot/client/lama_cpp_client.py b/chatbot/bot/client/lama_cpp_client.py index 486c7dc..814c59a 100644 --- a/chatbot/bot/client/lama_cpp_client.py +++ b/chatbot/bot/client/lama_cpp_client.py @@ -18,7 +18,7 @@ generate_qa_prompt, generate_refined_ctx_prompt, ) -from bot.model.model import ModelSettings +from bot.model.base_model import ModelSettings class LamaCppClient: diff --git a/chatbot/bot/model/model.py b/chatbot/bot/model/base_model.py similarity index 100% rename from chatbot/bot/model/model.py rename to chatbot/bot/model/base_model.py diff --git a/chatbot/bot/model/model_settings.py b/chatbot/bot/model/model_registry.py similarity index 90% rename from chatbot/bot/model/model_settings.py rename to chatbot/bot/model/model_registry.py index 0b19390..16be89b 100644 --- a/chatbot/bot/model/model_settings.py +++ b/chatbot/bot/model/model_registry.py @@ -2,7 +2,7 @@ from bot.model.settings.llama import Llama31Settings, Llama32Settings from bot.model.settings.openchat import OpenChat35Settings, OpenChat36Settings -from bot.model.settings.phi import Phi31Settings, Phi35Settings +from bot.model.settings.phi import Phi35Settings from bot.model.settings.stablelm_zephyr import StableLMZephyrSettings from bot.model.settings.starling import StarlingSettings @@ -15,7 +15,6 @@ class ModelType(Enum): OPENCHAT_3_5 = "openchat-3.5" OPENCHAT_3_6 = "openchat-3.6" STARLING = "starling" - PHI_3_1 = "phi-3.1" PHI_3_5 = "phi-3.5" LLAMA_3_1 = "llama-3.1" LLAMA_3_2 = "llama-3.2" @@ -26,7 +25,6 @@ class ModelType(Enum): ModelType.OPENCHAT_3_5.value: OpenChat35Settings, ModelType.OPENCHAT_3_6.value: OpenChat36Settings, ModelType.STARLING.value: StarlingSettings, - ModelType.PHI_3_1.value: Phi31Settings, ModelType.PHI_3_5.value: Phi35Settings, ModelType.LLAMA_3_1.value: Llama31Settings, ModelType.LLAMA_3_2.value: Llama32Settings, diff --git a/chatbot/bot/model/settings/llama.py b/chatbot/bot/model/settings/llama.py index 3781cc7..53c25a2 100644 --- a/chatbot/bot/model/settings/llama.py +++ b/chatbot/bot/model/settings/llama.py @@ -1,4 +1,4 @@ -from bot.model.model import ModelSettings +from bot.model.base_model import ModelSettings class Llama31Settings(ModelSettings): diff --git a/chatbot/bot/model/settings/openchat.py b/chatbot/bot/model/settings/openchat.py index fde2a76..cfb9cdd 100644 --- a/chatbot/bot/model/settings/openchat.py +++ b/chatbot/bot/model/settings/openchat.py @@ -1,4 +1,4 @@ -from bot.model.model import ModelSettings +from bot.model.base_model import ModelSettings class OpenChat35Settings(ModelSettings): diff --git a/chatbot/bot/model/settings/phi.py b/chatbot/bot/model/settings/phi.py index 6668d02..44bf922 100644 --- a/chatbot/bot/model/settings/phi.py +++ b/chatbot/bot/model/settings/phi.py @@ -1,20 +1,11 @@ -from bot.model.model import ModelSettings - - -class Phi31Settings(ModelSettings): - url = "https://huggingface.co/bartowski/Phi-3.1-mini-128k-instruct-GGUF/resolve/main/Phi-3.1-mini-128k-instruct-Q5_K_M.gguf" - file_name = "Phi-3.1-mini-128k-instruct-Q5_K_M.gguf" - config = { - "n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources - "n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance - "n_gpu_layers": 33, # The number of layers to offload to GPU, if you have GPU acceleration available - } - config_answer = {"temperature": 0.7, "stop": []} +from bot.model.base_model import ModelSettings class Phi35Settings(ModelSettings): - url = "https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF/resolve/main/Phi-3.5-mini-instruct-Q5_K_M.gguf" - file_name = "Phi-3.5-mini-instruct-Q5_K_M.gguf.gguf" + url = ( + "https://huggingface.co/MaziyarPanahi/Phi-3.5-mini-instruct-GGUF/resolve/main/Phi-3.5-mini-instruct.Q5_K_M.gguf" + ) + file_name = "Phi-3.5-mini-instruct.Q5_K_M.gguf" config = { "n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources "n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance diff --git a/chatbot/bot/model/settings/stablelm_zephyr.py b/chatbot/bot/model/settings/stablelm_zephyr.py index dea74cc..772d758 100644 --- a/chatbot/bot/model/settings/stablelm_zephyr.py +++ b/chatbot/bot/model/settings/stablelm_zephyr.py @@ -1,4 +1,4 @@ -from bot.model.model import ModelSettings +from bot.model.base_model import ModelSettings class StableLMZephyrSettings(ModelSettings): diff --git a/chatbot/bot/model/settings/starling.py b/chatbot/bot/model/settings/starling.py index 39d23ae..340324e 100644 --- a/chatbot/bot/model/settings/starling.py +++ b/chatbot/bot/model/settings/starling.py @@ -1,4 +1,4 @@ -from bot.model.model import ModelSettings +from bot.model.base_model import ModelSettings class StarlingSettings(ModelSettings): diff --git a/chatbot/chatbot_app.py b/chatbot/chatbot_app.py index 4dc7106..e3e494f 100644 --- a/chatbot/chatbot_app.py +++ b/chatbot/chatbot_app.py @@ -6,7 +6,7 @@ import streamlit as st from bot.client.lama_cpp_client import LamaCppClient from bot.conversation.conversation_retrieval import ConversationRetrieval -from bot.model.model_settings import get_model_settings, get_models +from bot.model.model_registry import get_model_settings, get_models from helpers.log import get_logger logger = get_logger(__name__) diff --git a/chatbot/rag_chatbot_app.py b/chatbot/rag_chatbot_app.py index e000c56..8997b15 100644 --- a/chatbot/rag_chatbot_app.py +++ b/chatbot/rag_chatbot_app.py @@ -13,7 +13,7 @@ ) from bot.memory.embedder import Embedder from bot.memory.vector_database.chroma import Chroma -from bot.model.model_settings import get_model_settings, get_models +from bot.model.model_registry import get_model_settings, get_models from helpers.log import get_logger from helpers.prettier import prettify_source