From 93dd31fc0fc459c951fe188682eafc5a23451278 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 13 Feb 2024 16:07:33 -0800 Subject: [PATCH 1/5] Increase stalebot timeout --- .github/workflows/stale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 2de6d955a3..d0d0616eae 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -14,7 +14,7 @@ jobs: with: stale-issue-message: "" close-issue-message: "This issue has been closed due to inactivity for 6 weeks. If you believe it is still relevant, please leave a comment below. You can tag a developer in your comment." - days-before-issue-stale: 42 + days-before-issue-stale: 60 days-before-issue-close: 0 stale-issue-label: "stale" days-before-pr-stale: -1 From 3a9ce3cfa607e0e69850fa35e0c55b9e3f36a8a3 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 13 Feb 2024 19:06:32 -0800 Subject: [PATCH 2/5] Update stalebot message --- .github/workflows/stale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index d0d0616eae..fee541962d 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -13,7 +13,7 @@ jobs: - uses: actions/stale@v5 with: stale-issue-message: "" - close-issue-message: "This issue has been closed due to inactivity for 6 weeks. If you believe it is still relevant, please leave a comment below. You can tag a developer in your comment." + close-issue-message: "This issue has been closed due to inactivity for 2 months. If you believe it is still relevant, please leave a comment below. You can tag a developer in your comment." days-before-issue-stale: 60 days-before-issue-close: 0 stale-issue-label: "stale" From d47182d9d186af5a5c72cc6ce1554a6eb1c040e8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 14 Feb 2024 00:28:51 -0300 Subject: [PATCH 3/5] llamacpp_HF: do not use oobabooga/llama-tokenizer (#5499) --- modules/models.py | 23 +++++++---------------- modules/ui_model_menu.py | 2 +- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/modules/models.py b/modules/models.py index 038669f3c8..d8f1a9f815 100644 --- a/modules/models.py +++ b/modules/models.py @@ -254,26 +254,17 @@ def llamacpp_loader(model_name): def llamacpp_HF_loader(model_name): from modules.llamacpp_hf import LlamacppHF - for fname in [model_name, "oobabooga_llama-tokenizer", "llama-tokenizer"]: - path = Path(f'{shared.args.model_dir}/{fname}') - if all((path / file).exists() for file in ['tokenizer_config.json', 'special_tokens_map.json', 'tokenizer.model']): - logger.info(f'Using tokenizer from: \"{path}\"') - break + path = Path(f'{shared.args.model_dir}/{model_name}') + + # Check if a HF tokenizer is available for the model + if all((path / file).exists() for file in ['tokenizer.model', 'tokenizer_config.json']): + logger.info(f'Using tokenizer from: \"{path}\"') else: - logger.error("Could not load the model because a tokenizer in transformers format was not found. Please download oobabooga/llama-tokenizer.") + logger.error("Could not load the model because a tokenizer in Transformers format was not found.") return None, None - if shared.args.no_use_fast: - logger.info('Loading the tokenizer with use_fast=False.') - - tokenizer = AutoTokenizer.from_pretrained( - path, - trust_remote_code=shared.args.trust_remote_code, - use_fast=not shared.args.no_use_fast - ) - model = LlamacppHF.from_pretrained(model_name) - return model, tokenizer + return model def ctransformers_loader(model_name): diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 2367909773..09d4276c95 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -143,7 +143,7 @@ def create_ui(): shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.') shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.') shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.") - shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to download a tokenizer.\n\nOption 1 (recommended): place your .gguf in a subfolder of models/ along with these 4 files: special_tokens_map.json, tokenizer_config.json, tokenizer.json, tokenizer.model.\n\nOption 2: download `oobabooga/llama-tokenizer` under "Download model or LoRA". That\'s a default Llama tokenizer that will work for some (but not all) models.') ++ shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.") with gr.Column(): with gr.Row(): From b16958575f506b591688c5bcaa7db52c948c6cc8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 13 Feb 2024 19:48:32 -0800 Subject: [PATCH 4/5] Minor bug fix --- modules/ui_model_menu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 09d4276c95..387915b1a1 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -143,7 +143,7 @@ def create_ui(): shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.') shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.') shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.") -+ shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.") + shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.") with gr.Column(): with gr.Row(): From 04d8bdf929603439032e6d4b08d7e7b9c358c003 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 14 Feb 2024 06:31:20 -0800 Subject: [PATCH 5/5] Fix ExLlamaV2 requirement on Windows --- requirements.txt | 6 +++--- requirements_noavx2.txt | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1b09063433..3a16e1ef0d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -50,11 +50,11 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu1 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 386fbe9e62..4d9caf3604 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -50,11 +50,11 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu1 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"