From 33c4ce07209676d1bf94cc448896e5ed35a43c18 Mon Sep 17 00:00:00 2001 From: DominikKowalczyk <43239883+DominikKowalczyk@users.noreply.github.com> Date: Thu, 15 Feb 2024 03:28:26 +0100 Subject: [PATCH 01/15] Bump gradio to 4.19 (#5419) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> --- css/main.css | 5 +++++ extensions/gallery/script.py | 2 +- extensions/whisper_stt/script.py | 2 +- modules/block_requests.py | 3 ++- modules/gradio_hijack.py | 9 +++++++++ modules/ui_chat.py | 32 ++++++++++++++++---------------- modules/ui_default.py | 6 +++--- modules/ui_model_menu.py | 2 +- modules/ui_notebook.py | 6 +++--- modules/ui_session.py | 4 ++-- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- server.py | 20 +++++++++++--------- 20 files changed, 63 insertions(+), 46 deletions(-) create mode 100644 modules/gradio_hijack.py diff --git a/css/main.css b/css/main.css index a73d34e0b4..3a951cf881 100644 --- a/css/main.css +++ b/css/main.css @@ -89,6 +89,11 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * { flex-wrap: nowrap; } +gradio-app > :first-child { + padding-left: var(--size-4) !important; + padding-right: var(--size-4) !important; +} + .header_bar { background-color: #f7f7f7; box-shadow: 0 2px 3px rgba(22 22 22 / 35%); diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py index 1cb7f27f83..1bb8068a5b 100644 --- a/extensions/gallery/script.py +++ b/extensions/gallery/script.py @@ -119,7 +119,7 @@ def ui(): samples_per_page=settings["gallery-items_per_page"] ) - filter_box.change(lambda: None, None, None, _js=f'() => {{{custom_js()}; gotoFirstPage()}}').success( + filter_box.change(lambda: None, None, None, js=f'() => {{{custom_js()}; gotoFirstPage()}}').success( filter_cards, filter_box, gallery).then( lambda x: gr.update(elem_classes='highlighted-border' if x != '' else ''), filter_box, filter_box, show_progress=False) diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py index cdc55687b3..efa58ce97a 100644 --- a/extensions/whisper_stt/script.py +++ b/extensions/whisper_stt/script.py @@ -64,7 +64,7 @@ def ui(): audio.change( auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then( - None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}") + None, auto_submit, None, js="(check) => {if (check) { document.getElementById('Generate').click() }}") whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None) whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None) diff --git a/modules/block_requests.py b/modules/block_requests.py index 38f1a17fe2..8a72217cc5 100644 --- a/modules/block_requests.py +++ b/modules/block_requests.py @@ -40,8 +40,9 @@ def my_open(*args, **kwargs): with original_open(*args, **kwargs) as f: file_contents = f.read() - file_contents = file_contents.replace(b'\t\t', b'') + file_contents = file_contents.replace(b'\t\t', b'') file_contents = file_contents.replace(b'cdnjs.cloudflare.com', b'127.0.0.1') + return io.BytesIO(file_contents) else: return original_open(*args, **kwargs) diff --git a/modules/gradio_hijack.py b/modules/gradio_hijack.py new file mode 100644 index 0000000000..026f3d6c2f --- /dev/null +++ b/modules/gradio_hijack.py @@ -0,0 +1,9 @@ +import gradio as gr + + +def Box(*args, **kwargs): + return gr.Blocks(*args, **kwargs) + + +if not hasattr(gr, 'Box'): + gr.Box = Box diff --git a/modules/ui_chat.py b/modules/ui_chat.py index a1b1af97e9..5502e99b8b 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -171,7 +171,7 @@ def create_event_handlers(): chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( @@ -179,28 +179,28 @@ def create_event_handlers(): chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Regenerate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Continue'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Impersonate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Replace last reply'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( @@ -278,7 +278,7 @@ def create_event_handlers(): chat.redraw_html, gradio(reload_arr), gradio('display')).then( lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}') shared.gradio['character_menu'].change( chat.load_character, gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context')).success( @@ -286,7 +286,7 @@ def create_event_handlers(): chat.load_latest_history, gradio('interface_state'), gradio('history')).then( chat.redraw_html, gradio(reload_arr), gradio('display')).then( lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then( - lambda: None, None, None, _js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}') + lambda: None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}') shared.gradio['mode'].change( lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then( @@ -322,15 +322,15 @@ def create_event_handlers(): shared.gradio['save_chat_history'].click( lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( - None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}') + None, gradio('temporary_text', 'character_menu', 'mode'), None, js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}') shared.gradio['Submit character'].click( chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') shared.gradio['Submit tavern character'].click( chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character')) shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character')) @@ -344,28 +344,28 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-default')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') shared.gradio['send_instruction_to_notebook'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-notebook')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') shared.gradio['send_instruction_to_negative_prompt'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('negative_prompt')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}') shared.gradio['send-chat-to-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-default')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') shared.gradio['send-chat-to-notebook'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-notebook')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') - shared.gradio['show_controls'].change(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') + shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') diff --git a/modules/ui_default.py b/modules/ui_default.py index 7db6f0d93a..1f9625516c 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -67,21 +67,21 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-default'].submit( lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False) shared.gradio['Continue-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 387915b1a1..f44fd6d600 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -74,7 +74,7 @@ def create_ui(): with gr.Row(): with gr.Column(): shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys(), value=None) - with gr.Box(): + with gr.Blocks(): with gr.Row(): with gr.Column(): with gr.Blocks(): diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index 6bd5c919f7..a7c62baf5e 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -67,14 +67,14 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-notebook'].submit( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False) shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False) @@ -83,7 +83,7 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False) diff --git a/modules/ui_session.py b/modules/ui_session.py index 989046eae8..08929c3386 100644 --- a/modules/ui_session.py +++ b/modules/ui_session.py @@ -32,10 +32,10 @@ def create_ui(): # Reset interface event shared.gradio['reset_interface'].click( set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then( - lambda: None, None, None, _js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500); return []}') + lambda: None, None, None, js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500); return []}') shared.gradio['toggle_dark_mode'].click( - lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then( + lambda: None, None, None, js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then( lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')) shared.gradio['save_settings'].click( diff --git a/requirements.txt b/requirements.txt index 3a16e1ef0d..6048db701e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==3.50.* +gradio==4.19.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_amd.txt b/requirements_amd.txt index 21cb90d623..595ef45303 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==3.50.* +gradio==4.19.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 349acc315e..002079fc77 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==3.50.* +gradio==4.19.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 94996ec195..80f00c42a0 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==3.50.* +gradio==4.19.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 3aa9391be5..97af44c070 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==3.50.* +gradio==4.19.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 16aa167633..9acf7fbf15 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==3.50.* +gradio==4.19.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 1fa23bcfaa..c99fbcfe2c 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==3.50.* +gradio==4.19.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 4d9caf3604..cbf5c0a329 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==3.50.* +gradio==4.19.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 5bf2cc4c4f..432b8effaf 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==3.50.* +gradio==4.19.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/server.py b/server.py index 681fe4e7a1..15963b6efa 100644 --- a/server.py +++ b/server.py @@ -18,6 +18,7 @@ warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_names" has conflict') with RequestBlocker(): + from modules import gradio_hijack import gradio as gr import matplotlib @@ -145,21 +146,21 @@ def create_interface(): ui_model_menu.create_event_handlers() # Interface launch events - if shared.settings['dark_theme']: - shared.gradio['interface'].load(lambda: None, None, None, _js="() => document.getElementsByTagName('body')[0].classList.add('dark')") - - shared.gradio['interface'].load(lambda: None, None, None, _js=f"() => {{{js}}}") - shared.gradio['interface'].load(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') - shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False) - shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) + shared.gradio['interface'].load( + lambda: None, None, None, js=f"() => {{if ({str(shared.settings['dark_theme']).lower()}) {{ document.getElementsByTagName('body')[0].classList.add('dark'); }} }}").then( + lambda: None, None, None, js=f"() => {{{js}}}").then( + lambda x: None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}').then( + partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False).then( + chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) extensions_module.create_extensions_tabs() # Extensions tabs extensions_module.create_extensions_block() # Extensions block # Launch the interface - shared.gradio['interface'].queue(concurrency_count=64) + shared.gradio['interface'].queue() with OpenMonkeyPatch(): shared.gradio['interface'].launch( + max_threads=64, prevent_thread_lock=True, share=shared.args.share, server_name=None if not shared.args.listen else (shared.args.listen_host or '0.0.0.0'), @@ -168,7 +169,8 @@ def create_interface(): auth=auth or None, ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True, ssl_keyfile=shared.args.ssl_keyfile, - ssl_certfile=shared.args.ssl_certfile + ssl_certfile=shared.args.ssl_certfile, + allowed_paths=["."] ) From 7123ac3f773baa120d644e7b8ab10027758d1813 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 14 Feb 2024 23:34:30 -0300 Subject: [PATCH 02/15] Remove "Maximum UI updates/second" parameter (#5507) --- modules/shared.py | 1 - modules/text_generation.py | 14 +++----------- modules/ui.py | 1 - modules/ui_parameters.py | 1 - settings-template.yaml | 1 - 5 files changed, 3 insertions(+), 15 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 2861d69019..31894cb490 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -46,7 +46,6 @@ 'truncation_length_min': 0, 'truncation_length_max': 200000, 'max_tokens_second': 0, - 'max_updates_second': 0, 'prompt_lookup_num_tokens': 0, 'custom_stopping_strings': '', 'custom_token_bans': '', diff --git a/modules/text_generation.py b/modules/text_generation.py index c62b9b01c5..dc8e33e683 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -80,19 +80,16 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap state = copy.deepcopy(state) state['stream'] = True - min_update_interval = 0 - if state.get('max_updates_second', 0) > 0: - min_update_interval = 1 / state['max_updates_second'] - # Generate for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat): reply, stop_found = apply_stopping_strings(reply, all_stop_strings) if escape_html: reply = html.escape(reply) + if is_stream: cur_time = time.time() - # Maximum number of tokens/second + # Limit number of tokens/second to make text readable in real time if state['max_tokens_second'] > 0: diff = 1 / state['max_tokens_second'] - (cur_time - last_update) if diff > 0: @@ -100,13 +97,8 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap last_update = time.time() yield reply - - # Limit updates to avoid lag in the Gradio UI - # API updates are not limited else: - if cur_time - last_update > min_update_interval: - last_update = cur_time - yield reply + yield reply if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything): break diff --git a/modules/ui.py b/modules/ui.py index 06498f69bf..5d7b838e52 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -112,7 +112,6 @@ def list_interface_input_elements(): 'max_new_tokens', 'auto_max_new_tokens', 'max_tokens_second', - 'max_updates_second', 'prompt_lookup_num_tokens', 'seed', 'temperature', diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 078590dc11..1a4ea965e9 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -72,7 +72,6 @@ def create_ui(default_preset): with gr.Column(): shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') - shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.') shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.') shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"') diff --git a/settings-template.yaml b/settings-template.yaml index 871011168a..095f25ec18 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -15,7 +15,6 @@ truncation_length: 2048 truncation_length_min: 0 truncation_length_max: 200000 max_tokens_second: 0 -max_updates_second: 0 prompt_lookup_num_tokens: 0 custom_stopping_strings: '' custom_token_bans: '' From 549f106879868f4152e92169ac62515b8a996d02 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 14 Feb 2024 21:57:48 -0800 Subject: [PATCH 03/15] Bump ExLlamaV2 to v0.0.13.2 --- requirements.txt | 10 +++++----- requirements_amd.txt | 6 +++--- requirements_amd_noavx2.txt | 6 +++--- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_noavx2.txt | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6048db701e..6d9d9305dd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -50,11 +50,11 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu1 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 595ef45303..5baf5df099 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -38,8 +38,8 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.42+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 002079fc77..de0697fc22 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -36,8 +36,8 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp # AMD wheels https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 80f00c42a0..b8bc229bb4 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -34,4 +34,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 97af44c070..f3b2915683 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -36,4 +36,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index cbf5c0a329..da1513e734 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -50,11 +50,11 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu1 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" From ea0e1feee7cf593814508f6e18f65344ecb31a35 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 14 Feb 2024 21:58:24 -0800 Subject: [PATCH 04/15] Bump llama-cpp-python to 0.2.43 --- requirements.txt | 24 ++++++++++++------------ requirements_amd.txt | 12 ++++++------ requirements_amd_noavx2.txt | 8 ++++---- requirements_apple_intel.txt | 12 ++++++------ requirements_apple_silicon.txt | 16 ++++++++-------- requirements_cpu_only.txt | 8 ++++---- requirements_cpu_only_noavx2.txt | 8 ++++---- requirements_noavx2.txt | 24 ++++++++++++------------ 8 files changed, 56 insertions(+), 56 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6d9d9305dd..9e79c67108 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,22 +28,22 @@ bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 5baf5df099..4bbb24b527 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -28,14 +28,14 @@ bitsandbytes==0.38.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows" # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.42+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.42+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.43+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.43+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index de0697fc22..d26025fe80 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -28,10 +28,10 @@ bitsandbytes==0.38.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows" # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index b8bc229bb4..01162475f8 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -28,10 +28,10 @@ bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index f3b2915683..fc875bea18 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -28,12 +28,12 @@ bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 9acf7fbf15..f8b3292f17 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -28,7 +28,7 @@ bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index c99fbcfe2c..045560883f 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -28,7 +28,7 @@ bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index da1513e734..ea108aaa33 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -28,22 +28,22 @@ bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From 080f7132c00d90785763c84199020d4d57b9ad88 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 15 Feb 2024 20:40:23 -0300 Subject: [PATCH 05/15] Revert gradio to 3.50.2 (#5513) --- css/main.css | 5 ----- extensions/gallery/script.py | 2 +- extensions/whisper_stt/script.py | 2 +- modules/block_requests.py | 3 +-- modules/gradio_hijack.py | 9 --------- modules/shared.py | 1 + modules/text_generation.py | 14 +++++++++++--- modules/ui.py | 1 + modules/ui_chat.py | 32 ++++++++++++++++---------------- modules/ui_default.py | 6 +++--- modules/ui_model_menu.py | 2 +- modules/ui_notebook.py | 6 +++--- modules/ui_parameters.py | 1 + modules/ui_session.py | 4 ++-- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- server.py | 20 +++++++++----------- settings-template.yaml | 1 + 25 files changed, 61 insertions(+), 66 deletions(-) delete mode 100644 modules/gradio_hijack.py diff --git a/css/main.css b/css/main.css index 3a951cf881..a73d34e0b4 100644 --- a/css/main.css +++ b/css/main.css @@ -89,11 +89,6 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * { flex-wrap: nowrap; } -gradio-app > :first-child { - padding-left: var(--size-4) !important; - padding-right: var(--size-4) !important; -} - .header_bar { background-color: #f7f7f7; box-shadow: 0 2px 3px rgba(22 22 22 / 35%); diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py index 1bb8068a5b..1cb7f27f83 100644 --- a/extensions/gallery/script.py +++ b/extensions/gallery/script.py @@ -119,7 +119,7 @@ def ui(): samples_per_page=settings["gallery-items_per_page"] ) - filter_box.change(lambda: None, None, None, js=f'() => {{{custom_js()}; gotoFirstPage()}}').success( + filter_box.change(lambda: None, None, None, _js=f'() => {{{custom_js()}; gotoFirstPage()}}').success( filter_cards, filter_box, gallery).then( lambda x: gr.update(elem_classes='highlighted-border' if x != '' else ''), filter_box, filter_box, show_progress=False) diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py index efa58ce97a..cdc55687b3 100644 --- a/extensions/whisper_stt/script.py +++ b/extensions/whisper_stt/script.py @@ -64,7 +64,7 @@ def ui(): audio.change( auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then( - None, auto_submit, None, js="(check) => {if (check) { document.getElementById('Generate').click() }}") + None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}") whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None) whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None) diff --git a/modules/block_requests.py b/modules/block_requests.py index 8a72217cc5..38f1a17fe2 100644 --- a/modules/block_requests.py +++ b/modules/block_requests.py @@ -40,9 +40,8 @@ def my_open(*args, **kwargs): with original_open(*args, **kwargs) as f: file_contents = f.read() - file_contents = file_contents.replace(b'\t\t', b'') + file_contents = file_contents.replace(b'\t\t', b'') file_contents = file_contents.replace(b'cdnjs.cloudflare.com', b'127.0.0.1') - return io.BytesIO(file_contents) else: return original_open(*args, **kwargs) diff --git a/modules/gradio_hijack.py b/modules/gradio_hijack.py deleted file mode 100644 index 026f3d6c2f..0000000000 --- a/modules/gradio_hijack.py +++ /dev/null @@ -1,9 +0,0 @@ -import gradio as gr - - -def Box(*args, **kwargs): - return gr.Blocks(*args, **kwargs) - - -if not hasattr(gr, 'Box'): - gr.Box = Box diff --git a/modules/shared.py b/modules/shared.py index 31894cb490..2861d69019 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -46,6 +46,7 @@ 'truncation_length_min': 0, 'truncation_length_max': 200000, 'max_tokens_second': 0, + 'max_updates_second': 0, 'prompt_lookup_num_tokens': 0, 'custom_stopping_strings': '', 'custom_token_bans': '', diff --git a/modules/text_generation.py b/modules/text_generation.py index dc8e33e683..c62b9b01c5 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -80,16 +80,19 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap state = copy.deepcopy(state) state['stream'] = True + min_update_interval = 0 + if state.get('max_updates_second', 0) > 0: + min_update_interval = 1 / state['max_updates_second'] + # Generate for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat): reply, stop_found = apply_stopping_strings(reply, all_stop_strings) if escape_html: reply = html.escape(reply) - if is_stream: cur_time = time.time() - # Limit number of tokens/second to make text readable in real time + # Maximum number of tokens/second if state['max_tokens_second'] > 0: diff = 1 / state['max_tokens_second'] - (cur_time - last_update) if diff > 0: @@ -97,8 +100,13 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap last_update = time.time() yield reply + + # Limit updates to avoid lag in the Gradio UI + # API updates are not limited else: - yield reply + if cur_time - last_update > min_update_interval: + last_update = cur_time + yield reply if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything): break diff --git a/modules/ui.py b/modules/ui.py index 5d7b838e52..06498f69bf 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -112,6 +112,7 @@ def list_interface_input_elements(): 'max_new_tokens', 'auto_max_new_tokens', 'max_tokens_second', + 'max_updates_second', 'prompt_lookup_num_tokens', 'seed', 'temperature', diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 5502e99b8b..a1b1af97e9 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -171,7 +171,7 @@ def create_event_handlers(): chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( @@ -179,28 +179,28 @@ def create_event_handlers(): chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Regenerate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Continue'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Impersonate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Replace last reply'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( @@ -278,7 +278,7 @@ def create_event_handlers(): chat.redraw_html, gradio(reload_arr), gradio('display')).then( lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}') shared.gradio['character_menu'].change( chat.load_character, gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context')).success( @@ -286,7 +286,7 @@ def create_event_handlers(): chat.load_latest_history, gradio('interface_state'), gradio('history')).then( chat.redraw_html, gradio(reload_arr), gradio('display')).then( lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then( - lambda: None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}') + lambda: None, None, None, _js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}') shared.gradio['mode'].change( lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then( @@ -322,15 +322,15 @@ def create_event_handlers(): shared.gradio['save_chat_history'].click( lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( - None, gradio('temporary_text', 'character_menu', 'mode'), None, js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}') + None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}') shared.gradio['Submit character'].click( chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then( - lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') shared.gradio['Submit tavern character'].click( chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then( - lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character')) shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character')) @@ -344,28 +344,28 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-default')).then( - lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') shared.gradio['send_instruction_to_notebook'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-notebook')).then( - lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') shared.gradio['send_instruction_to_negative_prompt'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('negative_prompt')).then( - lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}') shared.gradio['send-chat-to-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-default')).then( - lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') shared.gradio['send-chat-to-notebook'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-notebook')).then( - lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') - shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') + shared.gradio['show_controls'].change(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') diff --git a/modules/ui_default.py b/modules/ui_default.py index 1f9625516c..7db6f0d93a 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -67,21 +67,21 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-default'].submit( lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False) shared.gradio['Continue-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index f44fd6d600..387915b1a1 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -74,7 +74,7 @@ def create_ui(): with gr.Row(): with gr.Column(): shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys(), value=None) - with gr.Blocks(): + with gr.Box(): with gr.Row(): with gr.Column(): with gr.Blocks(): diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index a7c62baf5e..6bd5c919f7 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -67,14 +67,14 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-notebook'].submit( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False) shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False) @@ -83,7 +83,7 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False) diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 1a4ea965e9..078590dc11 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -72,6 +72,7 @@ def create_ui(default_preset): with gr.Column(): shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') + shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.') shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.') shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"') diff --git a/modules/ui_session.py b/modules/ui_session.py index 08929c3386..989046eae8 100644 --- a/modules/ui_session.py +++ b/modules/ui_session.py @@ -32,10 +32,10 @@ def create_ui(): # Reset interface event shared.gradio['reset_interface'].click( set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then( - lambda: None, None, None, js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500); return []}') + lambda: None, None, None, _js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500); return []}') shared.gradio['toggle_dark_mode'].click( - lambda: None, None, None, js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then( + lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then( lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')) shared.gradio['save_settings'].click( diff --git a/requirements.txt b/requirements.txt index 9e79c67108..f0c5fd6619 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==4.19.* +gradio==3.50.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_amd.txt b/requirements_amd.txt index 4bbb24b527..8e11d3c525 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==4.19.* +gradio==3.50.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index d26025fe80..89edbb0787 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==4.19.* +gradio==3.50.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 01162475f8..8b02c463ea 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==4.19.* +gradio==3.50.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index fc875bea18..31a51ece4c 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==4.19.* +gradio==3.50.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index f8b3292f17..e4b961a5bd 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==4.19.* +gradio==3.50.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 045560883f..5af6b9a8d7 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==4.19.* +gradio==3.50.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index ea108aaa33..325abfff07 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==4.19.* +gradio==3.50.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 432b8effaf..5bf2cc4c4f 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -gradio==4.19.* +gradio==3.50.* hqq==0.1.3 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/server.py b/server.py index 15963b6efa..681fe4e7a1 100644 --- a/server.py +++ b/server.py @@ -18,7 +18,6 @@ warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_names" has conflict') with RequestBlocker(): - from modules import gradio_hijack import gradio as gr import matplotlib @@ -146,21 +145,21 @@ def create_interface(): ui_model_menu.create_event_handlers() # Interface launch events - shared.gradio['interface'].load( - lambda: None, None, None, js=f"() => {{if ({str(shared.settings['dark_theme']).lower()}) {{ document.getElementsByTagName('body')[0].classList.add('dark'); }} }}").then( - lambda: None, None, None, js=f"() => {{{js}}}").then( - lambda x: None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}').then( - partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False).then( - chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) + if shared.settings['dark_theme']: + shared.gradio['interface'].load(lambda: None, None, None, _js="() => document.getElementsByTagName('body')[0].classList.add('dark')") + + shared.gradio['interface'].load(lambda: None, None, None, _js=f"() => {{{js}}}") + shared.gradio['interface'].load(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') + shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False) + shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) extensions_module.create_extensions_tabs() # Extensions tabs extensions_module.create_extensions_block() # Extensions block # Launch the interface - shared.gradio['interface'].queue() + shared.gradio['interface'].queue(concurrency_count=64) with OpenMonkeyPatch(): shared.gradio['interface'].launch( - max_threads=64, prevent_thread_lock=True, share=shared.args.share, server_name=None if not shared.args.listen else (shared.args.listen_host or '0.0.0.0'), @@ -169,8 +168,7 @@ def create_interface(): auth=auth or None, ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True, ssl_keyfile=shared.args.ssl_keyfile, - ssl_certfile=shared.args.ssl_certfile, - allowed_paths=["."] + ssl_certfile=shared.args.ssl_certfile ) diff --git a/settings-template.yaml b/settings-template.yaml index 095f25ec18..871011168a 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -15,6 +15,7 @@ truncation_length: 2048 truncation_length_min: 0 truncation_length_max: 200000 max_tokens_second: 0 +max_updates_second: 0 prompt_lookup_num_tokens: 0 custom_stopping_strings: '' custom_token_bans: '' From b2b74c83a606c6e2c4a23d536cae4adffca8034d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 15 Feb 2024 19:03:47 -0800 Subject: [PATCH 06/15] Fix Qwen1.5 in llamacpp_HF --- modules/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/models.py b/modules/models.py index d8f1a9f815..ea85da8426 100644 --- a/modules/models.py +++ b/modules/models.py @@ -257,7 +257,7 @@ def llamacpp_HF_loader(model_name): path = Path(f'{shared.args.model_dir}/{model_name}') # Check if a HF tokenizer is available for the model - if all((path / file).exists() for file in ['tokenizer.model', 'tokenizer_config.json']): + if all((path / file).exists() for file in ['tokenizer.json', 'tokenizer_config.json']): logger.info(f'Using tokenizer from: \"{path}\"') else: logger.error("Could not load the model because a tokenizer in Transformers format was not found.") From 44018c2f69d34f6d59c4d8acc9ddc55bddcd2eb2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 Feb 2024 12:43:24 -0300 Subject: [PATCH 07/15] Add a "llamacpp_HF creator" menu (#5519) --- download-model.py | 10 ++++--- modules/ui_model_menu.py | 64 +++++++++++++++++++++++++++++++++------- modules/utils.py | 11 ++++++- 3 files changed, 69 insertions(+), 16 deletions(-) diff --git a/download-model.py b/download-model.py index 82e956d6e1..09bc9a86e0 100644 --- a/download-model.py +++ b/download-model.py @@ -156,9 +156,8 @@ def get_download_links_from_huggingface(self, model, branch, text_only=False, sp is_llamacpp = has_gguf and specific_file is not None return links, sha256, is_lora, is_llamacpp - def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, base_folder=None): - if base_folder is None: - base_folder = 'models' if not is_lora else 'loras' + def get_output_folder(self, model, branch, is_lora, is_llamacpp=False): + base_folder = 'models' if not is_lora else 'loras' # If the model is of type GGUF, save directly in the base_folder if is_llamacpp: @@ -303,7 +302,10 @@ def check_model_files(self, model, branch, links, sha256, output_folder): links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file) # Get the output folder - output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=args.output) + if args.output: + output_folder = Path(args.output) + else: + output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp) if args.check: # Check previously downloaded files diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 387915b1a1..ca0de873e1 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -143,17 +143,27 @@ def create_ui(): shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.') shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.') shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.") - shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.") + shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to place your GGUF in a subfolder of models/ with the necessary tokenizer files.\n\nYou can use the \"llamacpp_HF creator\" menu to do that automatically.") with gr.Column(): with gr.Row(): shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.', interactive=not mu) - shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu) - shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu) - with gr.Row(): - shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu) - shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu) + with gr.Tab("Download"): + shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu) + shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu) + with gr.Row(): + shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu) + shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu) + + with gr.Tab("llamacpp_HF creator"): + with gr.Row(): + shared.gradio['gguf_menu'] = gr.Dropdown(choices=utils.get_available_ggufs(), value=lambda: shared.model_name, label='Choose your GGUF', elem_classes='slim-dropdown', interactive=not mu) + ui.create_refresh_button(shared.gradio['gguf_menu'], lambda: None, lambda: {'choices': utils.get_available_ggufs()}, 'refresh-button', interactive=not mu) + + shared.gradio['unquantized_url'] = gr.Textbox(label="Enter the URL for the original (unquantized) model", info="Example: https://huggingface.co/lmsys/vicuna-13b-v1.5", max_lines=1) + shared.gradio['create_llamacpp_hf_button'] = gr.Button("Submit", variant="primary", interactive=not mu) + gr.Markdown("This will move your gguf file into a subfolder of `models` along with the necessary tokenizer files.") with gr.Row(): shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready') @@ -203,6 +213,7 @@ def create_event_handlers(): shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True) shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True) shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model')) + shared.gradio['create_llamacpp_hf_button'].click(create_llamacpp_hf, gradio('gguf_menu', 'unquantized_url'), gradio('model_status'), show_progress=True) def load_model_wrapper(selected_model, loader, autoload=False): @@ -244,27 +255,58 @@ def load_lora_wrapper(selected_loras): def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False): try: - progress(0.0) downloader = importlib.import_module("download-model").ModelDownloader() + + progress(0.0) model, branch = downloader.sanitize_model_and_branch_names(repo_id, None) + yield ("Getting the download links from Hugging Face") links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file) if return_links: - yield '\n\n'.join([f"`{Path(link).name}`" for link in links]) + output = "```\n" + for link in links: + output += f"{Path(link).name}" + "\n" + + output += "```" + yield output return yield ("Getting the output folder") - base_folder = shared.args.lora_dir if is_lora else shared.args.model_dir - output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=base_folder) + output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp) if check: progress(0.5) + yield ("Checking previously downloaded files") downloader.check_model_files(model, branch, links, sha256, output_folder) progress(1.0) else: yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`") downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp) - yield ("Done!") + + yield (f"Model successfully saved to `{output_folder}/`.") + except: + progress(1.0) + yield traceback.format_exc().replace('\n', '\n\n') + + +def create_llamacpp_hf(gguf_name, unquantized_url, progress=gr.Progress()): + try: + downloader = importlib.import_module("download-model").ModelDownloader() + + progress(0.0) + model, branch = downloader.sanitize_model_and_branch_names(unquantized_url, None) + + yield ("Getting the tokenizer files links from Hugging Face") + links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True) + output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF") + + yield (f"Downloading tokenizer to `{output_folder}`") + downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False) + + # Move the GGUF + (Path(shared.args.model_dir) / gguf_name).rename(output_folder / gguf_name) + + yield (f"Model saved to `{output_folder}/`.\n\nYou can now load it using llamacpp_HF.") except: progress(1.0) yield traceback.format_exc().replace('\n', '\n\n') diff --git a/modules/utils.py b/modules/utils.py index de6d32dca6..be06ec3447 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -76,7 +76,16 @@ def get_available_models(): model_list = [] for item in list(Path(f'{shared.args.model_dir}/').glob('*')): if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name: - model_list.append(re.sub('.pth$', '', item.name)) + model_list.append(item.name) + + return ['None'] + sorted(model_list, key=natural_keys) + + +def get_available_ggufs(): + model_list = [] + for item in Path(f'{shared.args.model_dir}/').glob('*'): + if item.is_file() and item.name.lower().endswith(".gguf"): + model_list.append(item.name) return ['None'] + sorted(model_list, key=natural_keys) From f465b7b486987114905ed05a5a2f69cdee507eab Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 Feb 2024 12:55:27 -0300 Subject: [PATCH 08/15] Downloader: start one session per file (#5520) --- download-model.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/download-model.py b/download-model.py index 09bc9a86e0..d7cf9273f2 100644 --- a/download-model.py +++ b/download-model.py @@ -26,13 +26,16 @@ class ModelDownloader: def __init__(self, max_retries=5): - self.session = requests.Session() - if max_retries: - self.session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries)) - self.session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries)) + self.max_retries = max_retries + + def get_session(self): + session = requests.Session() + if self.max_retries: + session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=self.max_retries)) + session.mount('https://huggingface.co', HTTPAdapter(max_retries=self.max_retries)) if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None: - self.session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS')) + session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS')) try: from huggingface_hub import get_token @@ -41,7 +44,9 @@ def __init__(self, max_retries=5): token = os.getenv("HF_TOKEN") if token is not None: - self.session.headers = {'authorization': f'Bearer {token}'} + session.headers = {'authorization': f'Bearer {token}'} + + return session def sanitize_model_and_branch_names(self, model, branch): if model[-1] == '/': @@ -65,6 +70,7 @@ def sanitize_model_and_branch_names(self, model, branch): return model, branch def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None): + session = self.get_session() page = f"/api/models/{model}/tree/{branch}" cursor = b"" @@ -78,7 +84,7 @@ def get_download_links_from_huggingface(self, model, branch, text_only=False, sp is_lora = False while True: url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "") - r = self.session.get(url, timeout=10) + r = session.get(url, timeout=10) r.raise_for_status() content = r.content @@ -171,6 +177,7 @@ def get_output_folder(self, model, branch, is_lora, is_llamacpp=False): return output_folder def get_single_file(self, url, output_folder, start_from_scratch=False): + session = self.get_session() filename = Path(url.rsplit('/', 1)[1]) output_path = output_folder / filename headers = {} @@ -178,7 +185,7 @@ def get_single_file(self, url, output_folder, start_from_scratch=False): if output_path.exists() and not start_from_scratch: # Check if the file has already been downloaded completely - r = self.session.get(url, stream=True, timeout=10) + r = session.get(url, stream=True, timeout=10) total_size = int(r.headers.get('content-length', 0)) if output_path.stat().st_size >= total_size: return @@ -187,7 +194,7 @@ def get_single_file(self, url, output_folder, start_from_scratch=False): headers = {'Range': f'bytes={output_path.stat().st_size}-'} mode = 'ab' - with self.session.get(url, stream=True, headers=headers, timeout=10) as r: + with session.get(url, stream=True, headers=headers, timeout=10) as r: r.raise_for_status() # Do not continue the download if the request was unsuccessful total_size = int(r.headers.get('content-length', 0)) block_size = 1024 * 1024 # 1MB From 0e1d8d5601ef8d5140c2097a91225326fac22cda Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 Feb 2024 07:59:09 -0800 Subject: [PATCH 09/15] Instruction template: make "Send to default/notebook" work without a tokenizer --- modules/chat.py | 65 +++++++++++++++++++++++----------------------- modules/ui_chat.py | 2 +- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index bddc31326b..c431d2d092 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -166,53 +166,54 @@ def make_prompt(messages): prompt = remove_extra_bos(prompt) return prompt - # Handle truncation - max_length = get_max_prompt_length(state) prompt = make_prompt(messages) - encoded_length = get_encoded_length(prompt) - while len(messages) > 0 and encoded_length > max_length: + # Handle truncation + if shared.tokenizer is not None: + max_length = get_max_prompt_length(state) + encoded_length = get_encoded_length(prompt) + while len(messages) > 0 and encoded_length > max_length: + + # Remove old message, save system message + if len(messages) > 2 and messages[0]['role'] == 'system': + messages.pop(1) - # Remove old message, save system message - if len(messages) > 2 and messages[0]['role'] == 'system': - messages.pop(1) + # Remove old message when no system message is present + elif len(messages) > 1 and messages[0]['role'] != 'system': + messages.pop(0) - # Remove old message when no system message is present - elif len(messages) > 1 and messages[0]['role'] != 'system': - messages.pop(0) + # Resort to truncating the user input + else: - # Resort to truncating the user input - else: + user_message = messages[-1]['content'] - user_message = messages[-1]['content'] + # Bisect the truncation point + left, right = 0, len(user_message) - 1 - # Bisect the truncation point - left, right = 0, len(user_message) - 1 + while right - left > 1: + mid = (left + right) // 2 - while right - left > 1: - mid = (left + right) // 2 + messages[-1]['content'] = user_message[mid:] + prompt = make_prompt(messages) + encoded_length = get_encoded_length(prompt) - messages[-1]['content'] = user_message[mid:] + if encoded_length <= max_length: + right = mid + else: + left = mid + + messages[-1]['content'] = user_message[right:] prompt = make_prompt(messages) encoded_length = get_encoded_length(prompt) - - if encoded_length <= max_length: - right = mid + if encoded_length > max_length: + logger.error(f"Failed to build the chat prompt. The input is too long for the available context length.\n\nTruncation length: {state['truncation_length']}\nmax_new_tokens: {state['max_new_tokens']} (is it too high?)\nAvailable context length: {max_length}\n") + raise ValueError else: - left = mid + logger.warning(f"The input has been truncated. Context length: {state['truncation_length']}, max_new_tokens: {state['max_new_tokens']}, available context length: {max_length}.") + break - messages[-1]['content'] = user_message[right:] prompt = make_prompt(messages) encoded_length = get_encoded_length(prompt) - if encoded_length > max_length: - logger.error(f"Failed to build the chat prompt. The input is too long for the available context length.\n\nTruncation length: {state['truncation_length']}\nmax_new_tokens: {state['max_new_tokens']} (is it too high?)\nAvailable context length: {max_length}\n") - raise ValueError - else: - logger.warning(f"The input has been truncated. Context length: {state['truncation_length']}, max_new_tokens: {state['max_new_tokens']}, available context length: {max_length}.") - break - - prompt = make_prompt(messages) - encoded_length = get_encoded_length(prompt) if also_return_rows: return prompt, [message['content'] for message in messages] diff --git a/modules/ui_chat.py b/modules/ui_chat.py index a1b1af97e9..42e5cae2bf 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -109,7 +109,7 @@ def create_chat_settings_ui(): with gr.Row(): with gr.Column(): with gr.Row(): - shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Saved instruction templates', value='Select template to load...', elem_classes='slim-dropdown') + shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Saved instruction templates', info="After selecting the template, click on \"Load\" to load and apply it.", value='Select template to load...', elem_classes='slim-dropdown') ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu) shared.gradio['load_template'] = gr.Button("Load", elem_classes='refresh-button') shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu) From 76d28eaa9e26ac3c8e6f9b06a1f7d25e75894f56 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 Feb 2024 14:21:17 -0300 Subject: [PATCH 10/15] Add a menu for customizing the instruction template for the model (#5521) --- modules/chat.py | 3 ++ modules/models_settings.py | 61 +++++++++++++++++++++++++++----------- modules/shared.py | 23 ++++++++++---- modules/ui_chat.py | 2 +- modules/ui_model_menu.py | 14 +++++++++ modules/utils.py | 2 +- 6 files changed, 81 insertions(+), 24 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index c431d2d092..de7f19defc 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -691,6 +691,9 @@ def load_character(character, name1, name2): def load_instruction_template(template): + if template == 'None': + return '' + for filepath in [Path(f'instruction-templates/{template}.yaml'), Path('instruction-templates/Alpaca.yaml')]: if filepath.exists(): break diff --git a/modules/models_settings.py b/modules/models_settings.py index 9acc7efa3b..b4473275c9 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -243,27 +243,54 @@ def save_model_settings(model, state): Save the settings for this model to models/config-user.yaml ''' if model == 'None': - yield ("Not saving the settings because no model is loaded.") + yield ("Not saving the settings because no model is selected in the menu.") return - with Path(f'{shared.args.model_dir}/config-user.yaml') as p: - if p.exists(): - user_config = yaml.safe_load(open(p, 'r').read()) - else: - user_config = {} + user_config = shared.load_user_config() + model_regex = model + '$' # For exact matches + if model_regex not in user_config: + user_config[model_regex] = {} + + for k in ui.list_model_elements(): + if k == 'loader' or k in loaders.loaders_and_params[state['loader']]: + user_config[model_regex][k] = state[k] - model_regex = model + '$' # For exact matches - if model_regex not in user_config: - user_config[model_regex] = {} + shared.user_config = user_config - for k in ui.list_model_elements(): - if k == 'loader' or k in loaders.loaders_and_params[state['loader']]: - user_config[model_regex][k] = state[k] + output = yaml.dump(user_config, sort_keys=False) + p = Path(f'{shared.args.model_dir}/config-user.yaml') + with open(p, 'w') as f: + f.write(output) - shared.user_config = user_config + yield (f"Settings for `{model}` saved to `{p}`.") - output = yaml.dump(user_config, sort_keys=False) - with open(p, 'w') as f: - f.write(output) - yield (f"Settings for `{model}` saved to `{p}`.") +def save_instruction_template(model, template): + ''' + Similar to the function above, but it saves only the instruction template. + ''' + if model == 'None': + yield ("Not saving the template because no model is selected in the menu.") + return + + user_config = shared.load_user_config() + model_regex = model + '$' # For exact matches + if model_regex not in user_config: + user_config[model_regex] = {} + + if template == 'None': + user_config[model_regex].pop('instruction_template', None) + else: + user_config[model_regex]['instruction_template'] = template + + shared.user_config = user_config + + output = yaml.dump(user_config, sort_keys=False) + p = Path(f'{shared.args.model_dir}/config-user.yaml') + with open(p, 'w') as f: + f.write(output) + + if template == 'None': + yield (f"Instruction template for `{model}` unset in `{p}`, as the value for template was `{template}`.") + else: + yield (f"Instruction template for `{model}` saved to `{p}` as `{template}`.") diff --git a/modules/shared.py b/modules/shared.py index 2861d69019..d8aef36715 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -279,6 +279,23 @@ def is_chat(): return True +def load_user_config(): + ''' + Loads custom model-specific settings + ''' + if Path(f'{args.model_dir}/config-user.yaml').exists(): + file_content = open(f'{args.model_dir}/config-user.yaml', 'r').read().strip() + + if file_content: + user_config = yaml.safe_load(file_content) + else: + user_config = {} + else: + user_config = {} + + return user_config + + args.loader = fix_loader_name(args.loader) # Activate the multimodal extension @@ -297,11 +314,7 @@ def is_chat(): model_config = {} # Load custom model-specific settings -with Path(f'{args.model_dir}/config-user.yaml') as p: - if p.exists(): - user_config = yaml.safe_load(open(p, 'r').read()) - else: - user_config = {} +user_config = load_user_config() model_config = OrderedDict(model_config) user_config = OrderedDict(user_config) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 42e5cae2bf..7576628d5b 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -109,7 +109,7 @@ def create_chat_settings_ui(): with gr.Row(): with gr.Column(): with gr.Row(): - shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Saved instruction templates', info="After selecting the template, click on \"Load\" to load and apply it.", value='Select template to load...', elem_classes='slim-dropdown') + shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Saved instruction templates', info="After selecting the template, click on \"Load\" to load and apply it.", value='None', elem_classes='slim-dropdown') ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu) shared.gradio['load_template'] = gr.Button("Load", elem_classes='refresh-button') shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index ca0de873e1..94b0193761 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -17,6 +17,7 @@ from modules.models_settings import ( apply_model_settings_to_state, get_model_metadata, + save_instruction_template, save_model_settings, update_model_parameters ) @@ -165,6 +166,14 @@ def create_ui(): shared.gradio['create_llamacpp_hf_button'] = gr.Button("Submit", variant="primary", interactive=not mu) gr.Markdown("This will move your gguf file into a subfolder of `models` along with the necessary tokenizer files.") + with gr.Tab("Customize instruction template"): + with gr.Row(): + shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label='Select the desired instruction template', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu) + + shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu) + gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenver the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.") + with gr.Row(): shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready') @@ -214,6 +223,7 @@ def create_event_handlers(): shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True) shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model')) shared.gradio['create_llamacpp_hf_button'].click(create_llamacpp_hf, gradio('gguf_menu', 'unquantized_url'), gradio('model_status'), show_progress=True) + shared.gradio['customized_template_submit'].click(save_instruction_template, gradio('model_menu', 'customized_template'), gradio('model_status'), show_progress=True) def load_model_wrapper(selected_model, loader, autoload=False): @@ -320,3 +330,7 @@ def update_truncation_length(current_length, state): return state['n_ctx'] return current_length + + +def save_model_template(model, template): + pass diff --git a/modules/utils.py b/modules/utils.py index be06ec3447..4b65736b6b 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -114,7 +114,7 @@ def get_available_instruction_templates(): if os.path.exists(path): paths = (x for x in Path(path).iterdir() if x.suffix in ('.json', '.yaml', '.yml')) - return ['Select template to load...'] + sorted(set((k.stem for k in paths)), key=natural_keys) + return ['None'] + sorted(set((k.stem for k in paths)), key=natural_keys) def get_available_extensions(): From 4039999be5c4d1197491035d07e2de9ac5943994 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 Feb 2024 09:29:26 -0800 Subject: [PATCH 11/15] Autodetect llamacpp_HF loader when tokenizer exists --- modules/models.py | 2 +- modules/models_settings.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/models.py b/modules/models.py index ea85da8426..605680630c 100644 --- a/modules/models.py +++ b/modules/models.py @@ -257,7 +257,7 @@ def llamacpp_HF_loader(model_name): path = Path(f'{shared.args.model_dir}/{model_name}') # Check if a HF tokenizer is available for the model - if all((path / file).exists() for file in ['tokenizer.json', 'tokenizer_config.json']): + if all((path / file).exists() for file in ['tokenizer_config.json']): logger.info(f'Using tokenizer from: \"{path}\"') else: logger.error("Could not load the model because a tokenizer in Transformers format was not found.") diff --git a/modules/models_settings.py b/modules/models_settings.py index b4473275c9..659bc35d4a 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -153,6 +153,8 @@ def infer_loader(model_name, model_settings): loader = 'ExLlamav2_HF' elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()): loader = 'AutoAWQ' + elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists(): + loader = 'llamacpp_HF' elif len(list(path_to_model.glob('*.gguf'))) > 0: loader = 'llama.cpp' elif re.match(r'.*\.gguf', model_name.lower()): @@ -225,7 +227,7 @@ def apply_model_settings_to_state(model, state): loader = model_settings.pop('loader') # If the user is using an alternative loader for the same model type, let them keep using it - if not (loader == 'ExLlamav2_HF' and state['loader'] in ['GPTQ-for-LLaMa', 'ExLlamav2', 'AutoGPTQ']) and not (loader == 'llama.cpp' and state['loader'] in ['llamacpp_HF', 'ctransformers']): + if not (loader == 'ExLlamav2_HF' and state['loader'] in ['GPTQ-for-LLaMa', 'ExLlamav2', 'AutoGPTQ']) and not (loader == 'llama.cpp' and state['loader'] in ['ctransformers']): state['loader'] = loader for k in model_settings: From a6730f88f7c88324fe08fd0ca8f01102259b56b2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 Feb 2024 15:26:10 -0300 Subject: [PATCH 12/15] Add --autosplit flag for ExLlamaV2 (#5524) --- modules/exllamav2.py | 21 ++++++++++++--------- modules/exllamav2_hf.py | 20 ++++++++++++-------- modules/loaders.py | 2 ++ modules/shared.py | 1 + modules/ui.py | 1 + modules/ui_model_menu.py | 1 + 6 files changed, 29 insertions(+), 17 deletions(-) diff --git a/modules/exllamav2.py b/modules/exllamav2.py index 551ed4988c..239c203162 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -51,18 +51,21 @@ def from_pretrained(self, path_to_model): model = ExLlamaV2(config) - split = None - if shared.args.gpu_split: - split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] - - model.load(split) - - tokenizer = ExLlamaV2Tokenizer(config) if shared.args.cache_8bit: - cache = ExLlamaV2Cache_8bit(model) + cache = ExLlamaV2Cache_8bit(model, lazy=True) else: - cache = ExLlamaV2Cache(model) + cache = ExLlamaV2Cache(model, lazy=True) + if shared.args.autosplit: + model.load_autosplit(cache) + else: + split = None + if shared.args.gpu_split: + split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] + + model.load(split) + + tokenizer = ExLlamaV2Tokenizer(config) generator = ExLlamaV2StreamingGenerator(model, cache, tokenizer) result = self() diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py index 944c39dd29..e5b35a44f3 100644 --- a/modules/exllamav2_hf.py +++ b/modules/exllamav2_hf.py @@ -37,18 +37,22 @@ def __init__(self, config: ExLlamaV2Config): super().__init__(PretrainedConfig()) self.ex_config = config self.ex_model = ExLlamaV2(config) - split = None - if shared.args.gpu_split: - split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] - - self.ex_model.load(split) - self.generation_config = GenerationConfig() self.loras = None + self.generation_config = GenerationConfig() if shared.args.cache_8bit: - self.ex_cache = ExLlamaV2Cache_8bit(self.ex_model) + self.ex_cache = ExLlamaV2Cache_8bit(self.ex_model, lazy=True) else: - self.ex_cache = ExLlamaV2Cache(self.ex_model) + self.ex_cache = ExLlamaV2Cache(self.ex_model, lazy=True) + + if shared.args.autosplit: + self.ex_model.load_autosplit(self.ex_cache) + else: + split = None + if shared.args.gpu_split: + split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] + + self.ex_model.load(split) self.past_seq = None if shared.args.cfg_cache: diff --git a/modules/loaders.py b/modules/loaders.py index 26b7c5e283..08a7f22937 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -78,6 +78,7 @@ 'no_flash_attn', 'num_experts_per_token', 'cache_8bit', + 'autosplit', 'alpha_value', 'compress_pos_emb', 'trust_remote_code', @@ -89,6 +90,7 @@ 'no_flash_attn', 'num_experts_per_token', 'cache_8bit', + 'autosplit', 'alpha_value', 'compress_pos_emb', 'exllamav2_info', diff --git a/modules/shared.py b/modules/shared.py index d8aef36715..7bef04bf49 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -134,6 +134,7 @@ # ExLlamaV2 group = parser.add_argument_group('ExLlamaV2') group.add_argument('--gpu-split', type=str, help='Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.') +group.add_argument('--autosplit', action='store_true', help='Autosplit the model tensors across the available GPUs. This causes --gpu-split to be ignored.') group.add_argument('--max_seq_len', type=int, default=2048, help='Maximum sequence length.') group.add_argument('--cfg-cache', action='store_true', help='ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.') group.add_argument('--no_flash_attn', action='store_true', help='Force flash-attention to not be used.') diff --git a/modules/ui.py b/modules/ui.py index 06498f69bf..bb5a33394f 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -76,6 +76,7 @@ def list_model_elements(): 'no_flash_attn', 'num_experts_per_token', 'cache_8bit', + 'autosplit', 'threads', 'threads_batch', 'n_batch', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 94b0193761..14bc7cafb4 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -132,6 +132,7 @@ def create_ui(): shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk) shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16) shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.') + shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.') shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn, info='Force flash-attention to not be used.') shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.') shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.') From c375c753d6152da9ad2c57060c4646214ce2be73 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 Feb 2024 10:47:39 -0800 Subject: [PATCH 13/15] Bump bitsandbytes to 0.42 (Linux only) --- requirements.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index f0c5fd6619..bdbdf8199e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ tqdm wandb # bitsandbytes -bitsandbytes==0.41.1; platform_system != "Windows" +bitsandbytes==0.42.*; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # llama-cpp-python (CPU only, AVX2) diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 8b02c463ea..4030bf9af5 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -24,7 +24,7 @@ tqdm wandb # bitsandbytes -bitsandbytes==0.41.1; platform_system != "Windows" +bitsandbytes==0.42.*; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # Mac wheels diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 31a51ece4c..8cea6d6268 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -24,7 +24,7 @@ tqdm wandb # bitsandbytes -bitsandbytes==0.41.1; platform_system != "Windows" +bitsandbytes==0.42.*; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # Mac wheels diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index e4b961a5bd..283e2b19c6 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -24,7 +24,7 @@ tqdm wandb # bitsandbytes -bitsandbytes==0.41.1; platform_system != "Windows" +bitsandbytes==0.42.*; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # llama-cpp-python (CPU only, AVX2) diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 5af6b9a8d7..ba2d3d1033 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -24,7 +24,7 @@ tqdm wandb # bitsandbytes -bitsandbytes==0.41.1; platform_system != "Windows" +bitsandbytes==0.42.*; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # llama-cpp-python (CPU only, no AVX2) diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 325abfff07..5025fe9c19 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -24,7 +24,7 @@ tqdm wandb # bitsandbytes -bitsandbytes==0.41.1; platform_system != "Windows" +bitsandbytes==0.42.*; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" # llama-cpp-python (CPU only, no AVX2) diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 5bf2cc4c4f..2b6fa38c56 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -24,5 +24,5 @@ tqdm wandb # bitsandbytes -bitsandbytes==0.41.1; platform_system != "Windows" +bitsandbytes==0.42.*; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" From fa1019e8fed7d94ff9f01c4baf6902a289b1bcfc Mon Sep 17 00:00:00 2001 From: fschuh Date: Fri, 16 Feb 2024 19:40:51 -0500 Subject: [PATCH 14/15] Removed extra spaces from Mistral instruction template that were causing Mistral to misbehave (#5517) --- instruction-templates/Mistral.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instruction-templates/Mistral.yaml b/instruction-templates/Mistral.yaml index 113c238b5e..02e5b20da1 100644 --- a/instruction-templates/Mistral.yaml +++ b/instruction-templates/Mistral.yaml @@ -4,7 +4,7 @@ instruction_template: |- {{- message['content'] -}} {%- else -%} {%- if message['role'] == 'user' -%} - {{-' [INST] ' + message['content'].rstrip() + ' [/INST] '-}} + {{-'[INST] ' + message['content'].rstrip() + ' [/INST]'-}} {%- else -%} {{-'' + message['content'] + '' -}} {%- endif -%} From af0bbf5b1373bfa0830cf43d040cbfabd5beda54 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 17 Feb 2024 09:01:04 -0800 Subject: [PATCH 15/15] Lint --- modules/ui_model_menu.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 14bc7cafb4..ac6a8a8f5b 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -173,7 +173,7 @@ def create_ui(): ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu) shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu) - gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenver the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.") + gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.") with gr.Row(): shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready') @@ -331,7 +331,3 @@ def update_truncation_length(current_length, state): return state['n_ctx'] return current_length - - -def save_model_template(model, template): - pass