From 987b3df6bab593778db2dd62e09b8b76015a488f Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Fri, 29 Mar 2024 22:43:39 -0400 Subject: [PATCH 01/10] Add translation feature and dependencies --- .github/scripts/Package-Windows.ps1 | 7 +- CMakeLists.txt | 7 +- cmake/BuildCTranslate2.cmake | 104 ++++++++++++++ cmake/BuildSentencepiece.cmake | 61 +++++++++ data/locale/en-US.ini | 3 + src/transcription-filter-data.h | 8 ++ src/transcription-filter.cpp | 68 +++++++++ src/translation/language_codes.h | 205 ++++++++++++++++++++++++++++ src/translation/translation.cpp | 87 ++++++++++++ src/translation/translation.h | 27 ++++ 10 files changed, 575 insertions(+), 2 deletions(-) create mode 100644 cmake/BuildCTranslate2.cmake create mode 100644 cmake/BuildSentencepiece.cmake create mode 100644 src/translation/language_codes.h create mode 100644 src/translation/translation.cpp create mode 100644 src/translation/translation.h diff --git a/.github/scripts/Package-Windows.ps1 b/.github/scripts/Package-Windows.ps1 index a09f54a..3d1a07c 100644 --- a/.github/scripts/Package-Windows.ps1 +++ b/.github/scripts/Package-Windows.ps1 @@ -49,7 +49,12 @@ function Package { $BuildSpec = Get-Content -Path ${BuildSpecFile} -Raw | ConvertFrom-Json $ProductName = $BuildSpec.name $ProductVersion = $BuildSpec.version - $CudaName = "cuda${Cublas}" + # Check if $cublas is cpu or cuda + if ( $Cublas -eq 'cpu' ) { + $CudaName = 'cpu' + } else { + $CudaName = "cuda${Cublas}" + } $OutputName = "${ProductName}-${ProductVersion}-windows-${Target}-${CudaName}" diff --git a/CMakeLists.txt b/CMakeLists.txt index ac7339f..90473bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,10 @@ endif() include(cmake/BuildWhispercpp.cmake) target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE Whispercpp) +include(cmake/BuildCTranslate2.cmake) +include(cmake/BuildSentencepiece.cmake) +target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE ct2 sentencepiece) + target_sources( ${CMAKE_PROJECT_NAME} PRIVATE src/plugin-main.c @@ -65,6 +69,7 @@ target_sources( src/whisper-utils/whisper-processing.cpp src/model-utils/model-downloader.cpp src/model-utils/model-downloader-ui.cpp - src/whisper-utils/whisper-utils.cpp) + src/whisper-utils/whisper-utils.cpp + src/translation/translation.cpp) set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name}) diff --git a/cmake/BuildCTranslate2.cmake b/cmake/BuildCTranslate2.cmake new file mode 100644 index 0000000..df48fdc --- /dev/null +++ b/cmake/BuildCTranslate2.cmake @@ -0,0 +1,104 @@ +# build the CTranslate2 library from source https://github.com/OpenNMT/CTranslate2.git + +include(ExternalProject) +include(FetchContent) + +if(APPLE) + + FetchContent_Declare( + ctranslate2_fetch + URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.1.1/libctranslate2-macos-Release-1.1.1.tar.gz + URL_HASH SHA256=da04d88ecc1ea105f8ee672e4eab33af96e50c999c5cc8170e105e110392182b) + FetchContent_MakeAvailable(ctranslate2_fetch) + + add_library(ct2 INTERFACE) + target_link_libraries(ct2 INTERFACE "-framework Accelerate" ${ctranslate2_fetch_SOURCE_DIR}/lib/libctranslate2.a + ${ctranslate2_fetch_SOURCE_DIR}/lib/libcpu_features.a) + set_target_properties(ct2 PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${ctranslate2_fetch_SOURCE_DIR}/include) + target_compile_options(ct2 INTERFACE -Wno-shorten-64-to-32) + +elseif(WIN32) + + # check CPU_OR_CUDA environment variable + if(NOT DEFINED ENV{CPU_OR_CUDA}) + message(FATAL_ERROR "Please set the CPU_OR_CUDA environment variable to either CPU or CUDA") + endif() + + if($ENV{CPU_OR_CUDA} STREQUAL "cpu") + FetchContent_Declare( + ctranslate2_fetch + URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.2.0/libctranslate2-windows-4.1.1-Release-cpu.zip + URL_HASH SHA256=30ff8b2499b8d3b5a6c4d6f7f8ddbc89e745ff06e0050b645e3b7c9b369451a3) + else() + # add compile definitions for CUDA + add_compile_definitions(POLYGLOT_WITH_CUDA) + add_compile_definitions(POLYGLOT_CUDA_VERSION=$ENV{CPU_OR_CUDA}) + + if($ENV{CPU_OR_CUDA} STREQUAL "12.2.0") + FetchContent_Declare( + ctranslate2_fetch + URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.2.0/libctranslate2-windows-4.1.1-Release-cuda12.2.0.zip + URL_HASH SHA256=131724d510f9f2829970953a1bc9e4e8fb7b4cbc8218e32270dcfe6172a51558) + elseif($ENV{CPU_OR_CUDA} STREQUAL "11.8.0") + FetchContent_Declare( + ctranslate2_fetch + URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.2.0/libctranslate2-windows-4.1.1-Release-cuda11.8.0.zip + URL_HASH SHA256=a120bee82f821df35a4646add30ac18b5c23e4e16b56fa7ba338eeae336e0d81) + else() + message(FATAL_ERROR "Unsupported CUDA version: $ENV{CPU_OR_CUDA}") + endif() + endif() + + FetchContent_MakeAvailable(ctranslate2_fetch) + + add_library(ct2 INTERFACE) + target_link_libraries(ct2 INTERFACE ${ctranslate2_fetch_SOURCE_DIR}/lib/ctranslate2.lib) + set_target_properties(ct2 PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${ctranslate2_fetch_SOURCE_DIR}/include) + target_compile_options(ct2 INTERFACE /wd4267 /wd4244 /wd4305 /wd4996 /wd4099) + + file(GLOB CT2_DLLS ${ctranslate2_fetch_SOURCE_DIR}/bin/*.dll) + install(FILES ${CT2_DLLS} DESTINATION "obs-plugins/64bit") +else() + set(CT2_VERSION "4.1.1") + set(CT2_URL "https://github.com/OpenNMT/CTranslate2.git") + set(CT2_OPENBLAS_CMAKE_ARGS -DWITH_OPENBLAS=OFF) + + set(CT2_CMAKE_PLATFORM_OPTIONS -DBUILD_SHARED_LIBS=OFF -DOPENMP_RUNTIME=NONE -DCMAKE_POSITION_INDEPENDENT_CODE=ON) + set(CT2_LIB_INSTALL_LOCATION lib/${CMAKE_SHARED_LIBRARY_PREFIX}ctranslate2${CMAKE_STATIC_LIBRARY_SUFFIX}) + + ExternalProject_Add( + ct2_build + GIT_REPOSITORY ${CT2_URL} + GIT_TAG v${CT2_VERSION} + GIT_PROGRESS 1 + BUILD_COMMAND ${CMAKE_COMMAND} --build --config ${CMAKE_BUILD_TYPE} + CMAKE_GENERATOR ${CMAKE_GENERATOR} + INSTALL_COMMAND ${CMAKE_COMMAND} --install --config ${CMAKE_BUILD_TYPE} + BUILD_BYPRODUCTS /${CT2_LIB_INSTALL_LOCATION} + CMAKE_ARGS -DCMAKE_GENERATOR_PLATFORM=${CMAKE_GENERATOR_PLATFORM} + -DCMAKE_INSTALL_PREFIX= + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DWITH_CUDA=OFF + -DWITH_MKL=OFF + -DWITH_TESTS=OFF + -DWITH_EXAMPLES=OFF + -DWITH_TFLITE=OFF + -DWITH_TRT=OFF + -DWITH_PYTHON=OFF + -DWITH_SERVER=OFF + -DWITH_COVERAGE=OFF + -DWITH_PROFILING=OFF + -DBUILD_CLI=OFF + ${CT2_OPENBLAS_CMAKE_ARGS} + ${CT2_CMAKE_PLATFORM_OPTIONS}) + ExternalProject_Get_Property(ct2_build INSTALL_DIR) + + add_library(ct2::ct2 STATIC IMPORTED GLOBAL) + add_dependencies(ct2::ct2 ct2_build) + set_target_properties(ct2::ct2 PROPERTIES IMPORTED_LOCATION ${INSTALL_DIR}/${CT2_LIB_INSTALL_LOCATION}) + + add_library(ct2 INTERFACE) + target_link_libraries(ct2 INTERFACE ct2::ct2) + set_target_properties(ct2::ct2 PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include) + +endif() diff --git a/cmake/BuildSentencepiece.cmake b/cmake/BuildSentencepiece.cmake new file mode 100644 index 0000000..024283e --- /dev/null +++ b/cmake/BuildSentencepiece.cmake @@ -0,0 +1,61 @@ +# build sentencepiece from "https://github.com/google/sentencepiece.git" + +if(APPLE) + + include(FetchContent) + + FetchContent_Declare( + sentencepiece_fetch + URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.1.1/libsentencepiece-macos-Release-1.1.1.tar.gz + URL_HASH SHA256=c911f1e84ea94925a8bc3fd3257185b2e18395075509c8659cc7003a979e0b32) + FetchContent_MakeAvailable(sentencepiece_fetch) + add_library(sentencepiece INTERFACE) + target_link_libraries(sentencepiece INTERFACE ${sentencepiece_fetch_SOURCE_DIR}/lib/libsentencepiece.a) + set_target_properties(sentencepiece PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + ${sentencepiece_fetch_SOURCE_DIR}/include) +elseif(WIN32) + + FetchContent_Declare( + sentencepiece_fetch + URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.1.1/sentencepiece-windows-0.2.0-Release.zip + URL_HASH SHA256=846699c7fa1e8918b71ed7f2bd5cd60e47e51105e1d84e3192919b4f0f10fdeb) + FetchContent_MakeAvailable(sentencepiece_fetch) + add_library(sentencepiece INTERFACE) + target_link_libraries(sentencepiece INTERFACE ${sentencepiece_fetch_SOURCE_DIR}/lib/sentencepiece.lib) + set_target_properties(sentencepiece PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + ${sentencepiece_fetch_SOURCE_DIR}/include) + +else() + + set(SP_URL + "https://github.com/google/sentencepiece.git" + CACHE STRING "URL of sentencepiece repository") + + set(SP_CMAKE_OPTIONS -DSPM_ENABLE_SHARED=OFF) + set(SENTENCEPIECE_INSTALL_LIB_LOCATION lib/${CMAKE_STATIC_LIBRARY_PREFIX}sentencepiece${CMAKE_STATIC_LIBRARY_SUFFIX}) + + include(ExternalProject) + + ExternalProject_Add( + sentencepiece_build + GIT_REPOSITORY ${SP_URL} + GIT_TAG v0.1.99 + BUILD_COMMAND ${CMAKE_COMMAND} --build --config ${CMAKE_BUILD_TYPE} + CMAKE_GENERATOR ${CMAKE_GENERATOR} + INSTALL_COMMAND ${CMAKE_COMMAND} --install --config ${CMAKE_BUILD_TYPE} + BUILD_BYPRODUCTS /${SENTENCEPIECE_INSTALL_LIB_LOCATION} + CMAKE_ARGS -DCMAKE_GENERATOR_PLATFORM=${CMAKE_GENERATOR_PLATFORM} -DCMAKE_INSTALL_PREFIX= + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} ${SP_CMAKE_OPTIONS}) + ExternalProject_Get_Property(sentencepiece_build INSTALL_DIR) + + add_library(libsentencepiece STATIC IMPORTED GLOBAL) + add_dependencies(libsentencepiece sentencepiece_build) + set_target_properties(libsentencepiece PROPERTIES IMPORTED_LOCATION + ${INSTALL_DIR}/${SENTENCEPIECE_INSTALL_LIB_LOCATION}) + + add_library(sentencepiece INTERFACE) + add_dependencies(sentencepiece libsentencepiece) + target_link_libraries(sentencepiece INTERFACE libsentencepiece) + target_include_directories(sentencepiece INTERFACE ${INSTALL_DIR}/include) + +endif() diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini index 31a2293..18b39a7 100644 --- a/data/locale/en-US.ini +++ b/data/locale/en-US.ini @@ -44,3 +44,6 @@ process_while_muted="Process speech while source is muted" rename_file_to_match_recording="Rename file to match recording" min_sub_duration="Min. sub duration (ms)" advanced_settings="Advanced Settings" +target_language="Target language" +source_language="Source language" +translate="Translate" diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index f370765..5dceb70 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -15,6 +15,8 @@ #include #include +#include "translation/translation.h" + #define MAX_PREPROC_CHANNELS 10 #define MT_ obs_module_text @@ -80,6 +82,9 @@ struct transcription_filter_data { bool save_only_while_recording = false; bool process_while_muted = false; bool rename_file_to_match_recording = false; + bool translate = false; + std::string source_lang; + std::string target_lang; // Text source to output the subtitles obs_weak_source_t *text_source; @@ -98,6 +103,9 @@ struct transcription_filter_data { std::mutex *whisper_ctx_mutex; std::condition_variable *wshiper_thread_cv; + // translation context + struct translation_context translation_ctx; + // ctor transcription_filter_data() { diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 42ccf5d..50b718b 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -8,6 +8,7 @@ #include "whisper-utils/whisper-processing.h" #include "whisper-utils/whisper-language.h" #include "whisper-utils/whisper-utils.h" +#include "translation/language_codes.h" #include #include @@ -274,6 +275,16 @@ void set_text_callback(struct transcription_filter_data *gf, std::string str_copy = result.text; #endif + if (gf->translate) { + std::string translated_text; + if (translate(gf->translation_ctx, str_copy, gf->source_lang, gf->target_lang, + translated_text) == OBS_POLYGLOT_TRANSLATION_SUCCESS) { + str_copy = translated_text; + } else { + obs_log(gf->log_level, "Failed to translate text"); + } + } + if (gf->caption_to_stream) { obs_output_t *streaming_output = obs_frontend_get_streaming_output(); if (streaming_output) { @@ -387,6 +398,26 @@ void transcription_filter_update(void *data, obs_data_t *s) gf->min_sub_duration = (int)obs_data_get_int(s, "min_sub_duration"); gf->last_sub_render_time = 0; + bool new_translate = obs_data_get_bool(s, "translate"); + gf->source_lang = obs_data_get_string(s, "translate_source_language"); + gf->target_lang = obs_data_get_string(s, "translate_target_language"); + + if (new_translate != gf->translate) { + if (new_translate) { + if (build_translation_context(gf->translation_ctx, + "models/m2m100-418m.sp.model", + "models/m2m100-418m") != + OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS) { + obs_log(gf->log_level, "Failed to initialize translation context"); + gf->translate = false; + } else { + gf->translate = true; + } + } else { + gf->translate = false; + } + } + obs_log(gf->log_level, "transcription_filter: update text source"); // update the text source const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources"); @@ -669,6 +700,9 @@ void transcription_filter_defaults(obs_data_t *s) obs_data_set_default_int(s, "step_size_msec", 1000); obs_data_set_default_int(s, "min_sub_duration", 3000); obs_data_set_default_bool(s, "advanced_settings", false); + obs_data_set_default_bool(s, "translate", false); + obs_data_set_default_string(s, "translate_target_language", "__es__"); + obs_data_set_default_string(s, "translate_source_language", "__en__"); // Whisper parameters obs_data_set_default_int(s, "whisper_sampling_method", WHISPER_SAMPLING_BEAM_SEARCH); @@ -730,6 +764,40 @@ obs_properties_t *transcription_filter_properties(void *data) return true; }); + // add translation option group + obs_properties_t *translation_group = obs_properties_create(); + obs_property_t *translation_group_prop = obs_properties_add_group( + ppts, "translate", MT_("translate"), OBS_GROUP_CHECKABLE, translation_group); + // add target language selection + obs_property_t *prop_tgt = obs_properties_add_list( + translation_group, "translate_target_language", MT_("target_language"), + OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); + obs_property_t *prop_src = obs_properties_add_list( + translation_group, "translate_source_language", MT_("source_language"), + OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); + + // Populate the dropdown with the language codes + for (const auto &language : language_codes) { + obs_property_list_add_string(prop_tgt, language.second.c_str(), + language.first.c_str()); + obs_property_list_add_string(prop_src, language.second.c_str(), + language.first.c_str()); + } + + // add callback to enable/disable translation group + obs_property_set_modified_callback(translation_group_prop, [](obs_properties_t *props, + obs_property_t *property, + obs_data_t *settings) { + UNUSED_PARAMETER(property); + // Show/Hide the translation group + const bool translate_enabled = obs_data_get_bool(settings, "translate"); + obs_property_set_visible(obs_properties_get(props, "translate_target_language"), + translate_enabled); + obs_property_set_visible(obs_properties_get(props, "translate_source_language"), + translate_enabled); + return true; + }); + obs_properties_add_bool(ppts, "process_while_muted", MT_("process_while_muted")); obs_property_t *subs_output = obs_properties_add_list(ppts, "subtitle_sources", MT_("subtitle_sources"), diff --git a/src/translation/language_codes.h b/src/translation/language_codes.h new file mode 100644 index 0000000..7922446 --- /dev/null +++ b/src/translation/language_codes.h @@ -0,0 +1,205 @@ +#pragma once + +#include +#include + +std::map language_codes = {{"__af__", "Afrikaans"}, + {"__am__", "Amharic"}, + {"__ar__", "Arabic"}, + {"__ast__", "Asturian"}, + {"__az__", "Azerbai"}, + {"__ba__", "Bashkir"}, + {"__be__", "Belarusian"}, + {"__bg__", "Bulgarian"}, + {"__bn__", "Bengali"}, + {"__br__", "Breton"}, + {"__bs__", "Bosnian"}, + {"__ca__", "Catalan"}, + {"__ceb__", "Cebuano"}, + {"__cs__", "Czech"}, + {"__cy__", "Welsh"}, + {"__da__", "Danish"}, + {"__de__", "German"}, + {"__el__", "Greek"}, + {"__en__", "English"}, + {"__es__", "Spanish"}, + {"__et__", "Estonian"}, + {"__fa__", "Persian"}, + {"__ff__", "Fulah"}, + {"__fi__", "Finnish"}, + {"__fr__", "French"}, + {"__fy__", "Frisian"}, + {"__ga__", "Irish"}, + {"__gd__", "Scottish Gaelic"}, + {"__gl__", "Galician"}, + {"__gu__", "Gujarati"}, + {"__ha__", "Hausa"}, + {"__he__", "Hebrew"}, + {"__hi__", "Hindi"}, + {"__hr__", "Croatian"}, + {"__ht__", "Haitian Creole"}, + {"__hu__", "Hungarian"}, + {"__hy__", "Armenian"}, + {"__id__", "Indonesian"}, + {"__ig__", "Igbo"}, + {"__ilo__", "Ilokano"}, + {"__is__", "Icelandic"}, + {"__it__", "Italian"}, + {"__ja__", "Japanese"}, + {"__jv__", "Javanese"}, + {"__ka__", "Georgian"}, + {"__kk__", "Kazakh"}, + {"__km__", "Khmer"}, + {"__kn__", "Kannada"}, + {"__ko__", "Korean"}, + {"__lb__", "Luxembourgish"}, + {"__lg__", "Ganda"}, + {"__ln__", "Lingala"}, + {"__lo__", "Lao"}, + {"__lt__", "Lithuanian"}, + {"__lv__", "Latvian"}, + {"__mg__", "Malagasy"}, + {"__mk__", "Macedonian"}, + {"__ml__", "Malayalam"}, + {"__mn__", "Mongolian"}, + {"__mr__", "Marathi"}, + {"__ms__", "Malay"}, + {"__my__", "Burmese"}, + {"__ne__", "Nepali"}, + {"__nl__", "Dutch"}, + {"__no__", "Norwegian"}, + {"__ns__", "Northern Sotho"}, + {"__oc__", "Occitan"}, + {"__or__", "Oriya"}, + {"__pa__", "Punjabi"}, + {"__pl__", "Polish"}, + {"__ps__", "Pashto"}, + {"__pt__", "Portuguese"}, + {"__ro__", "Romanian"}, + {"__ru__", "Russian"}, + {"__sd__", "Sindhi"}, + {"__si__", "Sinhala"}, + {"__sk__", "Slovak"}, + {"__sl__", "Slovenian"}, + {"__so__", "Somali"}, + {"__sq__", "Albanian"}, + {"__sr__", "Serbian"}, + {"__ss__", "Swati"}, + {"__su__", "Sundanese"}, + {"__sv__", "Swedish"}, + {"__sw__", "Swahili"}, + {"__ta__", "Tamil"}, + {"__th__", "Thai"}, + {"__tl__", "Tagalog"}, + {"__tn__", "Tswana"}, + {"__tr__", "Turkish"}, + {"__uk__", "Ukrainian"}, + {"__ur__", "Urdu"}, + {"__uz__", "Uzbek"}, + {"__vi__", "Vietnamese"}, + {"__wo__", "Wolof"}, + {"__xh__", "Xhosa"}, + {"__yi__", "Yiddish"}, + {"__yo__", "Yoruba"}, + {"__zh__", "Chinese"}, + {"__zu__", "Zulu"}}; + +std::map language_codes_reverse = {{"Afrikaans", "__af__"}, + {"Amharic", "__am__"}, + {"Arabic", "__ar__"}, + {"Asturian", "__ast__"}, + {"Azerbai", "__az__"}, + {"Bashkir", "__ba__"}, + {"Belarusian", "__be__"}, + {"Bengali", "__bn__"}, + {"Breton", "__br__"}, + {"Bosnian", "__bs__"}, + {"Catalan", "__ca__"}, + {"Cebuano", "__ceb__"}, + {"Czech", "__cs__"}, + {"Welsh", "__cy__"}, + {"Danish", "__da__"}, + {"German", "__de__"}, + {"Greek", "__el__"}, + {"English", "__en__"}, + {"Spanish", "__es__"}, + {"Estonian", "__et__"}, + {"Persian", "__fa__"}, + {"Fulah", "__ff__"}, + {"Finnish", "__fi__"}, + {"French", "__fr__"}, + {"Frisian", "__fy__"}, + {"Irish", "__ga__"}, + {"Scottish Gaelic", "__gd__"}, + {"Galician", "__gl__"}, + {"Gujarati", "__gu__"}, + {"Hausa", "__ha__"}, + {"Hebrew", "__he__"}, + {"Hindi", "__hi__"}, + {"Croatian", "__hr__"}, + {"Haitian Creole", "__ht__"}, + {"Hungarian", "__hu__"}, + {"Armenian", "__hy__"}, + {"Indonesian", "__id__"}, + {"Igbo", "__ig__"}, + {"Ilokano", "__ilo__"}, + {"Icelandic", "__is__"}, + {"Italian", "__it__"}, + {"Japanese", "__ja__"}, + {"Javanese", "__jv__"}, + {"Georgian", "__ka__"}, + {"Kazakh", "__kk__"}, + {"Khmer", "__km__"}, + {"Kannada", "__kn__"}, + {"Korean", "__ko__"}, + {"Luxembourgish", "__lb__"}, + {"Ganda", "__lg__"}, + {"Lingala", "__ln__"}, + {"Lao", "__lo__"}, + {"Lithuanian", "__lt__"}, + {"Latvian", "__lv__"}, + {"Malagasy", "__mg__"}, + {"Macedonian", "__mk__"}, + {"Malayalam", "__ml__"}, + {"Mongolian", "__mn__"}, + {"Marathi", "__mr__"}, + {"Malay", "__ms__"}, + {"Burmese", "__my__"}, + {"Nepali", "__ne__"}, + {"Dutch", "__nl__"}, + {"Norwegian", "__no__"}, + {"Northern Sotho", "__ns__"}, + {"Occitan", "__oc__"}, + {"Oriya", "__or__"}, + {"Punjabi", "__pa__"}, + {"Polish", "__pl__"}, + {"Pashto", "__ps__"}, + {"Portuguese", "__pt__"}, + {"Romanian", "__ro__"}, + {"Russian", "__ru__"}, + {"Sindhi", "__sd__"}, + {"Sinhala", "__si__"}, + {"Slovak", "__sk__"}, + {"Slovenian", "__sl__"}, + {"Somali", "__so__"}, + {"Albanian", "__sq__"}, + {"Serbian", "__sr__"}, + {"Swati", "__ss__"}, + {"Sundanese", "__su__"}, + {"Swedish", "__sv__"}, + {"Swahili", "__sw__"}, + {"Tamil", "__ta__"}, + {"Thai", "__th__"}, + {"Tagalog", "__tl__"}, + {"Tswana", "__tn__"}, + {"Turkish", "__tr__"}, + {"Ukrainian", "__uk__"}, + {"Urdu", "__ur__"}, + {"Uzbek", "__uz__"}, + {"Vietnamese", "__vi__"}, + {"Wolof", "__wo__"}, + {"Xhosa", "__xh__"}, + {"Yiddish", "__yi__"}, + {"Yoruba", "__yo__"}, + {"Chinese", "__zh__"}, + {"Zulu", "__zu__"}}; diff --git a/src/translation/translation.cpp b/src/translation/translation.cpp new file mode 100644 index 0000000..1987f99 --- /dev/null +++ b/src/translation/translation.cpp @@ -0,0 +1,87 @@ +#include "translation.h" +#include "plugin-support.h" + +#include +#include +#include +#include + +int build_translation_context(struct translation_context &translation_ctx, + const std::string &local_spm_path, + const std::string &local_model_path) +{ + obs_log(LOG_INFO, "Building translation context..."); + try { + obs_log(LOG_INFO, "Loading SPM from %s", local_spm_path.c_str()); + translation_ctx.processor.reset(new sentencepiece::SentencePieceProcessor()); + const auto status = translation_ctx.processor->Load(local_spm_path); + if (!status.ok()) { + obs_log(LOG_ERROR, "Failed to load SPM: %s", status.ToString().c_str()); + return OBS_POLYGLOT_TRANSLATION_INIT_FAIL; + } + + translation_ctx.tokenizer = [&translation_ctx](const std::string &text) { + std::vector tokens; + translation_ctx.processor->Encode(text, &tokens); + return tokens; + }; + translation_ctx.detokenizer = + [&translation_ctx](const std::vector &tokens) { + std::string text; + translation_ctx.processor->Decode(tokens, &text); + return std::regex_replace(text, std::regex(""), "UNK"); + }; + + obs_log(LOG_INFO, "Loading CT2 model from %s", local_model_path.c_str()); + +#ifdef POLYGLOT_WITH_CUDA + ctranslate2::Device device = ctranslate2::Device::CUDA; + obs_log(LOG_INFO, "Using CUDA"); +#else + ctranslate2::Device device = ctranslate2::Device::CPU; + obs_log(LOG_INFO, "Using CPU"); +#endif + + translation_ctx.translator.reset(new ctranslate2::Translator( + local_model_path, device, ctranslate2::ComputeType::AUTO)); + obs_log(LOG_INFO, "CT2 Model loaded"); + + translation_ctx.options.reset(new ctranslate2::TranslationOptions); + translation_ctx.options->beam_size = 1; + translation_ctx.options->max_decoding_length = 40; + translation_ctx.options->use_vmap = true; + translation_ctx.options->return_scores = false; + } catch (std::exception &e) { + obs_log(LOG_ERROR, "Failed to load CT2 model: %s", e.what()); + return OBS_POLYGLOT_TRANSLATION_INIT_FAIL; + } + return OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS; +} + +int translate(struct translation_context &translation_ctx, const std::string &text, + const std::string &source_lang, const std::string &target_lang, std::string &result) +{ + try { + // get tokens + std::vector tokens = translation_ctx.tokenizer(text); + tokens.insert(tokens.begin(), ""); + tokens.insert(tokens.begin(), source_lang); + tokens.push_back(""); + + const std::vector> batch = {tokens}; + + const std::vector> target_prefix = {{target_lang}}; + const std::vector results = + translation_ctx.translator->translate_batch(batch, target_prefix, + *translation_ctx.options); + + // detokenize starting with the 2nd token + const auto &tokens_result = results[0].output(); + result = translation_ctx.detokenizer( + std::vector(tokens_result.begin() + 1, tokens_result.end())); + } catch (std::exception &e) { + obs_log(LOG_ERROR, "Error: %s", e.what()); + return OBS_POLYGLOT_TRANSLATION_FAIL; + } + return OBS_POLYGLOT_TRANSLATION_SUCCESS; +} diff --git a/src/translation/translation.h b/src/translation/translation.h new file mode 100644 index 0000000..9d21734 --- /dev/null +++ b/src/translation/translation.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include +#include +#include + +struct translation_context { + std::unique_ptr processor; + std::unique_ptr translator; + std::unique_ptr options; + std::function(const std::string &)> tokenizer; + std::function &)> detokenizer; +}; + +int build_translation_context(struct translation_context &translation_ctx, + const std::string &local_spm_path, + const std::string &local_model_path); + +int translate(struct translation_context &translation_ctx, const std::string &text, + const std::string &source_lang, const std::string &target_lang, std::string &result); + +#define OBS_POLYGLOT_TRANSLATION_INIT_FAIL -1 +#define OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS 0 +#define OBS_POLYGLOT_TRANSLATION_SUCCESS 0 +#define OBS_POLYGLOT_TRANSLATION_FAIL -1 From b658125204a84e1a9b943e5f96e100693d3e1791 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 1 Apr 2024 11:58:19 -0400 Subject: [PATCH 02/10] Add model-infos.cpp and translate_add_context to en-US.ini --- CMakeLists.txt | 1 + data/locale/ar-SA.ini | 50 +++++++ data/locale/de-DE.ini | 50 +++++++ data/locale/en-US.ini | 1 + data/locale/es-ES.ini | 50 +++++++ data/locale/fr-FR.ini | 50 +++++++ data/locale/hi-IN.ini | 50 +++++++ data/locale/ja-JP.ini | 50 +++++++ data/locale/ko-KR.ini | 50 +++++++ data/locale/pl-PL.ini | 50 +++++++ data/locale/{pt_BR.ini => pt-BR.ini} | 4 + data/locale/{ru_RU.ini => ru-RU.ini} | 4 + data/locale/zh-CN.ini | 50 +++++++ .../ggml-model-whisper-tiny.en.bin | Bin src/model-utils/model-downloader-types.h | 25 ++++ src/model-utils/model-downloader-ui.cpp | 115 +++++++++------- src/model-utils/model-downloader-ui.h | 6 +- src/model-utils/model-downloader.cpp | 89 +++++++++---- src/model-utils/model-downloader.h | 7 +- src/model-utils/model-infos.cpp | 122 +++++++++++++++++ src/transcription-filter-data.h | 7 +- src/transcription-filter.cpp | 124 ++++++++---------- src/translation/translation.cpp | 104 +++++++++++---- src/translation/translation.h | 10 +- src/whisper-utils/whisper-processing.cpp | 16 ++- src/whisper-utils/whisper-utils.cpp | 29 ++-- 26 files changed, 927 insertions(+), 187 deletions(-) create mode 100644 data/locale/ar-SA.ini create mode 100644 data/locale/de-DE.ini create mode 100644 data/locale/es-ES.ini create mode 100644 data/locale/fr-FR.ini create mode 100644 data/locale/hi-IN.ini create mode 100644 data/locale/ja-JP.ini create mode 100644 data/locale/ko-KR.ini create mode 100644 data/locale/pl-PL.ini rename data/locale/{pt_BR.ini => pt-BR.ini} (93%) rename data/locale/{ru_RU.ini => ru-RU.ini} (93%) create mode 100644 data/locale/zh-CN.ini rename data/models/{ => ggml-model-whisper-tiny-en}/ggml-model-whisper-tiny.en.bin (100%) create mode 100644 src/model-utils/model-infos.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 90473bf..fdd7cd1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,7 @@ target_sources( src/whisper-utils/whisper-processing.cpp src/model-utils/model-downloader.cpp src/model-utils/model-downloader-ui.cpp + src/model-utils/model-infos.cpp src/whisper-utils/whisper-utils.cpp src/translation/translation.cpp) diff --git a/data/locale/ar-SA.ini b/data/locale/ar-SA.ini new file mode 100644 index 0000000..be12610 --- /dev/null +++ b/data/locale/ar-SA.ini @@ -0,0 +1,50 @@ +LocalVocalPlugin="إضافة LocalVocal" +transcription_filterAudioFilter="تصفية نسخ LocalVocal" +vad_enabled="تمكين VAD" +log_level="مستوى السجل الداخلي" +log_words="تسجيل الخروج إلى الوحدة الطرفية" +caption_to_stream="تدفق الترجمات" +step_by_step_processing="المعالجة خطوة بخطوة (⚠️ زيادة المعالجة)" +step_size_msec="حجم الخطوة (ملي ثانية)" +subtitle_sources="مصادر الترجمات" +none_no_output="بدون / بلا مخرجات" +text_file_output="مخرجات ملف نصي" +output_filename="اسم ملف الخروج" +whisper_model="نموذج Whisper" +external_model_file="ملف النموذج الخارجي" +whisper_parameters="الإعدادات المتقدمة" +language="اللغة" +whisper_sampling_method="طريقة عينة Whisper" +n_threads="عدد الخيوط" +n_max_text_ctx="الحد الأقصى لسياق النص" +translate="ترجمة" +no_context="بدون سياق" +single_segment="جزء واحد" +print_special="طباعة خاصة" +print_progress="طباعة التقدم" +print_realtime="طباعة الوقت الفعلي" +print_timestamps="طباعة الطوابع الزمنية" +token_timestamps="طوابع زمنية للرمز" +thold_pt="عتبة احتمال الرمز" +thold_ptsum="عتبة مجموع احتمال الرمز" +max_len="الحد الأقصى للطول بالأحرف" +split_on_word="التقسيم على الكلمة" +max_tokens="الحد الأقصى للرموز" +speed_up="تسريع" +initial_prompt="المطالبة الأولية" +suppress_blank="كبت الفراغ" +suppress_non_speech_tokens="كبت رموز غير الكلام" +temperature="درجة الحرارة" +max_initial_ts="الحد الأقصى للطوابع الزمنية الأولية" +length_penalty="عقوبة الطول" +save_srt="حفظ بصيغة SRT" +truncate_output_file="تقليص الملف عند جملة جديدة" +only_while_recording="كتابة الخروج فقط أثناء التسجيل" +process_while_muted="معالجة الكلام أثناء كتم المصدر" +rename_file_to_match_recording="إعادة تسمية الملف ليتطابق مع التسجيل" +min_sub_duration="الحد الأدنى لمدة العنوان الفرعي (ملي ثانية)" +advanced_settings="الإعدادات المتقدمة" +target_language="اللغة الهدف" +source_language="لغة المصدر" +translate="ترجمة" +translate_add_context="الترجمة مع السياق" diff --git a/data/locale/de-DE.ini b/data/locale/de-DE.ini new file mode 100644 index 0000000..57bb71a --- /dev/null +++ b/data/locale/de-DE.ini @@ -0,0 +1,50 @@ +LocalVocalPlugin="LocalVocal Plugin" +transcription_filterAudioFilter="LocalVocal Transkription" +vad_enabled="VAD Aktiviert" +log_level="Interne Protokollebene" +log_words="Protokollausgabe zur Konsole" +caption_to_stream="Stream-Untertitel" +step_by_step_processing="Schritt-für-Schritt-Verarbeitung (⚠️ erhöhte Verarbeitung)" +step_size_msec="Schrittgröße (ms)" +subtitle_sources="Untertitel Ausgabe" +none_no_output="Keine / Keine Ausgabe" +text_file_output="Textdatei Ausgabe" +output_filename="Ausgabedateiname" +whisper_model="Flüstermodell" +external_model_file="Externe Modelldatei" +whisper_parameters="Erweiterte Einstellungen" +language="Sprache" +whisper_sampling_method="Flüster Sampling Methode" +n_threads="Anzahl der Threads" +n_max_text_ctx="Max Textkontext" +translate="Übersetzen" +no_context="Kein Kontext" +single_segment="Einzelnes Segment" +print_special="Sonderdruck" +print_progress="Fortschritt drucken" +print_realtime="Echtzeit drucken" +print_timestamps="Zeitstempel drucken" +token_timestamps="Token Zeitstempel" +thold_pt="Token-Wahrscheinlichkeitsschwelle" +thold_ptsum="Token Summenwahrscheinlichkeitsschwelle" +max_len="Maximale Länge in Zeichen" +split_on_word="Auf Wort teilen" +max_tokens="Max Tokens" +speed_up="Beschleunigen" +initial_prompt="Erste Aufforderung" +suppress_blank="Leerzeichen unterdrücken" +suppress_non_speech_tokens="Nicht-Sprach-Token unterdrücken" +temperature="Temperatur" +max_initial_ts="Max Anfangszeitstempel" +length_penalty="Längenstrafe" +save_srt="Im SRT-Format speichern" +truncate_output_file="Datei bei neuem Satz kürzen" +only_while_recording="Ausgabe nur während der Aufnahme schreiben" +process_while_muted="Sprache verarbeiten, während die Quelle stummgeschaltet ist" +rename_file_to_match_recording="Datei umbenennen, um Aufnahme zu entsprechen" +min_sub_duration="Min. Untertiteldauer (ms)" +advanced_settings="Erweiterte Einstellungen" +target_language="Zielsprache" +source_language="Quellsprache" +translate="Übersetzen" +translate_add_context="Mit Kontext übersetzen" diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini index 18b39a7..fab4510 100644 --- a/data/locale/en-US.ini +++ b/data/locale/en-US.ini @@ -47,3 +47,4 @@ advanced_settings="Advanced Settings" target_language="Target language" source_language="Source language" translate="Translate" +translate_add_context="Translate with context" diff --git a/data/locale/es-ES.ini b/data/locale/es-ES.ini new file mode 100644 index 0000000..a9f0580 --- /dev/null +++ b/data/locale/es-ES.ini @@ -0,0 +1,50 @@ +LocalVocalPlugin="Plugin LocalVocal" +transcription_filterAudioFilter="Transcripción LocalVocal" +vad_enabled="VAD Habilitado" +log_level="Nivel de Registro Interno" +log_words="Registro de Salida a la Consola" +caption_to_stream="Subtítulos en Stream" +step_by_step_processing="Procesamiento paso a paso (⚠️ procesamiento aumentado)" +step_size_msec="Tamaño de paso (ms)" +subtitle_sources="Salida de Subtítulos" +none_no_output="Ninguno / Sin salida" +text_file_output="Salida de archivo de texto" +output_filename="Nombre del archivo de salida" +whisper_model="Modelo Whisper" +external_model_file="Archivo de modelo externo" +whisper_parameters="Configuraciones Avanzadas" +language="Idioma" +whisper_sampling_method="Método de Muestreo Whisper" +n_threads="Número de hilos" +n_max_text_ctx="Contexto de texto máximo" +translate="Traducir" +no_context="Sin contexto" +single_segment="Segmento único" +print_special="Imprimir especial" +print_progress="Imprimir progreso" +print_realtime="Imprimir en tiempo real" +print_timestamps="Imprimir marcas de tiempo" +token_timestamps="Marcas de tiempo de token" +thold_pt="Umbral de prob. de token" +thold_ptsum="Umbral de suma de prob. de token" +max_len="Longitud máxima en caracteres" +split_on_word="Dividir en palabra" +max_tokens="Tokens máximos" +speed_up="Acelerar" +initial_prompt="Indicación inicial" +suppress_blank="Suprimir en blanco" +suppress_non_speech_tokens="Suprimir tokens no verbales" +temperature="Temperatura" +max_initial_ts="Marcas de tiempo iniciales máximas" +length_penalty="Penalización de longitud" +save_srt="Guardar en formato SRT" +truncate_output_file="Truncar archivo en nueva oración" +only_while_recording="Escribir salida solo mientras se graba" +process_while_muted="Procesar el habla mientras la fuente está silenciada" +rename_file_to_match_recording="Renombrar archivo para que coincida con la grabación" +min_sub_duration="Duración mínima de sub (ms)" +advanced_settings="Configuraciones Avanzadas" +target_language="Idioma objetivo" +source_language="Idioma fuente" +translate="Traducir" +translate_add_context="Traducir con contexto" diff --git a/data/locale/fr-FR.ini b/data/locale/fr-FR.ini new file mode 100644 index 0000000..13a00d9 --- /dev/null +++ b/data/locale/fr-FR.ini @@ -0,0 +1,50 @@ +LocalVocalPlugin="Plugin LocalVocal" +transcription_filterAudioFilter="Transcription LocalVocal" +vad_enabled="VAD Activé" +log_level="Niveau de journalisation interne" +log_words="Journalisation de la sortie vers la console" +caption_to_stream="Sous-titres en streaming" +step_by_step_processing="Traitement étape par étape (⚠️ traitement accru)" +step_size_msec="Taille de l'étape (ms)" +subtitle_sources="Sortie des sous-titres" +none_no_output="Aucun / Pas de sortie" +text_file_output="Sortie de fichier texte" +output_filename="Nom du fichier de sortie" +whisper_model="Modèle Whisper" +external_model_file="Fichier de modèle externe" +whisper_parameters="Paramètres avancés" +language="Langue" +whisper_sampling_method="Méthode d'échantillonnage Whisper" +n_threads="Nombre de fils" +n_max_text_ctx="Contexte de texte max" +translate="Traduire" +no_context="Pas de contexte" +single_segment="Segment unique" +print_special="Imprimer spécial" +print_progress="Imprimer la progression" +print_realtime="Imprimer en temps réel" +print_timestamps="Imprimer les horodatages" +token_timestamps="Horodatages des jetons" +thold_pt="Seuil de prob. de jeton" +thold_ptsum="Seuil de somme de prob. de jeton" +max_len="Longueur max en caractères" +split_on_word="Diviser sur le mot" +max_tokens="Max jetons" +speed_up="Accélérer" +initial_prompt="Invite initiale" +suppress_blank="Supprimer le blanc" +suppress_non_speech_tokens="Supprimer les jetons non-parlés" +temperature="Température" +max_initial_ts="Max horodatages initiaux" +length_penalty="Pénalité de longueur" +save_srt="Enregistrer au format SRT" +truncate_output_file="Tronquer le fichier sur nouvelle phrase" +only_while_recording="Écrire la sortie uniquement pendant l'enregistrement" +process_while_muted="Traiter la parole pendant que la source est en sourdine" +rename_file_to_match_recording="Renommer le fichier pour correspondre à l'enregistrement" +min_sub_duration="Durée min. du sous-titre (ms)" +advanced_settings="Paramètres avancés" +target_language="Langue cible" +source_language="Langue source" +translate="Traduire" +translate_add_context="Traduire avec contexte" diff --git a/data/locale/hi-IN.ini b/data/locale/hi-IN.ini new file mode 100644 index 0000000..03d8c27 --- /dev/null +++ b/data/locale/hi-IN.ini @@ -0,0 +1,50 @@ +LocalVocalPlugin="लोकलवोकल प्लगइन" +transcription_filterAudioFilter="लोकलवोकल ट्रांसक्रिप्शन" +vad_enabled="VAD सक्षम" +log_level="आंतरिक लॉग स्तर" +log_words="कंसोल पर लॉग आउटपुट" +caption_to_stream="स्ट्रीम कैप्शन" +step_by_step_processing="चरण-दर-चरण प्रसंस्करण (⚠️ बढ़ी प्रसंस्करण)" +step_size_msec="चरण का आकार (ms)" +subtitle_sources="उपशीर्षक आउटपुट" +none_no_output="कोई नहीं / कोई आउटपुट नहीं" +text_file_output="टेक्स्ट फ़ाइल आउटपुट" +output_filename="आउटपुट फ़ाइलनाम" +whisper_model="व्हिस्पर मॉडल" +external_model_file="बाहरी मॉडल फ़ाइल" +whisper_parameters="उन्नत सेटिंग्स" +language="भाषा" +whisper_sampling_method="व्हिस्पर सैंपलिंग विधि" +n_threads="धागों की संख्या" +n_max_text_ctx="अधिकतम पाठ संदर्भ" +translate="अनुवाद करें" +no_context="कोई संदर्भ नहीं" +single_segment="एकल सेगमेंट" +print_special="विशेष मुद्रित करें" +print_progress="प्रगति मुद्रित करें" +print_realtime="रियलटाइम मुद्रित करें" +print_timestamps="टाइमस्टैंप मुद्रित करें" +token_timestamps="टोकन टाइमस्टैंप" +thold_pt="टोकन प्रॉब. थ्रेशोल्ड" +thold_ptsum="टोकन सम प्रॉब. थ्रेशोल्ड" +max_len="अधिकतम लंबाई इन अक्षरों में" +split_on_word="शब्द पर विभाजित करें" +max_tokens="अधिकतम टोकन" +speed_up="स्पीड अप" +initial_prompt="प्रारंभिक प्रॉम्प्ट" +suppress_blank="रिक्त संयंत्रित करें" +suppress_non_speech_tokens="गैर-भाषण टोकनों को दबाएं" +temperature="तापमान" +max_initial_ts="अधिकतम प्रारंभिक टाइमस्टैंप" +length_penalty="लंबाई दंड" +save_srt="SRT प्रारूप में सहेजें" +truncate_output_file="नई वाक्यांश पर फ़ाइल को छोटा करें" +only_while_recording="केवल रिकॉर्डिंग के दौरान आउटपुट लिखें" +process_while_muted="स्रोत म्यूट होने पर भी भाषण को प्रसंस्करण करें" +rename_file_to_match_recording="रिकॉर्डिंग से मेल खाने के लिए फ़ाइल का नाम बदलें" +min_sub_duration="न्यूनतम उपशीर्षक अवधि (ms)" +advanced_settings="उन्नत सेटिंग्स" +target_language="लक्ष्य भाषा" +source_language="स्रोत भाषा" +translate="अनुवाद करें" +translate_add_context="संदर्भ के साथ अनुवाद करें" diff --git a/data/locale/ja-JP.ini b/data/locale/ja-JP.ini new file mode 100644 index 0000000..d7fc6d3 --- /dev/null +++ b/data/locale/ja-JP.ini @@ -0,0 +1,50 @@ +LocalVocalPlugin="ローカルボーカルプラグイン" +transcription_filterAudioFilter="ローカルボーカルトランスクリプション" +vad_enabled="VAD有効" +log_level="内部ログレベル" +log_words="コンソールへのログ出力" +caption_to_stream="ストリームキャプション" +step_by_step_processing="ステップバイステップ処理(⚠️処理増加)" +step_size_msec="ステップサイズ(ms)" +subtitle_sources="字幕出力" +none_no_output="なし/出力なし" +text_file_output="テキストファイル出力" +output_filename="出力ファイル名" +whisper_model="ウィスパーモデル" +external_model_file="外部モデルファイル" +whisper_parameters="詳細設定" +language="言語" +whisper_sampling_method="ウィスパーサンプリング方法" +n_threads="スレッド数" +n_max_text_ctx="最大テキストコンテキスト" +translate="翻訳" +no_context="コンテキストなし" +single_segment="単一セグメント" +print_special="特別な印刷" +print_progress="進行状況を印刷" +print_realtime="リアルタイムで印刷" +print_timestamps="タイムスタンプを印刷" +token_timestamps="トークンタイムスタンプ" +thold_pt="トークン確率閾値" +thold_ptsum="トークン合計確率閾値" +max_len="最大長(文字)" +split_on_word="単語で分割" +max_tokens="最大トークン数" +speed_up="スピードアップ" +initial_prompt="初期プロンプト" +suppress_blank="空白を抑制" +suppress_non_speech_tokens="非音声トークンを抑制" +temperature="温度" +max_initial_ts="最大初期タイムスタンプ" +length_penalty="長さのペナルティ" +save_srt="SRT形式で保存" +truncate_output_file="新しい文でファイルを切り捨てる" +only_while_recording="録音中のみ出力を書き込む" +process_while_muted="ソースがミュート中も音声を処理する" +rename_file_to_match_recording="ファイル名を録音に合わせて変更" +min_sub_duration="最小サブ持続時間(ms)" +advanced_settings="詳細設定" +target_language="目標言語" +source_language="ソース言語" +translate="翻訳" +translate_add_context="コンテキスト付きで翻訳" diff --git a/data/locale/ko-KR.ini b/data/locale/ko-KR.ini new file mode 100644 index 0000000..12d714c --- /dev/null +++ b/data/locale/ko-KR.ini @@ -0,0 +1,50 @@ +LocalVocalPlugin="로컬보컬 플러그인" +transcription_filterAudioFilter="로컬보컬 전사" +vad_enabled="VAD 활성화" +log_level="내부 로그 레벨" +log_words="콘솔에 로그 출력" +caption_to_stream="스트림 캡션" +step_by_step_processing="단계별 처리 (⚠️ 처리 시간 증가)" +step_size_msec="단계 크기 (ms)" +subtitle_sources="자막 출력" +none_no_output="없음 / 출력 없음" +text_file_output="텍스트 파일 출력" +output_filename="출력 파일명" +whisper_model="속삭임 모델" +external_model_file="외부 모델 파일" +whisper_parameters="고급 설정" +language="언어" +whisper_sampling_method="속삭임 샘플링 방법" +n_threads="스레드 수" +n_max_text_ctx="최대 텍스트 컨텍스트" +translate="번역" +no_context="컨텍스트 없음" +single_segment="단일 세그먼트" +print_special="특수 출력" +print_progress="진행 상황 출력" +print_realtime="실시간 출력" +print_timestamps="타임스탬프 출력" +token_timestamps="토큰 타임스탬프" +thold_pt="토큰 확률 임계값" +thold_ptsum="토큰 합 확률 임계값" +max_len="최대 길이(문자)" +split_on_word="단어로 분할" +max_tokens="최대 토큰" +speed_up="속도 향상" +initial_prompt="초기 프롬프트" +suppress_blank="공백 제거" +suppress_non_speech_tokens="비음성 토큰 제거" +temperature="온도" +max_initial_ts="최대 초기 타임스탬프" +length_penalty="길이 패널티" +save_srt="SRT 형식으로 저장" +truncate_output_file="새 문장에서 파일 잘라내기" +only_while_recording="녹음 중에만 출력 작성" +process_while_muted="소스가 음소거 상태일 때 음성 처리" +rename_file_to_match_recording="녹음과 일치하도록 파일 이름 변경" +min_sub_duration="최소. 자막 지속 시간 (ms)" +advanced_settings="고급 설정" +target_language="대상 언어" +source_language="원본 언어" +translate="번역" +translate_add_context="컨텍스트와 함께 번역" diff --git a/data/locale/pl-PL.ini b/data/locale/pl-PL.ini new file mode 100644 index 0000000..d550b18 --- /dev/null +++ b/data/locale/pl-PL.ini @@ -0,0 +1,50 @@ +LocalVocalPlugin="Wtyczka LocalVocal" +transcription_filterAudioFilter="Transkrypcja LocalVocal" +vad_enabled="VAD Włączony" +log_level="Poziom logowania wewnętrznego" +log_words="Logowanie wyjścia do konsoli" +caption_to_stream="Podpisy strumienia" +step_by_step_processing="Przetwarzanie krok po kroku (⚠️ zwiększone przetwarzanie)" +step_size_msec="Rozmiar kroku (ms)" +subtitle_sources="Źródła napisów" +none_no_output="Brak / Brak wyjścia" +text_file_output="Wyjście pliku tekstowego" +output_filename="Nazwa pliku wyjściowego" +whisper_model="Model Whisper" +external_model_file="Zewnętrzny plik modelu" +whisper_parameters="Ustawienia zaawansowane" +language="Język" +whisper_sampling_method="Metoda próbkowania Whisper" +n_threads="Liczba wątków" +n_max_text_ctx="Maksymalny kontekst tekstu" +translate="Tłumacz" +no_context="Brak kontekstu" +single_segment="Pojedynczy segment" +print_special="Drukuj specjalne" +print_progress="Drukuj postęp" +print_realtime="Drukuj w czasie rzeczywistym" +print_timestamps="Drukuj znaczniki czasu" +token_timestamps="Znaczniki czasu tokenów" +thold_pt="Próg prawd. tokena" +thold_ptsum="Próg sumy prawd. tokena" +max_len="Maksymalna długość w znakach" +split_on_word="Podziel na słowo" +max_tokens="Maksymalna liczba tokenów" +speed_up="Przyspiesz" +initial_prompt="Początkowy monit" +suppress_blank="Tłumienie pustych" +suppress_non_speech_tokens="Tłumienie tokenów nie-mowy" +temperature="Temperatura" +max_initial_ts="Maksymalne początkowe znaczniki czasu" +length_penalty="Kara za długość" +save_srt="Zapisz w formacie SRT" +truncate_output_file="Skróć plik przy nowym zdaniu" +only_while_recording="Zapisuj wyjście tylko podczas nagrywania" +process_while_muted="Przetwarzaj mowę, gdy źródło jest wyciszone" +rename_file_to_match_recording="Zmień nazwę pliku, aby pasowała do nagrania" +min_sub_duration="Min. czas trwania napisów (ms)" +advanced_settings="Ustawienia zaawansowane" +target_language="Język docelowy" +source_language="Język źródłowy" +translate="Tłumacz" +translate_add_context="Tłumacz z kontekstem" diff --git a/data/locale/pt_BR.ini b/data/locale/pt-BR.ini similarity index 93% rename from data/locale/pt_BR.ini rename to data/locale/pt-BR.ini index f416835..6033e61 100644 --- a/data/locale/pt_BR.ini +++ b/data/locale/pt-BR.ini @@ -44,3 +44,7 @@ only_while_recording="Escreva durante a gravação" process_while_muted="Processar enquanto está silenciada" rename_file_to_match_recording="Renomear arquivo para corresponder à gravação" min_sub_duration="Duração mínima da legenda (msec)" +target_language="Língua alvo" +source_language="Língua de origem" +translate="Traduzir" +translate_add_context="Traduzir com contexto" diff --git a/data/locale/ru_RU.ini b/data/locale/ru-RU.ini similarity index 93% rename from data/locale/ru_RU.ini rename to data/locale/ru-RU.ini index 6d3ce3f..7918ad8 100644 --- a/data/locale/ru_RU.ini +++ b/data/locale/ru-RU.ini @@ -43,3 +43,7 @@ only_while_recording="Записывать вывод только во врем process_while_muted="Обрабатывать речь, пока источник отключен" rename_file_to_match_recording="Переименовать файл, чтобы соответствовать записи" min_sub_duration="Минимальная длительность субтитров (мс)" +target_language="Целевой язык" +source_language="Исходный язык" +translate="Перевести" +translate_add_context="Перевести с контекстом" diff --git a/data/locale/zh-CN.ini b/data/locale/zh-CN.ini new file mode 100644 index 0000000..2809ae7 --- /dev/null +++ b/data/locale/zh-CN.ini @@ -0,0 +1,50 @@ +LocalVocalPlugin="LocalVocal 插件" +transcription_filterAudioFilter="LocalVocal 转录" +vad_enabled="启用 VAD" +log_level="内部日志级别" +log_words="控制台日志输出" +caption_to_stream="流字幕" +step_by_step_processing="逐步处理(⚠️ 增加处理)" +step_size_msec="步长(毫秒)" +subtitle_sources="字幕输出" +none_no_output="无 / 无输出" +text_file_output="文本文件输出" +output_filename="输出文件名" +whisper_model="Whisper 模型" +external_model_file="外部模型文件" +whisper_parameters="高级设置" +language="语言" +whisper_sampling_method="Whisper 采样方法" +n_threads="线程数" +n_max_text_ctx="最大文本上下文" +translate="翻译" +no_context="无上下文" +single_segment="单一段落" +print_special="打印特殊" +print_progress="打印进度" +print_realtime="实时打印" +print_timestamps="打印时间戳" +token_timestamps="令牌时间戳" +thold_pt="令牌概率阈值" +thold_ptsum="令牌总概率阈值" +max_len="最大长度(字符)" +split_on_word="按单词分割" +max_tokens="最大令牌数" +speed_up="加速" +initial_prompt="初始提示" +suppress_blank="抑制空白" +suppress_non_speech_tokens="抑制非语音令牌" +temperature="温度" +max_initial_ts="最大初始时间戳" +length_penalty="长度惩罚" +save_srt="保存为 SRT 格式" +truncate_output_file="新句子时截断文件" +only_while_recording="仅在录制时写入输出" +process_while_muted="在源静音时处理语音" +rename_file_to_match_recording="将文件重命名以匹配录制" +min_sub_duration="最小字幕持续时间(毫秒)" +advanced_settings="高级设置" +target_language="目标语言" +source_language="源语言" +translate="翻译" +translate_add_context="带上下文翻译" diff --git a/data/models/ggml-model-whisper-tiny.en.bin b/data/models/ggml-model-whisper-tiny-en/ggml-model-whisper-tiny.en.bin similarity index 100% rename from data/models/ggml-model-whisper-tiny.en.bin rename to data/models/ggml-model-whisper-tiny-en/ggml-model-whisper-tiny.en.bin diff --git a/src/model-utils/model-downloader-types.h b/src/model-utils/model-downloader-types.h index 0ef81c3..3d24d96 100644 --- a/src/model-utils/model-downloader-types.h +++ b/src/model-utils/model-downloader-types.h @@ -1,3 +1,28 @@ +#ifndef MODEL_DOWNLOADER_TYPES_H +#define MODEL_DOWNLOADER_TYPES_H + +#include +#include +#include +#include typedef std::function download_finished_callback_t; + +struct ModelFileDownloadInfo { + std::string url; + std::string sha256; +}; + +enum ModelType { MODEL_TYPE_TRANSCRIPTION, MODEL_TYPE_TRANSLATION }; + +struct ModelInfo { + std::string friendly_name; + std::string local_folder_name; + ModelType type; + std::vector files; +}; + +extern std::map models_info; + +#endif /* MODEL_DOWNLOADER_TYPES_H */ diff --git a/src/model-utils/model-downloader-ui.cpp b/src/model-utils/model-downloader-ui.cpp index e53d0ab..dfd8bd5 100644 --- a/src/model-utils/model-downloader-ui.cpp +++ b/src/model-utils/model-downloader-ui.cpp @@ -5,15 +5,13 @@ #include -const std::string MODEL_BASE_PATH = "https://ggml.ggerganov.com/"; - size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) { size_t written = fwrite(ptr, size, nmemb, stream); return written; } -ModelDownloader::ModelDownloader(const std::string &model_name, +ModelDownloader::ModelDownloader(const ModelInfo &model_info, download_finished_callback_t download_finished_callback_, QWidget *parent) : QDialog(parent), @@ -30,7 +28,7 @@ ModelDownloader::ModelDownloader(const std::string &model_name, // Add a label for the model name QLabel *model_name_label = new QLabel(this); - model_name_label->setText(QString::fromStdString(model_name)); + model_name_label->setText(QString::fromStdString(model_info.friendly_name)); model_name_label->setAlignment(Qt::AlignCenter); this->layout->addWidget(model_name_label); @@ -43,7 +41,7 @@ ModelDownloader::ModelDownloader(const std::string &model_name, this->layout->addWidget(this->progress_bar); this->download_thread = new QThread(); - this->download_worker = new ModelDownloadWorker(model_name); + this->download_worker = new ModelDownloadWorker(model_info); this->download_worker->moveToThread(this->download_thread); connect(this->download_thread, &QThread::started, this->download_worker, @@ -112,65 +110,92 @@ void ModelDownloader::show_error(const std::string &reason) this->download_finished_callback(1, ""); } -ModelDownloadWorker::ModelDownloadWorker(const std::string &model_name_) -{ - this->model_name = model_name_; +ModelDownloadWorker::ModelDownloadWorker(const ModelInfo &model_info_) : model_info(model_info_) {} + +std::string get_filename_from_url(const std::string& url) { + auto lastSlashPos = url.find_last_of("/"); + auto queryPos = url.find("?", lastSlashPos); + if (queryPos == std::string::npos) { + return url.substr(lastSlashPos + 1); + } else { + return url.substr(lastSlashPos + 1, queryPos - lastSlashPos - 1); + } } void ModelDownloadWorker::download_model() { - char *module_config_path = obs_module_get_config_path(obs_current_module(), "models"); + char *config_folder = obs_module_get_config_path(obs_current_module(), "models"); + const std::filesystem::path module_config_models_folder = + std::filesystem::absolute(config_folder); + bfree(config_folder); + // Check if the config folder exists - if (!std::filesystem::exists(module_config_path)) { - obs_log(LOG_WARNING, "Config folder does not exist: %s", module_config_path); + if (!std::filesystem::exists(module_config_models_folder)) { + obs_log(LOG_WARNING, "Config folder does not exist: %s", module_config_models_folder); // Create the config folder - if (!std::filesystem::create_directories(module_config_path)) { + if (!std::filesystem::create_directories(module_config_models_folder)) { obs_log(LOG_ERROR, "Failed to create config folder: %s", - module_config_path); + module_config_models_folder); emit download_error("Failed to create config folder."); return; } } - char *model_save_path_str = - obs_module_get_config_path(obs_current_module(), this->model_name.c_str()); - std::string model_save_path(model_save_path_str); - bfree(model_save_path_str); - obs_log(LOG_INFO, "Model save path: %s", model_save_path.c_str()); + const std::string model_local_config_path = + (module_config_models_folder / model_info.local_folder_name).string(); - // extract filename from path in this->modle_name - const std::string model_filename = - this->model_name.substr(this->model_name.find_last_of("/\\") + 1); + obs_log(LOG_INFO, "Model save path: %s", model_local_config_path.c_str()); - std::string model_url = MODEL_BASE_PATH + model_filename; - obs_log(LOG_INFO, "Model URL: %s", model_url.c_str()); + if (!std::filesystem::exists(model_local_config_path)) { + // model folder does not exist, create it + if (!std::filesystem::create_directories(model_local_config_path)) { + obs_log(LOG_ERROR, "Failed to create model folder: %s", + model_local_config_path.c_str()); + emit download_error("Failed to create model folder."); + return; + } + } CURL *curl = curl_easy_init(); if (curl) { - FILE *fp = fopen(model_save_path.c_str(), "wb"); - if (fp == nullptr) { - obs_log(LOG_ERROR, "Failed to open file %s.", model_save_path.c_str()); - emit download_error("Failed to open file."); - return; - } - curl_easy_setopt(curl, CURLOPT_URL, model_url.c_str()); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); - curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); - curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, - ModelDownloadWorker::progress_callback); - curl_easy_setopt(curl, CURLOPT_XFERINFODATA, this); - // Follow redirects - curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - CURLcode res = curl_easy_perform(curl); - if (res != CURLE_OK) { - obs_log(LOG_ERROR, "Failed to download model %s.", - this->model_name.c_str()); - emit download_error("Failed to download model."); + for (auto &model_download_file : this->model_info.files) { + obs_log(LOG_INFO, "Model URL: %s", model_download_file.url.c_str()); + + const std::string model_filename = get_filename_from_url(model_download_file.url); + const std::string model_file_save_path = + (std::filesystem::path(model_local_config_path) / model_filename).string(); + if (std::filesystem::exists(model_file_save_path)) { + obs_log(LOG_INFO, "Model file already exists: %s", + model_file_save_path.c_str()); + continue; + } + + FILE *fp = fopen(model_file_save_path.c_str(), "wb"); + if (fp == nullptr) { + obs_log(LOG_ERROR, "Failed to open model file for writing %s.", + model_file_save_path.c_str()); + emit download_error("Failed to open file."); + return; + } + curl_easy_setopt(curl, CURLOPT_URL, model_download_file.url.c_str()); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); + curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, + ModelDownloadWorker::progress_callback); + curl_easy_setopt(curl, CURLOPT_XFERINFODATA, this); + // Follow redirects + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + obs_log(LOG_ERROR, "Failed to download model file %s.", + model_filename.c_str()); + emit download_error("Failed to download model file."); + } + fclose(fp); } curl_easy_cleanup(curl); - fclose(fp); - emit download_finished(model_save_path); + emit download_finished(model_local_config_path); } else { obs_log(LOG_ERROR, "Failed to initialize curl."); emit download_error("Failed to initialize curl."); diff --git a/src/model-utils/model-downloader-ui.h b/src/model-utils/model-downloader-ui.h index d2e5fb2..20521b6 100644 --- a/src/model-utils/model-downloader-ui.h +++ b/src/model-utils/model-downloader-ui.h @@ -14,7 +14,7 @@ class ModelDownloadWorker : public QObject { Q_OBJECT public: - ModelDownloadWorker(const std::string &model_name); + ModelDownloadWorker(const ModelInfo &model_info_); ~ModelDownloadWorker(); public slots: @@ -28,13 +28,13 @@ public slots: private: static int progress_callback(void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow); - std::string model_name; + ModelInfo model_info; }; class ModelDownloader : public QDialog { Q_OBJECT public: - ModelDownloader(const std::string &model_name, + ModelDownloader(const ModelInfo &model_info, download_finished_callback_t download_finished_callback, QWidget *parent = nullptr); ~ModelDownloader(); diff --git a/src/model-utils/model-downloader.cpp b/src/model-utils/model-downloader.cpp index c83adff..f8b5293 100644 --- a/src/model-utils/model-downloader.cpp +++ b/src/model-utils/model-downloader.cpp @@ -12,46 +12,83 @@ #include -std::string find_model_file(const std::string &model_name) +std::string find_file_in_folder_by_name(const std::string& folder_path, const std::string& file_name) { + for (const auto &entry : std::filesystem::directory_iterator(folder_path)) { + if (entry.path().filename() == file_name) { + return entry.path().string(); + } + } + return ""; +} + +std::string find_bin_file_in_folder(const std::string &model_local_folder_path) { + // find .bin file in folder + for (const auto &entry : std::filesystem::directory_iterator(model_local_folder_path)) { + if (entry.path().extension() == ".bin") { + const std::string bin_file_path = entry.path().string(); + obs_log(LOG_INFO, "Model bin file found in folder: %s", bin_file_path.c_str()); + return bin_file_path; + } + } + obs_log(LOG_ERROR, "Model bin file not found in folder: %s", model_local_folder_path.c_str()); + return ""; +} + +std::string find_model_folder(const ModelInfo &model_info) { - const char *model_name_cstr = model_name.c_str(); - obs_log(LOG_INFO, "Checking if model %s exists in data...", model_name_cstr); + char* data_folder_models = obs_module_file("models"); + const std::filesystem::path module_data_models_folder = + std::filesystem::absolute(data_folder_models); + bfree(data_folder_models); + + const std::string model_local_data_path = + (module_data_models_folder / model_info.local_folder_name).string(); + + obs_log(LOG_INFO, "Checking if model '%s' exists in data...", model_info.friendly_name.c_str()); - char *model_file_path = obs_module_file(model_name_cstr); - if (model_file_path == nullptr) { - obs_log(LOG_INFO, "Model %s not found in data.", model_name_cstr); + if (!std::filesystem::exists(model_local_data_path)) { + obs_log(LOG_INFO, "Model not found in data: %s", model_local_data_path.c_str()); } else { - std::string model_file_path_str(model_file_path); - bfree(model_file_path); - if (!std::filesystem::exists(model_file_path_str)) { - obs_log(LOG_INFO, "Model not found in data: %s", - model_file_path_str.c_str()); - } else { - obs_log(LOG_INFO, "Model found in data: %s", model_file_path_str.c_str()); - return model_file_path_str; - } + obs_log(LOG_INFO, "Model folder found in data: %s", model_local_data_path.c_str()); + return model_local_data_path; } // Check if model exists in the config folder - char *model_config_path_str = - obs_module_get_config_path(obs_current_module(), model_name_cstr); - std::string model_config_path(model_config_path_str); - bfree(model_config_path_str); - obs_log(LOG_INFO, "Model path in config: %s", model_config_path.c_str()); - if (std::filesystem::exists(model_config_path)) { - obs_log(LOG_INFO, "Model exists in config folder: %s", model_config_path.c_str()); - return model_config_path; + char *config_folder = obs_module_get_config_path(obs_current_module(), "models"); + const std::filesystem::path module_config_models_folder = + std::filesystem::absolute(config_folder); + bfree(config_folder); + + obs_log(LOG_INFO, "Checking if model '%s' exists in config...", model_info.friendly_name.c_str()); + + const std::string model_local_config_path = + (module_config_models_folder / model_info.local_folder_name).string(); + + obs_log(LOG_INFO, "Model path in config: %s", model_local_config_path.c_str()); + if (std::filesystem::exists(model_local_config_path)) { + obs_log(LOG_INFO, "Model exists in config folder: %s", + model_local_config_path.c_str()); + return model_local_config_path; } - obs_log(LOG_INFO, "Model %s not found.", model_name_cstr); + obs_log(LOG_INFO, "Model '%s' not found.", model_info.friendly_name.c_str()); return ""; } -void download_model_with_ui_dialog(const std::string &model_name, +std::string find_model_bin_file(const ModelInfo &model_info) { + const std::string model_local_folder_path = find_model_folder(model_info); + if (model_local_folder_path.empty()) { + return ""; + } + + return find_bin_file_in_folder(model_local_folder_path); +} + +void download_model_with_ui_dialog(const ModelInfo &model_info, download_finished_callback_t download_finished_callback) { // Start the model downloader UI ModelDownloader *model_downloader = new ModelDownloader( - model_name, download_finished_callback, (QWidget *)obs_frontend_get_main_window()); + model_info, download_finished_callback, (QWidget *)obs_frontend_get_main_window()); model_downloader->show(); } diff --git a/src/model-utils/model-downloader.h b/src/model-utils/model-downloader.h index 09d07ab..fca3337 100644 --- a/src/model-utils/model-downloader.h +++ b/src/model-utils/model-downloader.h @@ -6,10 +6,13 @@ #include "model-downloader-types.h" -std::string find_model_file(const std::string &model_name); +std::string find_file_in_folder_by_name(const std::string& folder_path, const std::string& file_name); +std::string find_bin_file_in_folder(const std::string &path); +std::string find_model_folder(const ModelInfo &model_info); +std::string find_model_bin_file(const ModelInfo &model_info); // Start the model downloader UI dialog with a callback for when the download is finished -void download_model_with_ui_dialog(const std::string &model_name, +void download_model_with_ui_dialog(const ModelInfo &model_info, download_finished_callback_t download_finished_callback); #endif // MODEL_DOWNLOADER_H diff --git a/src/model-utils/model-infos.cpp b/src/model-utils/model-infos.cpp new file mode 100644 index 0000000..cd00814 --- /dev/null +++ b/src/model-utils/model-infos.cpp @@ -0,0 +1,122 @@ +#include "model-downloader-types.h" + +std::map models_info = {{ + {"M2M-100 418M (495Mb)", + {"M2M-100 418M", + "m2m-100-418M", + MODEL_TYPE_TRANSLATION, + {{"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/model.bin?download=true", + "D6703DD9F920FF896E45C3D97B490761BED5944937B90BBE6A7245F5652542D4"}, + { + "https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/config.json?download=true", + "4244772990E30069563E3DDFB4AD6DC95BDFD2AC3DE667EA8858C9B0A8433FA8", + }, + {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/generation_config.json?download=true", + "AED76366507333DDBB8BD49960F23C82FE6446B3319A46A54BEFDB45324CCF61"}, + {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/shared_vocabulary.json?download=true", + "7EB5D0FF184C6095C7C10F9911C0AEA492250ABD12854F9C3D787C64B1C6397E"}, + {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/special_tokens_map.json?download=true", + "C1A4F86C3874D279AE1B2A05162858DB5DD6C61665D84223ED886CBCFF08FDA6"}, + {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/tokenizer_config.json?download=true", + "AE54F15F0649BB05041CDADAD8485BA1FAF40BC33E6B4C2A74AE2D1AE5710FA2"}, + {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/vocab.json?download=true", + "B6E77E474AEEA8F441363ACA7614317C06381F3EACFE10FB9856D5081D1074CC"}, + {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/sentencepiece.bpe.model?download=true", + "D8F7C76ED2A5E0822BE39F0A4F95A55EB19C78F4593CE609E2EDBC2AEA4D380A"}}}}, + {"Whisper Base q5 (57Mb)", + {"Whisper Base q5", + "whisper-base-q5", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-base-q5_1.bin", + "422F1AE452ADE6F30A004D7E5C6A43195E4433BC370BF23FAC9CC591F01A8898"}}}}, + {"Whisper Base En q5 (57Mb)", + {"Whisper Base En q5", + "ggml-model-whisper-base-en-q5_1", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin", + "4BAF70DD0D7C4247BA2B81FAFD9C01005AC77C2F9EF064E00DCF195D0E2FDD2F"}}}}, + {"Whisper Base (141Mb)", + {"Whisper Base", + "ggml-model-whisper-base", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-base.bin", + "60ED5BC3DD14EEA856493D334349B405782DDCAF0028D4B5DF4088345FBA2EFE"}}}}, + {"Whisper Base En (141Mb)", + {"Whisper Base En", + "ggml-model-whisper-base-en", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin", + "A03779C86DF3323075F5E796CB2CE5029F00EC8869EEE3FDFB897AFE36C6D002"}}}}, + {"Whisper Large q5 (1Gb)", + {"Whisper Large q5", + "ggml-model-whisper-large-q5_0", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-large-q5_0.bin", + "3A214837221E4530DBC1FE8D734F302AF393EB30BD0ED046042EBF4BAF70F6F2"}}}}, + {"Whisper Medium q5 (514Mb)", + {"Whisper Medium q5", + "ggml-model-whisper-medium-q5_0", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-medium-q5_0.bin", + "19FEA4B380C3A618EC4723C3EEF2EB785FFBA0D0538CF43F8F235E7B3B34220F"}}}}, + {"Whisper Medium En q5 (514Mb)", + {"Whisper Medium En q5", + "ggml-model-whisper-medium-en-q5_0", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-medium.en-q5_0.bin", + "76733E26AD8FE1C7A5BF7531A9D41917B2ADC0F20F2E4F5531688A8C6CD88EB0"}}}}, + {"Whisper Small q5 (181Mb)", + {"Whisper Small q5", + "ggml-model-whisper-small-q5_1", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin", + "AE85E4A935D7A567BD102FE55AFC16BB595BDB618E11B2FC7591BC08120411BB"}}}}, + {"Whisper Small En q5 (181Mb)", + {"Whisper Small En q5", + "ggml-model-whisper-small-en-q5_1", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-small.en-q5_1.bin", + "BFDFF4894DCB76BBF647D56263EA2A96645423F1669176F4844A1BF8E478AD30"}}}}, + {"Whisper Small (465Mb)", + {"Whisper Small", + "ggml-model-whisper-small", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-small.bin", + "1BE3A9B2063867B937E64E2EC7483364A79917E157FA98C5D94B5C1FFFEA987B"}}}}, + {"Whisper Small En (465Mb)", + {"Whisper Small En", + "ggml-model-whisper-small-en", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin", + "C6138D6D58ECC8322097E0F987C32F1BE8BB0A18532A3F88F734D1BBF9C41E5D"}}}}, + {"Whisper Tiny (74Mb)", + {"Whisper Tiny", + "ggml-model-whisper-tiny", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.bin", + "BE07E048E1E599AD46341C8D2A135645097A538221678B7ACDD1B1919C6E1B21"}}}}, + {"Whisper Tiny q5 (31Mb)", + {"Whisper Tiny q5", + "ggml-model-whisper-tiny-q5_1", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-tiny-q5_1.bin", + "818710568DA3CA15689E31A743197B520007872FF9576237BDA97BD1B469C3D7"}}}}, + {"Whisper Tiny En q5 (31Mb)", + {"Whisper Tiny En q5", + "ggml-model-whisper-tiny-en-q5_1", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin", + "C77C5766F1CEF09B6B7D47F21B546CBDDD4157886B3B5D6D4F709E91E66C7C2B"}}}}, + {"Whisper Tiny En q8 (42Mb)", + {"Whisper Tiny En q8", + "ggml-model-whisper-tiny-en-q8_0", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin", + "5BC2B3860AA151A4C6E7BB095E1FCCE7CF12C7B020CA08DCEC0C6D018BB7DD94"}}}}, + {"Whisper Tiny En (74Mb)", + {"Whisper Tiny En", + "ggml-model-whisper-tiny-en", + MODEL_TYPE_TRANSCRIPTION, + {{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin", + "921E4CF8686FDD993DCD081A5DA5B6C365BFDE1162E72B08D75AC75289920B1F"}}}}, +}}; diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index 5dceb70..3e12c2a 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -65,7 +65,7 @@ struct transcription_filter_data { audio_resampler_t *resampler; /* whisper */ - char *whisper_model_path; + std::string whisper_model_path; struct whisper_context *whisper_context; whisper_full_params whisper_params; @@ -86,6 +86,9 @@ struct transcription_filter_data { std::string source_lang; std::string target_lang; + // Last transcription result + std::string last_text; + // Text source to output the subtitles obs_weak_source_t *text_source; char *text_source_name; @@ -115,7 +118,7 @@ struct transcription_filter_data { } context = nullptr; resampler = nullptr; - whisper_model_path = nullptr; + whisper_model_path = ""; whisper_context = nullptr; text_source = nullptr; text_source_mutex = nullptr; diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 50b718b..4627c50 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -9,6 +9,7 @@ #include "whisper-utils/whisper-language.h" #include "whisper-utils/whisper-utils.h" #include "translation/language_codes.h" +#include "translation/translation.h" #include #include @@ -127,7 +128,7 @@ void transcription_filter_destroy(void *data) struct transcription_filter_data *gf = static_cast(data); - obs_log(gf->log_level, "transcription_filter_destroy"); + obs_log(gf->log_level, "filter destroy"); shutdown_whisper_thread(gf); if (gf->text_source_name) { @@ -275,16 +276,28 @@ void set_text_callback(struct transcription_filter_data *gf, std::string str_copy = result.text; #endif + // remove trailing spaces, newlines, tabs or punctuation + str_copy.erase(std::find_if(str_copy.rbegin(), str_copy.rend(), + [](unsigned char ch) { return !std::isspace(ch) || !std::ispunct(ch); }) + .base(), + str_copy.end()); + if (gf->translate) { - std::string translated_text; + obs_log(gf->log_level, "Translating text. %s -> %s", gf->source_lang.c_str(), gf->target_lang.c_str()); + std::string translated_text; if (translate(gf->translation_ctx, str_copy, gf->source_lang, gf->target_lang, translated_text) == OBS_POLYGLOT_TRANSLATION_SUCCESS) { - str_copy = translated_text; + if (gf->log_words) { + obs_log(LOG_INFO, "Translation: '%s' -> '%s'", str_copy.c_str(), translated_text.c_str()); + } + str_copy = translated_text; } else { obs_log(gf->log_level, "Failed to translate text"); } } + gf->last_text = str_copy; + if (gf->caption_to_stream) { obs_output_t *streaming_output = obs_frontend_get_streaming_output(); if (streaming_output) { @@ -377,7 +390,7 @@ void transcription_filter_update(void *data, obs_data_t *s) static_cast(data); gf->log_level = (int)obs_data_get_int(s, "log_level"); - obs_log(gf->log_level, "transcription_filter_update"); + obs_log(gf->log_level, "filter update"); gf->vad_enabled = obs_data_get_bool(s, "vad_enabled"); gf->log_words = obs_data_get_bool(s, "log_words"); @@ -401,24 +414,17 @@ void transcription_filter_update(void *data, obs_data_t *s) bool new_translate = obs_data_get_bool(s, "translate"); gf->source_lang = obs_data_get_string(s, "translate_source_language"); gf->target_lang = obs_data_get_string(s, "translate_target_language"); + gf->translation_ctx.add_context = obs_data_get_bool(s, "translate_add_context"); if (new_translate != gf->translate) { if (new_translate) { - if (build_translation_context(gf->translation_ctx, - "models/m2m100-418m.sp.model", - "models/m2m100-418m") != - OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS) { - obs_log(gf->log_level, "Failed to initialize translation context"); - gf->translate = false; - } else { - gf->translate = true; - } + start_translation(gf); } else { gf->translate = false; } } - obs_log(gf->log_level, "transcription_filter: update text source"); + obs_log(gf->log_level, "update text source"); // update the text source const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources"); obs_weak_source_t *old_weak_text_source = NULL; @@ -482,10 +488,10 @@ void transcription_filter_update(void *data, obs_data_t *s) return; } - obs_log(gf->log_level, "transcription_filter: update whisper model"); + obs_log(gf->log_level, "update whisper model"); update_whsiper_model_path(gf, s); - obs_log(gf->log_level, "transcription_filter: update whisper params"); + obs_log(gf->log_level, "update whisper params"); std::lock_guard lock(*gf->whisper_ctx_mutex); gf->whisper_params = whisper_full_default_params( @@ -495,7 +501,7 @@ void transcription_filter_update(void *data, obs_data_t *s) gf->whisper_params.initial_prompt = obs_data_get_string(s, "initial_prompt"); gf->whisper_params.n_threads = (int)obs_data_get_int(s, "n_threads"); gf->whisper_params.n_max_text_ctx = (int)obs_data_get_int(s, "n_max_text_ctx"); - gf->whisper_params.translate = obs_data_get_bool(s, "translate"); + gf->whisper_params.translate = obs_data_get_bool(s, "whisper_translate"); gf->whisper_params.no_context = obs_data_get_bool(s, "no_context"); gf->whisper_params.single_segment = obs_data_get_bool(s, "single_segment"); gf->whisper_params.print_special = obs_data_get_bool(s, "print_special"); @@ -519,7 +525,7 @@ void transcription_filter_update(void *data, obs_data_t *s) void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) { - obs_log(LOG_INFO, "transcription filter create"); + obs_log(LOG_INFO, "LocalVocal filter create"); struct transcription_filter_data *gf = new transcription_filter_data(); @@ -559,10 +565,10 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->overlap_ms = (int)obs_data_get_int(settings, "overlap_size_msec"); gf->overlap_frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)gf->overlap_ms)); - obs_log(gf->log_level, "transcription_filter: channels %d, frames %d, sample_rate %d", + obs_log(gf->log_level, "channels %d, frames %d, sample_rate %d", (int)gf->channels, (int)gf->frames, gf->sample_rate); - obs_log(gf->log_level, "transcription_filter: setup audio resampler"); + obs_log(gf->log_level, "setup audio resampler"); struct resample_info src, dst; src.samples_per_sec = gf->sample_rate; src.format = AUDIO_FORMAT_FLOAT_PLANAR; @@ -574,12 +580,12 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->resampler = audio_resampler_create(&dst, &src); - obs_log(gf->log_level, "transcription_filter: setup mutexes and condition variables"); + obs_log(gf->log_level, "setup mutexes and condition variables"); gf->whisper_buf_mutex = new std::mutex(); gf->whisper_ctx_mutex = new std::mutex(); gf->wshiper_thread_cv = new std::condition_variable(); gf->text_source_mutex = new std::mutex(); - obs_log(gf->log_level, "transcription_filter: clear text source data"); + obs_log(gf->log_level, "clear text source data"); gf->text_source = nullptr; const char *subtitle_sources = obs_data_get_string(settings, "subtitle_sources"); if (subtitle_sources != nullptr) { @@ -587,13 +593,13 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) } else { gf->text_source_name = nullptr; } - obs_log(gf->log_level, "transcription_filter: clear paths and whisper context"); + obs_log(gf->log_level, "clear paths and whisper context"); gf->whisper_model_file_currently_loaded = ""; gf->output_file_path = std::string(""); - gf->whisper_model_path = nullptr; // The update function will set the model path + gf->whisper_model_path = std::string(""); // The update function will set the model path gf->whisper_context = nullptr; - obs_log(gf->log_level, "transcription_filter: run update"); + obs_log(gf->log_level, "run update"); // get the settings updated on the filter data struct transcription_filter_update(gf, settings); @@ -641,7 +647,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) }, gf); - obs_log(gf->log_level, "transcription_filter: filter created."); + obs_log(gf->log_level, "filter created."); return gf; } @@ -665,7 +671,7 @@ void transcription_filter_activate(void *data) { struct transcription_filter_data *gf = static_cast(data); - obs_log(gf->log_level, "transcription_filter filter activated"); + obs_log(gf->log_level, "filter activated"); gf->active = true; } @@ -673,20 +679,19 @@ void transcription_filter_deactivate(void *data) { struct transcription_filter_data *gf = static_cast(data); - obs_log(gf->log_level, "transcription_filter filter deactivated"); + obs_log(gf->log_level, "filter deactivated"); gf->active = false; } void transcription_filter_defaults(obs_data_t *s) { - obs_log(LOG_INFO, "transcription_filter_defaults"); + obs_log(LOG_INFO, "filter defaults"); obs_data_set_default_bool(s, "vad_enabled", true); obs_data_set_default_int(s, "log_level", LOG_DEBUG); obs_data_set_default_bool(s, "log_words", true); obs_data_set_default_bool(s, "caption_to_stream", false); - obs_data_set_default_string(s, "whisper_model_path", - "models/ggml-model-whisper-tiny.en.bin"); + obs_data_set_default_string(s, "whisper_model_path", "Whisper Tiny En (74Mb)"); obs_data_set_default_string(s, "whisper_language_select", "en"); obs_data_set_default_string(s, "subtitle_sources", "none"); obs_data_set_default_bool(s, "step_by_step_processing", false); @@ -703,14 +708,15 @@ void transcription_filter_defaults(obs_data_t *s) obs_data_set_default_bool(s, "translate", false); obs_data_set_default_string(s, "translate_target_language", "__es__"); obs_data_set_default_string(s, "translate_source_language", "__en__"); + obs_data_set_default_bool(s, "translate_add_context", true); // Whisper parameters obs_data_set_default_int(s, "whisper_sampling_method", WHISPER_SAMPLING_BEAM_SEARCH); obs_data_set_default_string(s, "initial_prompt", ""); obs_data_set_default_int(s, "n_threads", 4); obs_data_set_default_int(s, "n_max_text_ctx", 16384); - obs_data_set_default_bool(s, "translate", false); - obs_data_set_default_bool(s, "no_context", true); + obs_data_set_default_bool(s, "whisper_translate", false); + obs_data_set_default_bool(s, "no_context", false); obs_data_set_default_bool(s, "single_segment", true); obs_data_set_default_bool(s, "print_special", false); obs_data_set_default_bool(s, "print_progress", false); @@ -732,7 +738,7 @@ void transcription_filter_defaults(obs_data_t *s) obs_properties_t *transcription_filter_properties(void *data) { - obs_log(LOG_INFO, "transcription_filter_properties"); + obs_log(LOG_DEBUG, "Add filter properties"); struct transcription_filter_data *gf = static_cast(data); @@ -775,6 +781,8 @@ obs_properties_t *transcription_filter_properties(void *data) obs_property_t *prop_src = obs_properties_add_list( translation_group, "translate_source_language", MT_("source_language"), OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); + obs_property_t *prop_add_context = obs_properties_add_bool( + translation_group, "translate_add_context", MT_("translate_add_context")); // Populate the dropdown with the language codes for (const auto &language : language_codes) { @@ -791,10 +799,9 @@ obs_properties_t *transcription_filter_properties(void *data) UNUSED_PARAMETER(property); // Show/Hide the translation group const bool translate_enabled = obs_data_get_bool(settings, "translate"); - obs_property_set_visible(obs_properties_get(props, "translate_target_language"), - translate_enabled); - obs_property_set_visible(obs_properties_get(props, "translate_source_language"), - translate_enabled); + for (const auto& prop : { "translate_target_language", "translate_source_language", "translate_add_context" }) { + obs_property_set_visible(obs_properties_get(props, prop), translate_enabled); + } return true; }); @@ -822,39 +829,14 @@ obs_properties_t *transcription_filter_properties(void *data) obs_property_t *whisper_models_list = obs_properties_add_list(ppts, "whisper_model_path", MT_("whisper_model"), OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); + // Add models from models_info map + for (const auto &model_info : models_info) { + if (model_info.second.type == MODEL_TYPE_TRANSCRIPTION) { + obs_property_list_add_string(whisper_models_list, model_info.first.c_str(), + model_info.first.c_str()); + } + } - obs_property_list_add_string(whisper_models_list, "Base q5 57M", - "models/ggml-model-whisper-base-q5_1.bin"); - obs_property_list_add_string(whisper_models_list, "Base 141M", - "models/ggml-model-whisper-base.bin"); - obs_property_list_add_string(whisper_models_list, "Base (Eng) q5 57M", - "models/ggml-model-whisper-base.en-q5_1.bin"); - obs_property_list_add_string(whisper_models_list, "Base (Eng) 141M", - "models/ggml-model-whisper-base.en.bin"); - obs_property_list_add_string(whisper_models_list, "Large q5 1G", - "models/ggml-model-whisper-large-q5_0.bin"); - obs_property_list_add_string(whisper_models_list, "Medium q5 514M", - "models/ggml-model-whisper-medium-q5_0.bin"); - obs_property_list_add_string(whisper_models_list, "Medium (Eng) 514M", - "models/ggml-model-whisper-medium.en-q5_0.bin"); - obs_property_list_add_string(whisper_models_list, "Small q5 181M", - "models/ggml-model-whisper-small-q5_1.bin"); - obs_property_list_add_string(whisper_models_list, "Small 465M", - "models/ggml-model-whisper-small.bin"); - obs_property_list_add_string(whisper_models_list, "Small (Eng) q5 181M", - "models/ggml-model-whisper-small.en-q5_1.bin"); - obs_property_list_add_string(whisper_models_list, "Small (Eng) 465M", - "models/ggml-model-whisper-small.en.bin"); - obs_property_list_add_string(whisper_models_list, "Tiny q5 31M", - "models/ggml-model-whisper-tiny-q5_1.bin"); - obs_property_list_add_string(whisper_models_list, "Tiny 74M", - "models/ggml-model-whisper-tiny.bin"); - obs_property_list_add_string(whisper_models_list, "Tiny (Eng) q5 31M", - "models/ggml-model-whisper-tiny.en-q5_1.bin"); - obs_property_list_add_string(whisper_models_list, "Tiny (Eng) q8 42M", - "models/ggml-model-whisper-tiny.en-q8_0.bin"); - obs_property_list_add_string(whisper_models_list, "Tiny (Eng) 74M", - "models/ggml-model-whisper-tiny.en.bin"); obs_property_list_add_string(whisper_models_list, "Load external model file", "!!!external!!!"); @@ -956,7 +938,7 @@ obs_properties_t *transcription_filter_properties(void *data) // int offset_ms; // start offset in ms // int duration_ms; // audio duration to process in ms // bool translate; - obs_properties_add_bool(whisper_params_group, "translate", MT_("translate")); + obs_properties_add_bool(whisper_params_group, "whisper_translate", MT_("translate")); // bool no_context; // do not use past transcription (if any) as initial prompt for the decoder obs_properties_add_bool(whisper_params_group, "no_context", MT_("no_context")); // bool single_segment; // force single segment output (useful for streaming) diff --git a/src/translation/translation.cpp b/src/translation/translation.cpp index 1987f99..662d58f 100644 --- a/src/translation/translation.cpp +++ b/src/translation/translation.cpp @@ -1,16 +1,57 @@ #include "translation.h" #include "plugin-support.h" +#include "model-utils/model-downloader.h" +#include "transcription-filter-data.h" #include #include #include #include -int build_translation_context(struct translation_context &translation_ctx, - const std::string &local_spm_path, - const std::string &local_model_path) +void build_and_enable_translation(struct transcription_filter_data* gf, const std::string& model_file_path) { - obs_log(LOG_INFO, "Building translation context..."); + gf->translation_ctx.local_model_folder_path = model_file_path; + if (build_translation_context(gf->translation_ctx) == OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS) { + obs_log(LOG_INFO, "Enable translation"); + gf->translate = true; + } else { + obs_log(LOG_ERROR, "Failed to load CT2 model"); + gf->translate = false; + } +} + +void start_translation(struct transcription_filter_data* gf) +{ + obs_log(LOG_INFO, "Starting translation..."); + + const ModelInfo &translation_model_info = models_info["M2M-100 418M (495Mb)"]; + std::string model_file_found = find_model_folder(translation_model_info); + if (model_file_found == "") { + obs_log(LOG_INFO, "Translation CT2 model does not exist. Downloading..."); + download_model_with_ui_dialog( + translation_model_info, + [gf, model_file_found](int download_status, const std::string &path) { + if (download_status == 0) { + obs_log(LOG_INFO, "CT2 model download complete"); + build_and_enable_translation(gf, path); + } else { + obs_log(LOG_ERROR, "Model download failed"); + gf->translate = false; + } + }); + } else { + // Model exists, just load it + build_and_enable_translation(gf, model_file_found); + } +} + +int build_translation_context(struct translation_context &translation_ctx) +{ + std::string local_model_path = translation_ctx.local_model_folder_path; + obs_log(LOG_INFO, "Building translation context from '%s'...", local_model_path.c_str()); + // find the SPM file in the model folder + std::string local_spm_path = find_file_in_folder_by_name(local_model_path, "sentencepiece.bpe.model"); + try { obs_log(LOG_INFO, "Loading SPM from %s", local_spm_path.c_str()); translation_ctx.processor.reset(new sentencepiece::SentencePieceProcessor()); @@ -36,10 +77,10 @@ int build_translation_context(struct translation_context &translation_ctx, #ifdef POLYGLOT_WITH_CUDA ctranslate2::Device device = ctranslate2::Device::CUDA; - obs_log(LOG_INFO, "Using CUDA"); + obs_log(LOG_INFO, "CT2 Using CUDA"); #else ctranslate2::Device device = ctranslate2::Device::CPU; - obs_log(LOG_INFO, "Using CPU"); + obs_log(LOG_INFO, "CT2 Using CPU"); #endif translation_ctx.translator.reset(new ctranslate2::Translator( @@ -51,6 +92,8 @@ int build_translation_context(struct translation_context &translation_ctx, translation_ctx.options->max_decoding_length = 40; translation_ctx.options->use_vmap = true; translation_ctx.options->return_scores = false; + translation_ctx.options->repetition_penalty = 1.1; + translation_ctx.options->no_repeat_ngram_size = 2; } catch (std::exception &e) { obs_log(LOG_ERROR, "Failed to load CT2 model: %s", e.what()); return OBS_POLYGLOT_TRANSLATION_INIT_FAIL; @@ -62,23 +105,38 @@ int translate(struct translation_context &translation_ctx, const std::string &te const std::string &source_lang, const std::string &target_lang, std::string &result) { try { - // get tokens - std::vector tokens = translation_ctx.tokenizer(text); - tokens.insert(tokens.begin(), ""); - tokens.insert(tokens.begin(), source_lang); - tokens.push_back(""); - - const std::vector> batch = {tokens}; - - const std::vector> target_prefix = {{target_lang}}; - const std::vector results = - translation_ctx.translator->translate_batch(batch, target_prefix, - *translation_ctx.options); - - // detokenize starting with the 2nd token - const auto &tokens_result = results[0].output(); - result = translation_ctx.detokenizer( - std::vector(tokens_result.begin() + 1, tokens_result.end())); + // set input tokens + std::vector input_tokens = {source_lang, ""}; + if (translation_ctx.add_context && translation_ctx.last_input_tokens.size() > 0) { + input_tokens.insert(input_tokens.end(), translation_ctx.last_input_tokens.begin(), translation_ctx.last_input_tokens.end()); + } + std::vector new_input_tokens = translation_ctx.tokenizer(text); + input_tokens.insert(input_tokens.end(), new_input_tokens.begin(), new_input_tokens.end()); + input_tokens.push_back(""); + + translation_ctx.last_input_tokens = new_input_tokens; + + const std::vector> batch = {input_tokens}; + + // get target prefix + std::vector target_prefix = {target_lang}; + if (translation_ctx.add_context && translation_ctx.last_translation_tokens.size() > 0) { + target_prefix.insert(target_prefix.end(), translation_ctx.last_translation_tokens.begin(), translation_ctx.last_translation_tokens.end()); + } + + const std::vector> target_prefix_batch = {target_prefix}; + const std::vector results = + translation_ctx.translator->translate_batch(batch, target_prefix_batch, + *translation_ctx.options); + + const auto &tokens_result = results[0].output(); + // take the tokens from the target_prefix length to the end + std::vector translation_tokens(tokens_result.begin() + target_prefix.size(), + tokens_result.end()); + + translation_ctx.last_translation_tokens = translation_tokens; + // detokenize + result = translation_ctx.detokenizer(translation_tokens); } catch (std::exception &e) { obs_log(LOG_ERROR, "Error: %s", e.what()); return OBS_POLYGLOT_TRANSLATION_FAIL; diff --git a/src/translation/translation.h b/src/translation/translation.h index 9d21734..bfa0a18 100644 --- a/src/translation/translation.h +++ b/src/translation/translation.h @@ -7,16 +7,20 @@ #include struct translation_context { + std::string local_model_folder_path; std::unique_ptr processor; std::unique_ptr translator; std::unique_ptr options; std::function(const std::string &)> tokenizer; std::function &)> detokenizer; + std::vector last_input_tokens; + std::vector last_translation_tokens; + // Use the last translation as context for the next translation + bool add_context; }; -int build_translation_context(struct translation_context &translation_ctx, - const std::string &local_spm_path, - const std::string &local_model_path); +void start_translation(struct transcription_filter_data* gf); +int build_translation_context(struct translation_context &translation_ctx); int translate(struct translation_context &translation_ctx, const std::string &text, const std::string &source_lang, const std::string &target_lang, std::string &result); diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp index 748f52e..ab58320 100644 --- a/src/whisper-utils/whisper-processing.cpp +++ b/src/whisper-utils/whisper-processing.cpp @@ -14,6 +14,7 @@ #include #include #endif +#include "model-utils/model-downloader.h" #define VAD_THOLD 0.0001f #define FREQ_THOLD 100.0f @@ -108,10 +109,23 @@ bool vad_simple(float *pcmf32, size_t pcm32f_size, uint32_t sample_rate, float v return true; } -struct whisper_context *init_whisper_context(const std::string &model_path) +struct whisper_context *init_whisper_context(const std::string &model_path_in) { + std::string model_path = model_path_in; + obs_log(LOG_INFO, "Loading whisper model from %s", model_path.c_str()); + if (std::filesystem::is_directory(model_path)) { + obs_log(LOG_INFO, "Model path is a directory, not a file, looking for .bin file in folder"); + // look for .bin file + const std::string model_bin_file = find_bin_file_in_folder(model_path); + if (model_bin_file.empty()) { + obs_log(LOG_ERROR, "Model bin file not found in folder: %s", model_path.c_str()); + return nullptr; + } + model_path = model_bin_file; + } + struct whisper_context_params cparams = whisper_context_default_params(); #ifdef LOCALVOCAL_WITH_CUDA cparams.use_gpu = true; diff --git a/src/whisper-utils/whisper-utils.cpp b/src/whisper-utils/whisper-utils.cpp index 47b983c..8d7de1f 100644 --- a/src/whisper-utils/whisper-utils.cpp +++ b/src/whisper-utils/whisper-utils.cpp @@ -9,10 +9,10 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t std::string new_model_path = obs_data_get_string(s, "whisper_model_path"); const bool is_external_model = new_model_path.find("!!!external!!!") != std::string::npos; - if (gf->whisper_model_path == nullptr || - strcmp(new_model_path.c_str(), gf->whisper_model_path) != 0 || is_external_model) { + if (gf->whisper_model_path.empty() || gf->whisper_model_path != new_model_path || + is_external_model) { // model path changed, reload the model - obs_log(gf->log_level, "model path changed from %s to %s", gf->whisper_model_path, + obs_log(gf->log_level, "model path changed from %s to %s", gf->whisper_model_path.c_str(), new_model_path.c_str()); // check if the new model is external file @@ -20,18 +20,25 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t // new model is not external file shutdown_whisper_thread(gf); - gf->whisper_model_path = bstrdup(new_model_path.c_str()); + if (models_info.count(new_model_path) == 0) { + obs_log(LOG_WARNING, "Model '%s' does not exist", + new_model_path.c_str()); + return; + } + + const ModelInfo &model_info = models_info[new_model_path]; // check if the model exists, if not, download it - std::string model_file_found = find_model_file(gf->whisper_model_path); + std::string model_file_found = find_model_bin_file(model_info); if (model_file_found == "") { obs_log(LOG_WARNING, "Whisper model does not exist"); download_model_with_ui_dialog( - gf->whisper_model_path, - [gf](int download_status, const std::string &path) { + model_info, + [gf, new_model_path](int download_status, const std::string &path) { if (download_status == 0) { obs_log(LOG_INFO, "Model download complete"); + gf->whisper_model_path = new_model_path; start_whisper_thread_with_path(gf, path); } else { obs_log(LOG_ERROR, "Model download failed"); @@ -39,6 +46,7 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t }); } else { // Model exists, just load it + gf->whisper_model_path = new_model_path; start_whisper_thread_with_path(gf, model_file_found); } } else { @@ -55,7 +63,7 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t return; } else { shutdown_whisper_thread(gf); - gf->whisper_model_path = bstrdup(new_model_path.c_str()); + gf->whisper_model_path = new_model_path; start_whisper_thread_with_path(gf, external_model_file_path); } @@ -85,9 +93,8 @@ void shutdown_whisper_thread(struct transcription_filter_data *gf) if (gf->whisper_thread.joinable()) { gf->whisper_thread.join(); } - if (gf->whisper_model_path != nullptr) { - bfree(gf->whisper_model_path); - gf->whisper_model_path = nullptr; + if (!gf->whisper_model_path.empty()) { + gf->whisper_model_path.clear(); } } From 3bd3694870a3d2f8196a2a22fe119b54c5573253 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 1 Apr 2024 12:01:23 -0400 Subject: [PATCH 03/10] Fix formatting and whitespace issues --- src/model-utils/model-downloader-ui.cpp | 44 ++++---- src/model-utils/model-downloader.cpp | 66 ++++++------ src/model-utils/model-downloader.h | 3 +- src/transcription-filter-data.h | 4 +- src/transcription-filter.cpp | 44 ++++---- src/translation/translation.cpp | 125 ++++++++++++----------- src/translation/translation.h | 12 +-- src/whisper-utils/whisper-processing.cpp | 24 +++-- src/whisper-utils/whisper-utils.cpp | 12 +-- 9 files changed, 182 insertions(+), 152 deletions(-) diff --git a/src/model-utils/model-downloader-ui.cpp b/src/model-utils/model-downloader-ui.cpp index dfd8bd5..b978379 100644 --- a/src/model-utils/model-downloader-ui.cpp +++ b/src/model-utils/model-downloader-ui.cpp @@ -112,14 +112,15 @@ void ModelDownloader::show_error(const std::string &reason) ModelDownloadWorker::ModelDownloadWorker(const ModelInfo &model_info_) : model_info(model_info_) {} -std::string get_filename_from_url(const std::string& url) { - auto lastSlashPos = url.find_last_of("/"); - auto queryPos = url.find("?", lastSlashPos); - if (queryPos == std::string::npos) { - return url.substr(lastSlashPos + 1); - } else { - return url.substr(lastSlashPos + 1, queryPos - lastSlashPos - 1); - } +std::string get_filename_from_url(const std::string &url) +{ + auto lastSlashPos = url.find_last_of("/"); + auto queryPos = url.find("?", lastSlashPos); + if (queryPos == std::string::npos) { + return url.substr(lastSlashPos + 1); + } else { + return url.substr(lastSlashPos + 1, queryPos - lastSlashPos - 1); + } } void ModelDownloadWorker::download_model() @@ -131,7 +132,8 @@ void ModelDownloadWorker::download_model() // Check if the config folder exists if (!std::filesystem::exists(module_config_models_folder)) { - obs_log(LOG_WARNING, "Config folder does not exist: %s", module_config_models_folder); + obs_log(LOG_WARNING, "Config folder does not exist: %s", + module_config_models_folder); // Create the config folder if (!std::filesystem::create_directories(module_config_models_folder)) { obs_log(LOG_ERROR, "Failed to create config folder: %s", @@ -146,24 +148,26 @@ void ModelDownloadWorker::download_model() obs_log(LOG_INFO, "Model save path: %s", model_local_config_path.c_str()); - if (!std::filesystem::exists(model_local_config_path)) { - // model folder does not exist, create it - if (!std::filesystem::create_directories(model_local_config_path)) { - obs_log(LOG_ERROR, "Failed to create model folder: %s", - model_local_config_path.c_str()); - emit download_error("Failed to create model folder."); - return; - } - } + if (!std::filesystem::exists(model_local_config_path)) { + // model folder does not exist, create it + if (!std::filesystem::create_directories(model_local_config_path)) { + obs_log(LOG_ERROR, "Failed to create model folder: %s", + model_local_config_path.c_str()); + emit download_error("Failed to create model folder."); + return; + } + } CURL *curl = curl_easy_init(); if (curl) { for (auto &model_download_file : this->model_info.files) { obs_log(LOG_INFO, "Model URL: %s", model_download_file.url.c_str()); - const std::string model_filename = get_filename_from_url(model_download_file.url); + const std::string model_filename = + get_filename_from_url(model_download_file.url); const std::string model_file_save_path = - (std::filesystem::path(model_local_config_path) / model_filename).string(); + (std::filesystem::path(model_local_config_path) / model_filename) + .string(); if (std::filesystem::exists(model_file_save_path)) { obs_log(LOG_INFO, "Model file already exists: %s", model_file_save_path.c_str()); diff --git a/src/model-utils/model-downloader.cpp b/src/model-utils/model-downloader.cpp index f8b5293..ae3e8b1 100644 --- a/src/model-utils/model-downloader.cpp +++ b/src/model-utils/model-downloader.cpp @@ -12,45 +12,51 @@ #include -std::string find_file_in_folder_by_name(const std::string& folder_path, const std::string& file_name) { - for (const auto &entry : std::filesystem::directory_iterator(folder_path)) { - if (entry.path().filename() == file_name) { - return entry.path().string(); - } - } - return ""; +std::string find_file_in_folder_by_name(const std::string &folder_path, + const std::string &file_name) +{ + for (const auto &entry : std::filesystem::directory_iterator(folder_path)) { + if (entry.path().filename() == file_name) { + return entry.path().string(); + } + } + return ""; } -std::string find_bin_file_in_folder(const std::string &model_local_folder_path) { - // find .bin file in folder - for (const auto &entry : std::filesystem::directory_iterator(model_local_folder_path)) { - if (entry.path().extension() == ".bin") { - const std::string bin_file_path = entry.path().string(); - obs_log(LOG_INFO, "Model bin file found in folder: %s", bin_file_path.c_str()); - return bin_file_path; - } - } - obs_log(LOG_ERROR, "Model bin file not found in folder: %s", model_local_folder_path.c_str()); - return ""; +std::string find_bin_file_in_folder(const std::string &model_local_folder_path) +{ + // find .bin file in folder + for (const auto &entry : std::filesystem::directory_iterator(model_local_folder_path)) { + if (entry.path().extension() == ".bin") { + const std::string bin_file_path = entry.path().string(); + obs_log(LOG_INFO, "Model bin file found in folder: %s", + bin_file_path.c_str()); + return bin_file_path; + } + } + obs_log(LOG_ERROR, "Model bin file not found in folder: %s", + model_local_folder_path.c_str()); + return ""; } std::string find_model_folder(const ModelInfo &model_info) { - char* data_folder_models = obs_module_file("models"); + char *data_folder_models = obs_module_file("models"); const std::filesystem::path module_data_models_folder = std::filesystem::absolute(data_folder_models); - bfree(data_folder_models); + bfree(data_folder_models); const std::string model_local_data_path = (module_data_models_folder / model_info.local_folder_name).string(); - obs_log(LOG_INFO, "Checking if model '%s' exists in data...", model_info.friendly_name.c_str()); + obs_log(LOG_INFO, "Checking if model '%s' exists in data...", + model_info.friendly_name.c_str()); if (!std::filesystem::exists(model_local_data_path)) { obs_log(LOG_INFO, "Model not found in data: %s", model_local_data_path.c_str()); } else { obs_log(LOG_INFO, "Model folder found in data: %s", model_local_data_path.c_str()); - return model_local_data_path; + return model_local_data_path; } // Check if model exists in the config folder @@ -59,7 +65,8 @@ std::string find_model_folder(const ModelInfo &model_info) std::filesystem::absolute(config_folder); bfree(config_folder); - obs_log(LOG_INFO, "Checking if model '%s' exists in config...", model_info.friendly_name.c_str()); + obs_log(LOG_INFO, "Checking if model '%s' exists in config...", + model_info.friendly_name.c_str()); const std::string model_local_config_path = (module_config_models_folder / model_info.local_folder_name).string(); @@ -75,13 +82,14 @@ std::string find_model_folder(const ModelInfo &model_info) return ""; } -std::string find_model_bin_file(const ModelInfo &model_info) { - const std::string model_local_folder_path = find_model_folder(model_info); - if (model_local_folder_path.empty()) { - return ""; - } +std::string find_model_bin_file(const ModelInfo &model_info) +{ + const std::string model_local_folder_path = find_model_folder(model_info); + if (model_local_folder_path.empty()) { + return ""; + } - return find_bin_file_in_folder(model_local_folder_path); + return find_bin_file_in_folder(model_local_folder_path); } void download_model_with_ui_dialog(const ModelInfo &model_info, diff --git a/src/model-utils/model-downloader.h b/src/model-utils/model-downloader.h index fca3337..b075d39 100644 --- a/src/model-utils/model-downloader.h +++ b/src/model-utils/model-downloader.h @@ -6,7 +6,8 @@ #include "model-downloader-types.h" -std::string find_file_in_folder_by_name(const std::string& folder_path, const std::string& file_name); +std::string find_file_in_folder_by_name(const std::string &folder_path, + const std::string &file_name); std::string find_bin_file_in_folder(const std::string &path); std::string find_model_folder(const ModelInfo &model_info); std::string find_model_bin_file(const ModelInfo &model_info); diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index 3e12c2a..90621fb 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -86,8 +86,8 @@ struct transcription_filter_data { std::string source_lang; std::string target_lang; - // Last transcription result - std::string last_text; + // Last transcription result + std::string last_text; // Text source to output the subtitles obs_weak_source_t *text_source; diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 4627c50..909aa37 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -276,27 +276,31 @@ void set_text_callback(struct transcription_filter_data *gf, std::string str_copy = result.text; #endif - // remove trailing spaces, newlines, tabs or punctuation - str_copy.erase(std::find_if(str_copy.rbegin(), str_copy.rend(), - [](unsigned char ch) { return !std::isspace(ch) || !std::ispunct(ch); }) - .base(), - str_copy.end()); + // remove trailing spaces, newlines, tabs or punctuation + str_copy.erase(std::find_if(str_copy.rbegin(), str_copy.rend(), + [](unsigned char ch) { + return !std::isspace(ch) || !std::ispunct(ch); + }) + .base(), + str_copy.end()); if (gf->translate) { - obs_log(gf->log_level, "Translating text. %s -> %s", gf->source_lang.c_str(), gf->target_lang.c_str()); - std::string translated_text; + obs_log(gf->log_level, "Translating text. %s -> %s", gf->source_lang.c_str(), + gf->target_lang.c_str()); + std::string translated_text; if (translate(gf->translation_ctx, str_copy, gf->source_lang, gf->target_lang, translated_text) == OBS_POLYGLOT_TRANSLATION_SUCCESS) { - if (gf->log_words) { - obs_log(LOG_INFO, "Translation: '%s' -> '%s'", str_copy.c_str(), translated_text.c_str()); - } - str_copy = translated_text; + if (gf->log_words) { + obs_log(LOG_INFO, "Translation: '%s' -> '%s'", str_copy.c_str(), + translated_text.c_str()); + } + str_copy = translated_text; } else { obs_log(gf->log_level, "Failed to translate text"); } } - gf->last_text = str_copy; + gf->last_text = str_copy; if (gf->caption_to_stream) { obs_output_t *streaming_output = obs_frontend_get_streaming_output(); @@ -565,8 +569,8 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->overlap_ms = (int)obs_data_get_int(settings, "overlap_size_msec"); gf->overlap_frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)gf->overlap_ms)); - obs_log(gf->log_level, "channels %d, frames %d, sample_rate %d", - (int)gf->channels, (int)gf->frames, gf->sample_rate); + obs_log(gf->log_level, "channels %d, frames %d, sample_rate %d", (int)gf->channels, + (int)gf->frames, gf->sample_rate); obs_log(gf->log_level, "setup audio resampler"); struct resample_info src, dst; @@ -781,8 +785,8 @@ obs_properties_t *transcription_filter_properties(void *data) obs_property_t *prop_src = obs_properties_add_list( translation_group, "translate_source_language", MT_("source_language"), OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); - obs_property_t *prop_add_context = obs_properties_add_bool( - translation_group, "translate_add_context", MT_("translate_add_context")); + obs_property_t *prop_add_context = obs_properties_add_bool( + translation_group, "translate_add_context", MT_("translate_add_context")); // Populate the dropdown with the language codes for (const auto &language : language_codes) { @@ -799,9 +803,11 @@ obs_properties_t *transcription_filter_properties(void *data) UNUSED_PARAMETER(property); // Show/Hide the translation group const bool translate_enabled = obs_data_get_bool(settings, "translate"); - for (const auto& prop : { "translate_target_language", "translate_source_language", "translate_add_context" }) { - obs_property_set_visible(obs_properties_get(props, prop), translate_enabled); - } + for (const auto &prop : {"translate_target_language", "translate_source_language", + "translate_add_context"}) { + obs_property_set_visible(obs_properties_get(props, prop), + translate_enabled); + } return true; }); diff --git a/src/translation/translation.cpp b/src/translation/translation.cpp index 662d58f..56874c8 100644 --- a/src/translation/translation.cpp +++ b/src/translation/translation.cpp @@ -8,49 +8,52 @@ #include #include -void build_and_enable_translation(struct transcription_filter_data* gf, const std::string& model_file_path) +void build_and_enable_translation(struct transcription_filter_data *gf, + const std::string &model_file_path) { - gf->translation_ctx.local_model_folder_path = model_file_path; - if (build_translation_context(gf->translation_ctx) == OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS) { - obs_log(LOG_INFO, "Enable translation"); - gf->translate = true; - } else { - obs_log(LOG_ERROR, "Failed to load CT2 model"); - gf->translate = false; - } + gf->translation_ctx.local_model_folder_path = model_file_path; + if (build_translation_context(gf->translation_ctx) == + OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS) { + obs_log(LOG_INFO, "Enable translation"); + gf->translate = true; + } else { + obs_log(LOG_ERROR, "Failed to load CT2 model"); + gf->translate = false; + } } -void start_translation(struct transcription_filter_data* gf) +void start_translation(struct transcription_filter_data *gf) { - obs_log(LOG_INFO, "Starting translation..."); - - const ModelInfo &translation_model_info = models_info["M2M-100 418M (495Mb)"]; - std::string model_file_found = find_model_folder(translation_model_info); - if (model_file_found == "") { - obs_log(LOG_INFO, "Translation CT2 model does not exist. Downloading..."); - download_model_with_ui_dialog( - translation_model_info, - [gf, model_file_found](int download_status, const std::string &path) { - if (download_status == 0) { - obs_log(LOG_INFO, "CT2 model download complete"); - build_and_enable_translation(gf, path); - } else { - obs_log(LOG_ERROR, "Model download failed"); - gf->translate = false; - } - }); - } else { - // Model exists, just load it - build_and_enable_translation(gf, model_file_found); - } + obs_log(LOG_INFO, "Starting translation..."); + + const ModelInfo &translation_model_info = models_info["M2M-100 418M (495Mb)"]; + std::string model_file_found = find_model_folder(translation_model_info); + if (model_file_found == "") { + obs_log(LOG_INFO, "Translation CT2 model does not exist. Downloading..."); + download_model_with_ui_dialog( + translation_model_info, + [gf, model_file_found](int download_status, const std::string &path) { + if (download_status == 0) { + obs_log(LOG_INFO, "CT2 model download complete"); + build_and_enable_translation(gf, path); + } else { + obs_log(LOG_ERROR, "Model download failed"); + gf->translate = false; + } + }); + } else { + // Model exists, just load it + build_and_enable_translation(gf, model_file_found); + } } int build_translation_context(struct translation_context &translation_ctx) { std::string local_model_path = translation_ctx.local_model_folder_path; obs_log(LOG_INFO, "Building translation context from '%s'...", local_model_path.c_str()); - // find the SPM file in the model folder - std::string local_spm_path = find_file_in_folder_by_name(local_model_path, "sentencepiece.bpe.model"); + // find the SPM file in the model folder + std::string local_spm_path = + find_file_in_folder_by_name(local_model_path, "sentencepiece.bpe.model"); try { obs_log(LOG_INFO, "Loading SPM from %s", local_spm_path.c_str()); @@ -107,36 +110,42 @@ int translate(struct translation_context &translation_ctx, const std::string &te try { // set input tokens std::vector input_tokens = {source_lang, ""}; - if (translation_ctx.add_context && translation_ctx.last_input_tokens.size() > 0) { - input_tokens.insert(input_tokens.end(), translation_ctx.last_input_tokens.begin(), translation_ctx.last_input_tokens.end()); - } - std::vector new_input_tokens = translation_ctx.tokenizer(text); - input_tokens.insert(input_tokens.end(), new_input_tokens.begin(), new_input_tokens.end()); + if (translation_ctx.add_context && translation_ctx.last_input_tokens.size() > 0) { + input_tokens.insert(input_tokens.end(), + translation_ctx.last_input_tokens.begin(), + translation_ctx.last_input_tokens.end()); + } + std::vector new_input_tokens = translation_ctx.tokenizer(text); + input_tokens.insert(input_tokens.end(), new_input_tokens.begin(), + new_input_tokens.end()); input_tokens.push_back(""); - translation_ctx.last_input_tokens = new_input_tokens; + translation_ctx.last_input_tokens = new_input_tokens; const std::vector> batch = {input_tokens}; - // get target prefix - std::vector target_prefix = {target_lang}; - if (translation_ctx.add_context && translation_ctx.last_translation_tokens.size() > 0) { - target_prefix.insert(target_prefix.end(), translation_ctx.last_translation_tokens.begin(), translation_ctx.last_translation_tokens.end()); - } - - const std::vector> target_prefix_batch = {target_prefix}; - const std::vector results = - translation_ctx.translator->translate_batch(batch, target_prefix_batch, - *translation_ctx.options); - - const auto &tokens_result = results[0].output(); - // take the tokens from the target_prefix length to the end - std::vector translation_tokens(tokens_result.begin() + target_prefix.size(), - tokens_result.end()); - - translation_ctx.last_translation_tokens = translation_tokens; - // detokenize - result = translation_ctx.detokenizer(translation_tokens); + // get target prefix + std::vector target_prefix = {target_lang}; + if (translation_ctx.add_context && + translation_ctx.last_translation_tokens.size() > 0) { + target_prefix.insert(target_prefix.end(), + translation_ctx.last_translation_tokens.begin(), + translation_ctx.last_translation_tokens.end()); + } + + const std::vector> target_prefix_batch = {target_prefix}; + const std::vector results = + translation_ctx.translator->translate_batch(batch, target_prefix_batch, + *translation_ctx.options); + + const auto &tokens_result = results[0].output(); + // take the tokens from the target_prefix length to the end + std::vector translation_tokens( + tokens_result.begin() + target_prefix.size(), tokens_result.end()); + + translation_ctx.last_translation_tokens = translation_tokens; + // detokenize + result = translation_ctx.detokenizer(translation_tokens); } catch (std::exception &e) { obs_log(LOG_ERROR, "Error: %s", e.what()); return OBS_POLYGLOT_TRANSLATION_FAIL; diff --git a/src/translation/translation.h b/src/translation/translation.h index bfa0a18..d79fd9d 100644 --- a/src/translation/translation.h +++ b/src/translation/translation.h @@ -7,19 +7,19 @@ #include struct translation_context { - std::string local_model_folder_path; + std::string local_model_folder_path; std::unique_ptr processor; std::unique_ptr translator; std::unique_ptr options; std::function(const std::string &)> tokenizer; std::function &)> detokenizer; - std::vector last_input_tokens; - std::vector last_translation_tokens; - // Use the last translation as context for the next translation - bool add_context; + std::vector last_input_tokens; + std::vector last_translation_tokens; + // Use the last translation as context for the next translation + bool add_context; }; -void start_translation(struct transcription_filter_data* gf); +void start_translation(struct transcription_filter_data *gf); int build_translation_context(struct translation_context &translation_ctx); int translate(struct translation_context &translation_ctx, const std::string &text, diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp index ab58320..485f28a 100644 --- a/src/whisper-utils/whisper-processing.cpp +++ b/src/whisper-utils/whisper-processing.cpp @@ -111,20 +111,22 @@ bool vad_simple(float *pcmf32, size_t pcm32f_size, uint32_t sample_rate, float v struct whisper_context *init_whisper_context(const std::string &model_path_in) { - std::string model_path = model_path_in; + std::string model_path = model_path_in; obs_log(LOG_INFO, "Loading whisper model from %s", model_path.c_str()); - if (std::filesystem::is_directory(model_path)) { - obs_log(LOG_INFO, "Model path is a directory, not a file, looking for .bin file in folder"); - // look for .bin file - const std::string model_bin_file = find_bin_file_in_folder(model_path); - if (model_bin_file.empty()) { - obs_log(LOG_ERROR, "Model bin file not found in folder: %s", model_path.c_str()); - return nullptr; - } - model_path = model_bin_file; - } + if (std::filesystem::is_directory(model_path)) { + obs_log(LOG_INFO, + "Model path is a directory, not a file, looking for .bin file in folder"); + // look for .bin file + const std::string model_bin_file = find_bin_file_in_folder(model_path); + if (model_bin_file.empty()) { + obs_log(LOG_ERROR, "Model bin file not found in folder: %s", + model_path.c_str()); + return nullptr; + } + model_path = model_bin_file; + } struct whisper_context_params cparams = whisper_context_default_params(); #ifdef LOCALVOCAL_WITH_CUDA diff --git a/src/whisper-utils/whisper-utils.cpp b/src/whisper-utils/whisper-utils.cpp index 8d7de1f..73e1bc8 100644 --- a/src/whisper-utils/whisper-utils.cpp +++ b/src/whisper-utils/whisper-utils.cpp @@ -12,8 +12,8 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t if (gf->whisper_model_path.empty() || gf->whisper_model_path != new_model_path || is_external_model) { // model path changed, reload the model - obs_log(gf->log_level, "model path changed from %s to %s", gf->whisper_model_path.c_str(), - new_model_path.c_str()); + obs_log(gf->log_level, "model path changed from %s to %s", + gf->whisper_model_path.c_str(), new_model_path.c_str()); // check if the new model is external file if (!is_external_model) { @@ -33,12 +33,12 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t if (model_file_found == "") { obs_log(LOG_WARNING, "Whisper model does not exist"); download_model_with_ui_dialog( - model_info, - [gf, new_model_path](int download_status, const std::string &path) { + model_info, [gf, new_model_path](int download_status, + const std::string &path) { if (download_status == 0) { obs_log(LOG_INFO, "Model download complete"); - gf->whisper_model_path = new_model_path; + gf->whisper_model_path = new_model_path; start_whisper_thread_with_path(gf, path); } else { obs_log(LOG_ERROR, "Model download failed"); @@ -46,7 +46,7 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t }); } else { // Model exists, just load it - gf->whisper_model_path = new_model_path; + gf->whisper_model_path = new_model_path; start_whisper_thread_with_path(gf, model_file_found); } } else { From 16ca8fb873be6d44eabb8b2ff38826c5455df2ec Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 1 Apr 2024 12:22:34 -0400 Subject: [PATCH 04/10] Update build plugin and version, fix translation and whisper-utils --- .github/actions/build-plugin/action.yaml | 2 ++ buildspec.json | 2 +- src/translation/translation.cpp | 2 +- src/whisper-utils/whisper-utils.cpp | 6 +++--- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/actions/build-plugin/action.yaml b/.github/actions/build-plugin/action.yaml index 03e0e20..ac5950d 100644 --- a/.github/actions/build-plugin/action.yaml +++ b/.github/actions/build-plugin/action.yaml @@ -86,6 +86,8 @@ runs: } .github/scripts/Build-Windows.ps1 @BuildArgs + env: + CPU_OR_CUDA: ${{ inputs.cublas }} - name: Create Summary 📊 if: contains(fromJSON('["Linux", "macOS"]'),runner.os) diff --git a/buildspec.json b/buildspec.json index acb2be7..4aac87c 100644 --- a/buildspec.json +++ b/buildspec.json @@ -45,7 +45,7 @@ } }, "name": "obs-localvocal", - "version": "0.2.0", + "version": "0.2.1", "author": "Roy Shilkrot", "website": "https://github.com/occ-ai/obs-localvocal", "email": "roy.shil@gmail.com", diff --git a/src/translation/translation.cpp b/src/translation/translation.cpp index 56874c8..95e58ae 100644 --- a/src/translation/translation.cpp +++ b/src/translation/translation.cpp @@ -95,7 +95,7 @@ int build_translation_context(struct translation_context &translation_ctx) translation_ctx.options->max_decoding_length = 40; translation_ctx.options->use_vmap = true; translation_ctx.options->return_scores = false; - translation_ctx.options->repetition_penalty = 1.1; + translation_ctx.options->repetition_penalty = 1.1f; translation_ctx.options->no_repeat_ngram_size = 2; } catch (std::exception &e) { obs_log(LOG_ERROR, "Failed to load CT2 model: %s", e.what()); diff --git a/src/whisper-utils/whisper-utils.cpp b/src/whisper-utils/whisper-utils.cpp index 73e1bc8..57dc954 100644 --- a/src/whisper-utils/whisper-utils.cpp +++ b/src/whisper-utils/whisper-utils.cpp @@ -71,8 +71,8 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t } } else { // model path did not change - obs_log(LOG_DEBUG, "model path did not change: %s == %s", gf->whisper_model_path, - new_model_path.c_str()); + obs_log(LOG_DEBUG, "model path did not change: %s == %s", + gf->whisper_model_path.c_str(), new_model_path.c_str()); } } @@ -94,7 +94,7 @@ void shutdown_whisper_thread(struct transcription_filter_data *gf) gf->whisper_thread.join(); } if (!gf->whisper_model_path.empty()) { - gf->whisper_model_path.clear(); + gf->whisper_model_path = ""; } } From eaa2040058782810d885f985feaa72a7d05c9f35 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 1 Apr 2024 12:31:19 -0400 Subject: [PATCH 05/10] Fix compiler warning and simplify code in transcription-filter.cpp --- cmake/common/compiler_common.cmake | 2 +- src/transcription-filter.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/common/compiler_common.cmake b/cmake/common/compiler_common.cmake index 8ac423f..9b86272 100644 --- a/cmake/common/compiler_common.cmake +++ b/cmake/common/compiler_common.cmake @@ -34,7 +34,7 @@ set(_obs_clang_c_options -Wfour-char-constants -Winfinite-recursion -Wint-conversion - -Wnewline-eof + -Wno-newline-eof -Wno-conversion -Wno-float-conversion -Wno-implicit-fallthrough diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 909aa37..b3df64f 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -785,8 +785,8 @@ obs_properties_t *transcription_filter_properties(void *data) obs_property_t *prop_src = obs_properties_add_list( translation_group, "translate_source_language", MT_("source_language"), OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); - obs_property_t *prop_add_context = obs_properties_add_bool( - translation_group, "translate_add_context", MT_("translate_add_context")); + obs_properties_add_bool(translation_group, "translate_add_context", + MT_("translate_add_context")); // Populate the dropdown with the language codes for (const auto &language : language_codes) { From 1135ca83d7b1d1cfacfb95e17c2f67fa2924bc80 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 1 Apr 2024 12:37:19 -0400 Subject: [PATCH 06/10] Update CMakePresets.json and buildspec.json --- CMakePresets.json | 36 ++++++++++++++++++++---------------- buildspec.json | 20 ++++++++++---------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/CMakePresets.json b/CMakePresets.json index bc80925..6c429e1 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -6,10 +6,19 @@ "patch": 0 }, "configurePresets": [ + { + "name": "template", + "hidden": true, + "cacheVariables": { + "ENABLE_FRONTEND_API": true, + "ENABLE_QT": false + } + }, { "name": "macos", "displayName": "macOS Universal", "description": "Build for macOS 11.0+ (Universal binary)", + "inherits": ["template"], "binaryDir": "${sourceDir}/build_macos", "condition": { "type": "equals", @@ -17,14 +26,12 @@ "rhs": "Darwin" }, "generator": "Xcode", - "warnings": {"dev": true, "deprecated": true}, + "warnings": { "dev": true, "deprecated": true }, "cacheVariables": { "QT_VERSION": "6", "CMAKE_OSX_DEPLOYMENT_TARGET": "11.0", "CODESIGN_IDENTITY": "$penv{CODESIGN_IDENT}", - "CODESIGN_TEAM": "$penv{CODESIGN_TEAM}", - "ENABLE_FRONTEND_API": true, - "ENABLE_QT": true + "CODESIGN_TEAM": "$penv{CODESIGN_TEAM}" } }, { @@ -41,6 +48,7 @@ "name": "windows-x64", "displayName": "Windows x64", "description": "Build for Windows x64", + "inherits": ["template"], "binaryDir": "${sourceDir}/build_x64", "condition": { "type": "equals", @@ -49,12 +57,10 @@ }, "generator": "Visual Studio 17 2022", "architecture": "x64", - "warnings": {"dev": true, "deprecated": true}, + "warnings": { "dev": true, "deprecated": true }, "cacheVariables": { "QT_VERSION": "6", - "CMAKE_SYSTEM_VERSION": "10.0.18363.657", - "ENABLE_FRONTEND_API": true, - "ENABLE_QT": true + "CMAKE_SYSTEM_VERSION": "10.0.18363.657" } }, { @@ -70,6 +76,7 @@ "name": "linux-x86_64", "displayName": "Linux x86_64", "description": "Build for Linux x86_64", + "inherits": ["template"], "binaryDir": "${sourceDir}/build_x86_64", "condition": { "type": "equals", @@ -77,12 +84,10 @@ "rhs": "Linux" }, "generator": "Ninja", - "warnings": {"dev": true, "deprecated": true}, + "warnings": { "dev": true, "deprecated": true }, "cacheVariables": { "QT_VERSION": "6", - "CMAKE_BUILD_TYPE": "RelWithDebInfo", - "ENABLE_FRONTEND_API": true, - "ENABLE_QT": true + "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, { @@ -99,6 +104,7 @@ "name": "linux-aarch64", "displayName": "Linux aarch64", "description": "Build for Linux aarch64", + "inherits": ["template"], "binaryDir": "${sourceDir}/build_aarch64", "condition": { "type": "equals", @@ -106,12 +112,10 @@ "rhs": "Linux" }, "generator": "Ninja", - "warnings": {"dev": true, "deprecated": true}, + "warnings": { "dev": true, "deprecated": true }, "cacheVariables": { "QT_VERSION": "6", - "CMAKE_BUILD_TYPE": "RelWithDebInfo", - "ENABLE_FRONTEND_API": true, - "ENABLE_QT": true + "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, { diff --git a/buildspec.json b/buildspec.json index 4aac87c..1c349f5 100644 --- a/buildspec.json +++ b/buildspec.json @@ -1,33 +1,33 @@ { "dependencies": { "obs-studio": { - "version": "29.1.2", + "version": "30.0.2", "baseUrl": "https://github.com/obsproject/obs-studio/archive/refs/tags", "label": "OBS sources", "hashes": { - "macos": "215f1fa5772c5dd9f3d6e35b0cb573912b00320149666a77864f9d305525504b", - "windows-x64": "46d451f3f42b9d2c59339ec268165849c7b7904cdf1cc2a8d44c015815a9e37d" + "macos": "be12c3ad0a85713750d8325e4b1db75086223402d7080d0e3c2833d7c5e83c27", + "windows-x64": "970058c49322cfa9cd6d620abb393fed89743ba7e74bd9dbb6ebe0ea8141d9c7" } }, "prebuilt": { - "version": "2023-04-12", + "version": "2023-11-03", "baseUrl": "https://github.com/obsproject/obs-deps/releases/download", "label": "Pre-Built obs-deps", "hashes": { - "macos": "9535c6e1ad96f7d49960251e85a245774088d48da1d602bb82f734b10219125a", - "windows-x64": "c13a14a1acc4224b21304d97b63da4121de1ed6981297e50496fbc474abc0503" + "macos": "90c2fc069847ec2768dcc867c1c63b112c615ed845a907dc44acab7a97181974", + "windows-x64": "d0825a6fb65822c993a3059edfba70d72d2e632ef74893588cf12b1f0d329ce6" } }, "qt6": { - "version": "2023-04-12", + "version": "2023-11-03", "baseUrl": "https://github.com/obsproject/obs-deps/releases/download", "label": "Pre-Built Qt6", "hashes": { - "macos": "eb7614544ab4f3d2c6052c797635602280ca5b028a6b987523d8484222ce45d1", - "windows-x64": "4d39364b8a8dee5aa24fcebd8440d5c22bb4551c6b440ffeacce7d61f2ed1add" + "macos": "ba4a7152848da0053f63427a2a2cb0a199af3992997c0db08564df6f48c9db98", + "windows-x64": "bc57dedf76b47119a6dce0435a2f21b35b08c8f2948b1cb34a157320f77732d1" }, "debugSymbols": { - "windows-x64": "f34ee5067be19ed370268b15c53684b7b8aaa867dc800b68931df905d679e31f" + "windows-x64": "fd8ecd1d8cd2ef049d9f4d7fb5c134f784836d6020758094855dfa98bd025036" } } }, From 2aa0be736e5c072ab706f1ea7aeaee9bb736e57d Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 1 Apr 2024 12:47:46 -0400 Subject: [PATCH 07/10] Fix Clang compiler warnings --- cmake/common/compiler_common.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/common/compiler_common.cmake b/cmake/common/compiler_common.cmake index 9b86272..de86046 100644 --- a/cmake/common/compiler_common.cmake +++ b/cmake/common/compiler_common.cmake @@ -34,13 +34,14 @@ set(_obs_clang_c_options -Wfour-char-constants -Winfinite-recursion -Wint-conversion - -Wno-newline-eof -Wno-conversion + -Wno-error=newline-eof -Wno-float-conversion -Wno-implicit-fallthrough -Wno-missing-braces -Wno-missing-field-initializers -Wno-missing-prototypes + -Wno-newline-eof -Wno-semicolon-before-method-body -Wno-shadow -Wno-sign-conversion From 0d7442275cd56c26215a24e212d141b33b1dce08 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 1 Apr 2024 12:47:59 -0400 Subject: [PATCH 08/10] Enable QT in CMakePresets.json --- CMakePresets.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakePresets.json b/CMakePresets.json index 6c429e1..053671c 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -11,7 +11,7 @@ "hidden": true, "cacheVariables": { "ENABLE_FRONTEND_API": true, - "ENABLE_QT": false + "ENABLE_QT": true } }, { From 69ce2cf81804e9889da24257398ce482979a687f Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 1 Apr 2024 13:49:58 -0400 Subject: [PATCH 09/10] Fix compiler warnings and create missing config folder --- cmake/common/compiler_common.cmake | 3 +-- cmake/macos/compilerconfig.cmake | 1 + src/model-utils/model-downloader-ui.cpp | 4 ++-- src/transcription-filter.cpp | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/common/compiler_common.cmake b/cmake/common/compiler_common.cmake index de86046..8ac423f 100644 --- a/cmake/common/compiler_common.cmake +++ b/cmake/common/compiler_common.cmake @@ -34,14 +34,13 @@ set(_obs_clang_c_options -Wfour-char-constants -Winfinite-recursion -Wint-conversion + -Wnewline-eof -Wno-conversion - -Wno-error=newline-eof -Wno-float-conversion -Wno-implicit-fallthrough -Wno-missing-braces -Wno-missing-field-initializers -Wno-missing-prototypes - -Wno-newline-eof -Wno-semicolon-before-method-body -Wno-shadow -Wno-sign-conversion diff --git a/cmake/macos/compilerconfig.cmake b/cmake/macos/compilerconfig.cmake index c40a532..524aab5 100644 --- a/cmake/macos/compilerconfig.cmake +++ b/cmake/macos/compilerconfig.cmake @@ -55,3 +55,4 @@ else() endif() add_compile_definitions($<$:DEBUG> $<$:_DEBUG> SIMDE_ENABLE_OPENMP) +add_compile_options(-Wno-error=newline-eof) diff --git a/src/model-utils/model-downloader-ui.cpp b/src/model-utils/model-downloader-ui.cpp index b978379..023ccb6 100644 --- a/src/model-utils/model-downloader-ui.cpp +++ b/src/model-utils/model-downloader-ui.cpp @@ -133,11 +133,11 @@ void ModelDownloadWorker::download_model() // Check if the config folder exists if (!std::filesystem::exists(module_config_models_folder)) { obs_log(LOG_WARNING, "Config folder does not exist: %s", - module_config_models_folder); + module_config_models_folder.string().c_str()); // Create the config folder if (!std::filesystem::create_directories(module_config_models_folder)) { obs_log(LOG_ERROR, "Failed to create config folder: %s", - module_config_models_folder); + module_config_models_folder.string().c_str()); emit download_error("Failed to create config folder."); return; } diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index b3df64f..ebd457c 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -186,8 +186,8 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf) } } -#define is_lead_byte(c) (((c)&0xe0) == 0xc0 || ((c)&0xf0) == 0xe0 || ((c)&0xf8) == 0xf0) -#define is_trail_byte(c) (((c)&0xc0) == 0x80) +#define is_lead_byte(c) (((c) & 0xe0) == 0xc0 || ((c) & 0xf0) == 0xe0 || ((c) & 0xf8) == 0xf0) +#define is_trail_byte(c) (((c) & 0xc0) == 0x80) inline int lead_byte_length(const uint8_t c) { From 256451482be6f2f20f806758be33377654784eea Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 1 Apr 2024 13:53:34 -0400 Subject: [PATCH 10/10] Fix formatting of is_lead_byte and is_trail_byte macros --- src/transcription-filter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index ebd457c..b3df64f 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -186,8 +186,8 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf) } } -#define is_lead_byte(c) (((c) & 0xe0) == 0xc0 || ((c) & 0xf0) == 0xe0 || ((c) & 0xf8) == 0xf0) -#define is_trail_byte(c) (((c) & 0xc0) == 0x80) +#define is_lead_byte(c) (((c)&0xe0) == 0xc0 || ((c)&0xf0) == 0xe0 || ((c)&0xf8) == 0xf0) +#define is_trail_byte(c) (((c)&0xc0) == 0x80) inline int lead_byte_length(const uint8_t c) {