From 987b3df6bab593778db2dd62e09b8b76015a488f Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Fri, 29 Mar 2024 22:43:39 -0400
Subject: [PATCH 01/10] Add translation feature and dependencies

---
 .github/scripts/Package-Windows.ps1 |   7 +-
 CMakeLists.txt                      |   7 +-
 cmake/BuildCTranslate2.cmake        | 104 ++++++++++++++
 cmake/BuildSentencepiece.cmake      |  61 +++++++++
 data/locale/en-US.ini               |   3 +
 src/transcription-filter-data.h     |   8 ++
 src/transcription-filter.cpp        |  68 +++++++++
 src/translation/language_codes.h    | 205 ++++++++++++++++++++++++++++
 src/translation/translation.cpp     |  87 ++++++++++++
 src/translation/translation.h       |  27 ++++
 10 files changed, 575 insertions(+), 2 deletions(-)
 create mode 100644 cmake/BuildCTranslate2.cmake
 create mode 100644 cmake/BuildSentencepiece.cmake
 create mode 100644 src/translation/language_codes.h
 create mode 100644 src/translation/translation.cpp
 create mode 100644 src/translation/translation.h

diff --git a/.github/scripts/Package-Windows.ps1 b/.github/scripts/Package-Windows.ps1
index a09f54a..3d1a07c 100644
--- a/.github/scripts/Package-Windows.ps1
+++ b/.github/scripts/Package-Windows.ps1
@@ -49,7 +49,12 @@ function Package {
     $BuildSpec = Get-Content -Path ${BuildSpecFile} -Raw | ConvertFrom-Json
     $ProductName = $BuildSpec.name
     $ProductVersion = $BuildSpec.version
-    $CudaName = "cuda${Cublas}"
+    # Check if $cublas is cpu or cuda
+    if ( $Cublas -eq 'cpu' ) {
+        $CudaName = 'cpu'
+    } else {
+        $CudaName = "cuda${Cublas}"
+    }
 
     $OutputName = "${ProductName}-${ProductVersion}-windows-${Target}-${CudaName}"
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ac7339f..90473bf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,6 +57,10 @@ endif()
 include(cmake/BuildWhispercpp.cmake)
 target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE Whispercpp)
 
+include(cmake/BuildCTranslate2.cmake)
+include(cmake/BuildSentencepiece.cmake)
+target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE ct2 sentencepiece)
+
 target_sources(
   ${CMAKE_PROJECT_NAME}
   PRIVATE src/plugin-main.c
@@ -65,6 +69,7 @@ target_sources(
           src/whisper-utils/whisper-processing.cpp
           src/model-utils/model-downloader.cpp
           src/model-utils/model-downloader-ui.cpp
-          src/whisper-utils/whisper-utils.cpp)
+          src/whisper-utils/whisper-utils.cpp
+          src/translation/translation.cpp)
 
 set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
diff --git a/cmake/BuildCTranslate2.cmake b/cmake/BuildCTranslate2.cmake
new file mode 100644
index 0000000..df48fdc
--- /dev/null
+++ b/cmake/BuildCTranslate2.cmake
@@ -0,0 +1,104 @@
+# build the CTranslate2 library from source https://github.com/OpenNMT/CTranslate2.git
+
+include(ExternalProject)
+include(FetchContent)
+
+if(APPLE)
+
+  FetchContent_Declare(
+    ctranslate2_fetch
+    URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.1.1/libctranslate2-macos-Release-1.1.1.tar.gz
+    URL_HASH SHA256=da04d88ecc1ea105f8ee672e4eab33af96e50c999c5cc8170e105e110392182b)
+  FetchContent_MakeAvailable(ctranslate2_fetch)
+
+  add_library(ct2 INTERFACE)
+  target_link_libraries(ct2 INTERFACE "-framework Accelerate" ${ctranslate2_fetch_SOURCE_DIR}/lib/libctranslate2.a
+                                      ${ctranslate2_fetch_SOURCE_DIR}/lib/libcpu_features.a)
+  set_target_properties(ct2 PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${ctranslate2_fetch_SOURCE_DIR}/include)
+  target_compile_options(ct2 INTERFACE -Wno-shorten-64-to-32)
+
+elseif(WIN32)
+
+  # check CPU_OR_CUDA environment variable
+  if(NOT DEFINED ENV{CPU_OR_CUDA})
+    message(FATAL_ERROR "Please set the CPU_OR_CUDA environment variable to either CPU or CUDA")
+  endif()
+
+  if($ENV{CPU_OR_CUDA} STREQUAL "cpu")
+    FetchContent_Declare(
+      ctranslate2_fetch
+      URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.2.0/libctranslate2-windows-4.1.1-Release-cpu.zip
+      URL_HASH SHA256=30ff8b2499b8d3b5a6c4d6f7f8ddbc89e745ff06e0050b645e3b7c9b369451a3)
+  else()
+    # add compile definitions for CUDA
+    add_compile_definitions(POLYGLOT_WITH_CUDA)
+    add_compile_definitions(POLYGLOT_CUDA_VERSION=$ENV{CPU_OR_CUDA})
+
+    if($ENV{CPU_OR_CUDA} STREQUAL "12.2.0")
+      FetchContent_Declare(
+        ctranslate2_fetch
+        URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.2.0/libctranslate2-windows-4.1.1-Release-cuda12.2.0.zip
+        URL_HASH SHA256=131724d510f9f2829970953a1bc9e4e8fb7b4cbc8218e32270dcfe6172a51558)
+    elseif($ENV{CPU_OR_CUDA} STREQUAL "11.8.0")
+      FetchContent_Declare(
+        ctranslate2_fetch
+        URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.2.0/libctranslate2-windows-4.1.1-Release-cuda11.8.0.zip
+        URL_HASH SHA256=a120bee82f821df35a4646add30ac18b5c23e4e16b56fa7ba338eeae336e0d81)
+    else()
+      message(FATAL_ERROR "Unsupported CUDA version: $ENV{CPU_OR_CUDA}")
+    endif()
+  endif()
+
+  FetchContent_MakeAvailable(ctranslate2_fetch)
+
+  add_library(ct2 INTERFACE)
+  target_link_libraries(ct2 INTERFACE ${ctranslate2_fetch_SOURCE_DIR}/lib/ctranslate2.lib)
+  set_target_properties(ct2 PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${ctranslate2_fetch_SOURCE_DIR}/include)
+  target_compile_options(ct2 INTERFACE /wd4267 /wd4244 /wd4305 /wd4996 /wd4099)
+
+  file(GLOB CT2_DLLS ${ctranslate2_fetch_SOURCE_DIR}/bin/*.dll)
+  install(FILES ${CT2_DLLS} DESTINATION "obs-plugins/64bit")
+else()
+  set(CT2_VERSION "4.1.1")
+  set(CT2_URL "https://github.com/OpenNMT/CTranslate2.git")
+  set(CT2_OPENBLAS_CMAKE_ARGS -DWITH_OPENBLAS=OFF)
+
+  set(CT2_CMAKE_PLATFORM_OPTIONS -DBUILD_SHARED_LIBS=OFF -DOPENMP_RUNTIME=NONE -DCMAKE_POSITION_INDEPENDENT_CODE=ON)
+  set(CT2_LIB_INSTALL_LOCATION lib/${CMAKE_SHARED_LIBRARY_PREFIX}ctranslate2${CMAKE_STATIC_LIBRARY_SUFFIX})
+
+  ExternalProject_Add(
+    ct2_build
+    GIT_REPOSITORY ${CT2_URL}
+    GIT_TAG v${CT2_VERSION}
+    GIT_PROGRESS 1
+    BUILD_COMMAND ${CMAKE_COMMAND} --build <BINARY_DIR> --config ${CMAKE_BUILD_TYPE}
+    CMAKE_GENERATOR ${CMAKE_GENERATOR}
+    INSTALL_COMMAND ${CMAKE_COMMAND} --install <BINARY_DIR> --config ${CMAKE_BUILD_TYPE}
+    BUILD_BYPRODUCTS <INSTALL_DIR>/${CT2_LIB_INSTALL_LOCATION}
+    CMAKE_ARGS -DCMAKE_GENERATOR_PLATFORM=${CMAKE_GENERATOR_PLATFORM}
+               -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
+               -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+               -DWITH_CUDA=OFF
+               -DWITH_MKL=OFF
+               -DWITH_TESTS=OFF
+               -DWITH_EXAMPLES=OFF
+               -DWITH_TFLITE=OFF
+               -DWITH_TRT=OFF
+               -DWITH_PYTHON=OFF
+               -DWITH_SERVER=OFF
+               -DWITH_COVERAGE=OFF
+               -DWITH_PROFILING=OFF
+               -DBUILD_CLI=OFF
+               ${CT2_OPENBLAS_CMAKE_ARGS}
+               ${CT2_CMAKE_PLATFORM_OPTIONS})
+  ExternalProject_Get_Property(ct2_build INSTALL_DIR)
+
+  add_library(ct2::ct2 STATIC IMPORTED GLOBAL)
+  add_dependencies(ct2::ct2 ct2_build)
+  set_target_properties(ct2::ct2 PROPERTIES IMPORTED_LOCATION ${INSTALL_DIR}/${CT2_LIB_INSTALL_LOCATION})
+
+  add_library(ct2 INTERFACE)
+  target_link_libraries(ct2 INTERFACE ct2::ct2)
+  set_target_properties(ct2::ct2 PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include)
+
+endif()
diff --git a/cmake/BuildSentencepiece.cmake b/cmake/BuildSentencepiece.cmake
new file mode 100644
index 0000000..024283e
--- /dev/null
+++ b/cmake/BuildSentencepiece.cmake
@@ -0,0 +1,61 @@
+# build sentencepiece from "https://github.com/google/sentencepiece.git"
+
+if(APPLE)
+
+  include(FetchContent)
+
+  FetchContent_Declare(
+    sentencepiece_fetch
+    URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.1.1/libsentencepiece-macos-Release-1.1.1.tar.gz
+    URL_HASH SHA256=c911f1e84ea94925a8bc3fd3257185b2e18395075509c8659cc7003a979e0b32)
+  FetchContent_MakeAvailable(sentencepiece_fetch)
+  add_library(sentencepiece INTERFACE)
+  target_link_libraries(sentencepiece INTERFACE ${sentencepiece_fetch_SOURCE_DIR}/lib/libsentencepiece.a)
+  set_target_properties(sentencepiece PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                 ${sentencepiece_fetch_SOURCE_DIR}/include)
+elseif(WIN32)
+
+  FetchContent_Declare(
+    sentencepiece_fetch
+    URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.1.1/sentencepiece-windows-0.2.0-Release.zip
+    URL_HASH SHA256=846699c7fa1e8918b71ed7f2bd5cd60e47e51105e1d84e3192919b4f0f10fdeb)
+  FetchContent_MakeAvailable(sentencepiece_fetch)
+  add_library(sentencepiece INTERFACE)
+  target_link_libraries(sentencepiece INTERFACE ${sentencepiece_fetch_SOURCE_DIR}/lib/sentencepiece.lib)
+  set_target_properties(sentencepiece PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                 ${sentencepiece_fetch_SOURCE_DIR}/include)
+
+else()
+
+  set(SP_URL
+      "https://github.com/google/sentencepiece.git"
+      CACHE STRING "URL of sentencepiece repository")
+
+  set(SP_CMAKE_OPTIONS -DSPM_ENABLE_SHARED=OFF)
+  set(SENTENCEPIECE_INSTALL_LIB_LOCATION lib/${CMAKE_STATIC_LIBRARY_PREFIX}sentencepiece${CMAKE_STATIC_LIBRARY_SUFFIX})
+
+  include(ExternalProject)
+
+  ExternalProject_Add(
+    sentencepiece_build
+    GIT_REPOSITORY ${SP_URL}
+    GIT_TAG v0.1.99
+    BUILD_COMMAND ${CMAKE_COMMAND} --build <BINARY_DIR> --config ${CMAKE_BUILD_TYPE}
+    CMAKE_GENERATOR ${CMAKE_GENERATOR}
+    INSTALL_COMMAND ${CMAKE_COMMAND} --install <BINARY_DIR> --config ${CMAKE_BUILD_TYPE}
+    BUILD_BYPRODUCTS <INSTALL_DIR>/${SENTENCEPIECE_INSTALL_LIB_LOCATION}
+    CMAKE_ARGS -DCMAKE_GENERATOR_PLATFORM=${CMAKE_GENERATOR_PLATFORM} -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
+               -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} ${SP_CMAKE_OPTIONS})
+  ExternalProject_Get_Property(sentencepiece_build INSTALL_DIR)
+
+  add_library(libsentencepiece STATIC IMPORTED GLOBAL)
+  add_dependencies(libsentencepiece sentencepiece_build)
+  set_target_properties(libsentencepiece PROPERTIES IMPORTED_LOCATION
+                                                    ${INSTALL_DIR}/${SENTENCEPIECE_INSTALL_LIB_LOCATION})
+
+  add_library(sentencepiece INTERFACE)
+  add_dependencies(sentencepiece libsentencepiece)
+  target_link_libraries(sentencepiece INTERFACE libsentencepiece)
+  target_include_directories(sentencepiece INTERFACE ${INSTALL_DIR}/include)
+
+endif()
diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini
index 31a2293..18b39a7 100644
--- a/data/locale/en-US.ini
+++ b/data/locale/en-US.ini
@@ -44,3 +44,6 @@ process_while_muted="Process speech while source is muted"
 rename_file_to_match_recording="Rename file to match recording"
 min_sub_duration="Min. sub duration (ms)"
 advanced_settings="Advanced Settings"
+target_language="Target language"
+source_language="Source language"
+translate="Translate"
diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h
index f370765..5dceb70 100644
--- a/src/transcription-filter-data.h
+++ b/src/transcription-filter-data.h
@@ -15,6 +15,8 @@
 #include <functional>
 #include <string>
 
+#include "translation/translation.h"
+
 #define MAX_PREPROC_CHANNELS 10
 
 #define MT_ obs_module_text
@@ -80,6 +82,9 @@ struct transcription_filter_data {
 	bool save_only_while_recording = false;
 	bool process_while_muted = false;
 	bool rename_file_to_match_recording = false;
+	bool translate = false;
+	std::string source_lang;
+	std::string target_lang;
 
 	// Text source to output the subtitles
 	obs_weak_source_t *text_source;
@@ -98,6 +103,9 @@ struct transcription_filter_data {
 	std::mutex *whisper_ctx_mutex;
 	std::condition_variable *wshiper_thread_cv;
 
+	// translation context
+	struct translation_context translation_ctx;
+
 	// ctor
 	transcription_filter_data()
 	{
diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
index 42ccf5d..50b718b 100644
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@@ -8,6 +8,7 @@
 #include "whisper-utils/whisper-processing.h"
 #include "whisper-utils/whisper-language.h"
 #include "whisper-utils/whisper-utils.h"
+#include "translation/language_codes.h"
 
 #include <algorithm>
 #include <fstream>
@@ -274,6 +275,16 @@ void set_text_callback(struct transcription_filter_data *gf,
 	std::string str_copy = result.text;
 #endif
 
+	if (gf->translate) {
+		std::string translated_text;
+		if (translate(gf->translation_ctx, str_copy, gf->source_lang, gf->target_lang,
+			      translated_text) == OBS_POLYGLOT_TRANSLATION_SUCCESS) {
+			str_copy = translated_text;
+		} else {
+			obs_log(gf->log_level, "Failed to translate text");
+		}
+	}
+
 	if (gf->caption_to_stream) {
 		obs_output_t *streaming_output = obs_frontend_get_streaming_output();
 		if (streaming_output) {
@@ -387,6 +398,26 @@ void transcription_filter_update(void *data, obs_data_t *s)
 	gf->min_sub_duration = (int)obs_data_get_int(s, "min_sub_duration");
 	gf->last_sub_render_time = 0;
 
+	bool new_translate = obs_data_get_bool(s, "translate");
+	gf->source_lang = obs_data_get_string(s, "translate_source_language");
+	gf->target_lang = obs_data_get_string(s, "translate_target_language");
+
+	if (new_translate != gf->translate) {
+		if (new_translate) {
+			if (build_translation_context(gf->translation_ctx,
+						      "models/m2m100-418m.sp.model",
+						      "models/m2m100-418m") !=
+			    OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS) {
+				obs_log(gf->log_level, "Failed to initialize translation context");
+				gf->translate = false;
+			} else {
+				gf->translate = true;
+			}
+		} else {
+			gf->translate = false;
+		}
+	}
+
 	obs_log(gf->log_level, "transcription_filter: update text source");
 	// update the text source
 	const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources");
@@ -669,6 +700,9 @@ void transcription_filter_defaults(obs_data_t *s)
 	obs_data_set_default_int(s, "step_size_msec", 1000);
 	obs_data_set_default_int(s, "min_sub_duration", 3000);
 	obs_data_set_default_bool(s, "advanced_settings", false);
+	obs_data_set_default_bool(s, "translate", false);
+	obs_data_set_default_string(s, "translate_target_language", "__es__");
+	obs_data_set_default_string(s, "translate_source_language", "__en__");
 
 	// Whisper parameters
 	obs_data_set_default_int(s, "whisper_sampling_method", WHISPER_SAMPLING_BEAM_SEARCH);
@@ -730,6 +764,40 @@ obs_properties_t *transcription_filter_properties(void *data)
 		return true;
 	});
 
+	// add translation option group
+	obs_properties_t *translation_group = obs_properties_create();
+	obs_property_t *translation_group_prop = obs_properties_add_group(
+		ppts, "translate", MT_("translate"), OBS_GROUP_CHECKABLE, translation_group);
+	// add target language selection
+	obs_property_t *prop_tgt = obs_properties_add_list(
+		translation_group, "translate_target_language", MT_("target_language"),
+		OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
+	obs_property_t *prop_src = obs_properties_add_list(
+		translation_group, "translate_source_language", MT_("source_language"),
+		OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
+
+	// Populate the dropdown with the language codes
+	for (const auto &language : language_codes) {
+		obs_property_list_add_string(prop_tgt, language.second.c_str(),
+					     language.first.c_str());
+		obs_property_list_add_string(prop_src, language.second.c_str(),
+					     language.first.c_str());
+	}
+
+	// add callback to enable/disable translation group
+	obs_property_set_modified_callback(translation_group_prop, [](obs_properties_t *props,
+								      obs_property_t *property,
+								      obs_data_t *settings) {
+		UNUSED_PARAMETER(property);
+		// Show/Hide the translation group
+		const bool translate_enabled = obs_data_get_bool(settings, "translate");
+		obs_property_set_visible(obs_properties_get(props, "translate_target_language"),
+					 translate_enabled);
+		obs_property_set_visible(obs_properties_get(props, "translate_source_language"),
+					 translate_enabled);
+		return true;
+	});
+
 	obs_properties_add_bool(ppts, "process_while_muted", MT_("process_while_muted"));
 	obs_property_t *subs_output =
 		obs_properties_add_list(ppts, "subtitle_sources", MT_("subtitle_sources"),
diff --git a/src/translation/language_codes.h b/src/translation/language_codes.h
new file mode 100644
index 0000000..7922446
--- /dev/null
+++ b/src/translation/language_codes.h
@@ -0,0 +1,205 @@
+#pragma once
+
+#include <map>
+#include <string>
+
+std::map<std::string, std::string> language_codes = {{"__af__", "Afrikaans"},
+						     {"__am__", "Amharic"},
+						     {"__ar__", "Arabic"},
+						     {"__ast__", "Asturian"},
+						     {"__az__", "Azerbai"},
+						     {"__ba__", "Bashkir"},
+						     {"__be__", "Belarusian"},
+						     {"__bg__", "Bulgarian"},
+						     {"__bn__", "Bengali"},
+						     {"__br__", "Breton"},
+						     {"__bs__", "Bosnian"},
+						     {"__ca__", "Catalan"},
+						     {"__ceb__", "Cebuano"},
+						     {"__cs__", "Czech"},
+						     {"__cy__", "Welsh"},
+						     {"__da__", "Danish"},
+						     {"__de__", "German"},
+						     {"__el__", "Greek"},
+						     {"__en__", "English"},
+						     {"__es__", "Spanish"},
+						     {"__et__", "Estonian"},
+						     {"__fa__", "Persian"},
+						     {"__ff__", "Fulah"},
+						     {"__fi__", "Finnish"},
+						     {"__fr__", "French"},
+						     {"__fy__", "Frisian"},
+						     {"__ga__", "Irish"},
+						     {"__gd__", "Scottish Gaelic"},
+						     {"__gl__", "Galician"},
+						     {"__gu__", "Gujarati"},
+						     {"__ha__", "Hausa"},
+						     {"__he__", "Hebrew"},
+						     {"__hi__", "Hindi"},
+						     {"__hr__", "Croatian"},
+						     {"__ht__", "Haitian Creole"},
+						     {"__hu__", "Hungarian"},
+						     {"__hy__", "Armenian"},
+						     {"__id__", "Indonesian"},
+						     {"__ig__", "Igbo"},
+						     {"__ilo__", "Ilokano"},
+						     {"__is__", "Icelandic"},
+						     {"__it__", "Italian"},
+						     {"__ja__", "Japanese"},
+						     {"__jv__", "Javanese"},
+						     {"__ka__", "Georgian"},
+						     {"__kk__", "Kazakh"},
+						     {"__km__", "Khmer"},
+						     {"__kn__", "Kannada"},
+						     {"__ko__", "Korean"},
+						     {"__lb__", "Luxembourgish"},
+						     {"__lg__", "Ganda"},
+						     {"__ln__", "Lingala"},
+						     {"__lo__", "Lao"},
+						     {"__lt__", "Lithuanian"},
+						     {"__lv__", "Latvian"},
+						     {"__mg__", "Malagasy"},
+						     {"__mk__", "Macedonian"},
+						     {"__ml__", "Malayalam"},
+						     {"__mn__", "Mongolian"},
+						     {"__mr__", "Marathi"},
+						     {"__ms__", "Malay"},
+						     {"__my__", "Burmese"},
+						     {"__ne__", "Nepali"},
+						     {"__nl__", "Dutch"},
+						     {"__no__", "Norwegian"},
+						     {"__ns__", "Northern Sotho"},
+						     {"__oc__", "Occitan"},
+						     {"__or__", "Oriya"},
+						     {"__pa__", "Punjabi"},
+						     {"__pl__", "Polish"},
+						     {"__ps__", "Pashto"},
+						     {"__pt__", "Portuguese"},
+						     {"__ro__", "Romanian"},
+						     {"__ru__", "Russian"},
+						     {"__sd__", "Sindhi"},
+						     {"__si__", "Sinhala"},
+						     {"__sk__", "Slovak"},
+						     {"__sl__", "Slovenian"},
+						     {"__so__", "Somali"},
+						     {"__sq__", "Albanian"},
+						     {"__sr__", "Serbian"},
+						     {"__ss__", "Swati"},
+						     {"__su__", "Sundanese"},
+						     {"__sv__", "Swedish"},
+						     {"__sw__", "Swahili"},
+						     {"__ta__", "Tamil"},
+						     {"__th__", "Thai"},
+						     {"__tl__", "Tagalog"},
+						     {"__tn__", "Tswana"},
+						     {"__tr__", "Turkish"},
+						     {"__uk__", "Ukrainian"},
+						     {"__ur__", "Urdu"},
+						     {"__uz__", "Uzbek"},
+						     {"__vi__", "Vietnamese"},
+						     {"__wo__", "Wolof"},
+						     {"__xh__", "Xhosa"},
+						     {"__yi__", "Yiddish"},
+						     {"__yo__", "Yoruba"},
+						     {"__zh__", "Chinese"},
+						     {"__zu__", "Zulu"}};
+
+std::map<std::string, std::string> language_codes_reverse = {{"Afrikaans", "__af__"},
+							     {"Amharic", "__am__"},
+							     {"Arabic", "__ar__"},
+							     {"Asturian", "__ast__"},
+							     {"Azerbai", "__az__"},
+							     {"Bashkir", "__ba__"},
+							     {"Belarusian", "__be__"},
+							     {"Bengali", "__bn__"},
+							     {"Breton", "__br__"},
+							     {"Bosnian", "__bs__"},
+							     {"Catalan", "__ca__"},
+							     {"Cebuano", "__ceb__"},
+							     {"Czech", "__cs__"},
+							     {"Welsh", "__cy__"},
+							     {"Danish", "__da__"},
+							     {"German", "__de__"},
+							     {"Greek", "__el__"},
+							     {"English", "__en__"},
+							     {"Spanish", "__es__"},
+							     {"Estonian", "__et__"},
+							     {"Persian", "__fa__"},
+							     {"Fulah", "__ff__"},
+							     {"Finnish", "__fi__"},
+							     {"French", "__fr__"},
+							     {"Frisian", "__fy__"},
+							     {"Irish", "__ga__"},
+							     {"Scottish Gaelic", "__gd__"},
+							     {"Galician", "__gl__"},
+							     {"Gujarati", "__gu__"},
+							     {"Hausa", "__ha__"},
+							     {"Hebrew", "__he__"},
+							     {"Hindi", "__hi__"},
+							     {"Croatian", "__hr__"},
+							     {"Haitian Creole", "__ht__"},
+							     {"Hungarian", "__hu__"},
+							     {"Armenian", "__hy__"},
+							     {"Indonesian", "__id__"},
+							     {"Igbo", "__ig__"},
+							     {"Ilokano", "__ilo__"},
+							     {"Icelandic", "__is__"},
+							     {"Italian", "__it__"},
+							     {"Japanese", "__ja__"},
+							     {"Javanese", "__jv__"},
+							     {"Georgian", "__ka__"},
+							     {"Kazakh", "__kk__"},
+							     {"Khmer", "__km__"},
+							     {"Kannada", "__kn__"},
+							     {"Korean", "__ko__"},
+							     {"Luxembourgish", "__lb__"},
+							     {"Ganda", "__lg__"},
+							     {"Lingala", "__ln__"},
+							     {"Lao", "__lo__"},
+							     {"Lithuanian", "__lt__"},
+							     {"Latvian", "__lv__"},
+							     {"Malagasy", "__mg__"},
+							     {"Macedonian", "__mk__"},
+							     {"Malayalam", "__ml__"},
+							     {"Mongolian", "__mn__"},
+							     {"Marathi", "__mr__"},
+							     {"Malay", "__ms__"},
+							     {"Burmese", "__my__"},
+							     {"Nepali", "__ne__"},
+							     {"Dutch", "__nl__"},
+							     {"Norwegian", "__no__"},
+							     {"Northern Sotho", "__ns__"},
+							     {"Occitan", "__oc__"},
+							     {"Oriya", "__or__"},
+							     {"Punjabi", "__pa__"},
+							     {"Polish", "__pl__"},
+							     {"Pashto", "__ps__"},
+							     {"Portuguese", "__pt__"},
+							     {"Romanian", "__ro__"},
+							     {"Russian", "__ru__"},
+							     {"Sindhi", "__sd__"},
+							     {"Sinhala", "__si__"},
+							     {"Slovak", "__sk__"},
+							     {"Slovenian", "__sl__"},
+							     {"Somali", "__so__"},
+							     {"Albanian", "__sq__"},
+							     {"Serbian", "__sr__"},
+							     {"Swati", "__ss__"},
+							     {"Sundanese", "__su__"},
+							     {"Swedish", "__sv__"},
+							     {"Swahili", "__sw__"},
+							     {"Tamil", "__ta__"},
+							     {"Thai", "__th__"},
+							     {"Tagalog", "__tl__"},
+							     {"Tswana", "__tn__"},
+							     {"Turkish", "__tr__"},
+							     {"Ukrainian", "__uk__"},
+							     {"Urdu", "__ur__"},
+							     {"Uzbek", "__uz__"},
+							     {"Vietnamese", "__vi__"},
+							     {"Wolof", "__wo__"},
+							     {"Xhosa", "__xh__"},
+							     {"Yiddish", "__yi__"},
+							     {"Yoruba", "__yo__"},
+							     {"Chinese", "__zh__"},
+							     {"Zulu", "__zu__"}};
diff --git a/src/translation/translation.cpp b/src/translation/translation.cpp
new file mode 100644
index 0000000..1987f99
--- /dev/null
+++ b/src/translation/translation.cpp
@@ -0,0 +1,87 @@
+#include "translation.h"
+#include "plugin-support.h"
+
+#include <ctranslate2/translator.h>
+#include <sentencepiece_processor.h>
+#include <obs-module.h>
+#include <regex>
+
+int build_translation_context(struct translation_context &translation_ctx,
+			      const std::string &local_spm_path,
+			      const std::string &local_model_path)
+{
+	obs_log(LOG_INFO, "Building translation context...");
+	try {
+		obs_log(LOG_INFO, "Loading SPM from %s", local_spm_path.c_str());
+		translation_ctx.processor.reset(new sentencepiece::SentencePieceProcessor());
+		const auto status = translation_ctx.processor->Load(local_spm_path);
+		if (!status.ok()) {
+			obs_log(LOG_ERROR, "Failed to load SPM: %s", status.ToString().c_str());
+			return OBS_POLYGLOT_TRANSLATION_INIT_FAIL;
+		}
+
+		translation_ctx.tokenizer = [&translation_ctx](const std::string &text) {
+			std::vector<std::string> tokens;
+			translation_ctx.processor->Encode(text, &tokens);
+			return tokens;
+		};
+		translation_ctx.detokenizer =
+			[&translation_ctx](const std::vector<std::string> &tokens) {
+				std::string text;
+				translation_ctx.processor->Decode(tokens, &text);
+				return std::regex_replace(text, std::regex("<unk>"), "UNK");
+			};
+
+		obs_log(LOG_INFO, "Loading CT2 model from %s", local_model_path.c_str());
+
+#ifdef POLYGLOT_WITH_CUDA
+		ctranslate2::Device device = ctranslate2::Device::CUDA;
+		obs_log(LOG_INFO, "Using CUDA");
+#else
+		ctranslate2::Device device = ctranslate2::Device::CPU;
+		obs_log(LOG_INFO, "Using CPU");
+#endif
+
+		translation_ctx.translator.reset(new ctranslate2::Translator(
+			local_model_path, device, ctranslate2::ComputeType::AUTO));
+		obs_log(LOG_INFO, "CT2 Model loaded");
+
+		translation_ctx.options.reset(new ctranslate2::TranslationOptions);
+		translation_ctx.options->beam_size = 1;
+		translation_ctx.options->max_decoding_length = 40;
+		translation_ctx.options->use_vmap = true;
+		translation_ctx.options->return_scores = false;
+	} catch (std::exception &e) {
+		obs_log(LOG_ERROR, "Failed to load CT2 model: %s", e.what());
+		return OBS_POLYGLOT_TRANSLATION_INIT_FAIL;
+	}
+	return OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS;
+}
+
+int translate(struct translation_context &translation_ctx, const std::string &text,
+	      const std::string &source_lang, const std::string &target_lang, std::string &result)
+{
+	try {
+		// get tokens
+		std::vector<std::string> tokens = translation_ctx.tokenizer(text);
+		tokens.insert(tokens.begin(), "<s>");
+		tokens.insert(tokens.begin(), source_lang);
+		tokens.push_back("</s>");
+
+		const std::vector<std::vector<std::string>> batch = {tokens};
+
+		const std::vector<std::vector<std::string>> target_prefix = {{target_lang}};
+		const std::vector<ctranslate2::TranslationResult> results =
+			translation_ctx.translator->translate_batch(batch, target_prefix,
+								    *translation_ctx.options);
+
+		// detokenize starting with the 2nd token
+		const auto &tokens_result = results[0].output();
+		result = translation_ctx.detokenizer(
+			std::vector<std::string>(tokens_result.begin() + 1, tokens_result.end()));
+	} catch (std::exception &e) {
+		obs_log(LOG_ERROR, "Error: %s", e.what());
+		return OBS_POLYGLOT_TRANSLATION_FAIL;
+	}
+	return OBS_POLYGLOT_TRANSLATION_SUCCESS;
+}
diff --git a/src/translation/translation.h b/src/translation/translation.h
new file mode 100644
index 0000000..9d21734
--- /dev/null
+++ b/src/translation/translation.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <ctranslate2/translator.h>
+#include <sentencepiece_processor.h>
+#include <string>
+#include <vector>
+#include <functional>
+
+struct translation_context {
+	std::unique_ptr<sentencepiece::SentencePieceProcessor> processor;
+	std::unique_ptr<ctranslate2::Translator> translator;
+	std::unique_ptr<ctranslate2::TranslationOptions> options;
+	std::function<std::vector<std::string>(const std::string &)> tokenizer;
+	std::function<std::string(const std::vector<std::string> &)> detokenizer;
+};
+
+int build_translation_context(struct translation_context &translation_ctx,
+			      const std::string &local_spm_path,
+			      const std::string &local_model_path);
+
+int translate(struct translation_context &translation_ctx, const std::string &text,
+	      const std::string &source_lang, const std::string &target_lang, std::string &result);
+
+#define OBS_POLYGLOT_TRANSLATION_INIT_FAIL -1
+#define OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS 0
+#define OBS_POLYGLOT_TRANSLATION_SUCCESS 0
+#define OBS_POLYGLOT_TRANSLATION_FAIL -1

From b658125204a84e1a9b943e5f96e100693d3e1791 Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 1 Apr 2024 11:58:19 -0400
Subject: [PATCH 02/10] Add model-infos.cpp and translate_add_context to
 en-US.ini

---
 CMakeLists.txt                                |   1 +
 data/locale/ar-SA.ini                         |  50 +++++++
 data/locale/de-DE.ini                         |  50 +++++++
 data/locale/en-US.ini                         |   1 +
 data/locale/es-ES.ini                         |  50 +++++++
 data/locale/fr-FR.ini                         |  50 +++++++
 data/locale/hi-IN.ini                         |  50 +++++++
 data/locale/ja-JP.ini                         |  50 +++++++
 data/locale/ko-KR.ini                         |  50 +++++++
 data/locale/pl-PL.ini                         |  50 +++++++
 data/locale/{pt_BR.ini => pt-BR.ini}          |   4 +
 data/locale/{ru_RU.ini => ru-RU.ini}          |   4 +
 data/locale/zh-CN.ini                         |  50 +++++++
 .../ggml-model-whisper-tiny.en.bin            | Bin
 src/model-utils/model-downloader-types.h      |  25 ++++
 src/model-utils/model-downloader-ui.cpp       | 115 +++++++++-------
 src/model-utils/model-downloader-ui.h         |   6 +-
 src/model-utils/model-downloader.cpp          |  89 +++++++++----
 src/model-utils/model-downloader.h            |   7 +-
 src/model-utils/model-infos.cpp               | 122 +++++++++++++++++
 src/transcription-filter-data.h               |   7 +-
 src/transcription-filter.cpp                  | 124 ++++++++----------
 src/translation/translation.cpp               | 104 +++++++++++----
 src/translation/translation.h                 |  10 +-
 src/whisper-utils/whisper-processing.cpp      |  16 ++-
 src/whisper-utils/whisper-utils.cpp           |  29 ++--
 26 files changed, 927 insertions(+), 187 deletions(-)
 create mode 100644 data/locale/ar-SA.ini
 create mode 100644 data/locale/de-DE.ini
 create mode 100644 data/locale/es-ES.ini
 create mode 100644 data/locale/fr-FR.ini
 create mode 100644 data/locale/hi-IN.ini
 create mode 100644 data/locale/ja-JP.ini
 create mode 100644 data/locale/ko-KR.ini
 create mode 100644 data/locale/pl-PL.ini
 rename data/locale/{pt_BR.ini => pt-BR.ini} (93%)
 rename data/locale/{ru_RU.ini => ru-RU.ini} (93%)
 create mode 100644 data/locale/zh-CN.ini
 rename data/models/{ => ggml-model-whisper-tiny-en}/ggml-model-whisper-tiny.en.bin (100%)
 create mode 100644 src/model-utils/model-infos.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 90473bf..fdd7cd1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -69,6 +69,7 @@ target_sources(
           src/whisper-utils/whisper-processing.cpp
           src/model-utils/model-downloader.cpp
           src/model-utils/model-downloader-ui.cpp
+          src/model-utils/model-infos.cpp
           src/whisper-utils/whisper-utils.cpp
           src/translation/translation.cpp)
 
diff --git a/data/locale/ar-SA.ini b/data/locale/ar-SA.ini
new file mode 100644
index 0000000..be12610
--- /dev/null
+++ b/data/locale/ar-SA.ini
@@ -0,0 +1,50 @@
+LocalVocalPlugin="إضافة LocalVocal"
+transcription_filterAudioFilter="تصفية نسخ LocalVocal"
+vad_enabled="تمكين VAD"
+log_level="مستوى السجل الداخلي"
+log_words="تسجيل الخروج إلى الوحدة الطرفية"
+caption_to_stream="تدفق الترجمات"
+step_by_step_processing="المعالجة خطوة بخطوة (⚠️ زيادة المعالجة)"
+step_size_msec="حجم الخطوة (ملي ثانية)"
+subtitle_sources="مصادر الترجمات"
+none_no_output="بدون / بلا مخرجات"
+text_file_output="مخرجات ملف نصي"
+output_filename="اسم ملف الخروج"
+whisper_model="نموذج Whisper"
+external_model_file="ملف النموذج الخارجي"
+whisper_parameters="الإعدادات المتقدمة"
+language="اللغة"
+whisper_sampling_method="طريقة عينة Whisper"
+n_threads="عدد الخيوط"
+n_max_text_ctx="الحد الأقصى لسياق النص"
+translate="ترجمة"
+no_context="بدون سياق"
+single_segment="جزء واحد"
+print_special="طباعة خاصة"
+print_progress="طباعة التقدم"
+print_realtime="طباعة الوقت الفعلي"
+print_timestamps="طباعة الطوابع الزمنية"
+token_timestamps="طوابع زمنية للرمز"
+thold_pt="عتبة احتمال الرمز"
+thold_ptsum="عتبة مجموع احتمال الرمز"
+max_len="الحد الأقصى للطول بالأحرف"
+split_on_word="التقسيم على الكلمة"
+max_tokens="الحد الأقصى للرموز"
+speed_up="تسريع"
+initial_prompt="المطالبة الأولية"
+suppress_blank="كبت الفراغ"
+suppress_non_speech_tokens="كبت رموز غير الكلام"
+temperature="درجة الحرارة"
+max_initial_ts="الحد الأقصى للطوابع الزمنية الأولية"
+length_penalty="عقوبة الطول"
+save_srt="حفظ بصيغة SRT"
+truncate_output_file="تقليص الملف عند جملة جديدة"
+only_while_recording="كتابة الخروج فقط أثناء التسجيل"
+process_while_muted="معالجة الكلام أثناء كتم المصدر"
+rename_file_to_match_recording="إعادة تسمية الملف ليتطابق مع التسجيل"
+min_sub_duration="الحد الأدنى لمدة العنوان الفرعي (ملي ثانية)"
+advanced_settings="الإعدادات المتقدمة"
+target_language="اللغة الهدف"
+source_language="لغة المصدر"
+translate="ترجمة"
+translate_add_context="الترجمة مع السياق"
diff --git a/data/locale/de-DE.ini b/data/locale/de-DE.ini
new file mode 100644
index 0000000..57bb71a
--- /dev/null
+++ b/data/locale/de-DE.ini
@@ -0,0 +1,50 @@
+LocalVocalPlugin="LocalVocal Plugin"
+transcription_filterAudioFilter="LocalVocal Transkription"
+vad_enabled="VAD Aktiviert"
+log_level="Interne Protokollebene"
+log_words="Protokollausgabe zur Konsole"
+caption_to_stream="Stream-Untertitel"
+step_by_step_processing="Schritt-für-Schritt-Verarbeitung (⚠️ erhöhte Verarbeitung)"
+step_size_msec="Schrittgröße (ms)"
+subtitle_sources="Untertitel Ausgabe"
+none_no_output="Keine / Keine Ausgabe"
+text_file_output="Textdatei Ausgabe"
+output_filename="Ausgabedateiname"
+whisper_model="Flüstermodell"
+external_model_file="Externe Modelldatei"
+whisper_parameters="Erweiterte Einstellungen"
+language="Sprache"
+whisper_sampling_method="Flüster Sampling Methode"
+n_threads="Anzahl der Threads"
+n_max_text_ctx="Max Textkontext"
+translate="Übersetzen"
+no_context="Kein Kontext"
+single_segment="Einzelnes Segment"
+print_special="Sonderdruck"
+print_progress="Fortschritt drucken"
+print_realtime="Echtzeit drucken"
+print_timestamps="Zeitstempel drucken"
+token_timestamps="Token Zeitstempel"
+thold_pt="Token-Wahrscheinlichkeitsschwelle"
+thold_ptsum="Token Summenwahrscheinlichkeitsschwelle"
+max_len="Maximale Länge in Zeichen"
+split_on_word="Auf Wort teilen"
+max_tokens="Max Tokens"
+speed_up="Beschleunigen"
+initial_prompt="Erste Aufforderung"
+suppress_blank="Leerzeichen unterdrücken"
+suppress_non_speech_tokens="Nicht-Sprach-Token unterdrücken"
+temperature="Temperatur"
+max_initial_ts="Max Anfangszeitstempel"
+length_penalty="Längenstrafe"
+save_srt="Im SRT-Format speichern"
+truncate_output_file="Datei bei neuem Satz kürzen"
+only_while_recording="Ausgabe nur während der Aufnahme schreiben"
+process_while_muted="Sprache verarbeiten, während die Quelle stummgeschaltet ist"
+rename_file_to_match_recording="Datei umbenennen, um Aufnahme zu entsprechen"
+min_sub_duration="Min. Untertiteldauer (ms)"
+advanced_settings="Erweiterte Einstellungen"
+target_language="Zielsprache"
+source_language="Quellsprache"
+translate="Übersetzen"
+translate_add_context="Mit Kontext übersetzen"
diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini
index 18b39a7..fab4510 100644
--- a/data/locale/en-US.ini
+++ b/data/locale/en-US.ini
@@ -47,3 +47,4 @@ advanced_settings="Advanced Settings"
 target_language="Target language"
 source_language="Source language"
 translate="Translate"
+translate_add_context="Translate with context"
diff --git a/data/locale/es-ES.ini b/data/locale/es-ES.ini
new file mode 100644
index 0000000..a9f0580
--- /dev/null
+++ b/data/locale/es-ES.ini
@@ -0,0 +1,50 @@
+LocalVocalPlugin="Plugin LocalVocal"
+transcription_filterAudioFilter="Transcripción LocalVocal"
+vad_enabled="VAD Habilitado"
+log_level="Nivel de Registro Interno"
+log_words="Registro de Salida a la Consola"
+caption_to_stream="Subtítulos en Stream"
+step_by_step_processing="Procesamiento paso a paso (⚠️ procesamiento aumentado)"
+step_size_msec="Tamaño de paso (ms)"
+subtitle_sources="Salida de Subtítulos"
+none_no_output="Ninguno / Sin salida"
+text_file_output="Salida de archivo de texto"
+output_filename="Nombre del archivo de salida"
+whisper_model="Modelo Whisper"
+external_model_file="Archivo de modelo externo"
+whisper_parameters="Configuraciones Avanzadas"
+language="Idioma"
+whisper_sampling_method="Método de Muestreo Whisper"
+n_threads="Número de hilos"
+n_max_text_ctx="Contexto de texto máximo"
+translate="Traducir"
+no_context="Sin contexto"
+single_segment="Segmento único"
+print_special="Imprimir especial"
+print_progress="Imprimir progreso"
+print_realtime="Imprimir en tiempo real"
+print_timestamps="Imprimir marcas de tiempo"
+token_timestamps="Marcas de tiempo de token"
+thold_pt="Umbral de prob. de token"
+thold_ptsum="Umbral de suma de prob. de token"
+max_len="Longitud máxima en caracteres"
+split_on_word="Dividir en palabra"
+max_tokens="Tokens máximos"
+speed_up="Acelerar"
+initial_prompt="Indicación inicial"
+suppress_blank="Suprimir en blanco"
+suppress_non_speech_tokens="Suprimir tokens no verbales"
+temperature="Temperatura"
+max_initial_ts="Marcas de tiempo iniciales máximas"
+length_penalty="Penalización de longitud"
+save_srt="Guardar en formato SRT"
+truncate_output_file="Truncar archivo en nueva oración"
+only_while_recording="Escribir salida solo mientras se graba"
+process_while_muted="Procesar el habla mientras la fuente está silenciada"
+rename_file_to_match_recording="Renombrar archivo para que coincida con la grabación"
+min_sub_duration="Duración mínima de sub (ms)"
+advanced_settings="Configuraciones Avanzadas"
+target_language="Idioma objetivo"
+source_language="Idioma fuente"
+translate="Traducir"
+translate_add_context="Traducir con contexto"
diff --git a/data/locale/fr-FR.ini b/data/locale/fr-FR.ini
new file mode 100644
index 0000000..13a00d9
--- /dev/null
+++ b/data/locale/fr-FR.ini
@@ -0,0 +1,50 @@
+LocalVocalPlugin="Plugin LocalVocal"
+transcription_filterAudioFilter="Transcription LocalVocal"
+vad_enabled="VAD Activé"
+log_level="Niveau de journalisation interne"
+log_words="Journalisation de la sortie vers la console"
+caption_to_stream="Sous-titres en streaming"
+step_by_step_processing="Traitement étape par étape (⚠️ traitement accru)"
+step_size_msec="Taille de l'étape (ms)"
+subtitle_sources="Sortie des sous-titres"
+none_no_output="Aucun / Pas de sortie"
+text_file_output="Sortie de fichier texte"
+output_filename="Nom du fichier de sortie"
+whisper_model="Modèle Whisper"
+external_model_file="Fichier de modèle externe"
+whisper_parameters="Paramètres avancés"
+language="Langue"
+whisper_sampling_method="Méthode d'échantillonnage Whisper"
+n_threads="Nombre de fils"
+n_max_text_ctx="Contexte de texte max"
+translate="Traduire"
+no_context="Pas de contexte"
+single_segment="Segment unique"
+print_special="Imprimer spécial"
+print_progress="Imprimer la progression"
+print_realtime="Imprimer en temps réel"
+print_timestamps="Imprimer les horodatages"
+token_timestamps="Horodatages des jetons"
+thold_pt="Seuil de prob. de jeton"
+thold_ptsum="Seuil de somme de prob. de jeton"
+max_len="Longueur max en caractères"
+split_on_word="Diviser sur le mot"
+max_tokens="Max jetons"
+speed_up="Accélérer"
+initial_prompt="Invite initiale"
+suppress_blank="Supprimer le blanc"
+suppress_non_speech_tokens="Supprimer les jetons non-parlés"
+temperature="Température"
+max_initial_ts="Max horodatages initiaux"
+length_penalty="Pénalité de longueur"
+save_srt="Enregistrer au format SRT"
+truncate_output_file="Tronquer le fichier sur nouvelle phrase"
+only_while_recording="Écrire la sortie uniquement pendant l'enregistrement"
+process_while_muted="Traiter la parole pendant que la source est en sourdine"
+rename_file_to_match_recording="Renommer le fichier pour correspondre à l'enregistrement"
+min_sub_duration="Durée min. du sous-titre (ms)"
+advanced_settings="Paramètres avancés"
+target_language="Langue cible"
+source_language="Langue source"
+translate="Traduire"
+translate_add_context="Traduire avec contexte"
diff --git a/data/locale/hi-IN.ini b/data/locale/hi-IN.ini
new file mode 100644
index 0000000..03d8c27
--- /dev/null
+++ b/data/locale/hi-IN.ini
@@ -0,0 +1,50 @@
+LocalVocalPlugin="लोकलवोकल प्लगइन"
+transcription_filterAudioFilter="लोकलवोकल ट्रांसक्रिप्शन"
+vad_enabled="VAD सक्षम"
+log_level="आंतरिक लॉग स्तर"
+log_words="कंसोल पर लॉग आउटपुट"
+caption_to_stream="स्ट्रीम कैप्शन"
+step_by_step_processing="चरण-दर-चरण प्रसंस्करण (⚠️ बढ़ी प्रसंस्करण)"
+step_size_msec="चरण का आकार (ms)"
+subtitle_sources="उपशीर्षक आउटपुट"
+none_no_output="कोई नहीं / कोई आउटपुट नहीं"
+text_file_output="टेक्स्ट फ़ाइल आउटपुट"
+output_filename="आउटपुट फ़ाइलनाम"
+whisper_model="व्हिस्पर मॉडल"
+external_model_file="बाहरी मॉडल फ़ाइल"
+whisper_parameters="उन्नत सेटिंग्स"
+language="भाषा"
+whisper_sampling_method="व्हिस्पर सैंपलिंग विधि"
+n_threads="धागों की संख्या"
+n_max_text_ctx="अधिकतम पाठ संदर्भ"
+translate="अनुवाद करें"
+no_context="कोई संदर्भ नहीं"
+single_segment="एकल सेगमेंट"
+print_special="विशेष मुद्रित करें"
+print_progress="प्रगति मुद्रित करें"
+print_realtime="रियलटाइम मुद्रित करें"
+print_timestamps="टाइमस्टैंप मुद्रित करें"
+token_timestamps="टोकन टाइमस्टैंप"
+thold_pt="टोकन प्रॉब. थ्रेशोल्ड"
+thold_ptsum="टोकन सम प्रॉब. थ्रेशोल्ड"
+max_len="अधिकतम लंबाई इन अक्षरों में"
+split_on_word="शब्द पर विभाजित करें"
+max_tokens="अधिकतम टोकन"
+speed_up="स्पीड अप"
+initial_prompt="प्रारंभिक प्रॉम्प्ट"
+suppress_blank="रिक्त संयंत्रित करें"
+suppress_non_speech_tokens="गैर-भाषण टोकनों को दबाएं"
+temperature="तापमान"
+max_initial_ts="अधिकतम प्रारंभिक टाइमस्टैंप"
+length_penalty="लंबाई दंड"
+save_srt="SRT प्रारूप में सहेजें"
+truncate_output_file="नई वाक्यांश पर फ़ाइल को छोटा करें"
+only_while_recording="केवल रिकॉर्डिंग के दौरान आउटपुट लिखें"
+process_while_muted="स्रोत म्यूट होने पर भी भाषण को प्रसंस्करण करें"
+rename_file_to_match_recording="रिकॉर्डिंग से मेल खाने के लिए फ़ाइल का नाम बदलें"
+min_sub_duration="न्यूनतम उपशीर्षक अवधि (ms)"
+advanced_settings="उन्नत सेटिंग्स"
+target_language="लक्ष्य भाषा"
+source_language="स्रोत भाषा"
+translate="अनुवाद करें"
+translate_add_context="संदर्भ के साथ अनुवाद करें"
diff --git a/data/locale/ja-JP.ini b/data/locale/ja-JP.ini
new file mode 100644
index 0000000..d7fc6d3
--- /dev/null
+++ b/data/locale/ja-JP.ini
@@ -0,0 +1,50 @@
+LocalVocalPlugin="ローカルボーカルプラグイン"
+transcription_filterAudioFilter="ローカルボーカルトランスクリプション"
+vad_enabled="VAD有効"
+log_level="内部ログレベル"
+log_words="コンソールへのログ出力"
+caption_to_stream="ストリームキャプション"
+step_by_step_processing="ステップバイステップ処理（⚠️処理増加）"
+step_size_msec="ステップサイズ（ms）"
+subtitle_sources="字幕出力"
+none_no_output="なし/出力なし"
+text_file_output="テキストファイル出力"
+output_filename="出力ファイル名"
+whisper_model="ウィスパーモデル"
+external_model_file="外部モデルファイル"
+whisper_parameters="詳細設定"
+language="言語"
+whisper_sampling_method="ウィスパーサンプリング方法"
+n_threads="スレッド数"
+n_max_text_ctx="最大テキストコンテキスト"
+translate="翻訳"
+no_context="コンテキストなし"
+single_segment="単一セグメント"
+print_special="特別な印刷"
+print_progress="進行状況を印刷"
+print_realtime="リアルタイムで印刷"
+print_timestamps="タイムスタンプを印刷"
+token_timestamps="トークンタイムスタンプ"
+thold_pt="トークン確率閾値"
+thold_ptsum="トークン合計確率閾値"
+max_len="最大長（文字）"
+split_on_word="単語で分割"
+max_tokens="最大トークン数"
+speed_up="スピードアップ"
+initial_prompt="初期プロンプト"
+suppress_blank="空白を抑制"
+suppress_non_speech_tokens="非音声トークンを抑制"
+temperature="温度"
+max_initial_ts="最大初期タイムスタンプ"
+length_penalty="長さのペナルティ"
+save_srt="SRT形式で保存"
+truncate_output_file="新しい文でファイルを切り捨てる"
+only_while_recording="録音中のみ出力を書き込む"
+process_while_muted="ソースがミュート中も音声を処理する"
+rename_file_to_match_recording="ファイル名を録音に合わせて変更"
+min_sub_duration="最小サブ持続時間（ms）"
+advanced_settings="詳細設定"
+target_language="目標言語"
+source_language="ソース言語"
+translate="翻訳"
+translate_add_context="コンテキスト付きで翻訳"
diff --git a/data/locale/ko-KR.ini b/data/locale/ko-KR.ini
new file mode 100644
index 0000000..12d714c
--- /dev/null
+++ b/data/locale/ko-KR.ini
@@ -0,0 +1,50 @@
+LocalVocalPlugin="로컬보컬 플러그인"
+transcription_filterAudioFilter="로컬보컬 전사"
+vad_enabled="VAD 활성화"
+log_level="내부 로그 레벨"
+log_words="콘솔에 로그 출력"
+caption_to_stream="스트림 캡션"
+step_by_step_processing="단계별 처리 (⚠️ 처리 시간 증가)"
+step_size_msec="단계 크기 (ms)"
+subtitle_sources="자막 출력"
+none_no_output="없음 / 출력 없음"
+text_file_output="텍스트 파일 출력"
+output_filename="출력 파일명"
+whisper_model="속삭임 모델"
+external_model_file="외부 모델 파일"
+whisper_parameters="고급 설정"
+language="언어"
+whisper_sampling_method="속삭임 샘플링 방법"
+n_threads="스레드 수"
+n_max_text_ctx="최대 텍스트 컨텍스트"
+translate="번역"
+no_context="컨텍스트 없음"
+single_segment="단일 세그먼트"
+print_special="특수 출력"
+print_progress="진행 상황 출력"
+print_realtime="실시간 출력"
+print_timestamps="타임스탬프 출력"
+token_timestamps="토큰 타임스탬프"
+thold_pt="토큰 확률 임계값"
+thold_ptsum="토큰 합 확률 임계값"
+max_len="최대 길이(문자)"
+split_on_word="단어로 분할"
+max_tokens="최대 토큰"
+speed_up="속도 향상"
+initial_prompt="초기 프롬프트"
+suppress_blank="공백 제거"
+suppress_non_speech_tokens="비음성 토큰 제거"
+temperature="온도"
+max_initial_ts="최대 초기 타임스탬프"
+length_penalty="길이 패널티"
+save_srt="SRT 형식으로 저장"
+truncate_output_file="새 문장에서 파일 잘라내기"
+only_while_recording="녹음 중에만 출력 작성"
+process_while_muted="소스가 음소거 상태일 때 음성 처리"
+rename_file_to_match_recording="녹음과 일치하도록 파일 이름 변경"
+min_sub_duration="최소. 자막 지속 시간 (ms)"
+advanced_settings="고급 설정"
+target_language="대상 언어"
+source_language="원본 언어"
+translate="번역"
+translate_add_context="컨텍스트와 함께 번역"
diff --git a/data/locale/pl-PL.ini b/data/locale/pl-PL.ini
new file mode 100644
index 0000000..d550b18
--- /dev/null
+++ b/data/locale/pl-PL.ini
@@ -0,0 +1,50 @@
+LocalVocalPlugin="Wtyczka LocalVocal"
+transcription_filterAudioFilter="Transkrypcja LocalVocal"
+vad_enabled="VAD Włączony"
+log_level="Poziom logowania wewnętrznego"
+log_words="Logowanie wyjścia do konsoli"
+caption_to_stream="Podpisy strumienia"
+step_by_step_processing="Przetwarzanie krok po kroku (⚠️ zwiększone przetwarzanie)"
+step_size_msec="Rozmiar kroku (ms)"
+subtitle_sources="Źródła napisów"
+none_no_output="Brak / Brak wyjścia"
+text_file_output="Wyjście pliku tekstowego"
+output_filename="Nazwa pliku wyjściowego"
+whisper_model="Model Whisper"
+external_model_file="Zewnętrzny plik modelu"
+whisper_parameters="Ustawienia zaawansowane"
+language="Język"
+whisper_sampling_method="Metoda próbkowania Whisper"
+n_threads="Liczba wątków"
+n_max_text_ctx="Maksymalny kontekst tekstu"
+translate="Tłumacz"
+no_context="Brak kontekstu"
+single_segment="Pojedynczy segment"
+print_special="Drukuj specjalne"
+print_progress="Drukuj postęp"
+print_realtime="Drukuj w czasie rzeczywistym"
+print_timestamps="Drukuj znaczniki czasu"
+token_timestamps="Znaczniki czasu tokenów"
+thold_pt="Próg prawd. tokena"
+thold_ptsum="Próg sumy prawd. tokena"
+max_len="Maksymalna długość w znakach"
+split_on_word="Podziel na słowo"
+max_tokens="Maksymalna liczba tokenów"
+speed_up="Przyspiesz"
+initial_prompt="Początkowy monit"
+suppress_blank="Tłumienie pustych"
+suppress_non_speech_tokens="Tłumienie tokenów nie-mowy"
+temperature="Temperatura"
+max_initial_ts="Maksymalne początkowe znaczniki czasu"
+length_penalty="Kara za długość"
+save_srt="Zapisz w formacie SRT"
+truncate_output_file="Skróć plik przy nowym zdaniu"
+only_while_recording="Zapisuj wyjście tylko podczas nagrywania"
+process_while_muted="Przetwarzaj mowę, gdy źródło jest wyciszone"
+rename_file_to_match_recording="Zmień nazwę pliku, aby pasowała do nagrania"
+min_sub_duration="Min. czas trwania napisów (ms)"
+advanced_settings="Ustawienia zaawansowane"
+target_language="Język docelowy"
+source_language="Język źródłowy"
+translate="Tłumacz"
+translate_add_context="Tłumacz z kontekstem"
diff --git a/data/locale/pt_BR.ini b/data/locale/pt-BR.ini
similarity index 93%
rename from data/locale/pt_BR.ini
rename to data/locale/pt-BR.ini
index f416835..6033e61 100644
--- a/data/locale/pt_BR.ini
+++ b/data/locale/pt-BR.ini
@@ -44,3 +44,7 @@ only_while_recording="Escreva durante a gravação"
 process_while_muted="Processar enquanto está silenciada"
 rename_file_to_match_recording="Renomear arquivo para corresponder à gravação"
 min_sub_duration="Duração mínima da legenda (msec)"
+target_language="Língua alvo"
+source_language="Língua de origem"
+translate="Traduzir"
+translate_add_context="Traduzir com contexto"
diff --git a/data/locale/ru_RU.ini b/data/locale/ru-RU.ini
similarity index 93%
rename from data/locale/ru_RU.ini
rename to data/locale/ru-RU.ini
index 6d3ce3f..7918ad8 100644
--- a/data/locale/ru_RU.ini
+++ b/data/locale/ru-RU.ini
@@ -43,3 +43,7 @@ only_while_recording="Записывать вывод только во врем
 process_while_muted="Обрабатывать речь, пока источник отключен"
 rename_file_to_match_recording="Переименовать файл, чтобы соответствовать записи"
 min_sub_duration="Минимальная длительность субтитров (мс)"
+target_language="Целевой язык"
+source_language="Исходный язык"
+translate="Перевести"
+translate_add_context="Перевести с контекстом"
diff --git a/data/locale/zh-CN.ini b/data/locale/zh-CN.ini
new file mode 100644
index 0000000..2809ae7
--- /dev/null
+++ b/data/locale/zh-CN.ini
@@ -0,0 +1,50 @@
+LocalVocalPlugin="LocalVocal 插件"
+transcription_filterAudioFilter="LocalVocal 转录"
+vad_enabled="启用 VAD"
+log_level="内部日志级别"
+log_words="控制台日志输出"
+caption_to_stream="流字幕"
+step_by_step_processing="逐步处理（⚠️ 增加处理）"
+step_size_msec="步长（毫秒）"
+subtitle_sources="字幕输出"
+none_no_output="无 / 无输出"
+text_file_output="文本文件输出"
+output_filename="输出文件名"
+whisper_model="Whisper 模型"
+external_model_file="外部模型文件"
+whisper_parameters="高级设置"
+language="语言"
+whisper_sampling_method="Whisper 采样方法"
+n_threads="线程数"
+n_max_text_ctx="最大文本上下文"
+translate="翻译"
+no_context="无上下文"
+single_segment="单一段落"
+print_special="打印特殊"
+print_progress="打印进度"
+print_realtime="实时打印"
+print_timestamps="打印时间戳"
+token_timestamps="令牌时间戳"
+thold_pt="令牌概率阈值"
+thold_ptsum="令牌总概率阈值"
+max_len="最大长度（字符）"
+split_on_word="按单词分割"
+max_tokens="最大令牌数"
+speed_up="加速"
+initial_prompt="初始提示"
+suppress_blank="抑制空白"
+suppress_non_speech_tokens="抑制非语音令牌"
+temperature="温度"
+max_initial_ts="最大初始时间戳"
+length_penalty="长度惩罚"
+save_srt="保存为 SRT 格式"
+truncate_output_file="新句子时截断文件"
+only_while_recording="仅在录制时写入输出"
+process_while_muted="在源静音时处理语音"
+rename_file_to_match_recording="将文件重命名以匹配录制"
+min_sub_duration="最小字幕持续时间（毫秒）"
+advanced_settings="高级设置"
+target_language="目标语言"
+source_language="源语言"
+translate="翻译"
+translate_add_context="带上下文翻译"
diff --git a/data/models/ggml-model-whisper-tiny.en.bin b/data/models/ggml-model-whisper-tiny-en/ggml-model-whisper-tiny.en.bin
similarity index 100%
rename from data/models/ggml-model-whisper-tiny.en.bin
rename to data/models/ggml-model-whisper-tiny-en/ggml-model-whisper-tiny.en.bin
diff --git a/src/model-utils/model-downloader-types.h b/src/model-utils/model-downloader-types.h
index 0ef81c3..3d24d96 100644
--- a/src/model-utils/model-downloader-types.h
+++ b/src/model-utils/model-downloader-types.h
@@ -1,3 +1,28 @@
+#ifndef MODEL_DOWNLOADER_TYPES_H
+#define MODEL_DOWNLOADER_TYPES_H
+
+#include <functional>
+#include <map>
+#include <string>
+#include <vector>
 
 typedef std::function<void(int download_status, const std::string &path)>
 	download_finished_callback_t;
+
+struct ModelFileDownloadInfo {
+	std::string url;
+	std::string sha256;
+};
+
+enum ModelType { MODEL_TYPE_TRANSCRIPTION, MODEL_TYPE_TRANSLATION };
+
+struct ModelInfo {
+	std::string friendly_name;
+	std::string local_folder_name;
+	ModelType type;
+	std::vector<ModelFileDownloadInfo> files;
+};
+
+extern std::map<std::string, ModelInfo> models_info;
+
+#endif /* MODEL_DOWNLOADER_TYPES_H */
diff --git a/src/model-utils/model-downloader-ui.cpp b/src/model-utils/model-downloader-ui.cpp
index e53d0ab..dfd8bd5 100644
--- a/src/model-utils/model-downloader-ui.cpp
+++ b/src/model-utils/model-downloader-ui.cpp
@@ -5,15 +5,13 @@
 
 #include <filesystem>
 
-const std::string MODEL_BASE_PATH = "https://ggml.ggerganov.com/";
-
 size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream)
 {
 	size_t written = fwrite(ptr, size, nmemb, stream);
 	return written;
 }
 
-ModelDownloader::ModelDownloader(const std::string &model_name,
+ModelDownloader::ModelDownloader(const ModelInfo &model_info,
 				 download_finished_callback_t download_finished_callback_,
 				 QWidget *parent)
 	: QDialog(parent),
@@ -30,7 +28,7 @@ ModelDownloader::ModelDownloader(const std::string &model_name,
 
 	// Add a label for the model name
 	QLabel *model_name_label = new QLabel(this);
-	model_name_label->setText(QString::fromStdString(model_name));
+	model_name_label->setText(QString::fromStdString(model_info.friendly_name));
 	model_name_label->setAlignment(Qt::AlignCenter);
 	this->layout->addWidget(model_name_label);
 
@@ -43,7 +41,7 @@ ModelDownloader::ModelDownloader(const std::string &model_name,
 	this->layout->addWidget(this->progress_bar);
 
 	this->download_thread = new QThread();
-	this->download_worker = new ModelDownloadWorker(model_name);
+	this->download_worker = new ModelDownloadWorker(model_info);
 	this->download_worker->moveToThread(this->download_thread);
 
 	connect(this->download_thread, &QThread::started, this->download_worker,
@@ -112,65 +110,92 @@ void ModelDownloader::show_error(const std::string &reason)
 	this->download_finished_callback(1, "");
 }
 
-ModelDownloadWorker::ModelDownloadWorker(const std::string &model_name_)
-{
-	this->model_name = model_name_;
+ModelDownloadWorker::ModelDownloadWorker(const ModelInfo &model_info_) : model_info(model_info_) {}
+
+std::string get_filename_from_url(const std::string& url) {
+    auto lastSlashPos = url.find_last_of("/");
+    auto queryPos = url.find("?", lastSlashPos);
+    if (queryPos == std::string::npos) {
+        return url.substr(lastSlashPos + 1);
+    } else {
+        return url.substr(lastSlashPos + 1, queryPos - lastSlashPos - 1);
+    }
 }
 
 void ModelDownloadWorker::download_model()
 {
-	char *module_config_path = obs_module_get_config_path(obs_current_module(), "models");
+	char *config_folder = obs_module_get_config_path(obs_current_module(), "models");
+	const std::filesystem::path module_config_models_folder =
+		std::filesystem::absolute(config_folder);
+	bfree(config_folder);
+
 	// Check if the config folder exists
-	if (!std::filesystem::exists(module_config_path)) {
-		obs_log(LOG_WARNING, "Config folder does not exist: %s", module_config_path);
+	if (!std::filesystem::exists(module_config_models_folder)) {
+		obs_log(LOG_WARNING, "Config folder does not exist: %s", module_config_models_folder);
 		// Create the config folder
-		if (!std::filesystem::create_directories(module_config_path)) {
+		if (!std::filesystem::create_directories(module_config_models_folder)) {
 			obs_log(LOG_ERROR, "Failed to create config folder: %s",
-				module_config_path);
+				module_config_models_folder);
 			emit download_error("Failed to create config folder.");
 			return;
 		}
 	}
 
-	char *model_save_path_str =
-		obs_module_get_config_path(obs_current_module(), this->model_name.c_str());
-	std::string model_save_path(model_save_path_str);
-	bfree(model_save_path_str);
-	obs_log(LOG_INFO, "Model save path: %s", model_save_path.c_str());
+	const std::string model_local_config_path =
+		(module_config_models_folder / model_info.local_folder_name).string();
 
-	// extract filename from path in this->modle_name
-	const std::string model_filename =
-		this->model_name.substr(this->model_name.find_last_of("/\\") + 1);
+	obs_log(LOG_INFO, "Model save path: %s", model_local_config_path.c_str());
 
-	std::string model_url = MODEL_BASE_PATH + model_filename;
-	obs_log(LOG_INFO, "Model URL: %s", model_url.c_str());
+    if (!std::filesystem::exists(model_local_config_path)) {
+        // model folder does not exist, create it
+        if (!std::filesystem::create_directories(model_local_config_path)) {
+            obs_log(LOG_ERROR, "Failed to create model folder: %s",
+                model_local_config_path.c_str());
+            emit download_error("Failed to create model folder.");
+            return;
+        }
+    }
 
 	CURL *curl = curl_easy_init();
 	if (curl) {
-		FILE *fp = fopen(model_save_path.c_str(), "wb");
-		if (fp == nullptr) {
-			obs_log(LOG_ERROR, "Failed to open file %s.", model_save_path.c_str());
-			emit download_error("Failed to open file.");
-			return;
-		}
-		curl_easy_setopt(curl, CURLOPT_URL, model_url.c_str());
-		curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
-		curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
-		curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
-		curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION,
-				 ModelDownloadWorker::progress_callback);
-		curl_easy_setopt(curl, CURLOPT_XFERINFODATA, this);
-		// Follow redirects
-		curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
-		CURLcode res = curl_easy_perform(curl);
-		if (res != CURLE_OK) {
-			obs_log(LOG_ERROR, "Failed to download model %s.",
-				this->model_name.c_str());
-			emit download_error("Failed to download model.");
+		for (auto &model_download_file : this->model_info.files) {
+			obs_log(LOG_INFO, "Model URL: %s", model_download_file.url.c_str());
+
+            const std::string model_filename = get_filename_from_url(model_download_file.url);
+			const std::string model_file_save_path =
+				(std::filesystem::path(model_local_config_path) / model_filename).string();
+			if (std::filesystem::exists(model_file_save_path)) {
+				obs_log(LOG_INFO, "Model file already exists: %s",
+					model_file_save_path.c_str());
+				continue;
+			}
+
+			FILE *fp = fopen(model_file_save_path.c_str(), "wb");
+			if (fp == nullptr) {
+				obs_log(LOG_ERROR, "Failed to open model file for writing %s.",
+					model_file_save_path.c_str());
+				emit download_error("Failed to open file.");
+				return;
+			}
+			curl_easy_setopt(curl, CURLOPT_URL, model_download_file.url.c_str());
+			curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
+			curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
+			curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
+			curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION,
+					 ModelDownloadWorker::progress_callback);
+			curl_easy_setopt(curl, CURLOPT_XFERINFODATA, this);
+			// Follow redirects
+			curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+			CURLcode res = curl_easy_perform(curl);
+			if (res != CURLE_OK) {
+				obs_log(LOG_ERROR, "Failed to download model file %s.",
+					model_filename.c_str());
+				emit download_error("Failed to download model file.");
+			}
+			fclose(fp);
 		}
 		curl_easy_cleanup(curl);
-		fclose(fp);
-		emit download_finished(model_save_path);
+		emit download_finished(model_local_config_path);
 	} else {
 		obs_log(LOG_ERROR, "Failed to initialize curl.");
 		emit download_error("Failed to initialize curl.");
diff --git a/src/model-utils/model-downloader-ui.h b/src/model-utils/model-downloader-ui.h
index d2e5fb2..20521b6 100644
--- a/src/model-utils/model-downloader-ui.h
+++ b/src/model-utils/model-downloader-ui.h
@@ -14,7 +14,7 @@
 class ModelDownloadWorker : public QObject {
 	Q_OBJECT
 public:
-	ModelDownloadWorker(const std::string &model_name);
+	ModelDownloadWorker(const ModelInfo &model_info_);
 	~ModelDownloadWorker();
 
 public slots:
@@ -28,13 +28,13 @@ public slots:
 private:
 	static int progress_callback(void *clientp, curl_off_t dltotal, curl_off_t dlnow,
 				     curl_off_t ultotal, curl_off_t ulnow);
-	std::string model_name;
+	ModelInfo model_info;
 };
 
 class ModelDownloader : public QDialog {
 	Q_OBJECT
 public:
-	ModelDownloader(const std::string &model_name,
+	ModelDownloader(const ModelInfo &model_info,
 			download_finished_callback_t download_finished_callback,
 			QWidget *parent = nullptr);
 	~ModelDownloader();
diff --git a/src/model-utils/model-downloader.cpp b/src/model-utils/model-downloader.cpp
index c83adff..f8b5293 100644
--- a/src/model-utils/model-downloader.cpp
+++ b/src/model-utils/model-downloader.cpp
@@ -12,46 +12,83 @@
 
 #include <curl/curl.h>
 
-std::string find_model_file(const std::string &model_name)
+std::string find_file_in_folder_by_name(const std::string& folder_path, const std::string& file_name) {
+    for (const auto &entry : std::filesystem::directory_iterator(folder_path)) {
+        if (entry.path().filename() == file_name) {
+            return entry.path().string();
+        }
+    }
+    return "";
+}
+
+std::string find_bin_file_in_folder(const std::string &model_local_folder_path) {
+    // find .bin file in folder
+    for (const auto &entry : std::filesystem::directory_iterator(model_local_folder_path)) {
+        if (entry.path().extension() == ".bin") {
+            const std::string bin_file_path = entry.path().string();
+            obs_log(LOG_INFO, "Model bin file found in folder: %s", bin_file_path.c_str());
+            return bin_file_path;
+        }
+    }
+    obs_log(LOG_ERROR, "Model bin file not found in folder: %s", model_local_folder_path.c_str());
+    return "";
+}
+
+std::string find_model_folder(const ModelInfo &model_info)
 {
-	const char *model_name_cstr = model_name.c_str();
-	obs_log(LOG_INFO, "Checking if model %s exists in data...", model_name_cstr);
+    char* data_folder_models = obs_module_file("models");
+	const std::filesystem::path module_data_models_folder =
+		std::filesystem::absolute(data_folder_models);
+    bfree(data_folder_models);
+
+	const std::string model_local_data_path =
+		(module_data_models_folder / model_info.local_folder_name).string();
+
+	obs_log(LOG_INFO, "Checking if model '%s' exists in data...", model_info.friendly_name.c_str());
 
-	char *model_file_path = obs_module_file(model_name_cstr);
-	if (model_file_path == nullptr) {
-		obs_log(LOG_INFO, "Model %s not found in data.", model_name_cstr);
+	if (!std::filesystem::exists(model_local_data_path)) {
+		obs_log(LOG_INFO, "Model not found in data: %s", model_local_data_path.c_str());
 	} else {
-		std::string model_file_path_str(model_file_path);
-		bfree(model_file_path);
-		if (!std::filesystem::exists(model_file_path_str)) {
-			obs_log(LOG_INFO, "Model not found in data: %s",
-				model_file_path_str.c_str());
-		} else {
-			obs_log(LOG_INFO, "Model found in data: %s", model_file_path_str.c_str());
-			return model_file_path_str;
-		}
+		obs_log(LOG_INFO, "Model folder found in data: %s", model_local_data_path.c_str());
+        return model_local_data_path;
 	}
 
 	// Check if model exists in the config folder
-	char *model_config_path_str =
-		obs_module_get_config_path(obs_current_module(), model_name_cstr);
-	std::string model_config_path(model_config_path_str);
-	bfree(model_config_path_str);
-	obs_log(LOG_INFO, "Model path in config: %s", model_config_path.c_str());
-	if (std::filesystem::exists(model_config_path)) {
-		obs_log(LOG_INFO, "Model exists in config folder: %s", model_config_path.c_str());
-		return model_config_path;
+	char *config_folder = obs_module_get_config_path(obs_current_module(), "models");
+	const std::filesystem::path module_config_models_folder =
+		std::filesystem::absolute(config_folder);
+	bfree(config_folder);
+
+    obs_log(LOG_INFO, "Checking if model '%s' exists in config...", model_info.friendly_name.c_str());
+
+	const std::string model_local_config_path =
+		(module_config_models_folder / model_info.local_folder_name).string();
+
+	obs_log(LOG_INFO, "Model path in config: %s", model_local_config_path.c_str());
+	if (std::filesystem::exists(model_local_config_path)) {
+		obs_log(LOG_INFO, "Model exists in config folder: %s",
+			model_local_config_path.c_str());
+		return model_local_config_path;
 	}
 
-	obs_log(LOG_INFO, "Model %s not found.", model_name_cstr);
+	obs_log(LOG_INFO, "Model '%s' not found.", model_info.friendly_name.c_str());
 	return "";
 }
 
-void download_model_with_ui_dialog(const std::string &model_name,
+std::string find_model_bin_file(const ModelInfo &model_info) {
+    const std::string model_local_folder_path = find_model_folder(model_info);
+    if (model_local_folder_path.empty()) {
+        return "";
+    }
+
+    return find_bin_file_in_folder(model_local_folder_path);
+}
+
+void download_model_with_ui_dialog(const ModelInfo &model_info,
 				   download_finished_callback_t download_finished_callback)
 {
 	// Start the model downloader UI
 	ModelDownloader *model_downloader = new ModelDownloader(
-		model_name, download_finished_callback, (QWidget *)obs_frontend_get_main_window());
+		model_info, download_finished_callback, (QWidget *)obs_frontend_get_main_window());
 	model_downloader->show();
 }
diff --git a/src/model-utils/model-downloader.h b/src/model-utils/model-downloader.h
index 09d07ab..fca3337 100644
--- a/src/model-utils/model-downloader.h
+++ b/src/model-utils/model-downloader.h
@@ -6,10 +6,13 @@
 
 #include "model-downloader-types.h"
 
-std::string find_model_file(const std::string &model_name);
+std::string find_file_in_folder_by_name(const std::string& folder_path, const std::string& file_name);
+std::string find_bin_file_in_folder(const std::string &path);
+std::string find_model_folder(const ModelInfo &model_info);
+std::string find_model_bin_file(const ModelInfo &model_info);
 
 // Start the model downloader UI dialog with a callback for when the download is finished
-void download_model_with_ui_dialog(const std::string &model_name,
+void download_model_with_ui_dialog(const ModelInfo &model_info,
 				   download_finished_callback_t download_finished_callback);
 
 #endif // MODEL_DOWNLOADER_H
diff --git a/src/model-utils/model-infos.cpp b/src/model-utils/model-infos.cpp
new file mode 100644
index 0000000..cd00814
--- /dev/null
+++ b/src/model-utils/model-infos.cpp
@@ -0,0 +1,122 @@
+#include "model-downloader-types.h"
+
+std::map<std::string, ModelInfo> models_info = {{
+	{"M2M-100 418M (495Mb)",
+	 {"M2M-100 418M",
+	  "m2m-100-418M",
+	  MODEL_TYPE_TRANSLATION,
+	  {{"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/model.bin?download=true",
+	    "D6703DD9F920FF896E45C3D97B490761BED5944937B90BBE6A7245F5652542D4"},
+	   {
+		   "https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/config.json?download=true",
+		   "4244772990E30069563E3DDFB4AD6DC95BDFD2AC3DE667EA8858C9B0A8433FA8",
+	   },
+	   {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/generation_config.json?download=true",
+	    "AED76366507333DDBB8BD49960F23C82FE6446B3319A46A54BEFDB45324CCF61"},
+	   {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/shared_vocabulary.json?download=true",
+	    "7EB5D0FF184C6095C7C10F9911C0AEA492250ABD12854F9C3D787C64B1C6397E"},
+	   {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/special_tokens_map.json?download=true",
+	    "C1A4F86C3874D279AE1B2A05162858DB5DD6C61665D84223ED886CBCFF08FDA6"},
+	   {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/tokenizer_config.json?download=true",
+	    "AE54F15F0649BB05041CDADAD8485BA1FAF40BC33E6B4C2A74AE2D1AE5710FA2"},
+	   {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/vocab.json?download=true",
+	    "B6E77E474AEEA8F441363ACA7614317C06381F3EACFE10FB9856D5081D1074CC"},
+	   {"https://huggingface.co/jncraton/m2m100_418M-ct2-int8/resolve/main/sentencepiece.bpe.model?download=true",
+	    "D8F7C76ED2A5E0822BE39F0A4F95A55EB19C78F4593CE609E2EDBC2AEA4D380A"}}}},
+	{"Whisper Base q5 (57Mb)",
+	 {"Whisper Base q5",
+	  "whisper-base-q5",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-base-q5_1.bin",
+	    "422F1AE452ADE6F30A004D7E5C6A43195E4433BC370BF23FAC9CC591F01A8898"}}}},
+	{"Whisper Base En q5 (57Mb)",
+	 {"Whisper Base En q5",
+	  "ggml-model-whisper-base-en-q5_1",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin",
+	    "4BAF70DD0D7C4247BA2B81FAFD9C01005AC77C2F9EF064E00DCF195D0E2FDD2F"}}}},
+	{"Whisper Base (141Mb)",
+	 {"Whisper Base",
+	  "ggml-model-whisper-base",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-base.bin",
+	    "60ED5BC3DD14EEA856493D334349B405782DDCAF0028D4B5DF4088345FBA2EFE"}}}},
+	{"Whisper Base En (141Mb)",
+	 {"Whisper Base En",
+	  "ggml-model-whisper-base-en",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin",
+	    "A03779C86DF3323075F5E796CB2CE5029F00EC8869EEE3FDFB897AFE36C6D002"}}}},
+	{"Whisper Large q5 (1Gb)",
+	 {"Whisper Large q5",
+	  "ggml-model-whisper-large-q5_0",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-large-q5_0.bin",
+	    "3A214837221E4530DBC1FE8D734F302AF393EB30BD0ED046042EBF4BAF70F6F2"}}}},
+	{"Whisper Medium q5 (514Mb)",
+	 {"Whisper Medium q5",
+	  "ggml-model-whisper-medium-q5_0",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-medium-q5_0.bin",
+	    "19FEA4B380C3A618EC4723C3EEF2EB785FFBA0D0538CF43F8F235E7B3B34220F"}}}},
+	{"Whisper Medium En q5 (514Mb)",
+	 {"Whisper Medium En q5",
+	  "ggml-model-whisper-medium-en-q5_0",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-medium.en-q5_0.bin",
+	    "76733E26AD8FE1C7A5BF7531A9D41917B2ADC0F20F2E4F5531688A8C6CD88EB0"}}}},
+	{"Whisper Small q5 (181Mb)",
+	 {"Whisper Small q5",
+	  "ggml-model-whisper-small-q5_1",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin",
+	    "AE85E4A935D7A567BD102FE55AFC16BB595BDB618E11B2FC7591BC08120411BB"}}}},
+	{"Whisper Small En q5 (181Mb)",
+	 {"Whisper Small En q5",
+	  "ggml-model-whisper-small-en-q5_1",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-small.en-q5_1.bin",
+	    "BFDFF4894DCB76BBF647D56263EA2A96645423F1669176F4844A1BF8E478AD30"}}}},
+	{"Whisper Small (465Mb)",
+	 {"Whisper Small",
+	  "ggml-model-whisper-small",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-small.bin",
+	    "1BE3A9B2063867B937E64E2EC7483364A79917E157FA98C5D94B5C1FFFEA987B"}}}},
+	{"Whisper Small En (465Mb)",
+	 {"Whisper Small En",
+	  "ggml-model-whisper-small-en",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin",
+	    "C6138D6D58ECC8322097E0F987C32F1BE8BB0A18532A3F88F734D1BBF9C41E5D"}}}},
+	{"Whisper Tiny (74Mb)",
+	 {"Whisper Tiny",
+	  "ggml-model-whisper-tiny",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.bin",
+	    "BE07E048E1E599AD46341C8D2A135645097A538221678B7ACDD1B1919C6E1B21"}}}},
+	{"Whisper Tiny q5 (31Mb)",
+	 {"Whisper Tiny q5",
+	  "ggml-model-whisper-tiny-q5_1",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-tiny-q5_1.bin",
+	    "818710568DA3CA15689E31A743197B520007872FF9576237BDA97BD1B469C3D7"}}}},
+	{"Whisper Tiny En q5 (31Mb)",
+	 {"Whisper Tiny En q5",
+	  "ggml-model-whisper-tiny-en-q5_1",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin",
+	    "C77C5766F1CEF09B6B7D47F21B546CBDDD4157886B3B5D6D4F709E91E66C7C2B"}}}},
+	{"Whisper Tiny En q8 (42Mb)",
+	 {"Whisper Tiny En q8",
+	  "ggml-model-whisper-tiny-en-q8_0",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin",
+	    "5BC2B3860AA151A4C6E7BB095E1FCCE7CF12C7B020CA08DCEC0C6D018BB7DD94"}}}},
+	{"Whisper Tiny En (74Mb)",
+	 {"Whisper Tiny En",
+	  "ggml-model-whisper-tiny-en",
+	  MODEL_TYPE_TRANSCRIPTION,
+	  {{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin",
+	    "921E4CF8686FDD993DCD081A5DA5B6C365BFDE1162E72B08D75AC75289920B1F"}}}},
+}};
diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h
index 5dceb70..3e12c2a 100644
--- a/src/transcription-filter-data.h
+++ b/src/transcription-filter-data.h
@@ -65,7 +65,7 @@ struct transcription_filter_data {
 	audio_resampler_t *resampler;
 
 	/* whisper */
-	char *whisper_model_path;
+	std::string whisper_model_path;
 	struct whisper_context *whisper_context;
 	whisper_full_params whisper_params;
 
@@ -86,6 +86,9 @@ struct transcription_filter_data {
 	std::string source_lang;
 	std::string target_lang;
 
+    // Last transcription result
+    std::string last_text;
+
 	// Text source to output the subtitles
 	obs_weak_source_t *text_source;
 	char *text_source_name;
@@ -115,7 +118,7 @@ struct transcription_filter_data {
 		}
 		context = nullptr;
 		resampler = nullptr;
-		whisper_model_path = nullptr;
+		whisper_model_path = "";
 		whisper_context = nullptr;
 		text_source = nullptr;
 		text_source_mutex = nullptr;
diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
index 50b718b..4627c50 100644
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@@ -9,6 +9,7 @@
 #include "whisper-utils/whisper-language.h"
 #include "whisper-utils/whisper-utils.h"
 #include "translation/language_codes.h"
+#include "translation/translation.h"
 
 #include <algorithm>
 #include <fstream>
@@ -127,7 +128,7 @@ void transcription_filter_destroy(void *data)
 	struct transcription_filter_data *gf =
 		static_cast<struct transcription_filter_data *>(data);
 
-	obs_log(gf->log_level, "transcription_filter_destroy");
+	obs_log(gf->log_level, "filter destroy");
 	shutdown_whisper_thread(gf);
 
 	if (gf->text_source_name) {
@@ -275,16 +276,28 @@ void set_text_callback(struct transcription_filter_data *gf,
 	std::string str_copy = result.text;
 #endif
 
+    // remove trailing spaces, newlines, tabs or punctuation
+    str_copy.erase(std::find_if(str_copy.rbegin(), str_copy.rend(),
+                                [](unsigned char ch) { return !std::isspace(ch) || !std::ispunct(ch); })
+                       .base(),
+                   str_copy.end());
+
 	if (gf->translate) {
-		std::string translated_text;
+        obs_log(gf->log_level, "Translating text. %s -> %s", gf->source_lang.c_str(), gf->target_lang.c_str());
+        std::string translated_text;
 		if (translate(gf->translation_ctx, str_copy, gf->source_lang, gf->target_lang,
 			      translated_text) == OBS_POLYGLOT_TRANSLATION_SUCCESS) {
-			str_copy = translated_text;
+            if (gf->log_words) {
+                obs_log(LOG_INFO, "Translation: '%s' -> '%s'", str_copy.c_str(), translated_text.c_str());
+            }
+            str_copy = translated_text;
 		} else {
 			obs_log(gf->log_level, "Failed to translate text");
 		}
 	}
 
+    gf->last_text = str_copy;
+
 	if (gf->caption_to_stream) {
 		obs_output_t *streaming_output = obs_frontend_get_streaming_output();
 		if (streaming_output) {
@@ -377,7 +390,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
 		static_cast<struct transcription_filter_data *>(data);
 
 	gf->log_level = (int)obs_data_get_int(s, "log_level");
-	obs_log(gf->log_level, "transcription_filter_update");
+	obs_log(gf->log_level, "filter update");
 
 	gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
 	gf->log_words = obs_data_get_bool(s, "log_words");
@@ -401,24 +414,17 @@ void transcription_filter_update(void *data, obs_data_t *s)
 	bool new_translate = obs_data_get_bool(s, "translate");
 	gf->source_lang = obs_data_get_string(s, "translate_source_language");
 	gf->target_lang = obs_data_get_string(s, "translate_target_language");
+	gf->translation_ctx.add_context = obs_data_get_bool(s, "translate_add_context");
 
 	if (new_translate != gf->translate) {
 		if (new_translate) {
-			if (build_translation_context(gf->translation_ctx,
-						      "models/m2m100-418m.sp.model",
-						      "models/m2m100-418m") !=
-			    OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS) {
-				obs_log(gf->log_level, "Failed to initialize translation context");
-				gf->translate = false;
-			} else {
-				gf->translate = true;
-			}
+			start_translation(gf);
 		} else {
 			gf->translate = false;
 		}
 	}
 
-	obs_log(gf->log_level, "transcription_filter: update text source");
+	obs_log(gf->log_level, "update text source");
 	// update the text source
 	const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources");
 	obs_weak_source_t *old_weak_text_source = NULL;
@@ -482,10 +488,10 @@ void transcription_filter_update(void *data, obs_data_t *s)
 		return;
 	}
 
-	obs_log(gf->log_level, "transcription_filter: update whisper model");
+	obs_log(gf->log_level, "update whisper model");
 	update_whsiper_model_path(gf, s);
 
-	obs_log(gf->log_level, "transcription_filter: update whisper params");
+	obs_log(gf->log_level, "update whisper params");
 	std::lock_guard<std::mutex> lock(*gf->whisper_ctx_mutex);
 
 	gf->whisper_params = whisper_full_default_params(
@@ -495,7 +501,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
 	gf->whisper_params.initial_prompt = obs_data_get_string(s, "initial_prompt");
 	gf->whisper_params.n_threads = (int)obs_data_get_int(s, "n_threads");
 	gf->whisper_params.n_max_text_ctx = (int)obs_data_get_int(s, "n_max_text_ctx");
-	gf->whisper_params.translate = obs_data_get_bool(s, "translate");
+	gf->whisper_params.translate = obs_data_get_bool(s, "whisper_translate");
 	gf->whisper_params.no_context = obs_data_get_bool(s, "no_context");
 	gf->whisper_params.single_segment = obs_data_get_bool(s, "single_segment");
 	gf->whisper_params.print_special = obs_data_get_bool(s, "print_special");
@@ -519,7 +525,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
 
 void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
 {
-	obs_log(LOG_INFO, "transcription filter create");
+	obs_log(LOG_INFO, "LocalVocal filter create");
 
 	struct transcription_filter_data *gf = new transcription_filter_data();
 
@@ -559,10 +565,10 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
 
 	gf->overlap_ms = (int)obs_data_get_int(settings, "overlap_size_msec");
 	gf->overlap_frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)gf->overlap_ms));
-	obs_log(gf->log_level, "transcription_filter: channels %d, frames %d, sample_rate %d",
+	obs_log(gf->log_level, "channels %d, frames %d, sample_rate %d",
 		(int)gf->channels, (int)gf->frames, gf->sample_rate);
 
-	obs_log(gf->log_level, "transcription_filter: setup audio resampler");
+	obs_log(gf->log_level, "setup audio resampler");
 	struct resample_info src, dst;
 	src.samples_per_sec = gf->sample_rate;
 	src.format = AUDIO_FORMAT_FLOAT_PLANAR;
@@ -574,12 +580,12 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
 
 	gf->resampler = audio_resampler_create(&dst, &src);
 
-	obs_log(gf->log_level, "transcription_filter: setup mutexes and condition variables");
+	obs_log(gf->log_level, "setup mutexes and condition variables");
 	gf->whisper_buf_mutex = new std::mutex();
 	gf->whisper_ctx_mutex = new std::mutex();
 	gf->wshiper_thread_cv = new std::condition_variable();
 	gf->text_source_mutex = new std::mutex();
-	obs_log(gf->log_level, "transcription_filter: clear text source data");
+	obs_log(gf->log_level, "clear text source data");
 	gf->text_source = nullptr;
 	const char *subtitle_sources = obs_data_get_string(settings, "subtitle_sources");
 	if (subtitle_sources != nullptr) {
@@ -587,13 +593,13 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
 	} else {
 		gf->text_source_name = nullptr;
 	}
-	obs_log(gf->log_level, "transcription_filter: clear paths and whisper context");
+	obs_log(gf->log_level, "clear paths and whisper context");
 	gf->whisper_model_file_currently_loaded = "";
 	gf->output_file_path = std::string("");
-	gf->whisper_model_path = nullptr; // The update function will set the model path
+	gf->whisper_model_path = std::string(""); // The update function will set the model path
 	gf->whisper_context = nullptr;
 
-	obs_log(gf->log_level, "transcription_filter: run update");
+	obs_log(gf->log_level, "run update");
 	// get the settings updated on the filter data struct
 	transcription_filter_update(gf, settings);
 
@@ -641,7 +647,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
 		},
 		gf);
 
-	obs_log(gf->log_level, "transcription_filter: filter created.");
+	obs_log(gf->log_level, "filter created.");
 	return gf;
 }
 
@@ -665,7 +671,7 @@ void transcription_filter_activate(void *data)
 {
 	struct transcription_filter_data *gf =
 		static_cast<struct transcription_filter_data *>(data);
-	obs_log(gf->log_level, "transcription_filter filter activated");
+	obs_log(gf->log_level, "filter activated");
 	gf->active = true;
 }
 
@@ -673,20 +679,19 @@ void transcription_filter_deactivate(void *data)
 {
 	struct transcription_filter_data *gf =
 		static_cast<struct transcription_filter_data *>(data);
-	obs_log(gf->log_level, "transcription_filter filter deactivated");
+	obs_log(gf->log_level, "filter deactivated");
 	gf->active = false;
 }
 
 void transcription_filter_defaults(obs_data_t *s)
 {
-	obs_log(LOG_INFO, "transcription_filter_defaults");
+	obs_log(LOG_INFO, "filter defaults");
 
 	obs_data_set_default_bool(s, "vad_enabled", true);
 	obs_data_set_default_int(s, "log_level", LOG_DEBUG);
 	obs_data_set_default_bool(s, "log_words", true);
 	obs_data_set_default_bool(s, "caption_to_stream", false);
-	obs_data_set_default_string(s, "whisper_model_path",
-				    "models/ggml-model-whisper-tiny.en.bin");
+	obs_data_set_default_string(s, "whisper_model_path", "Whisper Tiny En (74Mb)");
 	obs_data_set_default_string(s, "whisper_language_select", "en");
 	obs_data_set_default_string(s, "subtitle_sources", "none");
 	obs_data_set_default_bool(s, "step_by_step_processing", false);
@@ -703,14 +708,15 @@ void transcription_filter_defaults(obs_data_t *s)
 	obs_data_set_default_bool(s, "translate", false);
 	obs_data_set_default_string(s, "translate_target_language", "__es__");
 	obs_data_set_default_string(s, "translate_source_language", "__en__");
+	obs_data_set_default_bool(s, "translate_add_context", true);
 
 	// Whisper parameters
 	obs_data_set_default_int(s, "whisper_sampling_method", WHISPER_SAMPLING_BEAM_SEARCH);
 	obs_data_set_default_string(s, "initial_prompt", "");
 	obs_data_set_default_int(s, "n_threads", 4);
 	obs_data_set_default_int(s, "n_max_text_ctx", 16384);
-	obs_data_set_default_bool(s, "translate", false);
-	obs_data_set_default_bool(s, "no_context", true);
+	obs_data_set_default_bool(s, "whisper_translate", false);
+	obs_data_set_default_bool(s, "no_context", false);
 	obs_data_set_default_bool(s, "single_segment", true);
 	obs_data_set_default_bool(s, "print_special", false);
 	obs_data_set_default_bool(s, "print_progress", false);
@@ -732,7 +738,7 @@ void transcription_filter_defaults(obs_data_t *s)
 
 obs_properties_t *transcription_filter_properties(void *data)
 {
-	obs_log(LOG_INFO, "transcription_filter_properties");
+	obs_log(LOG_DEBUG, "Add filter properties");
 
 	struct transcription_filter_data *gf =
 		static_cast<struct transcription_filter_data *>(data);
@@ -775,6 +781,8 @@ obs_properties_t *transcription_filter_properties(void *data)
 	obs_property_t *prop_src = obs_properties_add_list(
 		translation_group, "translate_source_language", MT_("source_language"),
 		OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
+    obs_property_t *prop_add_context = obs_properties_add_bool(
+                translation_group, "translate_add_context", MT_("translate_add_context"));
 
 	// Populate the dropdown with the language codes
 	for (const auto &language : language_codes) {
@@ -791,10 +799,9 @@ obs_properties_t *transcription_filter_properties(void *data)
 		UNUSED_PARAMETER(property);
 		// Show/Hide the translation group
 		const bool translate_enabled = obs_data_get_bool(settings, "translate");
-		obs_property_set_visible(obs_properties_get(props, "translate_target_language"),
-					 translate_enabled);
-		obs_property_set_visible(obs_properties_get(props, "translate_source_language"),
-					 translate_enabled);
+        for (const auto& prop : { "translate_target_language", "translate_source_language", "translate_add_context" }) {
+            obs_property_set_visible(obs_properties_get(props, prop), translate_enabled);
+        }
 		return true;
 	});
 
@@ -822,39 +829,14 @@ obs_properties_t *transcription_filter_properties(void *data)
 	obs_property_t *whisper_models_list =
 		obs_properties_add_list(ppts, "whisper_model_path", MT_("whisper_model"),
 					OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
+	// Add models from models_info map
+	for (const auto &model_info : models_info) {
+		if (model_info.second.type == MODEL_TYPE_TRANSCRIPTION) {
+			obs_property_list_add_string(whisper_models_list, model_info.first.c_str(),
+						     model_info.first.c_str());
+		}
+	}
 
-	obs_property_list_add_string(whisper_models_list, "Base q5 57M",
-				     "models/ggml-model-whisper-base-q5_1.bin");
-	obs_property_list_add_string(whisper_models_list, "Base 141M",
-				     "models/ggml-model-whisper-base.bin");
-	obs_property_list_add_string(whisper_models_list, "Base (Eng) q5 57M",
-				     "models/ggml-model-whisper-base.en-q5_1.bin");
-	obs_property_list_add_string(whisper_models_list, "Base (Eng) 141M",
-				     "models/ggml-model-whisper-base.en.bin");
-	obs_property_list_add_string(whisper_models_list, "Large q5 1G",
-				     "models/ggml-model-whisper-large-q5_0.bin");
-	obs_property_list_add_string(whisper_models_list, "Medium q5 514M",
-				     "models/ggml-model-whisper-medium-q5_0.bin");
-	obs_property_list_add_string(whisper_models_list, "Medium (Eng) 514M",
-				     "models/ggml-model-whisper-medium.en-q5_0.bin");
-	obs_property_list_add_string(whisper_models_list, "Small q5 181M",
-				     "models/ggml-model-whisper-small-q5_1.bin");
-	obs_property_list_add_string(whisper_models_list, "Small 465M",
-				     "models/ggml-model-whisper-small.bin");
-	obs_property_list_add_string(whisper_models_list, "Small (Eng) q5 181M",
-				     "models/ggml-model-whisper-small.en-q5_1.bin");
-	obs_property_list_add_string(whisper_models_list, "Small (Eng) 465M",
-				     "models/ggml-model-whisper-small.en.bin");
-	obs_property_list_add_string(whisper_models_list, "Tiny q5 31M",
-				     "models/ggml-model-whisper-tiny-q5_1.bin");
-	obs_property_list_add_string(whisper_models_list, "Tiny 74M",
-				     "models/ggml-model-whisper-tiny.bin");
-	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) q5 31M",
-				     "models/ggml-model-whisper-tiny.en-q5_1.bin");
-	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) q8 42M",
-				     "models/ggml-model-whisper-tiny.en-q8_0.bin");
-	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) 74M",
-				     "models/ggml-model-whisper-tiny.en.bin");
 	obs_property_list_add_string(whisper_models_list, "Load external model file",
 				     "!!!external!!!");
 
@@ -956,7 +938,7 @@ obs_properties_t *transcription_filter_properties(void *data)
 	// int offset_ms;          // start offset in ms
 	// int duration_ms;        // audio duration to process in ms
 	// bool translate;
-	obs_properties_add_bool(whisper_params_group, "translate", MT_("translate"));
+	obs_properties_add_bool(whisper_params_group, "whisper_translate", MT_("translate"));
 	// bool no_context;        // do not use past transcription (if any) as initial prompt for the decoder
 	obs_properties_add_bool(whisper_params_group, "no_context", MT_("no_context"));
 	// bool single_segment;    // force single segment output (useful for streaming)
diff --git a/src/translation/translation.cpp b/src/translation/translation.cpp
index 1987f99..662d58f 100644
--- a/src/translation/translation.cpp
+++ b/src/translation/translation.cpp
@@ -1,16 +1,57 @@
 #include "translation.h"
 #include "plugin-support.h"
+#include "model-utils/model-downloader.h"
+#include "transcription-filter-data.h"
 
 #include <ctranslate2/translator.h>
 #include <sentencepiece_processor.h>
 #include <obs-module.h>
 #include <regex>
 
-int build_translation_context(struct translation_context &translation_ctx,
-			      const std::string &local_spm_path,
-			      const std::string &local_model_path)
+void build_and_enable_translation(struct transcription_filter_data* gf, const std::string& model_file_path)
 {
-	obs_log(LOG_INFO, "Building translation context...");
+    gf->translation_ctx.local_model_folder_path = model_file_path;
+    if (build_translation_context(gf->translation_ctx) == OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS) {
+        obs_log(LOG_INFO, "Enable translation");
+        gf->translate = true;
+    } else {
+        obs_log(LOG_ERROR, "Failed to load CT2 model");
+        gf->translate = false;
+    }
+}
+
+void start_translation(struct transcription_filter_data* gf)
+{
+    obs_log(LOG_INFO, "Starting translation...");
+
+    const ModelInfo &translation_model_info = models_info["M2M-100 418M (495Mb)"];
+    std::string model_file_found = find_model_folder(translation_model_info);
+    if (model_file_found == "") {
+        obs_log(LOG_INFO, "Translation CT2 model does not exist. Downloading...");
+        download_model_with_ui_dialog(
+            translation_model_info,
+            [gf, model_file_found](int download_status, const std::string &path) {
+                if (download_status == 0) {
+                    obs_log(LOG_INFO, "CT2 model download complete");
+                    build_and_enable_translation(gf, path);
+                } else {
+                    obs_log(LOG_ERROR, "Model download failed");
+                    gf->translate = false;
+                }
+            });
+    } else {
+        // Model exists, just load it
+        build_and_enable_translation(gf, model_file_found);
+    }
+}
+
+int build_translation_context(struct translation_context &translation_ctx)
+{
+	std::string local_model_path = translation_ctx.local_model_folder_path;
+	obs_log(LOG_INFO, "Building translation context from '%s'...", local_model_path.c_str());
+    // find the SPM file in the model folder
+    std::string local_spm_path = find_file_in_folder_by_name(local_model_path, "sentencepiece.bpe.model");
+
 	try {
 		obs_log(LOG_INFO, "Loading SPM from %s", local_spm_path.c_str());
 		translation_ctx.processor.reset(new sentencepiece::SentencePieceProcessor());
@@ -36,10 +77,10 @@ int build_translation_context(struct translation_context &translation_ctx,
 
 #ifdef POLYGLOT_WITH_CUDA
 		ctranslate2::Device device = ctranslate2::Device::CUDA;
-		obs_log(LOG_INFO, "Using CUDA");
+		obs_log(LOG_INFO, "CT2 Using CUDA");
 #else
 		ctranslate2::Device device = ctranslate2::Device::CPU;
-		obs_log(LOG_INFO, "Using CPU");
+		obs_log(LOG_INFO, "CT2 Using CPU");
 #endif
 
 		translation_ctx.translator.reset(new ctranslate2::Translator(
@@ -51,6 +92,8 @@ int build_translation_context(struct translation_context &translation_ctx,
 		translation_ctx.options->max_decoding_length = 40;
 		translation_ctx.options->use_vmap = true;
 		translation_ctx.options->return_scores = false;
+		translation_ctx.options->repetition_penalty = 1.1;
+		translation_ctx.options->no_repeat_ngram_size = 2;
 	} catch (std::exception &e) {
 		obs_log(LOG_ERROR, "Failed to load CT2 model: %s", e.what());
 		return OBS_POLYGLOT_TRANSLATION_INIT_FAIL;
@@ -62,23 +105,38 @@ int translate(struct translation_context &translation_ctx, const std::string &te
 	      const std::string &source_lang, const std::string &target_lang, std::string &result)
 {
 	try {
-		// get tokens
-		std::vector<std::string> tokens = translation_ctx.tokenizer(text);
-		tokens.insert(tokens.begin(), "<s>");
-		tokens.insert(tokens.begin(), source_lang);
-		tokens.push_back("</s>");
-
-		const std::vector<std::vector<std::string>> batch = {tokens};
-
-		const std::vector<std::vector<std::string>> target_prefix = {{target_lang}};
-		const std::vector<ctranslate2::TranslationResult> results =
-			translation_ctx.translator->translate_batch(batch, target_prefix,
-								    *translation_ctx.options);
-
-		// detokenize starting with the 2nd token
-		const auto &tokens_result = results[0].output();
-		result = translation_ctx.detokenizer(
-			std::vector<std::string>(tokens_result.begin() + 1, tokens_result.end()));
+		// set input tokens
+		std::vector<std::string> input_tokens = {source_lang, "<s>"};
+        if (translation_ctx.add_context && translation_ctx.last_input_tokens.size() > 0) {
+            input_tokens.insert(input_tokens.end(), translation_ctx.last_input_tokens.begin(), translation_ctx.last_input_tokens.end());
+        }
+        std::vector<std::string> new_input_tokens = translation_ctx.tokenizer(text);
+        input_tokens.insert(input_tokens.end(), new_input_tokens.begin(), new_input_tokens.end());
+		input_tokens.push_back("</s>");
+
+        translation_ctx.last_input_tokens = new_input_tokens;
+
+		const std::vector<std::vector<std::string>> batch = {input_tokens};
+
+        // get target prefix
+        std::vector<std::string> target_prefix = {target_lang};
+        if (translation_ctx.add_context && translation_ctx.last_translation_tokens.size() > 0) {
+            target_prefix.insert(target_prefix.end(), translation_ctx.last_translation_tokens.begin(), translation_ctx.last_translation_tokens.end());
+        }
+
+        const std::vector<std::vector<std::string>> target_prefix_batch = {target_prefix};
+        const std::vector<ctranslate2::TranslationResult> results =
+            translation_ctx.translator->translate_batch(batch, target_prefix_batch,
+                                    *translation_ctx.options);
+
+        const auto &tokens_result = results[0].output();
+        // take the tokens from the target_prefix length to the end
+        std::vector<std::string> translation_tokens(tokens_result.begin() + target_prefix.size(),
+                                                     tokens_result.end());
+
+        translation_ctx.last_translation_tokens = translation_tokens;
+        // detokenize
+        result = translation_ctx.detokenizer(translation_tokens);
 	} catch (std::exception &e) {
 		obs_log(LOG_ERROR, "Error: %s", e.what());
 		return OBS_POLYGLOT_TRANSLATION_FAIL;
diff --git a/src/translation/translation.h b/src/translation/translation.h
index 9d21734..bfa0a18 100644
--- a/src/translation/translation.h
+++ b/src/translation/translation.h
@@ -7,16 +7,20 @@
 #include <functional>
 
 struct translation_context {
+    std::string local_model_folder_path;
 	std::unique_ptr<sentencepiece::SentencePieceProcessor> processor;
 	std::unique_ptr<ctranslate2::Translator> translator;
 	std::unique_ptr<ctranslate2::TranslationOptions> options;
 	std::function<std::vector<std::string>(const std::string &)> tokenizer;
 	std::function<std::string(const std::vector<std::string> &)> detokenizer;
+    std::vector<std::string> last_input_tokens;
+    std::vector<std::string> last_translation_tokens;
+    // Use the last translation as context for the next translation
+    bool add_context;
 };
 
-int build_translation_context(struct translation_context &translation_ctx,
-			      const std::string &local_spm_path,
-			      const std::string &local_model_path);
+void start_translation(struct transcription_filter_data* gf);
+int build_translation_context(struct translation_context &translation_ctx);
 
 int translate(struct translation_context &translation_ctx, const std::string &text,
 	      const std::string &source_lang, const std::string &target_lang, std::string &result);
diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp
index 748f52e..ab58320 100644
--- a/src/whisper-utils/whisper-processing.cpp
+++ b/src/whisper-utils/whisper-processing.cpp
@@ -14,6 +14,7 @@
 #include <fstream>
 #include <Windows.h>
 #endif
+#include "model-utils/model-downloader.h"
 
 #define VAD_THOLD 0.0001f
 #define FREQ_THOLD 100.0f
@@ -108,10 +109,23 @@ bool vad_simple(float *pcmf32, size_t pcm32f_size, uint32_t sample_rate, float v
 	return true;
 }
 
-struct whisper_context *init_whisper_context(const std::string &model_path)
+struct whisper_context *init_whisper_context(const std::string &model_path_in)
 {
+    std::string model_path = model_path_in;
+
 	obs_log(LOG_INFO, "Loading whisper model from %s", model_path.c_str());
 
+    if (std::filesystem::is_directory(model_path)) {
+        obs_log(LOG_INFO, "Model path is a directory, not a file, looking for .bin file in folder");
+        // look for .bin file
+        const std::string model_bin_file = find_bin_file_in_folder(model_path);
+        if (model_bin_file.empty()) {
+            obs_log(LOG_ERROR, "Model bin file not found in folder: %s", model_path.c_str());
+            return nullptr;
+        }
+        model_path = model_bin_file;
+    }
+
 	struct whisper_context_params cparams = whisper_context_default_params();
 #ifdef LOCALVOCAL_WITH_CUDA
 	cparams.use_gpu = true;
diff --git a/src/whisper-utils/whisper-utils.cpp b/src/whisper-utils/whisper-utils.cpp
index 47b983c..8d7de1f 100644
--- a/src/whisper-utils/whisper-utils.cpp
+++ b/src/whisper-utils/whisper-utils.cpp
@@ -9,10 +9,10 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t
 	std::string new_model_path = obs_data_get_string(s, "whisper_model_path");
 	const bool is_external_model = new_model_path.find("!!!external!!!") != std::string::npos;
 
-	if (gf->whisper_model_path == nullptr ||
-	    strcmp(new_model_path.c_str(), gf->whisper_model_path) != 0 || is_external_model) {
+	if (gf->whisper_model_path.empty() || gf->whisper_model_path != new_model_path ||
+	    is_external_model) {
 		// model path changed, reload the model
-		obs_log(gf->log_level, "model path changed from %s to %s", gf->whisper_model_path,
+		obs_log(gf->log_level, "model path changed from %s to %s", gf->whisper_model_path.c_str(),
 			new_model_path.c_str());
 
 		// check if the new model is external file
@@ -20,18 +20,25 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t
 			// new model is not external file
 			shutdown_whisper_thread(gf);
 
-			gf->whisper_model_path = bstrdup(new_model_path.c_str());
+			if (models_info.count(new_model_path) == 0) {
+				obs_log(LOG_WARNING, "Model '%s' does not exist",
+					new_model_path.c_str());
+				return;
+			}
+
+			const ModelInfo &model_info = models_info[new_model_path];
 
 			// check if the model exists, if not, download it
-			std::string model_file_found = find_model_file(gf->whisper_model_path);
+			std::string model_file_found = find_model_bin_file(model_info);
 			if (model_file_found == "") {
 				obs_log(LOG_WARNING, "Whisper model does not exist");
 				download_model_with_ui_dialog(
-					gf->whisper_model_path,
-					[gf](int download_status, const std::string &path) {
+					model_info,
+					[gf, new_model_path](int download_status, const std::string &path) {
 						if (download_status == 0) {
 							obs_log(LOG_INFO,
 								"Model download complete");
+                            gf->whisper_model_path = new_model_path;
 							start_whisper_thread_with_path(gf, path);
 						} else {
 							obs_log(LOG_ERROR, "Model download failed");
@@ -39,6 +46,7 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t
 					});
 			} else {
 				// Model exists, just load it
+                gf->whisper_model_path = new_model_path;
 				start_whisper_thread_with_path(gf, model_file_found);
 			}
 		} else {
@@ -55,7 +63,7 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t
 					return;
 				} else {
 					shutdown_whisper_thread(gf);
-					gf->whisper_model_path = bstrdup(new_model_path.c_str());
+					gf->whisper_model_path = new_model_path;
 					start_whisper_thread_with_path(gf,
 								       external_model_file_path);
 				}
@@ -85,9 +93,8 @@ void shutdown_whisper_thread(struct transcription_filter_data *gf)
 	if (gf->whisper_thread.joinable()) {
 		gf->whisper_thread.join();
 	}
-	if (gf->whisper_model_path != nullptr) {
-		bfree(gf->whisper_model_path);
-		gf->whisper_model_path = nullptr;
+	if (!gf->whisper_model_path.empty()) {
+		gf->whisper_model_path.clear();
 	}
 }
 

From 3bd3694870a3d2f8196a2a22fe119b54c5573253 Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 1 Apr 2024 12:01:23 -0400
Subject: [PATCH 03/10] Fix formatting and whitespace issues

---
 src/model-utils/model-downloader-ui.cpp  |  44 ++++----
 src/model-utils/model-downloader.cpp     |  66 ++++++------
 src/model-utils/model-downloader.h       |   3 +-
 src/transcription-filter-data.h          |   4 +-
 src/transcription-filter.cpp             |  44 ++++----
 src/translation/translation.cpp          | 125 ++++++++++++-----------
 src/translation/translation.h            |  12 +--
 src/whisper-utils/whisper-processing.cpp |  24 +++--
 src/whisper-utils/whisper-utils.cpp      |  12 +--
 9 files changed, 182 insertions(+), 152 deletions(-)

diff --git a/src/model-utils/model-downloader-ui.cpp b/src/model-utils/model-downloader-ui.cpp
index dfd8bd5..b978379 100644
--- a/src/model-utils/model-downloader-ui.cpp
+++ b/src/model-utils/model-downloader-ui.cpp
@@ -112,14 +112,15 @@ void ModelDownloader::show_error(const std::string &reason)
 
 ModelDownloadWorker::ModelDownloadWorker(const ModelInfo &model_info_) : model_info(model_info_) {}
 
-std::string get_filename_from_url(const std::string& url) {
-    auto lastSlashPos = url.find_last_of("/");
-    auto queryPos = url.find("?", lastSlashPos);
-    if (queryPos == std::string::npos) {
-        return url.substr(lastSlashPos + 1);
-    } else {
-        return url.substr(lastSlashPos + 1, queryPos - lastSlashPos - 1);
-    }
+std::string get_filename_from_url(const std::string &url)
+{
+	auto lastSlashPos = url.find_last_of("/");
+	auto queryPos = url.find("?", lastSlashPos);
+	if (queryPos == std::string::npos) {
+		return url.substr(lastSlashPos + 1);
+	} else {
+		return url.substr(lastSlashPos + 1, queryPos - lastSlashPos - 1);
+	}
 }
 
 void ModelDownloadWorker::download_model()
@@ -131,7 +132,8 @@ void ModelDownloadWorker::download_model()
 
 	// Check if the config folder exists
 	if (!std::filesystem::exists(module_config_models_folder)) {
-		obs_log(LOG_WARNING, "Config folder does not exist: %s", module_config_models_folder);
+		obs_log(LOG_WARNING, "Config folder does not exist: %s",
+			module_config_models_folder);
 		// Create the config folder
 		if (!std::filesystem::create_directories(module_config_models_folder)) {
 			obs_log(LOG_ERROR, "Failed to create config folder: %s",
@@ -146,24 +148,26 @@ void ModelDownloadWorker::download_model()
 
 	obs_log(LOG_INFO, "Model save path: %s", model_local_config_path.c_str());
 
-    if (!std::filesystem::exists(model_local_config_path)) {
-        // model folder does not exist, create it
-        if (!std::filesystem::create_directories(model_local_config_path)) {
-            obs_log(LOG_ERROR, "Failed to create model folder: %s",
-                model_local_config_path.c_str());
-            emit download_error("Failed to create model folder.");
-            return;
-        }
-    }
+	if (!std::filesystem::exists(model_local_config_path)) {
+		// model folder does not exist, create it
+		if (!std::filesystem::create_directories(model_local_config_path)) {
+			obs_log(LOG_ERROR, "Failed to create model folder: %s",
+				model_local_config_path.c_str());
+			emit download_error("Failed to create model folder.");
+			return;
+		}
+	}
 
 	CURL *curl = curl_easy_init();
 	if (curl) {
 		for (auto &model_download_file : this->model_info.files) {
 			obs_log(LOG_INFO, "Model URL: %s", model_download_file.url.c_str());
 
-            const std::string model_filename = get_filename_from_url(model_download_file.url);
+			const std::string model_filename =
+				get_filename_from_url(model_download_file.url);
 			const std::string model_file_save_path =
-				(std::filesystem::path(model_local_config_path) / model_filename).string();
+				(std::filesystem::path(model_local_config_path) / model_filename)
+					.string();
 			if (std::filesystem::exists(model_file_save_path)) {
 				obs_log(LOG_INFO, "Model file already exists: %s",
 					model_file_save_path.c_str());
diff --git a/src/model-utils/model-downloader.cpp b/src/model-utils/model-downloader.cpp
index f8b5293..ae3e8b1 100644
--- a/src/model-utils/model-downloader.cpp
+++ b/src/model-utils/model-downloader.cpp
@@ -12,45 +12,51 @@
 
 #include <curl/curl.h>
 
-std::string find_file_in_folder_by_name(const std::string& folder_path, const std::string& file_name) {
-    for (const auto &entry : std::filesystem::directory_iterator(folder_path)) {
-        if (entry.path().filename() == file_name) {
-            return entry.path().string();
-        }
-    }
-    return "";
+std::string find_file_in_folder_by_name(const std::string &folder_path,
+					const std::string &file_name)
+{
+	for (const auto &entry : std::filesystem::directory_iterator(folder_path)) {
+		if (entry.path().filename() == file_name) {
+			return entry.path().string();
+		}
+	}
+	return "";
 }
 
-std::string find_bin_file_in_folder(const std::string &model_local_folder_path) {
-    // find .bin file in folder
-    for (const auto &entry : std::filesystem::directory_iterator(model_local_folder_path)) {
-        if (entry.path().extension() == ".bin") {
-            const std::string bin_file_path = entry.path().string();
-            obs_log(LOG_INFO, "Model bin file found in folder: %s", bin_file_path.c_str());
-            return bin_file_path;
-        }
-    }
-    obs_log(LOG_ERROR, "Model bin file not found in folder: %s", model_local_folder_path.c_str());
-    return "";
+std::string find_bin_file_in_folder(const std::string &model_local_folder_path)
+{
+	// find .bin file in folder
+	for (const auto &entry : std::filesystem::directory_iterator(model_local_folder_path)) {
+		if (entry.path().extension() == ".bin") {
+			const std::string bin_file_path = entry.path().string();
+			obs_log(LOG_INFO, "Model bin file found in folder: %s",
+				bin_file_path.c_str());
+			return bin_file_path;
+		}
+	}
+	obs_log(LOG_ERROR, "Model bin file not found in folder: %s",
+		model_local_folder_path.c_str());
+	return "";
 }
 
 std::string find_model_folder(const ModelInfo &model_info)
 {
-    char* data_folder_models = obs_module_file("models");
+	char *data_folder_models = obs_module_file("models");
 	const std::filesystem::path module_data_models_folder =
 		std::filesystem::absolute(data_folder_models);
-    bfree(data_folder_models);
+	bfree(data_folder_models);
 
 	const std::string model_local_data_path =
 		(module_data_models_folder / model_info.local_folder_name).string();
 
-	obs_log(LOG_INFO, "Checking if model '%s' exists in data...", model_info.friendly_name.c_str());
+	obs_log(LOG_INFO, "Checking if model '%s' exists in data...",
+		model_info.friendly_name.c_str());
 
 	if (!std::filesystem::exists(model_local_data_path)) {
 		obs_log(LOG_INFO, "Model not found in data: %s", model_local_data_path.c_str());
 	} else {
 		obs_log(LOG_INFO, "Model folder found in data: %s", model_local_data_path.c_str());
-        return model_local_data_path;
+		return model_local_data_path;
 	}
 
 	// Check if model exists in the config folder
@@ -59,7 +65,8 @@ std::string find_model_folder(const ModelInfo &model_info)
 		std::filesystem::absolute(config_folder);
 	bfree(config_folder);
 
-    obs_log(LOG_INFO, "Checking if model '%s' exists in config...", model_info.friendly_name.c_str());
+	obs_log(LOG_INFO, "Checking if model '%s' exists in config...",
+		model_info.friendly_name.c_str());
 
 	const std::string model_local_config_path =
 		(module_config_models_folder / model_info.local_folder_name).string();
@@ -75,13 +82,14 @@ std::string find_model_folder(const ModelInfo &model_info)
 	return "";
 }
 
-std::string find_model_bin_file(const ModelInfo &model_info) {
-    const std::string model_local_folder_path = find_model_folder(model_info);
-    if (model_local_folder_path.empty()) {
-        return "";
-    }
+std::string find_model_bin_file(const ModelInfo &model_info)
+{
+	const std::string model_local_folder_path = find_model_folder(model_info);
+	if (model_local_folder_path.empty()) {
+		return "";
+	}
 
-    return find_bin_file_in_folder(model_local_folder_path);
+	return find_bin_file_in_folder(model_local_folder_path);
 }
 
 void download_model_with_ui_dialog(const ModelInfo &model_info,
diff --git a/src/model-utils/model-downloader.h b/src/model-utils/model-downloader.h
index fca3337..b075d39 100644
--- a/src/model-utils/model-downloader.h
+++ b/src/model-utils/model-downloader.h
@@ -6,7 +6,8 @@
 
 #include "model-downloader-types.h"
 
-std::string find_file_in_folder_by_name(const std::string& folder_path, const std::string& file_name);
+std::string find_file_in_folder_by_name(const std::string &folder_path,
+					const std::string &file_name);
 std::string find_bin_file_in_folder(const std::string &path);
 std::string find_model_folder(const ModelInfo &model_info);
 std::string find_model_bin_file(const ModelInfo &model_info);
diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h
index 3e12c2a..90621fb 100644
--- a/src/transcription-filter-data.h
+++ b/src/transcription-filter-data.h
@@ -86,8 +86,8 @@ struct transcription_filter_data {
 	std::string source_lang;
 	std::string target_lang;
 
-    // Last transcription result
-    std::string last_text;
+	// Last transcription result
+	std::string last_text;
 
 	// Text source to output the subtitles
 	obs_weak_source_t *text_source;
diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
index 4627c50..909aa37 100644
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@@ -276,27 +276,31 @@ void set_text_callback(struct transcription_filter_data *gf,
 	std::string str_copy = result.text;
 #endif
 
-    // remove trailing spaces, newlines, tabs or punctuation
-    str_copy.erase(std::find_if(str_copy.rbegin(), str_copy.rend(),
-                                [](unsigned char ch) { return !std::isspace(ch) || !std::ispunct(ch); })
-                       .base(),
-                   str_copy.end());
+	// remove trailing spaces, newlines, tabs or punctuation
+	str_copy.erase(std::find_if(str_copy.rbegin(), str_copy.rend(),
+				    [](unsigned char ch) {
+					    return !std::isspace(ch) || !std::ispunct(ch);
+				    })
+			       .base(),
+		       str_copy.end());
 
 	if (gf->translate) {
-        obs_log(gf->log_level, "Translating text. %s -> %s", gf->source_lang.c_str(), gf->target_lang.c_str());
-        std::string translated_text;
+		obs_log(gf->log_level, "Translating text. %s -> %s", gf->source_lang.c_str(),
+			gf->target_lang.c_str());
+		std::string translated_text;
 		if (translate(gf->translation_ctx, str_copy, gf->source_lang, gf->target_lang,
 			      translated_text) == OBS_POLYGLOT_TRANSLATION_SUCCESS) {
-            if (gf->log_words) {
-                obs_log(LOG_INFO, "Translation: '%s' -> '%s'", str_copy.c_str(), translated_text.c_str());
-            }
-            str_copy = translated_text;
+			if (gf->log_words) {
+				obs_log(LOG_INFO, "Translation: '%s' -> '%s'", str_copy.c_str(),
+					translated_text.c_str());
+			}
+			str_copy = translated_text;
 		} else {
 			obs_log(gf->log_level, "Failed to translate text");
 		}
 	}
 
-    gf->last_text = str_copy;
+	gf->last_text = str_copy;
 
 	if (gf->caption_to_stream) {
 		obs_output_t *streaming_output = obs_frontend_get_streaming_output();
@@ -565,8 +569,8 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
 
 	gf->overlap_ms = (int)obs_data_get_int(settings, "overlap_size_msec");
 	gf->overlap_frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)gf->overlap_ms));
-	obs_log(gf->log_level, "channels %d, frames %d, sample_rate %d",
-		(int)gf->channels, (int)gf->frames, gf->sample_rate);
+	obs_log(gf->log_level, "channels %d, frames %d, sample_rate %d", (int)gf->channels,
+		(int)gf->frames, gf->sample_rate);
 
 	obs_log(gf->log_level, "setup audio resampler");
 	struct resample_info src, dst;
@@ -781,8 +785,8 @@ obs_properties_t *transcription_filter_properties(void *data)
 	obs_property_t *prop_src = obs_properties_add_list(
 		translation_group, "translate_source_language", MT_("source_language"),
 		OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
-    obs_property_t *prop_add_context = obs_properties_add_bool(
-                translation_group, "translate_add_context", MT_("translate_add_context"));
+	obs_property_t *prop_add_context = obs_properties_add_bool(
+		translation_group, "translate_add_context", MT_("translate_add_context"));
 
 	// Populate the dropdown with the language codes
 	for (const auto &language : language_codes) {
@@ -799,9 +803,11 @@ obs_properties_t *transcription_filter_properties(void *data)
 		UNUSED_PARAMETER(property);
 		// Show/Hide the translation group
 		const bool translate_enabled = obs_data_get_bool(settings, "translate");
-        for (const auto& prop : { "translate_target_language", "translate_source_language", "translate_add_context" }) {
-            obs_property_set_visible(obs_properties_get(props, prop), translate_enabled);
-        }
+		for (const auto &prop : {"translate_target_language", "translate_source_language",
+					 "translate_add_context"}) {
+			obs_property_set_visible(obs_properties_get(props, prop),
+						 translate_enabled);
+		}
 		return true;
 	});
 
diff --git a/src/translation/translation.cpp b/src/translation/translation.cpp
index 662d58f..56874c8 100644
--- a/src/translation/translation.cpp
+++ b/src/translation/translation.cpp
@@ -8,49 +8,52 @@
 #include <obs-module.h>
 #include <regex>
 
-void build_and_enable_translation(struct transcription_filter_data* gf, const std::string& model_file_path)
+void build_and_enable_translation(struct transcription_filter_data *gf,
+				  const std::string &model_file_path)
 {
-    gf->translation_ctx.local_model_folder_path = model_file_path;
-    if (build_translation_context(gf->translation_ctx) == OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS) {
-        obs_log(LOG_INFO, "Enable translation");
-        gf->translate = true;
-    } else {
-        obs_log(LOG_ERROR, "Failed to load CT2 model");
-        gf->translate = false;
-    }
+	gf->translation_ctx.local_model_folder_path = model_file_path;
+	if (build_translation_context(gf->translation_ctx) ==
+	    OBS_POLYGLOT_TRANSLATION_INIT_SUCCESS) {
+		obs_log(LOG_INFO, "Enable translation");
+		gf->translate = true;
+	} else {
+		obs_log(LOG_ERROR, "Failed to load CT2 model");
+		gf->translate = false;
+	}
 }
 
-void start_translation(struct transcription_filter_data* gf)
+void start_translation(struct transcription_filter_data *gf)
 {
-    obs_log(LOG_INFO, "Starting translation...");
-
-    const ModelInfo &translation_model_info = models_info["M2M-100 418M (495Mb)"];
-    std::string model_file_found = find_model_folder(translation_model_info);
-    if (model_file_found == "") {
-        obs_log(LOG_INFO, "Translation CT2 model does not exist. Downloading...");
-        download_model_with_ui_dialog(
-            translation_model_info,
-            [gf, model_file_found](int download_status, const std::string &path) {
-                if (download_status == 0) {
-                    obs_log(LOG_INFO, "CT2 model download complete");
-                    build_and_enable_translation(gf, path);
-                } else {
-                    obs_log(LOG_ERROR, "Model download failed");
-                    gf->translate = false;
-                }
-            });
-    } else {
-        // Model exists, just load it
-        build_and_enable_translation(gf, model_file_found);
-    }
+	obs_log(LOG_INFO, "Starting translation...");
+
+	const ModelInfo &translation_model_info = models_info["M2M-100 418M (495Mb)"];
+	std::string model_file_found = find_model_folder(translation_model_info);
+	if (model_file_found == "") {
+		obs_log(LOG_INFO, "Translation CT2 model does not exist. Downloading...");
+		download_model_with_ui_dialog(
+			translation_model_info,
+			[gf, model_file_found](int download_status, const std::string &path) {
+				if (download_status == 0) {
+					obs_log(LOG_INFO, "CT2 model download complete");
+					build_and_enable_translation(gf, path);
+				} else {
+					obs_log(LOG_ERROR, "Model download failed");
+					gf->translate = false;
+				}
+			});
+	} else {
+		// Model exists, just load it
+		build_and_enable_translation(gf, model_file_found);
+	}
 }
 
 int build_translation_context(struct translation_context &translation_ctx)
 {
 	std::string local_model_path = translation_ctx.local_model_folder_path;
 	obs_log(LOG_INFO, "Building translation context from '%s'...", local_model_path.c_str());
-    // find the SPM file in the model folder
-    std::string local_spm_path = find_file_in_folder_by_name(local_model_path, "sentencepiece.bpe.model");
+	// find the SPM file in the model folder
+	std::string local_spm_path =
+		find_file_in_folder_by_name(local_model_path, "sentencepiece.bpe.model");
 
 	try {
 		obs_log(LOG_INFO, "Loading SPM from %s", local_spm_path.c_str());
@@ -107,36 +110,42 @@ int translate(struct translation_context &translation_ctx, const std::string &te
 	try {
 		// set input tokens
 		std::vector<std::string> input_tokens = {source_lang, "<s>"};
-        if (translation_ctx.add_context && translation_ctx.last_input_tokens.size() > 0) {
-            input_tokens.insert(input_tokens.end(), translation_ctx.last_input_tokens.begin(), translation_ctx.last_input_tokens.end());
-        }
-        std::vector<std::string> new_input_tokens = translation_ctx.tokenizer(text);
-        input_tokens.insert(input_tokens.end(), new_input_tokens.begin(), new_input_tokens.end());
+		if (translation_ctx.add_context && translation_ctx.last_input_tokens.size() > 0) {
+			input_tokens.insert(input_tokens.end(),
+					    translation_ctx.last_input_tokens.begin(),
+					    translation_ctx.last_input_tokens.end());
+		}
+		std::vector<std::string> new_input_tokens = translation_ctx.tokenizer(text);
+		input_tokens.insert(input_tokens.end(), new_input_tokens.begin(),
+				    new_input_tokens.end());
 		input_tokens.push_back("</s>");
 
-        translation_ctx.last_input_tokens = new_input_tokens;
+		translation_ctx.last_input_tokens = new_input_tokens;
 
 		const std::vector<std::vector<std::string>> batch = {input_tokens};
 
-        // get target prefix
-        std::vector<std::string> target_prefix = {target_lang};
-        if (translation_ctx.add_context && translation_ctx.last_translation_tokens.size() > 0) {
-            target_prefix.insert(target_prefix.end(), translation_ctx.last_translation_tokens.begin(), translation_ctx.last_translation_tokens.end());
-        }
-
-        const std::vector<std::vector<std::string>> target_prefix_batch = {target_prefix};
-        const std::vector<ctranslate2::TranslationResult> results =
-            translation_ctx.translator->translate_batch(batch, target_prefix_batch,
-                                    *translation_ctx.options);
-
-        const auto &tokens_result = results[0].output();
-        // take the tokens from the target_prefix length to the end
-        std::vector<std::string> translation_tokens(tokens_result.begin() + target_prefix.size(),
-                                                     tokens_result.end());
-
-        translation_ctx.last_translation_tokens = translation_tokens;
-        // detokenize
-        result = translation_ctx.detokenizer(translation_tokens);
+		// get target prefix
+		std::vector<std::string> target_prefix = {target_lang};
+		if (translation_ctx.add_context &&
+		    translation_ctx.last_translation_tokens.size() > 0) {
+			target_prefix.insert(target_prefix.end(),
+					     translation_ctx.last_translation_tokens.begin(),
+					     translation_ctx.last_translation_tokens.end());
+		}
+
+		const std::vector<std::vector<std::string>> target_prefix_batch = {target_prefix};
+		const std::vector<ctranslate2::TranslationResult> results =
+			translation_ctx.translator->translate_batch(batch, target_prefix_batch,
+								    *translation_ctx.options);
+
+		const auto &tokens_result = results[0].output();
+		// take the tokens from the target_prefix length to the end
+		std::vector<std::string> translation_tokens(
+			tokens_result.begin() + target_prefix.size(), tokens_result.end());
+
+		translation_ctx.last_translation_tokens = translation_tokens;
+		// detokenize
+		result = translation_ctx.detokenizer(translation_tokens);
 	} catch (std::exception &e) {
 		obs_log(LOG_ERROR, "Error: %s", e.what());
 		return OBS_POLYGLOT_TRANSLATION_FAIL;
diff --git a/src/translation/translation.h b/src/translation/translation.h
index bfa0a18..d79fd9d 100644
--- a/src/translation/translation.h
+++ b/src/translation/translation.h
@@ -7,19 +7,19 @@
 #include <functional>
 
 struct translation_context {
-    std::string local_model_folder_path;
+	std::string local_model_folder_path;
 	std::unique_ptr<sentencepiece::SentencePieceProcessor> processor;
 	std::unique_ptr<ctranslate2::Translator> translator;
 	std::unique_ptr<ctranslate2::TranslationOptions> options;
 	std::function<std::vector<std::string>(const std::string &)> tokenizer;
 	std::function<std::string(const std::vector<std::string> &)> detokenizer;
-    std::vector<std::string> last_input_tokens;
-    std::vector<std::string> last_translation_tokens;
-    // Use the last translation as context for the next translation
-    bool add_context;
+	std::vector<std::string> last_input_tokens;
+	std::vector<std::string> last_translation_tokens;
+	// Use the last translation as context for the next translation
+	bool add_context;
 };
 
-void start_translation(struct transcription_filter_data* gf);
+void start_translation(struct transcription_filter_data *gf);
 int build_translation_context(struct translation_context &translation_ctx);
 
 int translate(struct translation_context &translation_ctx, const std::string &text,
diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp
index ab58320..485f28a 100644
--- a/src/whisper-utils/whisper-processing.cpp
+++ b/src/whisper-utils/whisper-processing.cpp
@@ -111,20 +111,22 @@ bool vad_simple(float *pcmf32, size_t pcm32f_size, uint32_t sample_rate, float v
 
 struct whisper_context *init_whisper_context(const std::string &model_path_in)
 {
-    std::string model_path = model_path_in;
+	std::string model_path = model_path_in;
 
 	obs_log(LOG_INFO, "Loading whisper model from %s", model_path.c_str());
 
-    if (std::filesystem::is_directory(model_path)) {
-        obs_log(LOG_INFO, "Model path is a directory, not a file, looking for .bin file in folder");
-        // look for .bin file
-        const std::string model_bin_file = find_bin_file_in_folder(model_path);
-        if (model_bin_file.empty()) {
-            obs_log(LOG_ERROR, "Model bin file not found in folder: %s", model_path.c_str());
-            return nullptr;
-        }
-        model_path = model_bin_file;
-    }
+	if (std::filesystem::is_directory(model_path)) {
+		obs_log(LOG_INFO,
+			"Model path is a directory, not a file, looking for .bin file in folder");
+		// look for .bin file
+		const std::string model_bin_file = find_bin_file_in_folder(model_path);
+		if (model_bin_file.empty()) {
+			obs_log(LOG_ERROR, "Model bin file not found in folder: %s",
+				model_path.c_str());
+			return nullptr;
+		}
+		model_path = model_bin_file;
+	}
 
 	struct whisper_context_params cparams = whisper_context_default_params();
 #ifdef LOCALVOCAL_WITH_CUDA
diff --git a/src/whisper-utils/whisper-utils.cpp b/src/whisper-utils/whisper-utils.cpp
index 8d7de1f..73e1bc8 100644
--- a/src/whisper-utils/whisper-utils.cpp
+++ b/src/whisper-utils/whisper-utils.cpp
@@ -12,8 +12,8 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t
 	if (gf->whisper_model_path.empty() || gf->whisper_model_path != new_model_path ||
 	    is_external_model) {
 		// model path changed, reload the model
-		obs_log(gf->log_level, "model path changed from %s to %s", gf->whisper_model_path.c_str(),
-			new_model_path.c_str());
+		obs_log(gf->log_level, "model path changed from %s to %s",
+			gf->whisper_model_path.c_str(), new_model_path.c_str());
 
 		// check if the new model is external file
 		if (!is_external_model) {
@@ -33,12 +33,12 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t
 			if (model_file_found == "") {
 				obs_log(LOG_WARNING, "Whisper model does not exist");
 				download_model_with_ui_dialog(
-					model_info,
-					[gf, new_model_path](int download_status, const std::string &path) {
+					model_info, [gf, new_model_path](int download_status,
+									 const std::string &path) {
 						if (download_status == 0) {
 							obs_log(LOG_INFO,
 								"Model download complete");
-                            gf->whisper_model_path = new_model_path;
+							gf->whisper_model_path = new_model_path;
 							start_whisper_thread_with_path(gf, path);
 						} else {
 							obs_log(LOG_ERROR, "Model download failed");
@@ -46,7 +46,7 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t
 					});
 			} else {
 				// Model exists, just load it
-                gf->whisper_model_path = new_model_path;
+				gf->whisper_model_path = new_model_path;
 				start_whisper_thread_with_path(gf, model_file_found);
 			}
 		} else {

From 16ca8fb873be6d44eabb8b2ff38826c5455df2ec Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 1 Apr 2024 12:22:34 -0400
Subject: [PATCH 04/10] Update build plugin and version, fix translation and
 whisper-utils

---
 .github/actions/build-plugin/action.yaml | 2 ++
 buildspec.json                           | 2 +-
 src/translation/translation.cpp          | 2 +-
 src/whisper-utils/whisper-utils.cpp      | 6 +++---
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/actions/build-plugin/action.yaml b/.github/actions/build-plugin/action.yaml
index 03e0e20..ac5950d 100644
--- a/.github/actions/build-plugin/action.yaml
+++ b/.github/actions/build-plugin/action.yaml
@@ -86,6 +86,8 @@ runs:
         }
 
         .github/scripts/Build-Windows.ps1 @BuildArgs
+      env:
+        CPU_OR_CUDA: ${{ inputs.cublas }}
 
     - name: Create Summary 📊
       if: contains(fromJSON('["Linux", "macOS"]'),runner.os)
diff --git a/buildspec.json b/buildspec.json
index acb2be7..4aac87c 100644
--- a/buildspec.json
+++ b/buildspec.json
@@ -45,7 +45,7 @@
         }
     },
     "name": "obs-localvocal",
-    "version": "0.2.0",
+    "version": "0.2.1",
     "author": "Roy Shilkrot",
     "website": "https://github.com/occ-ai/obs-localvocal",
     "email": "roy.shil@gmail.com",
diff --git a/src/translation/translation.cpp b/src/translation/translation.cpp
index 56874c8..95e58ae 100644
--- a/src/translation/translation.cpp
+++ b/src/translation/translation.cpp
@@ -95,7 +95,7 @@ int build_translation_context(struct translation_context &translation_ctx)
 		translation_ctx.options->max_decoding_length = 40;
 		translation_ctx.options->use_vmap = true;
 		translation_ctx.options->return_scores = false;
-		translation_ctx.options->repetition_penalty = 1.1;
+		translation_ctx.options->repetition_penalty = 1.1f;
 		translation_ctx.options->no_repeat_ngram_size = 2;
 	} catch (std::exception &e) {
 		obs_log(LOG_ERROR, "Failed to load CT2 model: %s", e.what());
diff --git a/src/whisper-utils/whisper-utils.cpp b/src/whisper-utils/whisper-utils.cpp
index 73e1bc8..57dc954 100644
--- a/src/whisper-utils/whisper-utils.cpp
+++ b/src/whisper-utils/whisper-utils.cpp
@@ -71,8 +71,8 @@ void update_whsiper_model_path(struct transcription_filter_data *gf, obs_data_t
 		}
 	} else {
 		// model path did not change
-		obs_log(LOG_DEBUG, "model path did not change: %s == %s", gf->whisper_model_path,
-			new_model_path.c_str());
+		obs_log(LOG_DEBUG, "model path did not change: %s == %s",
+			gf->whisper_model_path.c_str(), new_model_path.c_str());
 	}
 }
 
@@ -94,7 +94,7 @@ void shutdown_whisper_thread(struct transcription_filter_data *gf)
 		gf->whisper_thread.join();
 	}
 	if (!gf->whisper_model_path.empty()) {
-		gf->whisper_model_path.clear();
+		gf->whisper_model_path = "";
 	}
 }
 

From eaa2040058782810d885f985feaa72a7d05c9f35 Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 1 Apr 2024 12:31:19 -0400
Subject: [PATCH 05/10] Fix compiler warning and simplify code in
 transcription-filter.cpp

---
 cmake/common/compiler_common.cmake | 2 +-
 src/transcription-filter.cpp       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/common/compiler_common.cmake b/cmake/common/compiler_common.cmake
index 8ac423f..9b86272 100644
--- a/cmake/common/compiler_common.cmake
+++ b/cmake/common/compiler_common.cmake
@@ -34,7 +34,7 @@ set(_obs_clang_c_options
     -Wfour-char-constants
     -Winfinite-recursion
     -Wint-conversion
-    -Wnewline-eof
+    -Wno-newline-eof
     -Wno-conversion
     -Wno-float-conversion
     -Wno-implicit-fallthrough
diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
index 909aa37..b3df64f 100644
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@@ -785,8 +785,8 @@ obs_properties_t *transcription_filter_properties(void *data)
 	obs_property_t *prop_src = obs_properties_add_list(
 		translation_group, "translate_source_language", MT_("source_language"),
 		OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
-	obs_property_t *prop_add_context = obs_properties_add_bool(
-		translation_group, "translate_add_context", MT_("translate_add_context"));
+	obs_properties_add_bool(translation_group, "translate_add_context",
+				MT_("translate_add_context"));
 
 	// Populate the dropdown with the language codes
 	for (const auto &language : language_codes) {

From 1135ca83d7b1d1cfacfb95e17c2f67fa2924bc80 Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 1 Apr 2024 12:37:19 -0400
Subject: [PATCH 06/10] Update CMakePresets.json and buildspec.json

---
 CMakePresets.json | 36 ++++++++++++++++++++----------------
 buildspec.json    | 20 ++++++++++----------
 2 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/CMakePresets.json b/CMakePresets.json
index bc80925..6c429e1 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -6,10 +6,19 @@
     "patch": 0
   },
   "configurePresets": [
+    {
+      "name": "template",
+      "hidden": true,
+      "cacheVariables": {
+        "ENABLE_FRONTEND_API": true,
+        "ENABLE_QT": false
+      }
+    },
     {
       "name": "macos",
       "displayName": "macOS Universal",
       "description": "Build for macOS 11.0+ (Universal binary)",
+      "inherits": ["template"],
       "binaryDir": "${sourceDir}/build_macos",
       "condition": {
         "type": "equals",
@@ -17,14 +26,12 @@
         "rhs": "Darwin"
       },
       "generator": "Xcode",
-      "warnings": {"dev": true, "deprecated": true},
+      "warnings": { "dev": true, "deprecated": true },
       "cacheVariables": {
         "QT_VERSION": "6",
         "CMAKE_OSX_DEPLOYMENT_TARGET": "11.0",
         "CODESIGN_IDENTITY": "$penv{CODESIGN_IDENT}",
-        "CODESIGN_TEAM": "$penv{CODESIGN_TEAM}",
-        "ENABLE_FRONTEND_API": true,
-        "ENABLE_QT": true
+        "CODESIGN_TEAM": "$penv{CODESIGN_TEAM}"
       }
     },
     {
@@ -41,6 +48,7 @@
       "name": "windows-x64",
       "displayName": "Windows x64",
       "description": "Build for Windows x64",
+      "inherits": ["template"],
       "binaryDir": "${sourceDir}/build_x64",
       "condition": {
         "type": "equals",
@@ -49,12 +57,10 @@
       },
       "generator": "Visual Studio 17 2022",
       "architecture": "x64",
-      "warnings": {"dev": true, "deprecated": true},
+      "warnings": { "dev": true, "deprecated": true },
       "cacheVariables": {
         "QT_VERSION": "6",
-        "CMAKE_SYSTEM_VERSION": "10.0.18363.657",
-        "ENABLE_FRONTEND_API": true,
-        "ENABLE_QT": true
+        "CMAKE_SYSTEM_VERSION": "10.0.18363.657"
       }
     },
     {
@@ -70,6 +76,7 @@
       "name": "linux-x86_64",
       "displayName": "Linux x86_64",
       "description": "Build for Linux x86_64",
+      "inherits": ["template"],
       "binaryDir": "${sourceDir}/build_x86_64",
       "condition": {
         "type": "equals",
@@ -77,12 +84,10 @@
         "rhs": "Linux"
       },
       "generator": "Ninja",
-      "warnings": {"dev": true, "deprecated": true},
+      "warnings": { "dev": true, "deprecated": true },
       "cacheVariables": {
         "QT_VERSION": "6",
-        "CMAKE_BUILD_TYPE": "RelWithDebInfo",
-        "ENABLE_FRONTEND_API": true,
-        "ENABLE_QT": true
+        "CMAKE_BUILD_TYPE": "RelWithDebInfo"
       }
     },
     {
@@ -99,6 +104,7 @@
       "name": "linux-aarch64",
       "displayName": "Linux aarch64",
       "description": "Build for Linux aarch64",
+      "inherits": ["template"],
       "binaryDir": "${sourceDir}/build_aarch64",
       "condition": {
         "type": "equals",
@@ -106,12 +112,10 @@
         "rhs": "Linux"
       },
       "generator": "Ninja",
-      "warnings": {"dev": true, "deprecated": true},
+      "warnings": { "dev": true, "deprecated": true },
       "cacheVariables": {
         "QT_VERSION": "6",
-        "CMAKE_BUILD_TYPE": "RelWithDebInfo",
-        "ENABLE_FRONTEND_API": true,
-        "ENABLE_QT": true
+        "CMAKE_BUILD_TYPE": "RelWithDebInfo"
       }
     },
     {
diff --git a/buildspec.json b/buildspec.json
index 4aac87c..1c349f5 100644
--- a/buildspec.json
+++ b/buildspec.json
@@ -1,33 +1,33 @@
 {
     "dependencies": {
         "obs-studio": {
-            "version": "29.1.2",
+            "version": "30.0.2",
             "baseUrl": "https://github.com/obsproject/obs-studio/archive/refs/tags",
             "label": "OBS sources",
             "hashes": {
-                "macos": "215f1fa5772c5dd9f3d6e35b0cb573912b00320149666a77864f9d305525504b",
-                "windows-x64": "46d451f3f42b9d2c59339ec268165849c7b7904cdf1cc2a8d44c015815a9e37d"
+                "macos": "be12c3ad0a85713750d8325e4b1db75086223402d7080d0e3c2833d7c5e83c27",
+                "windows-x64": "970058c49322cfa9cd6d620abb393fed89743ba7e74bd9dbb6ebe0ea8141d9c7"
             }
         },
         "prebuilt": {
-            "version": "2023-04-12",
+            "version": "2023-11-03",
             "baseUrl": "https://github.com/obsproject/obs-deps/releases/download",
             "label": "Pre-Built obs-deps",
             "hashes": {
-                "macos": "9535c6e1ad96f7d49960251e85a245774088d48da1d602bb82f734b10219125a",
-                "windows-x64": "c13a14a1acc4224b21304d97b63da4121de1ed6981297e50496fbc474abc0503"
+                "macos": "90c2fc069847ec2768dcc867c1c63b112c615ed845a907dc44acab7a97181974",
+                "windows-x64": "d0825a6fb65822c993a3059edfba70d72d2e632ef74893588cf12b1f0d329ce6"
             }
         },
         "qt6": {
-            "version": "2023-04-12",
+            "version": "2023-11-03",
             "baseUrl": "https://github.com/obsproject/obs-deps/releases/download",
             "label": "Pre-Built Qt6",
             "hashes": {
-                "macos": "eb7614544ab4f3d2c6052c797635602280ca5b028a6b987523d8484222ce45d1",
-                "windows-x64": "4d39364b8a8dee5aa24fcebd8440d5c22bb4551c6b440ffeacce7d61f2ed1add"
+                "macos": "ba4a7152848da0053f63427a2a2cb0a199af3992997c0db08564df6f48c9db98",
+                "windows-x64": "bc57dedf76b47119a6dce0435a2f21b35b08c8f2948b1cb34a157320f77732d1"
             },
             "debugSymbols": {
-                "windows-x64": "f34ee5067be19ed370268b15c53684b7b8aaa867dc800b68931df905d679e31f"
+                "windows-x64": "fd8ecd1d8cd2ef049d9f4d7fb5c134f784836d6020758094855dfa98bd025036"
             }
         }
     },

From 2aa0be736e5c072ab706f1ea7aeaee9bb736e57d Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 1 Apr 2024 12:47:46 -0400
Subject: [PATCH 07/10] Fix Clang compiler warnings

---
 cmake/common/compiler_common.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmake/common/compiler_common.cmake b/cmake/common/compiler_common.cmake
index 9b86272..de86046 100644
--- a/cmake/common/compiler_common.cmake
+++ b/cmake/common/compiler_common.cmake
@@ -34,13 +34,14 @@ set(_obs_clang_c_options
     -Wfour-char-constants
     -Winfinite-recursion
     -Wint-conversion
-    -Wno-newline-eof
     -Wno-conversion
+    -Wno-error=newline-eof
     -Wno-float-conversion
     -Wno-implicit-fallthrough
     -Wno-missing-braces
     -Wno-missing-field-initializers
     -Wno-missing-prototypes
+    -Wno-newline-eof
     -Wno-semicolon-before-method-body
     -Wno-shadow
     -Wno-sign-conversion

From 0d7442275cd56c26215a24e212d141b33b1dce08 Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 1 Apr 2024 12:47:59 -0400
Subject: [PATCH 08/10] Enable QT in CMakePresets.json

---
 CMakePresets.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakePresets.json b/CMakePresets.json
index 6c429e1..053671c 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -11,7 +11,7 @@
       "hidden": true,
       "cacheVariables": {
         "ENABLE_FRONTEND_API": true,
-        "ENABLE_QT": false
+        "ENABLE_QT": true
       }
     },
     {

From 69ce2cf81804e9889da24257398ce482979a687f Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 1 Apr 2024 13:49:58 -0400
Subject: [PATCH 09/10] Fix compiler warnings and create missing config folder

---
 cmake/common/compiler_common.cmake      | 3 +--
 cmake/macos/compilerconfig.cmake        | 1 +
 src/model-utils/model-downloader-ui.cpp | 4 ++--
 src/transcription-filter.cpp            | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/cmake/common/compiler_common.cmake b/cmake/common/compiler_common.cmake
index de86046..8ac423f 100644
--- a/cmake/common/compiler_common.cmake
+++ b/cmake/common/compiler_common.cmake
@@ -34,14 +34,13 @@ set(_obs_clang_c_options
     -Wfour-char-constants
     -Winfinite-recursion
     -Wint-conversion
+    -Wnewline-eof
     -Wno-conversion
-    -Wno-error=newline-eof
     -Wno-float-conversion
     -Wno-implicit-fallthrough
     -Wno-missing-braces
     -Wno-missing-field-initializers
     -Wno-missing-prototypes
-    -Wno-newline-eof
     -Wno-semicolon-before-method-body
     -Wno-shadow
     -Wno-sign-conversion
diff --git a/cmake/macos/compilerconfig.cmake b/cmake/macos/compilerconfig.cmake
index c40a532..524aab5 100644
--- a/cmake/macos/compilerconfig.cmake
+++ b/cmake/macos/compilerconfig.cmake
@@ -55,3 +55,4 @@ else()
 endif()
 
 add_compile_definitions($<$<CONFIG:DEBUG>:DEBUG> $<$<CONFIG:DEBUG>:_DEBUG> SIMDE_ENABLE_OPENMP)
+add_compile_options(-Wno-error=newline-eof)
diff --git a/src/model-utils/model-downloader-ui.cpp b/src/model-utils/model-downloader-ui.cpp
index b978379..023ccb6 100644
--- a/src/model-utils/model-downloader-ui.cpp
+++ b/src/model-utils/model-downloader-ui.cpp
@@ -133,11 +133,11 @@ void ModelDownloadWorker::download_model()
 	// Check if the config folder exists
 	if (!std::filesystem::exists(module_config_models_folder)) {
 		obs_log(LOG_WARNING, "Config folder does not exist: %s",
-			module_config_models_folder);
+			module_config_models_folder.string().c_str());
 		// Create the config folder
 		if (!std::filesystem::create_directories(module_config_models_folder)) {
 			obs_log(LOG_ERROR, "Failed to create config folder: %s",
-				module_config_models_folder);
+				module_config_models_folder.string().c_str());
 			emit download_error("Failed to create config folder.");
 			return;
 		}
diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
index b3df64f..ebd457c 100644
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@@ -186,8 +186,8 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf)
 	}
 }
 
-#define is_lead_byte(c) (((c)&0xe0) == 0xc0 || ((c)&0xf0) == 0xe0 || ((c)&0xf8) == 0xf0)
-#define is_trail_byte(c) (((c)&0xc0) == 0x80)
+#define is_lead_byte(c) (((c) & 0xe0) == 0xc0 || ((c) & 0xf0) == 0xe0 || ((c) & 0xf8) == 0xf0)
+#define is_trail_byte(c) (((c) & 0xc0) == 0x80)
 
 inline int lead_byte_length(const uint8_t c)
 {

From 256451482be6f2f20f806758be33377654784eea Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 1 Apr 2024 13:53:34 -0400
Subject: [PATCH 10/10] Fix formatting of is_lead_byte and is_trail_byte macros

---
 src/transcription-filter.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
index ebd457c..b3df64f 100644
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@@ -186,8 +186,8 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf)
 	}
 }
 
-#define is_lead_byte(c) (((c) & 0xe0) == 0xc0 || ((c) & 0xf0) == 0xe0 || ((c) & 0xf8) == 0xf0)
-#define is_trail_byte(c) (((c) & 0xc0) == 0x80)
+#define is_lead_byte(c) (((c)&0xe0) == 0xc0 || ((c)&0xf0) == 0xe0 || ((c)&0xf8) == 0xf0)
+#define is_trail_byte(c) (((c)&0xc0) == 0x80)
 
 inline int lead_byte_length(const uint8_t c)
 {