diff --git a/src/transcription-filter-utils.cpp b/src/transcription-filter-utils.cpp index 72f313c..3727550 100644 --- a/src/transcription-filter-utils.cpp +++ b/src/transcription-filter-utils.cpp @@ -53,3 +53,17 @@ void create_obs_text_source() } obs_source_release(scene_as_source); } + +bool add_sources_to_list(void *list_property, obs_source_t *source) +{ + auto source_id = obs_source_get_id(source); + if (strcmp(source_id, "text_ft2_source_v2") != 0 && + strcmp(source_id, "text_gdiplus_v2") != 0) { + return true; + } + + obs_property_t *sources = (obs_property_t *)list_property; + const char *name = obs_source_get_name(source); + obs_property_list_add_string(sources, name, name); + return true; +} diff --git a/src/transcription-filter-utils.h b/src/transcription-filter-utils.h index 9f24d55..4fac8ef 100644 --- a/src/transcription-filter-utils.h +++ b/src/transcription-filter-utils.h @@ -2,6 +2,7 @@ #define TRANSCRIPTION_FILTER_UTILS_H #include +#include // Convert channels number to a speaker layout inline enum speaker_layout convert_speaker_layout(uint8_t channels) @@ -30,4 +31,6 @@ inline enum speaker_layout convert_speaker_layout(uint8_t channels) void create_obs_text_source(); +bool add_sources_to_list(void *list_property, obs_source_t *source); + #endif // TRANSCRIPTION_FILTER_UTILS_H diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 2fc25d3..9ab2d55 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -30,20 +30,6 @@ #include "translation/translation.h" #include "translation/translation-includes.h" -bool add_sources_to_list(void *list_property, obs_source_t *source) -{ - auto source_id = obs_source_get_id(source); - if (strcmp(source_id, "text_ft2_source_v2") != 0 && - strcmp(source_id, "text_gdiplus_v2") != 0) { - return true; - } - - obs_property_t *sources = (obs_property_t *)list_property; - const char *name = obs_source_get_name(source); - obs_property_list_add_string(sources, name, name); - return true; -} - void set_source_signals(transcription_filter_data *gf, obs_source_t *parent_source) { signal_handler_t *sh = obs_source_get_signal_handler(parent_source); @@ -618,6 +604,26 @@ obs_properties_t *transcription_filter_properties(void *data) } else { obs_property_set_visible( obs_properties_get(props, "whisper_model_path_external"), false); + const std::string model_name = new_model_path; + // if the model is english-only -> hide all the languages but english + const bool is_english_only = + (model_name.find("English") != std::string::npos); + // clear the language selection list ("whisper_language_select") + obs_property_t *prop_lang = + obs_properties_get(props, "whisper_language_select"); + obs_property_list_clear(prop_lang); + if (is_english_only) { + // add only the english language + obs_property_list_add_string(prop_lang, "English", "en"); + // set the language to english + obs_data_set_string(settings, "whisper_language_select", "en"); + } else { + // add all the languages + for (const auto &lang : whisper_available_lang) { + obs_property_list_add_string(prop_lang, lang.second.c_str(), + lang.first.c_str()); + } + } } return true; }); @@ -759,7 +765,7 @@ obs_properties_t *transcription_filter_properties(void *data) {"whisper_params_group", "log_words", "caption_to_stream", "buffer_size_msec", "overlap_size_msec", "step_by_step_processing", "min_sub_duration", "process_while_muted", "buffered_output", "vad_enabled", "log_level", - "suppress_sentences", "sentence_psum_accept_thresh"}) { + "suppress_sentences", "sentence_psum_accept_thresh", "vad_threshold"}) { obs_property_set_visible(obs_properties_get(props, prop_name.c_str()), show_hide); } @@ -825,18 +831,9 @@ obs_properties_t *transcription_filter_properties(void *data) obs_property_t *whisper_language_select_list = obs_properties_add_list( whisper_params_group, "whisper_language_select", MT_("language"), OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); - // sort the languages by flipping the map - std::map whisper_available_lang_flip; - for (auto const &pair : whisper_available_lang) { - whisper_available_lang_flip[pair.second] = pair.first; - } // iterate over all available languages and add them to the list - for (auto const &pair : whisper_available_lang_flip) { - // Capitalize the language name - std::string language_name = pair.first; - language_name[0] = (char)toupper(language_name[0]); - - obs_property_list_add_string(whisper_language_select_list, language_name.c_str(), + for (auto const &pair : whisper_available_lang_reverse) { + obs_property_list_add_string(whisper_language_select_list, pair.first.c_str(), pair.second.c_str()); } diff --git a/src/transcription-utils.cpp b/src/transcription-utils.cpp index 321d2fb..727d3df 100644 --- a/src/transcription-utils.cpp +++ b/src/transcription-utils.cpp @@ -4,7 +4,9 @@ #include #include +// clang-format off #define is_lead_byte(c) (((c)&0xe0) == 0xc0 || ((c)&0xf0) == 0xe0 || ((c)&0xf8) == 0xf0) +// clang-format off #define is_trail_byte(c) (((c)&0xc0) == 0x80) inline int lead_byte_length(const uint8_t c) diff --git a/src/whisper-utils/silero-vad-onnx.cpp b/src/whisper-utils/silero-vad-onnx.cpp index 4200520..857649d 100644 --- a/src/whisper-utils/silero-vad-onnx.cpp +++ b/src/whisper-utils/silero-vad-onnx.cpp @@ -15,6 +15,8 @@ // #define __DEBUG_SPEECH_PROB___ +// prevent clang-format from reformatting the code +// clang-format off timestamp_t::timestamp_t(int start_, int end_) : start(start_), end(end_){}; // assignment operator modifies object, therefore non-const diff --git a/src/whisper-utils/whisper-language.h b/src/whisper-utils/whisper-language.h index f1bfb99..f9f349c 100644 --- a/src/whisper-utils/whisper-language.h +++ b/src/whisper-utils/whisper-language.h @@ -4,7 +4,7 @@ #include #include -static const std::map whisper_available_lang = { +static const std::map whisper_available_lang{ { "auto", "Auto detect", @@ -407,4 +407,408 @@ static const std::map whisper_available_lang = { }, }; +// the reverse map of whisper_available_lang +static const std::map whisper_available_lang_reverse{ + { + "Auto detect", + "auto", + }, + { + "English", + "en", + }, + { + "Chinese", + "zh", + }, + { + "German", + "de", + }, + { + "Spanish", + "es", + }, + { + "Russian", + "ru", + }, + { + "Korean", + "ko", + }, + { + "French", + "fr", + }, + { + "Japanese", + "ja", + }, + { + "Portuguese", + "pt", + }, + { + "Turkish", + "tr", + }, + { + "Polish", + "pl", + }, + { + "Catalan", + "ca", + }, + { + "Dutch", + "nl", + }, + { + "Arabic", + "ar", + }, + { + "Swedish", + "sv", + }, + { + "Italian", + "it", + }, + { + "Indonesian", + "id", + }, + { + "Hindi", + "hi", + }, + { + "Finnish", + "fi", + }, + { + "Vietnamese", + "vi", + }, + { + "Hebrew", + "he", + }, + { + "Ukrainian", + "uk", + }, + { + "Greek", + "el", + }, + { + "Malay", + "ms", + }, + { + "Czech", + "cs", + }, + { + "Romanian", + "ro", + }, + { + "Danish", + "da", + }, + { + "Hungarian", + "hu", + }, + { + "Tamil", + "ta", + }, + { + "Norwegian", + "no", + }, + { + "Thai", + "th", + }, + { + "Urdu", + "ur", + }, + { + "Croatian", + "hr", + }, + { + "Bulgarian", + "bg", + }, + { + "Lithuanian", + "lt", + }, + { + "Latin", + "la", + }, + { + "Maori", + "mi", + }, + { + "Malayalam", + "ml", + }, + { + "Welsh", + "cy", + }, + { + "Slovak", + "sk", + }, + { + "Telugu", + "te", + }, + { + "Persian", + "fa", + }, + { + "Latvian", + "lv", + }, + { + "Bengali", + "bn", + }, + { + "Serbian", + "sr", + }, + { + "Azerbaijani", + "az", + }, + { + "Slovenian", + "sl", + }, + { + "Kannada", + "kn", + }, + { + "Estonian", + "et", + }, + { + "Macedonian", + "mk", + }, + { + "Breton", + "br", + }, + { + "Basque", + "eu", + }, + { + "Icelandic", + "is", + }, + { + "Armenian", + "hy", + }, + { + "Nepali", + "ne", + }, + { + "Mongolian", + "mn", + }, + { + "Bosnian", + "bs", + }, + { + "Kazakh", + "kk", + }, + { + "Albanian", + "sq", + }, + { + "Swahili", + "sw", + }, + { + "Galician", + "gl", + }, + { + "Marathi", + "mr", + }, + { + "Punjabi", + "pa", + }, + { + "Sinhala", + "si", + }, + { + "Khmer", + "km", + }, + { + "Shona", + "sn", + }, + { + "Yoruba", + "yo", + }, + { + "Somali", + "so", + }, + { + "Afrikaans", + "af", + }, + { + "Occitan", + "oc", + }, + { + "Georgian", + "ka", + }, + { + "Belarusian", + "be", + }, + { + "Tajik", + "tg", + }, + { + "Sindhi", + "sd", + }, + { + "Gujarati", + "gu", + }, + { + "Amharic", + "am", + }, + { + "Yiddish", + "yi", + }, + { + "Lao", + "lo", + }, + { + "Uzbek", + "uz", + }, + { + "Faroese", + "fo", + }, + { + "Haitian", + "ht", + }, + { + "Pashto", + "ps", + }, + { + "Turkmen", + "tk", + }, + { + "Nynorsk", + "nn", + }, + { + "Maltese", + "mt", + }, + { + "Sanskrit", + "sa", + }, + { + "Luxembourgish", + "lb", + }, + { + "Myanmar", + "my", + }, + { + "Tibetan", + "bo", + }, + { + "Tagalog", + "tl", + }, + { + "Malagasy", + "mg", + }, + { + "Assamese", + "as", + }, + { + "Tatar", + "tt", + }, + { + "Hawaiian", + "haw", + }, + { + "Lingala", + "ln", + }, + { + "Hausa", + "ha", + }, + { + "Bashkir", + "ba", + }, + { + "Javanese", + "jw", + }, + { + "Sundanese", + "su", + }, +}; + #endif // WHISPER_LANGUAGE_H