Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Update buffer_output_type translations in locale files #119

Merged
merged 5 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions data/locale/ar-SA.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="الترجمة مع السياق"
whisper_translate="ترجمة إلى الإنجليزية (Whisper)"
buffer_size_msec="حجم الذاكرة المؤقتة (ملي ثانية)"
overlap_size_msec="حجم التداخل (ملي ثانية)"
buffer_output_type="نوع مخرجات الذاكرة المؤقتة"
1 change: 1 addition & 0 deletions data/locale/de-DE.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="Mit Kontext übersetzen"
whisper_translate="Ins Englische übersetzen (Flüstern)"
buffer_size_msec="Puffergröße (ms)"
overlap_size_msec="Überlappungsgröße (ms)"
buffer_output_type="Pufferausgabetyp"
3 changes: 2 additions & 1 deletion data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,5 @@ translation_no_repeat_ngram_size="No-repeat ngram size"
translation_max_input_length="Max input length"
buffered_output_parameters="Buffered output parameters"
buffer_num_lines="Number of lines"
buffer_num_chars_per_line="Characters per line"
buffer_num_chars_per_line="Amount per line"
buffer_output_type="Output type"
1 change: 1 addition & 0 deletions data/locale/es-ES.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="Traducir con contexto"
whisper_translate="Traducir al inglés (Whisper)"
buffer_size_msec="Tamaño del búfer (ms)"
overlap_size_msec="Tamaño de superposición (ms)"
buffer_output_type="Tipo de salida de búfer"
1 change: 1 addition & 0 deletions data/locale/fr-FR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="Traduire avec contexte"
whisper_translate="Traduire en anglais (Whisper)"
buffer_size_msec="Taille du tampon (ms)"
overlap_size_msec="Taille de chevauchement (ms)"
buffer_output_type="Type de sortie du tampon"
1 change: 1 addition & 0 deletions data/locale/hi-IN.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="संदर्भ के साथ अनुवाद
whisper_translate="अंग्रेजी में अनुवाद करें (व्हिस्पर)"
buffer_size_msec="बफ़र आकार (ms)"
overlap_size_msec="ओवरलैप आकार (ms)"
buffer_output_type="बफ़र आउटपुट प्रकार"
1 change: 1 addition & 0 deletions data/locale/ja-JP.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="コンテキスト付きで翻訳"
whisper_translate="英語に翻訳(ウィスパー)"
buffer_size_msec="バッファサイズ(ms)"
overlap_size_msec="オーバーラップサイズ(ms)"
buffer_output_type="バッファ出力タイプ"
1 change: 1 addition & 0 deletions data/locale/ko-KR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="컨텍스트와 함께 번역"
whisper_translate="영어로 번역 (속삭임)"
buffer_size_msec="버퍼 크기 (ms)"
overlap_size_msec="오버랩 크기 (ms)"
buffer_output_type="버퍼 출력 유형"
1 change: 1 addition & 0 deletions data/locale/pl-PL.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="Tłumacz z kontekstem"
whisper_translate="Tłumacz na angielski (Whisper)"
buffer_size_msec="Rozmiar bufora (ms)"
overlap_size_msec="Rozmiar nakładki (ms)"
buffer_output_type="Typ wyjścia bufora"
1 change: 1 addition & 0 deletions data/locale/pt-BR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="Traduzir com contexto"
whisper_translate="Traduzir para inglês (Whisper)"
buffer_size_msec="Tamanho do buffer (ms)"
overlap_size_msec="Tamanho da sobreposição (ms)"
buffer_output_type="Tipo de saída do buffer"
1 change: 1 addition & 0 deletions data/locale/ru-RU.ini
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ translate_add_context="Перевести с контекстом"
whisper_translate="Перевести на английский (Whisper)"
buffer_size_msec="Размер буфера (мс)"
overlap_size_msec="Размер перекрытия (мс)"
buffer_output_type="Тип выходных данных буфера"
1 change: 1 addition & 0 deletions data/locale/zh-CN.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="带上下文翻译"
whisper_translate="翻译为英语(Whisper)"
buffer_size_msec="缓冲区大小(毫秒)"
overlap_size_msec="重叠大小(毫秒)"
buffer_output_type="缓冲区输出类型"
2 changes: 2 additions & 0 deletions src/transcription-filter-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ struct transcription_filter_data {
TokenBufferThread captions_monitor;
int buffered_output_num_lines = 2;
int buffered_output_num_chars = 30;
TokenBufferSegmentation buffered_output_output_type =
TokenBufferSegmentation::SEGMENTATION_TOKEN;

// ctor
transcription_filter_data() : whisper_buf_mutex(), whisper_ctx_mutex(), wshiper_thread_cv()
Expand Down
78 changes: 52 additions & 26 deletions src/transcription-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ void transcription_filter_destroy(void *data)

void transcription_filter_update(void *data, obs_data_t *s)
{
obs_log(LOG_INFO, "LocalVocal filter update");
struct transcription_filter_data *gf =
static_cast<struct transcription_filter_data *>(data);
obs_log(gf->log_level, "LocalVocal filter update");

gf->log_level = (int)obs_data_get_int(s, "log_level");
gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
Expand All @@ -188,11 +188,13 @@ void transcription_filter_update(void *data, obs_data_t *s)
bool new_buffered_output = obs_data_get_bool(s, "buffered_output");
int new_buffer_num_lines = (int)obs_data_get_int(s, "buffer_num_lines");
int new_buffer_num_chars_per_line = (int)obs_data_get_int(s, "buffer_num_chars_per_line");
TokenBufferSegmentation new_buffer_output_type =
(TokenBufferSegmentation)obs_data_get_int(s, "buffer_output_type");

if (new_buffered_output) {
obs_log(LOG_INFO, "buffered_output enable");
obs_log(gf->log_level, "buffered_output enable");
if (!gf->buffered_output || !gf->captions_monitor.isEnabled()) {
obs_log(LOG_INFO, "buffered_output currently disabled, enabling");
obs_log(gf->log_level, "buffered_output currently disabled, enabling");
gf->buffered_output = true;
gf->captions_monitor.initialize(
gf,
Expand All @@ -203,18 +205,23 @@ void transcription_filter_update(void *data, obs_data_t *s)
}
},
new_buffer_num_lines, new_buffer_num_chars_per_line,
std::chrono::seconds(10));
std::chrono::seconds(3), new_buffer_output_type);
} else {
if (new_buffer_num_lines != gf->buffered_output_num_lines ||
new_buffer_num_chars_per_line != gf->buffered_output_num_chars) {
obs_log(LOG_INFO, "buffered_output parameters changed, updating");
new_buffer_num_chars_per_line != gf->buffered_output_num_chars ||
new_buffer_output_type != gf->buffered_output_output_type) {
obs_log(gf->log_level,
"buffered_output parameters changed, updating");
gf->captions_monitor.clear();
gf->captions_monitor.setNumSentences(new_buffer_num_lines);
gf->captions_monitor.setNumPerSentence(
new_buffer_num_chars_per_line);
gf->buffered_output_num_lines = new_buffer_num_lines;
gf->buffered_output_num_chars = new_buffer_num_chars_per_line;
gf->captions_monitor.setSegmentation(new_buffer_output_type);
}
}
gf->buffered_output_num_lines = new_buffer_num_lines;
gf->buffered_output_num_chars = new_buffer_num_chars_per_line;
gf->buffered_output_output_type = new_buffer_output_type;
} else {
obs_log(gf->log_level, "buffered_output disable");
if (gf->buffered_output) {
Expand Down Expand Up @@ -349,13 +356,23 @@ void transcription_filter_update(void *data, obs_data_t *s)
}
}

if (gf->initial_creation && gf->context != nullptr && obs_source_enabled(gf->context)) {
obs_log(LOG_INFO, "Initial filter creation and source enabled");
if (gf->context != nullptr && obs_source_enabled(gf->context)) {
if (gf->initial_creation) {
obs_log(LOG_INFO, "Initial filter creation and source enabled");

// source was enabled on creation
update_whisper_model(gf);
gf->active = true;
gf->initial_creation = false;
// source was enabled on creation
update_whisper_model(gf);
gf->active = true;
gf->initial_creation = false;
} else {
// check if the whisper model selection has changed
const std::string new_model_path =
obs_data_get_string(s, "whisper_model_path");
if (gf->whisper_model_path != new_model_path) {
obs_log(LOG_INFO, "New model selected: %s", new_model_path.c_str());
update_whisper_model(gf);
}
}
}
}

Expand Down Expand Up @@ -506,9 +523,11 @@ void transcription_filter_defaults(obs_data_t *s)
obs_data_set_default_bool(s, "buffered_output", false);
obs_data_set_default_int(s, "buffer_num_lines", 2);
obs_data_set_default_int(s, "buffer_num_chars_per_line", 30);
obs_data_set_default_int(s, "buffer_output_type",
(int)TokenBufferSegmentation::SEGMENTATION_TOKEN);

obs_data_set_default_bool(s, "vad_enabled", true);
obs_data_set_default_double(s, "vad_threshold", 0.5);
obs_data_set_default_double(s, "vad_threshold", 0.65);
obs_data_set_default_int(s, "log_level", LOG_DEBUG);
obs_data_set_default_bool(s, "log_words", false);
obs_data_set_default_bool(s, "caption_to_stream", false);
Expand Down Expand Up @@ -669,6 +688,16 @@ obs_properties_t *transcription_filter_properties(void *data)
return true;
});

// Add language selector
obs_property_t *whisper_language_select_list =
obs_properties_add_list(ppts, "whisper_language_select", MT_("language"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
// iterate over all available languages and add them to the list
for (auto const &pair : whisper_available_lang_reverse) {
obs_property_list_add_string(whisper_language_select_list, pair.first.c_str(),
pair.second.c_str());
}

// add translation option group
obs_properties_t *translation_group = obs_properties_create();
obs_property_t *translation_group_prop = obs_properties_add_group(
Expand Down Expand Up @@ -806,7 +835,8 @@ obs_properties_t *transcription_filter_properties(void *data)
{"whisper_params_group", "log_words", "caption_to_stream", "buffer_size_msec",
"overlap_size_msec", "step_by_step_processing", "min_sub_duration",
"process_while_muted", "buffered_output", "vad_enabled", "log_level",
"suppress_sentences", "sentence_psum_accept_thresh", "vad_threshold"}) {
"suppress_sentences", "sentence_psum_accept_thresh", "vad_threshold",
"buffered_output_group"}) {
obs_property_set_visible(obs_properties_get(props, prop_name.c_str()),
show_hide);
}
Expand All @@ -820,6 +850,12 @@ obs_properties_t *transcription_filter_properties(void *data)
obs_properties_t *buffered_output_group = obs_properties_create();
obs_properties_add_group(ppts, "buffered_output_group", MT_("buffered_output_parameters"),
OBS_GROUP_NORMAL, buffered_output_group);
// add buffer "type" character or word
obs_property_t *buffer_type_list = obs_properties_add_list(
buffered_output_group, "buffer_output_type", MT_("buffer_output_type"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
obs_property_list_add_int(buffer_type_list, "Character", SEGMENTATION_TOKEN);
obs_property_list_add_int(buffer_type_list, "Word", SEGMENTATION_WORD);
// add buffer lines parameter
obs_properties_add_int_slider(buffered_output_group, "buffer_num_lines",
MT_("buffer_num_lines"), 1, 5, 1);
Expand Down Expand Up @@ -868,16 +904,6 @@ obs_properties_t *transcription_filter_properties(void *data)
obs_properties_add_group(ppts, "whisper_params_group", MT_("whisper_parameters"),
OBS_GROUP_NORMAL, whisper_params_group);

// Add language selector
obs_property_t *whisper_language_select_list = obs_properties_add_list(
whisper_params_group, "whisper_language_select", MT_("language"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
// iterate over all available languages and add them to the list
for (auto const &pair : whisper_available_lang_reverse) {
obs_property_list_add_string(whisper_language_select_list, pair.first.c_str(),
pair.second.c_str());
}

obs_property_t *whisper_sampling_method_list = obs_properties_add_list(
whisper_params_group, "whisper_sampling_method", MT_("whisper_sampling_method"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
Expand Down
16 changes: 16 additions & 0 deletions src/transcription-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <string>
#include <vector>
#include <chrono>
#include <algorithm>
#include <cctype>

// Fix UTF8 string for Windows
std::string fix_utf8(const std::string &str);
Expand All @@ -25,4 +27,18 @@ inline uint64_t now_ms()
// Split a string into words based on spaces
std::vector<std::string> split_words(const std::string &str_copy);

// trim (strip) string from leading and trailing whitespaces
template<typename StringLike> StringLike trim(const StringLike &str)
{
StringLike str_copy = str;
str_copy.erase(str_copy.begin(),
std::find_if(str_copy.begin(), str_copy.end(),
[](unsigned char ch) { return !std::isspace(ch); }));
str_copy.erase(std::find_if(str_copy.rbegin(), str_copy.rend(),
[](unsigned char ch) { return !std::isspace(ch); })
.base(),
str_copy.end());
return str_copy;
}

#endif // TRANSCRIPTION_UTILS_H
Loading
Loading