Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Add transcription-filter-properties.cpp for managing filter… #138

Merged
merged 3 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ target_sources(
src/transcription-filter.cpp
src/transcription-filter.c
src/transcription-filter-callbacks.cpp
src/transcription-filter-properties.cpp
src/transcription-filter-utils.cpp
src/transcription-utils.cpp
src/model-utils/model-downloader.cpp
Expand Down
3 changes: 2 additions & 1 deletion data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ whisper_translate="Translate to English (Whisper)"
buffer_size_msec="Buffer size (ms)"
overlap_size_msec="Overlap size (ms)"
suppress_sentences="Suppress sentences (each line)"
translate_output="Translation output"
translate_output="Output Destination"
dtw_token_timestamps="DTW token timestamps"
buffered_output="Buffered output (Experimental)"
translate_model="Model"
Expand Down Expand Up @@ -82,3 +82,4 @@ translate_explaination="Enabling translation will increase the processing load o
log_group="Logging"
advanced_group="Advanced Configuration"
buffered_output_parameters="Buffered Output Configuration"
file_output_info="Note: Translation output will be saved to a file in the same directory with the target language added to the name, e.g. 'output_es.srt'."
90 changes: 69 additions & 21 deletions src/transcription-filter-callbacks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,28 +63,38 @@ std::string send_sentence_to_translation(const std::string &sentence,
obs_log(LOG_INFO, "Translation: '%s' -> '%s'", sentence.c_str(),
translated_text.c_str());
}
if (gf->translation_output == "none") {
// overwrite the original text with the translated text
return translated_text;
} else {
// send the translation to the selected source
send_caption_to_source(gf->translation_output, translated_text, gf);
}
return translated_text;
} else {
obs_log(gf->log_level, "Failed to translate text");
}
}
return sentence;
return "";
}

void send_sentence_to_file(struct transcription_filter_data *gf,
const DetectionResultWithText &result, const std::string &str_copy)
const DetectionResultWithText &result, const std::string &str_copy,
const std::string &translated_sentence)
{
// Check if we should save the sentence
if (gf->save_only_while_recording && !obs_frontend_recording_active()) {
// We are not recording, do not save the sentence to file
return;
}

std::string translated_file_path = "";
bool write_translations = gf->translate && !translated_sentence.empty();

// if translation is enabled, save the translated sentence to another file
if (write_translations) {
// add a postfix to the file name (without extension) with the translation target language
std::string output_file_path = gf->output_file_path;
std::string file_extension =
output_file_path.substr(output_file_path.find_last_of(".") + 1);
std::string file_name =
output_file_path.substr(0, output_file_path.find_last_of("."));
translated_file_path = file_name + "_" + gf->target_lang + "." + file_extension;
}

// should the file be truncated?
std::ios_base::openmode openmode = std::ios::out;
if (gf->truncate_output_file) {
Expand All @@ -97,6 +107,11 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
std::ofstream output_file(gf->output_file_path, openmode);
output_file << str_copy << std::endl;
output_file.close();
if (write_translations) {
std::ofstream translated_output_file(translated_file_path, openmode);
translated_output_file << translated_sentence << std::endl;
translated_output_file.close();
}
} else {
if (result.start_timestamp_ms == 0 && result.end_timestamp_ms == 0) {
// No timestamps, do not save the sentence to srt
Expand All @@ -109,27 +124,45 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
std::ofstream output_file(gf->output_file_path, openmode);
output_file << gf->sentence_number << std::endl;
// use the start and end timestamps to calculate the start and end time in srt format
auto format_ts_for_srt = [&output_file](uint64_t ts) {
auto format_ts_for_srt = [](std::ofstream &output_stream, uint64_t ts) {
uint64_t time_s = ts / 1000;
uint64_t time_m = time_s / 60;
uint64_t time_h = time_m / 60;
uint64_t time_ms_rem = ts % 1000;
uint64_t time_s_rem = time_s % 60;
uint64_t time_m_rem = time_m % 60;
uint64_t time_h_rem = time_h % 60;
output_file << std::setfill('0') << std::setw(2) << time_h_rem << ":"
<< std::setfill('0') << std::setw(2) << time_m_rem << ":"
<< std::setfill('0') << std::setw(2) << time_s_rem << ","
<< std::setfill('0') << std::setw(3) << time_ms_rem;
output_stream << std::setfill('0') << std::setw(2) << time_h_rem << ":"
<< std::setfill('0') << std::setw(2) << time_m_rem << ":"
<< std::setfill('0') << std::setw(2) << time_s_rem << ","
<< std::setfill('0') << std::setw(3) << time_ms_rem;
};
format_ts_for_srt(result.start_timestamp_ms);
format_ts_for_srt(output_file, result.start_timestamp_ms);
output_file << " --> ";
format_ts_for_srt(result.end_timestamp_ms);
format_ts_for_srt(output_file, result.end_timestamp_ms);
output_file << std::endl;

output_file << str_copy << std::endl;
output_file << std::endl;
output_file.close();

if (write_translations) {
obs_log(gf->log_level, "Saving translation to file %s, sentence #%d",
translated_file_path.c_str(), gf->sentence_number);

// Append translated sentence to file in .srt format
std::ofstream translated_output_file(translated_file_path, openmode);
translated_output_file << gf->sentence_number << std::endl;
format_ts_for_srt(translated_output_file, result.start_timestamp_ms);
translated_output_file << " --> ";
format_ts_for_srt(translated_output_file, result.end_timestamp_ms);
translated_output_file << std::endl;

translated_output_file << translated_sentence << std::endl;
translated_output_file << std::endl;
translated_output_file.close();
}

gf->sentence_number++;
}
}
Expand Down Expand Up @@ -185,13 +218,28 @@ void set_text_callback(struct transcription_filter_data *gf,
}
}

// send the sentence to translation (if enabled)
std::string translated_sentence = send_sentence_to_translation(str_copy, gf);

if (gf->translate) {
if (gf->translation_output == "none") {
// overwrite the original text with the translated text
str_copy = translated_sentence;
} else {
if (gf->buffered_output) {
gf->translation_monitor.addSentence(translated_sentence);
} else {
// non-buffered output - send the sentence to the selected source
send_caption_to_source(gf->translation_output, translated_sentence,
gf);
}
}
}

if (gf->buffered_output) {
gf->captions_monitor.addSentence(str_copy);
} else {
// non-buffered output
// send the sentence to translation (if enabled)
str_copy = send_sentence_to_translation(str_copy, gf);
// send the sentence to the selected source
// non-buffered output - send the sentence to the selected source
send_caption_to_source(gf->text_source_name, str_copy, gf);
}

Expand All @@ -200,7 +248,7 @@ void set_text_callback(struct transcription_filter_data *gf,
}

if (gf->save_to_file && gf->output_file_path != "") {
send_sentence_to_file(gf, result, str_copy);
send_sentence_to_file(gf, result, str_copy, translated_sentence);
}
};

Expand Down
1 change: 1 addition & 0 deletions src/transcription-filter-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ struct transcription_filter_data {

bool buffered_output = false;
TokenBufferThread captions_monitor;
TokenBufferThread translation_monitor;
int buffered_output_num_lines = 2;
int buffered_output_num_chars = 30;
TokenBufferSegmentation buffered_output_output_type =
Expand Down
Loading
Loading