Skip to content

Commit

Permalink
Refactor buffer size calculation and formatting in transcription filter
Browse files Browse the repository at this point in the history
This commit refactors the buffer size calculation in the transcription filter code to improve readability and maintainability. The code now uses a more concise and formatted approach to calculate the buffer size in milliseconds. Additionally, the commit also improves the formatting and readability of the code in the whisper-processing file. These changes enhance the overall code quality and maintainability.
  • Loading branch information
royshil committed Jan 26, 2024
1 parent a46a13e commit d3f2362
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
12 changes: 7 additions & 5 deletions src/transcription-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,8 @@ void transcription_filter_update(void *data, obs_data_t *s)

gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
gf->log_words = obs_data_get_bool(s, "log_words");
gf->frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)obs_data_get_int(s, "buffer_size_msec")));
gf->frames = (size_t)((float)gf->sample_rate /
(1000.0f / (float)obs_data_get_int(s, "buffer_size_msec")));
gf->caption_to_stream = obs_data_get_bool(s, "caption_to_stream");
bool step_by_step_processing = obs_data_get_bool(s, "step_by_step_processing");
gf->step_size_msec = step_by_step_processing ? (int)obs_data_get_int(s, "step_size_msec")
Expand Down Expand Up @@ -494,7 +495,8 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
// Get the number of channels for the input source
gf->channels = audio_output_get_channels(obs_get_audio());
gf->sample_rate = audio_output_get_sample_rate(obs_get_audio());
gf->frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)obs_data_get_int(settings, "buffer_size_msec")));
gf->frames = (size_t)((float)gf->sample_rate /
(1000.0f / (float)obs_data_get_int(settings, "buffer_size_msec")));
gf->last_num_frames = 0;
bool step_by_step_processing = obs_data_get_bool(settings, "step_by_step_processing");
gf->step_size_msec = step_by_step_processing
Expand Down Expand Up @@ -690,9 +692,9 @@ obs_properties_t *transcription_filter_properties(void *data)
obs_properties_add_bool(ppts, "caption_to_stream", MT_("caption_to_stream"));

obs_properties_add_int_slider(ppts, "buffer_size_msec", MT_("buffer_size_msec"), 1000,
DEFAULT_BUFFER_SIZE_MSEC, 50);
obs_properties_add_int_slider(ppts, "overlap_size_msec", MT_("overlap_size_msec"), 50,
300, 50);
DEFAULT_BUFFER_SIZE_MSEC, 50);
obs_properties_add_int_slider(ppts, "overlap_size_msec", MT_("overlap_size_msec"), 50, 300,
50);

obs_property_t *step_by_step_processing = obs_properties_add_bool(
ppts, "step_by_step_processing", MT_("step_by_step_processing"));
Expand Down
7 changes: 4 additions & 3 deletions src/whisper-utils/whisper-processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ float calculate_segment_energy(const float *pcmf32, size_t pcm32f_size)
return energy / (float)pcm32f_size;
}

size_t find_tail_word_cutoff(const float *pcmf32, size_t pcm32f_size, size_t overlap_ms, uint32_t sample_rate_hz)
size_t find_tail_word_cutoff(const float *pcmf32, size_t pcm32f_size, size_t overlap_ms,
uint32_t sample_rate_hz)
{
// segment size: 10ms worth of samples
const size_t segment_size = 10 * sample_rate_hz / 1000;
Expand Down Expand Up @@ -320,8 +321,8 @@ void process_audio_from_buffer(struct transcription_filter_data *gf)

if (!skipped_inference) {
// find the tail word cutoff
const size_t tail_word_cutoff =
find_tail_word_cutoff(output[0], out_frames, gf->overlap_ms, WHISPER_SAMPLE_RATE);
const size_t tail_word_cutoff = find_tail_word_cutoff(
output[0], out_frames, gf->overlap_ms, WHISPER_SAMPLE_RATE);
if (tail_word_cutoff < out_frames)
obs_log(gf->log_level, "tail word cutoff: %d frames",
(int)tail_word_cutoff);
Expand Down

0 comments on commit d3f2362

Please sign in to comment.