Refactor buffer size calculation and formatting in transcription filter

This commit refactors the buffer size calculation in the transcription filter code to improve readability and maintainability. The code now uses a more concise and formatted approach to calculate the buffer size in milliseconds. Additionally, the commit also improves the formatting and readability of the code in the whisper-processing file. These changes enhance the overall code quality and maintainability.
locaal-ai · Jan 26, 2024 · d3f2362 · d3f2362
1 parent a46a13e
commit d3f2362
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 8 deletions.
diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
@@ -370,7 +370,8 @@ void transcription_filter_update(void *data, obs_data_t *s)
 
 	gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
 	gf->log_words = obs_data_get_bool(s, "log_words");
-	gf->frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)obs_data_get_int(s, "buffer_size_msec")));
+	gf->frames = (size_t)((float)gf->sample_rate /
+			      (1000.0f / (float)obs_data_get_int(s, "buffer_size_msec")));
 	gf->caption_to_stream = obs_data_get_bool(s, "caption_to_stream");
 	bool step_by_step_processing = obs_data_get_bool(s, "step_by_step_processing");
 	gf->step_size_msec = step_by_step_processing ? (int)obs_data_get_int(s, "step_size_msec")
@@ -494,7 +495,8 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
 	// Get the number of channels for the input source
 	gf->channels = audio_output_get_channels(obs_get_audio());
 	gf->sample_rate = audio_output_get_sample_rate(obs_get_audio());
-	gf->frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)obs_data_get_int(settings, "buffer_size_msec")));
+	gf->frames = (size_t)((float)gf->sample_rate /
+			      (1000.0f / (float)obs_data_get_int(settings, "buffer_size_msec")));
 	gf->last_num_frames = 0;
 	bool step_by_step_processing = obs_data_get_bool(settings, "step_by_step_processing");
 	gf->step_size_msec = step_by_step_processing
@@ -690,9 +692,9 @@ obs_properties_t *transcription_filter_properties(void *data)
 	obs_properties_add_bool(ppts, "caption_to_stream", MT_("caption_to_stream"));
 
 	obs_properties_add_int_slider(ppts, "buffer_size_msec", MT_("buffer_size_msec"), 1000,
-						DEFAULT_BUFFER_SIZE_MSEC, 50);
-	obs_properties_add_int_slider(ppts, "overlap_size_msec", MT_("overlap_size_msec"), 50,
-						300, 50);
+				      DEFAULT_BUFFER_SIZE_MSEC, 50);
+	obs_properties_add_int_slider(ppts, "overlap_size_msec", MT_("overlap_size_msec"), 50, 300,
+				      50);
 
 	obs_property_t *step_by_step_processing = obs_properties_add_bool(
 		ppts, "step_by_step_processing", MT_("step_by_step_processing"));

diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp
@@ -55,7 +55,8 @@ float calculate_segment_energy(const float *pcmf32, size_t pcm32f_size)
 	return energy / (float)pcm32f_size;
 }
 
-size_t find_tail_word_cutoff(const float *pcmf32, size_t pcm32f_size, size_t overlap_ms, uint32_t sample_rate_hz)
+size_t find_tail_word_cutoff(const float *pcmf32, size_t pcm32f_size, size_t overlap_ms,
+			     uint32_t sample_rate_hz)
 {
 	// segment size: 10ms worth of samples
 	const size_t segment_size = 10 * sample_rate_hz / 1000;
@@ -320,8 +321,8 @@ void process_audio_from_buffer(struct transcription_filter_data *gf)
 
 	if (!skipped_inference) {
 		// find the tail word cutoff
-		const size_t tail_word_cutoff =
-			find_tail_word_cutoff(output[0], out_frames, gf->overlap_ms, WHISPER_SAMPLE_RATE);
+		const size_t tail_word_cutoff = find_tail_word_cutoff(
+			output[0], out_frames, gf->overlap_ms, WHISPER_SAMPLE_RATE);
 		if (tail_word_cutoff < out_frames)
 			obs_log(gf->log_level, "tail word cutoff: %d frames",
 				(int)tail_word_cutoff);