diff --git a/buildspec.json b/buildspec.json
index 7dd0fa6..c7455fe 100644
--- a/buildspec.json
+++ b/buildspec.json
@@ -45,7 +45,7 @@
         }
     },
     "name": "obs-localvocal",
-    "version": "0.0.6",
+    "version": "0.0.7",
     "author": "Roy Shilkrot",
     "website": "https://github.com/obs-ai/obs-localvocal",
     "email": "roy.shil@gmail.com",
diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
index d2b2d44..4eaefd8 100644
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@@ -167,7 +167,7 @@ void transcription_filter_destroy(void *data)
 	delete gf->wshiper_thread_cv;
 	delete gf->text_source_mutex;
 
-	bfree(gf);
+	delete gf;
 }
 
 void acquire_weak_text_source_ref(struct transcription_filter_data *gf)
diff --git a/src/whisper-processing.cpp b/src/whisper-processing.cpp
index 844aff5..75be45c 100644
--- a/src/whisper-processing.cpp
+++ b/src/whisper-processing.cpp
@@ -8,6 +8,7 @@
 
 #include <algorithm>
 #include <cctype>
+#include <cfloat>
 
 #ifdef _WIN32
 #include <fstream>
@@ -45,6 +46,37 @@ void high_pass_filter(float *pcmf32, size_t pcm32f_size, float cutoff, uint32_t
 	}
 }
 
+float calculate_segment_energy(const float *pcmf32, size_t pcm32f_size)
+{
+	float energy = 0.0f;
+	for (size_t i = 0; i < pcm32f_size; i++) {
+		energy += fabsf(pcmf32[i]);
+	}
+	return energy / (float)pcm32f_size;
+}
+
+size_t find_tail_word_cutoff(const float *pcmf32, size_t pcm32f_size, uint32_t sample_rate_hz)
+{
+	// segment size: 10ms worth of samples
+	const size_t segment_size = 10 * sample_rate_hz / 1000;
+	// overlap size in samples
+	const size_t overlap_size = OVERLAP_SIZE_MSEC * sample_rate_hz / 1000;
+	// tail lookup window starting point
+	const size_t tail_lookup_start = pcm32f_size - overlap_size;
+
+	size_t tail_word_cutoff = pcm32f_size;
+	float lowest_energy = FLT_MAX;
+	for (size_t i = tail_lookup_start; i < pcm32f_size - segment_size; i += segment_size / 2) {
+		const float energy = calculate_segment_energy(pcmf32 + i, segment_size);
+		if (energy < 0.0001 && energy < lowest_energy) {
+			tail_word_cutoff = i;
+			lowest_energy = energy;
+		}
+	}
+
+	return tail_word_cutoff;
+}
+
 // VAD (voice activity detection), return true if speech detected
 bool vad_simple(float *pcmf32, size_t pcm32f_size, uint32_t sample_rate, float vad_thold,
 		float freq_thold, bool verbose)
@@ -278,9 +310,16 @@ void process_audio_from_buffer(struct transcription_filter_data *gf)
 	}
 
 	if (!skipped_inference) {
+		// find the tail word cutoff
+		const size_t tail_word_cutoff =
+			find_tail_word_cutoff(output[0], out_frames, WHISPER_SAMPLE_RATE);
+		if (tail_word_cutoff < out_frames)
+			obs_log(gf->log_level, "tail word cutoff: %d frames",
+				(int)tail_word_cutoff);
+
 		// run inference
 		const struct DetectionResultWithText inference_result =
-			run_whisper_inference(gf, output[0], out_frames);
+			run_whisper_inference(gf, output[0], tail_word_cutoff);
 
 		if (inference_result.result == DETECTION_RESULT_SPEECH) {
 			// output inference result to a text source