Skip to content

Commit

Permalink
Fix destroy crash (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
royshil authored Nov 15, 2023
1 parent 9920fda commit ba8bd4d
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 3 deletions.
2 changes: 1 addition & 1 deletion buildspec.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
}
},
"name": "obs-localvocal",
"version": "0.0.6",
"version": "0.0.7",
"author": "Roy Shilkrot",
"website": "https://github.com/obs-ai/obs-localvocal",
"email": "[email protected]",
Expand Down
2 changes: 1 addition & 1 deletion src/transcription-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ void transcription_filter_destroy(void *data)
delete gf->wshiper_thread_cv;
delete gf->text_source_mutex;

bfree(gf);
delete gf;
}

void acquire_weak_text_source_ref(struct transcription_filter_data *gf)
Expand Down
41 changes: 40 additions & 1 deletion src/whisper-processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <algorithm>
#include <cctype>
#include <cfloat>

#ifdef _WIN32
#include <fstream>
Expand Down Expand Up @@ -45,6 +46,37 @@ void high_pass_filter(float *pcmf32, size_t pcm32f_size, float cutoff, uint32_t
}
}

float calculate_segment_energy(const float *pcmf32, size_t pcm32f_size)
{
float energy = 0.0f;
for (size_t i = 0; i < pcm32f_size; i++) {
energy += fabsf(pcmf32[i]);
}
return energy / (float)pcm32f_size;
}

size_t find_tail_word_cutoff(const float *pcmf32, size_t pcm32f_size, uint32_t sample_rate_hz)
{
// segment size: 10ms worth of samples
const size_t segment_size = 10 * sample_rate_hz / 1000;
// overlap size in samples
const size_t overlap_size = OVERLAP_SIZE_MSEC * sample_rate_hz / 1000;
// tail lookup window starting point
const size_t tail_lookup_start = pcm32f_size - overlap_size;

size_t tail_word_cutoff = pcm32f_size;
float lowest_energy = FLT_MAX;
for (size_t i = tail_lookup_start; i < pcm32f_size - segment_size; i += segment_size / 2) {
const float energy = calculate_segment_energy(pcmf32 + i, segment_size);
if (energy < 0.0001 && energy < lowest_energy) {
tail_word_cutoff = i;
lowest_energy = energy;
}
}

return tail_word_cutoff;
}

// VAD (voice activity detection), return true if speech detected
bool vad_simple(float *pcmf32, size_t pcm32f_size, uint32_t sample_rate, float vad_thold,
float freq_thold, bool verbose)
Expand Down Expand Up @@ -278,9 +310,16 @@ void process_audio_from_buffer(struct transcription_filter_data *gf)
}

if (!skipped_inference) {
// find the tail word cutoff
const size_t tail_word_cutoff =
find_tail_word_cutoff(output[0], out_frames, WHISPER_SAMPLE_RATE);
if (tail_word_cutoff < out_frames)
obs_log(gf->log_level, "tail word cutoff: %d frames",
(int)tail_word_cutoff);

// run inference
const struct DetectionResultWithText inference_result =
run_whisper_inference(gf, output[0], out_frames);
run_whisper_inference(gf, output[0], tail_word_cutoff);

if (inference_result.result == DETECTION_RESULT_SPEECH) {
// output inference result to a text source
Expand Down

0 comments on commit ba8bd4d

Please sign in to comment.