From b992db47f29a07f28434aa75d41420d6e391fbe9 Mon Sep 17 00:00:00 2001 From: Jan Wijffels Date: Tue, 23 Jan 2024 00:50:37 +0100 Subject: [PATCH] add relevant webrtc c code --- .../signal_processing/complex_bit_reverse.c | 108 +++ .../signal_processing/complex_fft.c | 299 ++++++++ .../signal_processing/cross_correlation.c | 30 + .../signal_processing/downsample_fast.c | 65 ++ .../common_audio/signal_processing/energy.c | 39 + .../signal_processing/get_scaling_square.c | 46 ++ .../signal_processing/min_max_operations.c | 258 +++++++ .../resample_by_2_internal.c | 689 ++++++++++++++++++ .../signal_processing/resample_fractional.c | 239 ++++++ .../common_audio/signal_processing/spl_init.c | 69 ++ .../vector_scaling_operations.c | 165 +++++ .../spl_sqrt_floor/spl_sqrt_floor.c | 77 ++ src/webrtc/common_audio/vad/vad_core.c | 685 +++++++++++++++++ src/webrtc/common_audio/vad/vad_filterbank.c | 329 +++++++++ src/webrtc/common_audio/vad/vad_gmm.c | 82 +++ src/webrtc/common_audio/vad/vad_sp.c | 176 +++++ src/webrtc/common_audio/vad/webrtc_vad.c | 114 +++ 17 files changed, 3470 insertions(+) create mode 100644 src/webrtc/common_audio/signal_processing/complex_bit_reverse.c create mode 100644 src/webrtc/common_audio/signal_processing/complex_fft.c create mode 100644 src/webrtc/common_audio/signal_processing/cross_correlation.c create mode 100644 src/webrtc/common_audio/signal_processing/downsample_fast.c create mode 100644 src/webrtc/common_audio/signal_processing/energy.c create mode 100644 src/webrtc/common_audio/signal_processing/get_scaling_square.c create mode 100644 src/webrtc/common_audio/signal_processing/min_max_operations.c create mode 100644 src/webrtc/common_audio/signal_processing/resample_by_2_internal.c create mode 100644 src/webrtc/common_audio/signal_processing/resample_fractional.c create mode 100644 src/webrtc/common_audio/signal_processing/spl_init.c create mode 100644 src/webrtc/common_audio/signal_processing/vector_scaling_operations.c create mode 100644 src/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c create mode 100644 src/webrtc/common_audio/vad/vad_core.c create mode 100644 src/webrtc/common_audio/vad/vad_filterbank.c create mode 100644 src/webrtc/common_audio/vad/vad_gmm.c create mode 100644 src/webrtc/common_audio/vad/vad_sp.c create mode 100644 src/webrtc/common_audio/vad/webrtc_vad.c diff --git a/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c b/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c new file mode 100644 index 0000000..1c82cff --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +/* Tables for data buffer indexes that are bit reversed and thus need to be + * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap + * operations, while index_7[{1, 3, 5, ...}] are for the right side of the + * operation. Same for index_8. + */ + +/* Indexes for the case of stages == 7. */ +static const int16_t index_7[112] = { + 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104, + 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52, + 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98, + 37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70, + 51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69, + 81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125, + 103, 115, 111, 123 +}; + +/* Indexes for the case of stages == 8. */ +static const int16_t index_8[240] = { + 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80, + 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20, + 40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184, + 30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41, + 148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76, + 51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62, + 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82, + 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87, + 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101, + 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142, + 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131, + 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201, + 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171, + 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227, + 203, 211, 207, 243, 215, 235, 223, 251, 239, 247 +}; + +void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) { + /* For any specific value of stages, we know exactly the indexes that are + * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of + * stages are 7 and 8, so we use tables to save unnecessary iterations and + * calculations for these two cases. + */ + if (stages == 7 || stages == 8) { + int m = 0; + int length = 112; + const int16_t* index = index_7; + + if (stages == 8) { + length = 240; + index = index_8; + } + + /* Decimation in time. Swap the elements with bit-reversed indexes. */ + for (m = 0; m < length; m += 2) { + /* We declare a int32_t* type pointer, to load both the 16-bit real + * and imaginary elements from complex_data in one instruction, reducing + * complexity. + */ + int32_t* complex_data_ptr = (int32_t*)complex_data; + int32_t temp = 0; + + temp = complex_data_ptr[index[m]]; /* Real and imaginary */ + complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]]; + complex_data_ptr[index[m + 1]] = temp; + } + } + else { + int m = 0, mr = 0, l = 0; + int n = 1 << stages; + int nn = n - 1; + + /* Decimation in time - re-order data */ + for (m = 1; m <= nn; ++m) { + int32_t* complex_data_ptr = (int32_t*)complex_data; + int32_t temp = 0; + + /* Find out indexes that are bit-reversed. */ + l = n; + do { + l >>= 1; + } while (l > nn - mr); + mr = (mr & (l - 1)) + l; + + if (mr <= m) { + continue; + } + + /* Swap the elements with bit-reversed indexes. + * This is similar to the loop in the stages == 7 or 8 cases. + */ + temp = complex_data_ptr[m]; /* Real and imaginary */ + complex_data_ptr[m] = complex_data_ptr[mr]; + complex_data_ptr[mr] = temp; + } + } +} diff --git a/src/webrtc/common_audio/signal_processing/complex_fft.c b/src/webrtc/common_audio/signal_processing/complex_fft.c new file mode 100644 index 0000000..ddc9a97 --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/complex_fft.c @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_ComplexFFT(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/complex_fft_tables.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/system/arch.h" + +#define CFFTSFT 14 +#define CFFTRND 1 +#define CFFTRND2 16384 + +#define CIFFTSFT 14 +#define CIFFTRND 1 + + +int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) +{ + int i, j, l, k, istep, n, m; + int16_t wr, wi; + int32_t tr32, ti32, qr32, qi32; + + /* The 1024-value is a constant given from the size of kSinTable1024[], + * and should not be changed depending on the input parameter 'stages' + */ + n = 1 << stages; + if (n > 1024) + return -1; + + l = 1; + k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change + depending on the input parameter 'stages' */ + + if (mode == 0) + { + // mode==0: Low-complexity and Low-accuracy mode + while (l < n) + { + istep = l << 1; + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = -kSinTable1024[j]; + + for (i = m; i < n; i += istep) + { + j = i + l; + + tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; + + ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; + + qr32 = (int32_t)frfi[2 * i]; + qi32 = (int32_t)frfi[2 * i + 1]; + frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1); + frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1); + frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1); + frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1); + } + } + + --k; + l = istep; + + } + + } else + { + // mode==1: High-complexity and High-accuracy mode + while (l < n) + { + istep = l << 1; + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = -kSinTable1024[j]; + +#ifdef WEBRTC_ARCH_ARM_V7 + int32_t wri = 0; + __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : + "r"((int32_t)wr), "r"((int32_t)wi)); +#endif + + for (i = m; i < n; i += istep) + { + j = i + l; + +#ifdef WEBRTC_ARCH_ARM_V7 + register int32_t frfi_r; + __asm __volatile( + "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd]," + " lsl #16\n\t" + "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t" + "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t" + :[frfi_r]"=&r"(frfi_r), + [tr32]"=&r"(tr32), + [ti32]"=r"(ti32) + :[frfi_even]"r"((int32_t)frfi[2*j]), + [frfi_odd]"r"((int32_t)frfi[2*j +1]), + [wri]"r"(wri), + [cfftrnd]"r"(CFFTRND)); +#else + tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND; + + ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND; +#endif + + tr32 >>= 15 - CFFTSFT; + ti32 >>= 15 - CFFTSFT; + + qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT); + qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT); + + frfi[2 * j] = (int16_t)( + (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * j + 1] = (int16_t)( + (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * i] = (int16_t)( + (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * i + 1] = (int16_t)( + (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT)); + } + } + + --k; + l = istep; + } + } + return 0; +} + +int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) +{ + size_t i, j, l, istep, n, m; + int k, scale, shift; + int16_t wr, wi; + int32_t tr32, ti32, qr32, qi32; + int32_t tmp32, round2; + + /* The 1024-value is a constant given from the size of kSinTable1024[], + * and should not be changed depending on the input parameter 'stages' + */ + n = ((size_t)1) << stages; + if (n > 1024) + return -1; + + scale = 0; + + l = 1; + k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change + depending on the input parameter 'stages' */ + + while (l < n) + { + // variable scaling, depending upon data + shift = 0; + round2 = 8192; + + tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n); + if (tmp32 > 13573) + { + shift++; + scale++; + round2 <<= 1; + } + if (tmp32 > 27146) + { + shift++; + scale++; + round2 <<= 1; + } + + istep = l << 1; + + if (mode == 0) + { + // mode==0: Low-complexity and Low-accuracy mode + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = kSinTable1024[j]; + + for (i = m; i < n; i += istep) + { + j = i + l; + + tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; + + ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; + + qr32 = (int32_t)frfi[2 * i]; + qi32 = (int32_t)frfi[2 * i + 1]; + frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift); + frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift); + frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift); + frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift); + } + } + } else + { + // mode==1: High-complexity and High-accuracy mode + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = kSinTable1024[j]; + +#ifdef WEBRTC_ARCH_ARM_V7 + int32_t wri = 0; + __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : + "r"((int32_t)wr), "r"((int32_t)wi)); +#endif + + for (i = m; i < n; i += istep) + { + j = i + l; + +#ifdef WEBRTC_ARCH_ARM_V7 + register int32_t frfi_r; + __asm __volatile( + "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t" + "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t" + "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t" + :[frfi_r]"=&r"(frfi_r), + [tr32]"=&r"(tr32), + [ti32]"=r"(ti32) + :[frfi_even]"r"((int32_t)frfi[2*j]), + [frfi_odd]"r"((int32_t)frfi[2*j +1]), + [wri]"r"(wri), + [cifftrnd]"r"(CIFFTRND) + ); +#else + + tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND; + + ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND; +#endif + tr32 >>= 15 - CIFFTSFT; + ti32 >>= 15 - CIFFTSFT; + + qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT); + qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT); + + frfi[2 * j] = (int16_t)( + (qr32 - tr32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * j + 1] = (int16_t)( + (qi32 - ti32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * i] = (int16_t)( + (qr32 + tr32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * i + 1] = (int16_t)( + (qi32 + ti32 + round2) >> (shift + CIFFTSFT)); + } + } + + } + --k; + l = istep; + } + return scale; +} diff --git a/src/webrtc/common_audio/signal_processing/cross_correlation.c b/src/webrtc/common_audio/signal_processing/cross_correlation.c new file mode 100644 index 0000000..c6267c9 --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/cross_correlation.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */ +void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2) { + size_t i = 0, j = 0; + + for (i = 0; i < dim_cross_correlation; i++) { + int32_t corr = 0; + for (j = 0; j < dim_seq; j++) + corr += (seq1[j] * seq2[j]) >> right_shifts; + seq2 += step_seq2; + *cross_correlation++ = corr; + } +} diff --git a/src/webrtc/common_audio/signal_processing/downsample_fast.c b/src/webrtc/common_audio/signal_processing/downsample_fast.c new file mode 100644 index 0000000..80fdc58 --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/downsample_fast.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +#include "rtc_base/checks.h" +#include "rtc_base/sanitizer.h" + +// TODO(Bjornv): Change the function parameter order to WebRTC code style. +// C version of WebRtcSpl_DownsampleFast() for generic platforms. +int WebRtcSpl_DownsampleFastC(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay) { + int16_t* const original_data_out = data_out; + size_t i = 0; + size_t j = 0; + int32_t out_s32 = 0; + size_t endpos = delay + factor * (data_out_length - 1) + 1; + + // Return error if any of the running conditions doesn't meet. + if (data_out_length == 0 || coefficients_length == 0 + || data_in_length < endpos) { + return -1; + } + + rtc_MsanCheckInitialized(coefficients, sizeof(coefficients[0]), + coefficients_length); + + for (i = delay; i < endpos; i += factor) { + out_s32 = 2048; // Round value, 0.5 in Q12. + + for (j = 0; j < coefficients_length; j++) { + // Negative overflow is permitted here, because this is + // auto-regressive filters, and the state for each batch run is + // stored in the "negative" positions of the output vector. + rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j], + sizeof(data_in[0]), 1); + // out_s32 is in Q12 domain. + out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j]; + } + + out_s32 >>= 12; // Q0. + + // Saturate and store the output. + *data_out++ = WebRtcSpl_SatW32ToW16(out_s32); + } + + RTC_DCHECK_EQ(original_data_out + data_out_length, data_out); + rtc_MsanCheckInitialized(original_data_out, sizeof(original_data_out[0]), + data_out_length); + + return 0; +} diff --git a/src/webrtc/common_audio/signal_processing/energy.c b/src/webrtc/common_audio/signal_processing/energy.c new file mode 100644 index 0000000..5cce6b8 --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/energy.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_Energy(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +int32_t WebRtcSpl_Energy(int16_t* vector, + size_t vector_length, + int* scale_factor) +{ + int32_t en = 0; + size_t i; + int scaling = + WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length); + size_t looptimes = vector_length; + int16_t *vectorptr = vector; + + for (i = 0; i < looptimes; i++) + { + en += (*vectorptr * *vectorptr) >> scaling; + vectorptr++; + } + *scale_factor = scaling; + + return en; +} diff --git a/src/webrtc/common_audio/signal_processing/get_scaling_square.c b/src/webrtc/common_audio/signal_processing/get_scaling_square.c new file mode 100644 index 0000000..4eb1269 --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/get_scaling_square.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_GetScalingSquare(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, + size_t in_vector_length, + size_t times) +{ + int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times); + size_t i; + int16_t smax = -1; + int16_t sabs; + int16_t *sptr = in_vector; + int16_t t; + size_t looptimes = in_vector_length; + + for (i = looptimes; i > 0; i--) + { + sabs = (*sptr > 0 ? *sptr++ : -*sptr++); + smax = (sabs > smax ? sabs : smax); + } + t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); + + if (smax == 0) + { + return 0; // Since norm(0) returns 0 + } else + { + return (t > nbits) ? 0 : nbits - t; + } +} diff --git a/src/webrtc/common_audio/signal_processing/min_max_operations.c b/src/webrtc/common_audio/signal_processing/min_max_operations.c new file mode 100644 index 0000000..6acf882 --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/min_max_operations.c @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file contains the implementation of functions + * WebRtcSpl_MaxAbsValueW16C() + * WebRtcSpl_MaxAbsValueW32C() + * WebRtcSpl_MaxValueW16C() + * WebRtcSpl_MaxValueW32C() + * WebRtcSpl_MinValueW16C() + * WebRtcSpl_MinValueW32C() + * WebRtcSpl_MaxAbsIndexW16() + * WebRtcSpl_MaxIndexW16() + * WebRtcSpl_MaxIndexW32() + * WebRtcSpl_MinIndexW16() + * WebRtcSpl_MinIndexW32() + * + */ + +#include +#include + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// TODO(bjorn/kma): Consolidate function pairs (e.g. combine +// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.) +// TODO(kma): Move the next six functions into min_max_operations_c.c. + +// Maximum absolute value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) { + size_t i = 0; + int absolute = 0, maximum = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + + if (absolute > maximum) { + maximum = absolute; + } + } + + // Guard the case for abs(-32768). + if (maximum > WEBRTC_SPL_WORD16_MAX) { + maximum = WEBRTC_SPL_WORD16_MAX; + } + + return (int16_t)maximum; +} + +// Maximum absolute value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) { + // Use uint32_t for the local variables, to accommodate the return value + // of abs(0x80000000), which is 0x80000000. + + uint32_t absolute = 0, maximum = 0; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = + (vector[i] != INT_MIN) ? abs((int)vector[i]) : INT_MAX + (uint32_t)1; + if (absolute > maximum) { + maximum = absolute; + } + } + + maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); + + return (int32_t)maximum; +} + +// Maximum value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; +} + +// Maximum value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; +} + +// Minimum value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} + +// Minimum value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} + +// Index of maximum absolute value in a word16 vector. +size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) { + // Use type int for local variables, to accomodate the value of abs(-32768). + + size_t i = 0, index = 0; + int absolute = 0, maximum = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + + if (absolute > maximum) { + maximum = absolute; + index = i; + } + } + + return index; +} + +int16_t WebRtcSpl_MaxAbsElementW16(const int16_t* vector, size_t length) { + int16_t min_val, max_val; + WebRtcSpl_MinMaxW16(vector, length, &min_val, &max_val); + if (min_val == max_val || min_val < -max_val) { + return min_val; + } + return max_val; +} + +// Index of maximum value in a word16 vector. +size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) { + size_t i = 0, index = 0; + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) { + maximum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of maximum value in a word32 vector. +size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) { + size_t i = 0, index = 0; + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) { + maximum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of minimum value in a word16 vector. +size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) { + size_t i = 0, index = 0; + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) { + minimum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of minimum value in a word32 vector. +size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) { + size_t i = 0, index = 0; + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) { + minimum = vector[i]; + index = i; + } + } + + return index; +} + +// Finds both the minimum and maximum elements in an array of 16-bit integers. +void WebRtcSpl_MinMaxW16(const int16_t* vector, size_t length, + int16_t* min_val, int16_t* max_val) { +#if defined(WEBRTC_HAS_NEON) + return WebRtcSpl_MinMaxW16Neon(vector, length, min_val, max_val); +#else + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + if (vector[i] > maximum) + maximum = vector[i]; + } + *min_val = minimum; + *max_val = maximum; +#endif +} diff --git a/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c b/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c new file mode 100644 index 0000000..99592b2 --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c @@ -0,0 +1,689 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This header file contains some internal resampling functions. + * + */ + +#include "common_audio/signal_processing/resample_by_2_internal.h" +#include "rtc_base/sanitizer.h" + +// allpass filter coefficients. +static const int16_t kResampleAllpass[2][3] = { + {821, 6110, 12382}, + {3050, 9368, 15063} +}; + +// +// decimator +// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN! +// output: int16_t (saturated) (of length len/2) +// state: filter state array; length = 8 + +void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[1]; + // UBSan: -1771017321 - 999586185 cannot be represented in type 'int' + + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // divide by two and store temporarily + in[i << 1] = (state[3] >> 1); + } + + in++; + + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // divide by two and store temporarily + in[i << 1] = (state[7] >> 1); + } + + in--; + + // combine allpass outputs + for (i = 0; i < len; i += 2) + { + // divide by two, add both allpass outputs and round + tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15; + tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15; + if (tmp0 > (int32_t)0x00007FFF) + tmp0 = 0x00007FFF; + if (tmp0 < (int32_t)0xFFFF8000) + tmp0 = 0xFFFF8000; + out[i] = (int16_t)tmp0; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i + 1] = (int16_t)tmp1; + } +} + +// +// decimator +// input: int16_t +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2) +// state: filter state array; length = 8 + +void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_DownBy2ShortToInt(const int16_t *in, + int32_t len, + int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // UBSan: -1379909682 - 834099714 cannot be represented in type 'int' + + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // divide by two and store temporarily + out[i] = (state[3] >> 1); + } + + in++; + + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // divide by two and store temporarily + out[i] += (state[7] >> 1); + } + + in--; +} + +// +// interpolator +// input: int16_t +// output: int32_t (normalized, not saturated) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, round and store + out[i << 1] = state[7] >> 15; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 15; + } +} + +// +// interpolator +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, round and store + out[i << 1] = state[7]; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3]; + } +} + +// +// interpolator +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int16_t (saturated) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, saturate and store + tmp1 = state[7] >> 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i << 1] = (int16_t)tmp1; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, saturate and store + tmp1 = state[3] >> 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i << 1] = (int16_t)tmp1; + } +} + +// lowpass filter +// input: int16_t +// output: int32_t (normalized, not saturated) +// state: filter state array; length = 8 +void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out, + int32_t* state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) + { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + } + in--; + + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } + + // switch to odd output samples + out++; + + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; + + // scale down, round and store + out[i << 1] = state[11] >> 1; + } + + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } +} + +// lowpass filter +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int32_t (normalized, not saturated) +// state: filter state array; length = 8 +void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out, + int32_t* state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) + { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = in[i << 1]; + } + in--; + + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // UBSan: -794814117 - 1566149201 cannot be represented in type 'int' + + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } + + // switch to odd output samples + out++; + + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; + + // scale down, round and store + out[i << 1] = state[11] >> 1; + } + + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } +} diff --git a/src/webrtc/common_audio/signal_processing/resample_fractional.c b/src/webrtc/common_audio/signal_processing/resample_fractional.c new file mode 100644 index 0000000..9ffe0ac --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/resample_fractional.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling functions between 48, 44, 32 and 24 kHz. + * The description headers can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// interpolation coefficients +static const int16_t kCoefficients48To32[2][8] = { + {778, -2050, 1087, 23285, 12903, -3783, 441, 222}, + {222, 441, -3783, 12903, 23285, 1087, -2050, 778} +}; + +static const int16_t kCoefficients32To24[3][8] = { + {767, -2362, 2434, 24406, 10620, -3838, 721, 90}, + {386, -381, -2646, 19062, 19062, -2646, -381, 386}, + {90, 721, -3838, 10620, 24406, 2434, -2362, 767} +}; + +static const int16_t kCoefficients44To32[4][9] = { + {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138}, + {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91}, + {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53}, + {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126} +}; + +// Resampling ratio: 2/3 +// input: int32_t (normalized, not saturated) :: size 3 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K +// K: number of blocks + +void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (3 input samples -> 2 output samples); + // process in sub blocks of size 3 samples. + int32_t tmp; + size_t m; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + tmp += kCoefficients48To32[0][0] * In[0]; + tmp += kCoefficients48To32[0][1] * In[1]; + tmp += kCoefficients48To32[0][2] * In[2]; + tmp += kCoefficients48To32[0][3] * In[3]; + tmp += kCoefficients48To32[0][4] * In[4]; + tmp += kCoefficients48To32[0][5] * In[5]; + tmp += kCoefficients48To32[0][6] * In[6]; + tmp += kCoefficients48To32[0][7] * In[7]; + Out[0] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients48To32[1][0] * In[1]; + tmp += kCoefficients48To32[1][1] * In[2]; + tmp += kCoefficients48To32[1][2] * In[3]; + tmp += kCoefficients48To32[1][3] * In[4]; + tmp += kCoefficients48To32[1][4] * In[5]; + tmp += kCoefficients48To32[1][5] * In[6]; + tmp += kCoefficients48To32[1][6] * In[7]; + tmp += kCoefficients48To32[1][7] * In[8]; + Out[1] = tmp; + + // update pointers + In += 3; + Out += 2; + } +} + +// Resampling ratio: 3/4 +// input: int32_t (normalized, not saturated) :: size 4 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K +// K: number of blocks + +void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (4 input samples -> 3 output samples); + // process in sub blocks of size 4 samples. + size_t m; + int32_t tmp; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + tmp += kCoefficients32To24[0][0] * In[0]; + tmp += kCoefficients32To24[0][1] * In[1]; + tmp += kCoefficients32To24[0][2] * In[2]; + tmp += kCoefficients32To24[0][3] * In[3]; + tmp += kCoefficients32To24[0][4] * In[4]; + tmp += kCoefficients32To24[0][5] * In[5]; + tmp += kCoefficients32To24[0][6] * In[6]; + tmp += kCoefficients32To24[0][7] * In[7]; + Out[0] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients32To24[1][0] * In[1]; + tmp += kCoefficients32To24[1][1] * In[2]; + tmp += kCoefficients32To24[1][2] * In[3]; + tmp += kCoefficients32To24[1][3] * In[4]; + tmp += kCoefficients32To24[1][4] * In[5]; + tmp += kCoefficients32To24[1][5] * In[6]; + tmp += kCoefficients32To24[1][6] * In[7]; + tmp += kCoefficients32To24[1][7] * In[8]; + Out[1] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients32To24[2][0] * In[2]; + tmp += kCoefficients32To24[2][1] * In[3]; + tmp += kCoefficients32To24[2][2] * In[4]; + tmp += kCoefficients32To24[2][3] * In[5]; + tmp += kCoefficients32To24[2][4] * In[6]; + tmp += kCoefficients32To24[2][5] * In[7]; + tmp += kCoefficients32To24[2][6] * In[8]; + tmp += kCoefficients32To24[2][7] * In[9]; + Out[2] = tmp; + + // update pointers + In += 4; + Out += 3; + } +} + +// +// fractional resampling filters +// Fout = 11/16 * Fin +// Fout = 8/11 * Fin +// + +// compute two inner-products and store them to output array +static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2, + const int16_t *coef_ptr, int32_t *out1, + int32_t *out2) +{ + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; + + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; + + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; + + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; + + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; + + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; + + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; + + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; + + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; + + coef = coef_ptr[8]; + *out1 = tmp1 + coef * in1[8]; + *out2 = tmp2 + coef * in2[-8]; +} + +// Resampling ratio: 8/11 +// input: int32_t (normalized, not saturated) :: size 11 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K +// K: number of blocks + +void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (11 input samples -> 8 output samples); + // process in sub blocks of size 11 samples. + int32_t tmp; + size_t m; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + + // first output sample + Out[0] = ((int32_t)In[3] << 15) + tmp; + + // sum and accumulate filter coefficients and input samples + tmp += kCoefficients44To32[3][0] * In[5]; + tmp += kCoefficients44To32[3][1] * In[6]; + tmp += kCoefficients44To32[3][2] * In[7]; + tmp += kCoefficients44To32[3][3] * In[8]; + tmp += kCoefficients44To32[3][4] * In[9]; + tmp += kCoefficients44To32[3][5] * In[10]; + tmp += kCoefficients44To32[3][6] * In[11]; + tmp += kCoefficients44To32[3][7] * In[12]; + tmp += kCoefficients44To32[3][8] * In[13]; + Out[4] = tmp; + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]); + + // update pointers + In += 11; + Out += 8; + } +} diff --git a/src/webrtc/common_audio/signal_processing/spl_init.c b/src/webrtc/common_audio/signal_processing/spl_init.c new file mode 100644 index 0000000..cf37d47 --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/spl_init.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Some code came from common/rtcd.c in the WebM project. + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// TODO(bugs.webrtc.org/9553): These function pointers are useless. Refactor +// things so that we simply have a bunch of regular functions with different +// implementations for different platforms. + +#if defined(WEBRTC_HAS_NEON) + +const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon; +const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon; +const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon; +const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon; +const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon; +const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon; +const CrossCorrelation WebRtcSpl_CrossCorrelation = + WebRtcSpl_CrossCorrelationNeon; +const DownsampleFast WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon; +const ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; + +#elif defined(MIPS32_LE) + +const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips; +const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = +#ifdef MIPS_DSP_R1_LE + WebRtcSpl_MaxAbsValueW32_mips; +#else + WebRtcSpl_MaxAbsValueW32C; +#endif +const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips; +const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; +const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; +const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; +const CrossCorrelation WebRtcSpl_CrossCorrelation = + WebRtcSpl_CrossCorrelation_mips; +const DownsampleFast WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips; +const ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound = +#ifdef MIPS_DSP_R1_LE + WebRtcSpl_ScaleAndAddVectorsWithRound_mips; +#else + WebRtcSpl_ScaleAndAddVectorsWithRoundC; +#endif + +#else + +const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C; +const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; +const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C; +const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C; +const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C; +const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C; +const CrossCorrelation WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC; +const DownsampleFast WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC; +const ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; + +#endif diff --git a/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c b/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c new file mode 100644 index 0000000..7307dc7 --- /dev/null +++ b/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the functions + * WebRtcSpl_VectorBitShiftW16() + * WebRtcSpl_VectorBitShiftW32() + * WebRtcSpl_VectorBitShiftW32ToW16() + * WebRtcSpl_ScaleVector() + * WebRtcSpl_ScaleVectorWithSat() + * WebRtcSpl_ScaleAndAddVectors() + * WebRtcSpl_ScaleAndAddVectorsWithRoundC() + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length, + const int16_t *in, int16_t right_shifts) +{ + size_t i; + + if (right_shifts > 0) + { + for (i = length; i > 0; i--) + { + (*res++) = ((*in++) >> right_shifts); + } + } else + { + for (i = length; i > 0; i--) + { + (*res++) = ((*in++) * (1 << (-right_shifts))); + } + } +} + +void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector, + size_t vector_length, + const int32_t *in_vector, + int16_t right_shifts) +{ + size_t i; + + if (right_shifts > 0) + { + for (i = vector_length; i > 0; i--) + { + (*out_vector++) = ((*in_vector++) >> right_shifts); + } + } else + { + for (i = vector_length; i > 0; i--) + { + (*out_vector++) = ((*in_vector++) << (-right_shifts)); + } + } +} + +void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length, + const int32_t* in, int right_shifts) { + size_t i; + int32_t tmp_w32; + + if (right_shifts >= 0) { + for (i = length; i > 0; i--) { + tmp_w32 = (*in++) >> right_shifts; + (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); + } + } else { + int left_shifts = -right_shifts; + for (i = length; i > 0; i--) { + tmp_w32 = (*in++) << left_shifts; + (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); + } + } +} + +void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector, + int16_t gain, size_t in_vector_length, + int16_t right_shifts) +{ + // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts + size_t i; + const int16_t *inptr; + int16_t *outptr; + + inptr = in_vector; + outptr = out_vector; + + for (i = 0; i < in_vector_length; i++) + { + *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts); + } +} + +void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector, + int16_t gain, size_t in_vector_length, + int16_t right_shifts) +{ + // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts + size_t i; + const int16_t *inptr; + int16_t *outptr; + + inptr = in_vector; + outptr = out_vector; + + for (i = 0; i < in_vector_length; i++) { + *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts); + } +} + +void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1, + const int16_t *in2, int16_t gain2, int shift2, + int16_t *out, size_t vector_length) +{ + // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2 + size_t i; + const int16_t *in1ptr; + const int16_t *in2ptr; + int16_t *outptr; + + in1ptr = in1; + in2ptr = in2; + outptr = out; + + for (i = 0; i < vector_length; i++) + { + *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) + + (int16_t)((gain2 * *in2ptr++) >> shift2); + } +} + +// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms. +int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length) { + size_t i = 0; + int round_value = (1 << right_shifts) >> 1; + + if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || + length == 0 || right_shifts < 0) { + return -1; + } + + for (i = 0; i < length; i++) { + out_vector[i] = (int16_t)(( + in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale + + round_value) >> right_shifts); + } + + return 0; +} diff --git a/src/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c b/src/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c new file mode 100644 index 0000000..b478a41 --- /dev/null +++ b/src/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c @@ -0,0 +1,77 @@ +/* + * Written by Wilco Dijkstra, 1996. The following email exchange establishes the + * license. + * + * From: Wilco Dijkstra + * Date: Fri, Jun 24, 2011 at 3:20 AM + * Subject: Re: sqrt routine + * To: Kevin Ma + * Hi Kevin, + * Thanks for asking. Those routines are public domain (originally posted to + * comp.sys.arm a long time ago), so you can use them freely for any purpose. + * Cheers, + * Wilco + * + * ----- Original Message ----- + * From: "Kevin Ma" + * To: + * Sent: Thursday, June 23, 2011 11:44 PM + * Subject: Fwd: sqrt routine + * Hi Wilco, + * I saw your sqrt routine from several web sites, including + * http://www.finesse.demon.co.uk/steven/sqrt.html. + * Just wonder if there's any copyright information with your Successive + * approximation routines, or if I can freely use it for any purpose. + * Thanks. + * Kevin + */ + +// Minor modifications in code style for WebRTC, 2012. + +#include "common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h" + +/* + * Algorithm: + * Successive approximation of the equation (root + delta) ^ 2 = N + * until delta < 1. If delta < 1 we have the integer part of SQRT (N). + * Use delta = 2^i for i = 15 .. 0. + * + * Output precision is 16 bits. Note for large input values (close to + * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) + * contains the MSB information (a non-sign value). Do with caution + * if you need to cast the output to int16_t type. + * + * If the input value is negative, it returns 0. + */ + +#define WEBRTC_SPL_SQRT_ITER(N) \ + try1 = root + (1 << (N)); \ + if (value >= try1 << (N)) \ + { \ + value -= try1 << (N); \ + root |= 2 << (N); \ + } + +int32_t WebRtcSpl_SqrtFloor(int32_t value) +{ + int32_t root = 0, try1; + + WEBRTC_SPL_SQRT_ITER (15); + WEBRTC_SPL_SQRT_ITER (14); + WEBRTC_SPL_SQRT_ITER (13); + WEBRTC_SPL_SQRT_ITER (12); + WEBRTC_SPL_SQRT_ITER (11); + WEBRTC_SPL_SQRT_ITER (10); + WEBRTC_SPL_SQRT_ITER ( 9); + WEBRTC_SPL_SQRT_ITER ( 8); + WEBRTC_SPL_SQRT_ITER ( 7); + WEBRTC_SPL_SQRT_ITER ( 6); + WEBRTC_SPL_SQRT_ITER ( 5); + WEBRTC_SPL_SQRT_ITER ( 4); + WEBRTC_SPL_SQRT_ITER ( 3); + WEBRTC_SPL_SQRT_ITER ( 2); + WEBRTC_SPL_SQRT_ITER ( 1); + WEBRTC_SPL_SQRT_ITER ( 0); + + return root >> 1; +} diff --git a/src/webrtc/common_audio/vad/vad_core.c b/src/webrtc/common_audio/vad/vad_core.c new file mode 100644 index 0000000..0872449 --- /dev/null +++ b/src/webrtc/common_audio/vad/vad_core.c @@ -0,0 +1,685 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/vad/vad_core.h" + +#include "rtc_base/sanitizer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "common_audio/vad/vad_filterbank.h" +#include "common_audio/vad/vad_gmm.h" +#include "common_audio/vad/vad_sp.h" + +// Spectrum Weighting +static const int16_t kSpectrumWeight[kNumChannels] = { 6, 8, 10, 12, 14, 16 }; +static const int16_t kNoiseUpdateConst = 655; // Q15 +static const int16_t kSpeechUpdateConst = 6554; // Q15 +static const int16_t kBackEta = 154; // Q8 +// Minimum difference between the two models, Q5 +static const int16_t kMinimumDifference[kNumChannels] = { + 544, 544, 576, 576, 576, 576 }; +// Upper limit of mean value for speech model, Q7 +static const int16_t kMaximumSpeech[kNumChannels] = { + 11392, 11392, 11520, 11520, 11520, 11520 }; +// Minimum value for mean value +static const int16_t kMinimumMean[kNumGaussians] = { 640, 768 }; +// Upper limit of mean value for noise model, Q7 +static const int16_t kMaximumNoise[kNumChannels] = { + 9216, 9088, 8960, 8832, 8704, 8576 }; +// Start values for the Gaussian models, Q7 +// Weights for the two Gaussians for the six channels (noise) +static const int16_t kNoiseDataWeights[kTableSize] = { + 34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 }; +// Weights for the two Gaussians for the six channels (speech) +static const int16_t kSpeechDataWeights[kTableSize] = { + 48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 }; +// Means for the two Gaussians for the six channels (noise) +static const int16_t kNoiseDataMeans[kTableSize] = { + 6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 }; +// Means for the two Gaussians for the six channels (speech) +static const int16_t kSpeechDataMeans[kTableSize] = { + 8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483 +}; +// Stds for the two Gaussians for the six channels (noise) +static const int16_t kNoiseDataStds[kTableSize] = { + 378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 }; +// Stds for the two Gaussians for the six channels (speech) +static const int16_t kSpeechDataStds[kTableSize] = { + 555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 }; + +// Constants used in GmmProbability(). +// +// Maximum number of counted speech (VAD = 1) frames in a row. +static const int16_t kMaxSpeechFrames = 6; +// Minimum standard deviation for both speech and noise. +static const int16_t kMinStd = 384; + +// Constants in WebRtcVad_InitCore(). +// Default aggressiveness mode. +static const short kDefaultMode = 0; +static const int kInitCheck = 42; + +// Constants used in WebRtcVad_set_mode_core(). +// +// Thresholds for different frame lengths (10 ms, 20 ms and 30 ms). +// +// Mode 0, Quality. +static const int16_t kOverHangMax1Q[3] = { 8, 4, 3 }; +static const int16_t kOverHangMax2Q[3] = { 14, 7, 5 }; +static const int16_t kLocalThresholdQ[3] = { 24, 21, 24 }; +static const int16_t kGlobalThresholdQ[3] = { 57, 48, 57 }; +// Mode 1, Low bitrate. +static const int16_t kOverHangMax1LBR[3] = { 8, 4, 3 }; +static const int16_t kOverHangMax2LBR[3] = { 14, 7, 5 }; +static const int16_t kLocalThresholdLBR[3] = { 37, 32, 37 }; +static const int16_t kGlobalThresholdLBR[3] = { 100, 80, 100 }; +// Mode 2, Aggressive. +static const int16_t kOverHangMax1AGG[3] = { 6, 3, 2 }; +static const int16_t kOverHangMax2AGG[3] = { 9, 5, 3 }; +static const int16_t kLocalThresholdAGG[3] = { 82, 78, 82 }; +static const int16_t kGlobalThresholdAGG[3] = { 285, 260, 285 }; +// Mode 3, Very aggressive. +static const int16_t kOverHangMax1VAG[3] = { 6, 3, 2 }; +static const int16_t kOverHangMax2VAG[3] = { 9, 5, 3 }; +static const int16_t kLocalThresholdVAG[3] = { 94, 94, 94 }; +static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 }; + +// Calculates the weighted average w.r.t. number of Gaussians. The `data` are +// updated with an `offset` before averaging. +// +// - data [i/o] : Data to average. +// - offset [i] : An offset added to `data`. +// - weights [i] : Weights used for averaging. +// +// returns : The weighted average. +static int32_t WeightedAverage(int16_t* data, int16_t offset, + const int16_t* weights) { + int k; + int32_t weighted_average = 0; + + for (k = 0; k < kNumGaussians; k++) { + data[k * kNumChannels] += offset; + weighted_average += data[k * kNumChannels] * weights[k * kNumChannels]; + } + return weighted_average; +} + +// An s16 x s32 -> s32 multiplication that's allowed to overflow. (It's still +// undefined behavior, so not a good idea; this just makes UBSan ignore the +// violation, so that our old code can continue to do what it's always been +// doing.) +static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow") + OverflowingMulS16ByS32ToS32(int16_t a, int32_t b) { + return a * b; +} + +// Calculates the probabilities for both speech and background noise using +// Gaussian Mixture Models (GMM). A hypothesis-test is performed to decide which +// type of signal is most probable. +// +// - self [i/o] : Pointer to VAD instance +// - features [i] : Feature vector of length `kNumChannels` +// = log10(energy in frequency band) +// - total_power [i] : Total power in audio frame. +// - frame_length [i] : Number of input samples +// +// - returns : the VAD decision (0 - noise, 1 - speech). +static int16_t GmmProbability(VadInstT* self, int16_t* features, + int16_t total_power, size_t frame_length) { + int channel, k; + int16_t feature_minimum; + int16_t h0, h1; + int16_t log_likelihood_ratio; + int16_t vadflag = 0; + int16_t shifts_h0, shifts_h1; + int16_t tmp_s16, tmp1_s16, tmp2_s16; + int16_t diff; + int gaussian; + int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk; + int16_t delt, ndelt; + int16_t maxspe, maxmu; + int16_t deltaN[kTableSize], deltaS[kTableSize]; + int16_t ngprvec[kTableSize] = { 0 }; // Conditional probability = 0. + int16_t sgprvec[kTableSize] = { 0 }; // Conditional probability = 0. + int32_t h0_test, h1_test; + int32_t tmp1_s32, tmp2_s32; + int32_t sum_log_likelihood_ratios = 0; + int32_t noise_global_mean, speech_global_mean; + int32_t noise_probability[kNumGaussians], speech_probability[kNumGaussians]; + int16_t overhead1, overhead2, individualTest, totalTest; + + // Set various thresholds based on frame lengths (80, 160 or 240 samples). + if (frame_length == 80) { + overhead1 = self->over_hang_max_1[0]; + overhead2 = self->over_hang_max_2[0]; + individualTest = self->individual[0]; + totalTest = self->total[0]; + } else if (frame_length == 160) { + overhead1 = self->over_hang_max_1[1]; + overhead2 = self->over_hang_max_2[1]; + individualTest = self->individual[1]; + totalTest = self->total[1]; + } else { + overhead1 = self->over_hang_max_1[2]; + overhead2 = self->over_hang_max_2[2]; + individualTest = self->individual[2]; + totalTest = self->total[2]; + } + + if (total_power > kMinEnergy) { + // The signal power of current frame is large enough for processing. The + // processing consists of two parts: + // 1) Calculating the likelihood of speech and thereby a VAD decision. + // 2) Updating the underlying model, w.r.t., the decision made. + + // The detection scheme is an LRT with hypothesis + // H0: Noise + // H1: Speech + // + // We combine a global LRT with local tests, for each frequency sub-band, + // here defined as `channel`. + for (channel = 0; channel < kNumChannels; channel++) { + // For each channel we model the probability with a GMM consisting of + // `kNumGaussians`, with different means and standard deviations depending + // on H0 or H1. + h0_test = 0; + h1_test = 0; + for (k = 0; k < kNumGaussians; k++) { + gaussian = channel + k * kNumChannels; + // Probability under H0, that is, probability of frame being noise. + // Value given in Q27 = Q7 * Q20. + tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], + self->noise_means[gaussian], + self->noise_stds[gaussian], + &deltaN[gaussian]); + noise_probability[k] = kNoiseDataWeights[gaussian] * tmp1_s32; + h0_test += noise_probability[k]; // Q27 + + // Probability under H1, that is, probability of frame being speech. + // Value given in Q27 = Q7 * Q20. + tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], + self->speech_means[gaussian], + self->speech_stds[gaussian], + &deltaS[gaussian]); + speech_probability[k] = kSpeechDataWeights[gaussian] * tmp1_s32; + h1_test += speech_probability[k]; // Q27 + } + + // Calculate the log likelihood ratio: log2(Pr{X|H1} / Pr{X|H1}). + // Approximation: + // log2(Pr{X|H1} / Pr{X|H1}) = log2(Pr{X|H1}*2^Q) - log2(Pr{X|H1}*2^Q) + // = log2(h1_test) - log2(h0_test) + // = log2(2^(31-shifts_h1)*(1+b1)) + // - log2(2^(31-shifts_h0)*(1+b0)) + // = shifts_h0 - shifts_h1 + // + log2(1+b1) - log2(1+b0) + // ~= shifts_h0 - shifts_h1 + // + // Note that b0 and b1 are values less than 1, hence, 0 <= log2(1+b0) < 1. + // Further, b0 and b1 are independent and on the average the two terms + // cancel. + shifts_h0 = WebRtcSpl_NormW32(h0_test); + shifts_h1 = WebRtcSpl_NormW32(h1_test); + if (h0_test == 0) { + shifts_h0 = 31; + } + if (h1_test == 0) { + shifts_h1 = 31; + } + log_likelihood_ratio = shifts_h0 - shifts_h1; + + // Update `sum_log_likelihood_ratios` with spectrum weighting. This is + // used for the global VAD decision. + sum_log_likelihood_ratios += + (int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]); + + // Local VAD decision. + if ((log_likelihood_ratio * 4) > individualTest) { + vadflag = 1; + } + + // TODO(bjornv): The conditional probabilities below are applied on the + // hard coded number of Gaussians set to two. Find a way to generalize. + // Calculate local noise probabilities used later when updating the GMM. + h0 = (int16_t) (h0_test >> 12); // Q15 + if (h0 > 0) { + // High probability of noise. Assign conditional probabilities for each + // Gaussian in the GMM. + tmp1_s32 = (noise_probability[0] & 0xFFFFF000) << 2; // Q29 + ngprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h0); // Q14 + ngprvec[channel + kNumChannels] = 16384 - ngprvec[channel]; + } else { + // Low noise probability. Assign conditional probability 1 to the first + // Gaussian and 0 to the rest (which is already set at initialization). + ngprvec[channel] = 16384; + } + + // Calculate local speech probabilities used later when updating the GMM. + h1 = (int16_t) (h1_test >> 12); // Q15 + if (h1 > 0) { + // High probability of speech. Assign conditional probabilities for each + // Gaussian in the GMM. Otherwise use the initialized values, i.e., 0. + tmp1_s32 = (speech_probability[0] & 0xFFFFF000) << 2; // Q29 + sgprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h1); // Q14 + sgprvec[channel + kNumChannels] = 16384 - sgprvec[channel]; + } + } + + // Make a global VAD decision. + vadflag |= (sum_log_likelihood_ratios >= totalTest); + + // Update the model parameters. + maxspe = 12800; + for (channel = 0; channel < kNumChannels; channel++) { + + // Get minimum value in past which is used for long term correction in Q4. + feature_minimum = WebRtcVad_FindMinimum(self, features[channel], channel); + + // Compute the "global" mean, that is the sum of the two means weighted. + noise_global_mean = WeightedAverage(&self->noise_means[channel], 0, + &kNoiseDataWeights[channel]); + tmp1_s16 = (int16_t) (noise_global_mean >> 6); // Q8 + + for (k = 0; k < kNumGaussians; k++) { + gaussian = channel + k * kNumChannels; + + nmk = self->noise_means[gaussian]; + smk = self->speech_means[gaussian]; + nsk = self->noise_stds[gaussian]; + ssk = self->speech_stds[gaussian]; + + // Update noise mean vector if the frame consists of noise only. + nmk2 = nmk; + if (!vadflag) { + // deltaN = (x-mu)/sigma^2 + // ngprvec[k] = `noise_probability[k]` / + // (`noise_probability[0]` + `noise_probability[1]`) + + // (Q14 * Q11 >> 11) = Q14. + delt = (int16_t)((ngprvec[gaussian] * deltaN[gaussian]) >> 11); + // Q7 + (Q14 * Q15 >> 22) = Q7. + nmk2 = nmk + (int16_t)((delt * kNoiseUpdateConst) >> 22); + } + + // Long term correction of the noise mean. + // Q8 - Q8 = Q8. + ndelt = (feature_minimum << 4) - tmp1_s16; + // Q7 + (Q8 * Q8) >> 9 = Q7. + nmk3 = nmk2 + (int16_t)((ndelt * kBackEta) >> 9); + + // Control that the noise mean does not drift to much. + tmp_s16 = (int16_t) ((k + 5) << 7); + if (nmk3 < tmp_s16) { + nmk3 = tmp_s16; + } + tmp_s16 = (int16_t) ((72 + k - channel) << 7); + if (nmk3 > tmp_s16) { + nmk3 = tmp_s16; + } + self->noise_means[gaussian] = nmk3; + + if (vadflag) { + // Update speech mean vector: + // `deltaS` = (x-mu)/sigma^2 + // sgprvec[k] = `speech_probability[k]` / + // (`speech_probability[0]` + `speech_probability[1]`) + + // (Q14 * Q11) >> 11 = Q14. + delt = (int16_t)((sgprvec[gaussian] * deltaS[gaussian]) >> 11); + // Q14 * Q15 >> 21 = Q8. + tmp_s16 = (int16_t)((delt * kSpeechUpdateConst) >> 21); + // Q7 + (Q8 >> 1) = Q7. With rounding. + smk2 = smk + ((tmp_s16 + 1) >> 1); + + // Control that the speech mean does not drift to much. + maxmu = maxspe + 640; + if (smk2 < kMinimumMean[k]) { + smk2 = kMinimumMean[k]; + } + if (smk2 > maxmu) { + smk2 = maxmu; + } + self->speech_means[gaussian] = smk2; // Q7. + + // (Q7 >> 3) = Q4. With rounding. + tmp_s16 = ((smk + 4) >> 3); + + tmp_s16 = features[channel] - tmp_s16; // Q4 + // (Q11 * Q4 >> 3) = Q12. + tmp1_s32 = (deltaS[gaussian] * tmp_s16) >> 3; + tmp2_s32 = tmp1_s32 - 4096; + tmp_s16 = sgprvec[gaussian] >> 2; + // (Q14 >> 2) * Q12 = Q24. + tmp1_s32 = tmp_s16 * tmp2_s32; + + tmp2_s32 = tmp1_s32 >> 4; // Q20 + + // 0.1 * Q20 / Q7 = Q13. + if (tmp2_s32 > 0) { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, ssk * 10); + } else { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp2_s32, ssk * 10); + tmp_s16 = -tmp_s16; + } + // Divide by 4 giving an update factor of 0.025 (= 0.1 / 4). + // Note that division by 4 equals shift by 2, hence, + // (Q13 >> 8) = (Q13 >> 6) / 4 = Q7. + tmp_s16 += 128; // Rounding. + ssk += (tmp_s16 >> 8); + if (ssk < kMinStd) { + ssk = kMinStd; + } + self->speech_stds[gaussian] = ssk; + } else { + // Update GMM variance vectors. + // deltaN * (features[channel] - nmk) - 1 + // Q4 - (Q7 >> 3) = Q4. + tmp_s16 = features[channel] - (nmk >> 3); + // (Q11 * Q4 >> 3) = Q12. + tmp1_s32 = (deltaN[gaussian] * tmp_s16) >> 3; + tmp1_s32 -= 4096; + + // (Q14 >> 2) * Q12 = Q24. + tmp_s16 = (ngprvec[gaussian] + 2) >> 2; + tmp2_s32 = OverflowingMulS16ByS32ToS32(tmp_s16, tmp1_s32); + // Q20 * approx 0.001 (2^-10=0.0009766), hence, + // (Q24 >> 14) = (Q24 >> 4) / 2^10 = Q20. + tmp1_s32 = tmp2_s32 >> 14; + + // Q20 / Q7 = Q13. + if (tmp1_s32 > 0) { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, nsk); + } else { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp1_s32, nsk); + tmp_s16 = -tmp_s16; + } + tmp_s16 += 32; // Rounding + nsk += tmp_s16 >> 6; // Q13 >> 6 = Q7. + if (nsk < kMinStd) { + nsk = kMinStd; + } + self->noise_stds[gaussian] = nsk; + } + } + + // Separate models if they are too close. + // `noise_global_mean` in Q14 (= Q7 * Q7). + noise_global_mean = WeightedAverage(&self->noise_means[channel], 0, + &kNoiseDataWeights[channel]); + + // `speech_global_mean` in Q14 (= Q7 * Q7). + speech_global_mean = WeightedAverage(&self->speech_means[channel], 0, + &kSpeechDataWeights[channel]); + + // `diff` = "global" speech mean - "global" noise mean. + // (Q14 >> 9) - (Q14 >> 9) = Q5. + diff = (int16_t) (speech_global_mean >> 9) - + (int16_t) (noise_global_mean >> 9); + if (diff < kMinimumDifference[channel]) { + tmp_s16 = kMinimumDifference[channel] - diff; + + // `tmp1_s16` = ~0.8 * (kMinimumDifference - diff) in Q7. + // `tmp2_s16` = ~0.2 * (kMinimumDifference - diff) in Q7. + tmp1_s16 = (int16_t)((13 * tmp_s16) >> 2); + tmp2_s16 = (int16_t)((3 * tmp_s16) >> 2); + + // Move Gaussian means for speech model by `tmp1_s16` and update + // `speech_global_mean`. Note that `self->speech_means[channel]` is + // changed after the call. + speech_global_mean = WeightedAverage(&self->speech_means[channel], + tmp1_s16, + &kSpeechDataWeights[channel]); + + // Move Gaussian means for noise model by -`tmp2_s16` and update + // `noise_global_mean`. Note that `self->noise_means[channel]` is + // changed after the call. + noise_global_mean = WeightedAverage(&self->noise_means[channel], + -tmp2_s16, + &kNoiseDataWeights[channel]); + } + + // Control that the speech & noise means do not drift to much. + maxspe = kMaximumSpeech[channel]; + tmp2_s16 = (int16_t) (speech_global_mean >> 7); + if (tmp2_s16 > maxspe) { + // Upper limit of speech model. + tmp2_s16 -= maxspe; + + for (k = 0; k < kNumGaussians; k++) { + self->speech_means[channel + k * kNumChannels] -= tmp2_s16; + } + } + + tmp2_s16 = (int16_t) (noise_global_mean >> 7); + if (tmp2_s16 > kMaximumNoise[channel]) { + tmp2_s16 -= kMaximumNoise[channel]; + + for (k = 0; k < kNumGaussians; k++) { + self->noise_means[channel + k * kNumChannels] -= tmp2_s16; + } + } + } + self->frame_counter++; + } + + // Smooth with respect to transition hysteresis. + if (!vadflag) { + if (self->over_hang > 0) { + vadflag = 2 + self->over_hang; + self->over_hang--; + } + self->num_of_speech = 0; + } else { + self->num_of_speech++; + if (self->num_of_speech > kMaxSpeechFrames) { + self->num_of_speech = kMaxSpeechFrames; + self->over_hang = overhead2; + } else { + self->over_hang = overhead1; + } + } + return vadflag; +} + +// Initialize the VAD. Set aggressiveness mode to default value. +int WebRtcVad_InitCore(VadInstT* self) { + int i; + + if (self == NULL) { + return -1; + } + + // Initialization of general struct variables. + self->vad = 1; // Speech active (=1). + self->frame_counter = 0; + self->over_hang = 0; + self->num_of_speech = 0; + + // Initialization of downsampling filter state. + memset(self->downsampling_filter_states, 0, + sizeof(self->downsampling_filter_states)); + + // Initialization of 48 to 8 kHz downsampling. + WebRtcSpl_ResetResample48khzTo8khz(&self->state_48_to_8); + + // Read initial PDF parameters. + for (i = 0; i < kTableSize; i++) { + self->noise_means[i] = kNoiseDataMeans[i]; + self->speech_means[i] = kSpeechDataMeans[i]; + self->noise_stds[i] = kNoiseDataStds[i]; + self->speech_stds[i] = kSpeechDataStds[i]; + } + + // Initialize Index and Minimum value vectors. + for (i = 0; i < 16 * kNumChannels; i++) { + self->low_value_vector[i] = 10000; + self->index_vector[i] = 0; + } + + // Initialize splitting filter states. + memset(self->upper_state, 0, sizeof(self->upper_state)); + memset(self->lower_state, 0, sizeof(self->lower_state)); + + // Initialize high pass filter states. + memset(self->hp_filter_state, 0, sizeof(self->hp_filter_state)); + + // Initialize mean value memory, for WebRtcVad_FindMinimum(). + for (i = 0; i < kNumChannels; i++) { + self->mean_value[i] = 1600; + } + + // Set aggressiveness mode to default (=`kDefaultMode`). + if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) { + return -1; + } + + self->init_flag = kInitCheck; + + return 0; +} + +// Set aggressiveness mode +int WebRtcVad_set_mode_core(VadInstT* self, int mode) { + int return_value = 0; + + switch (mode) { + case 0: + // Quality mode. + memcpy(self->over_hang_max_1, kOverHangMax1Q, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2Q, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdQ, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdQ, + sizeof(self->total)); + break; + case 1: + // Low bitrate mode. + memcpy(self->over_hang_max_1, kOverHangMax1LBR, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2LBR, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdLBR, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdLBR, + sizeof(self->total)); + break; + case 2: + // Aggressive mode. + memcpy(self->over_hang_max_1, kOverHangMax1AGG, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2AGG, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdAGG, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdAGG, + sizeof(self->total)); + break; + case 3: + // Very aggressive mode. + memcpy(self->over_hang_max_1, kOverHangMax1VAG, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2VAG, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdVAG, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdVAG, + sizeof(self->total)); + break; + default: + return_value = -1; + break; + } + + return return_value; +} + +// Calculate VAD decision by first extracting feature values and then calculate +// probability for both speech and background noise. + +int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, + size_t frame_length) { + int vad; + size_t i; + int16_t speech_nb[240]; // 30 ms in 8 kHz. + // `tmp_mem` is a temporary memory used by resample function, length is + // frame length in 10 ms (480 samples) + 256 extra. + int32_t tmp_mem[480 + 256] = { 0 }; + const size_t kFrameLen10ms48khz = 480; + const size_t kFrameLen10ms8khz = 80; + size_t num_10ms_frames = frame_length / kFrameLen10ms48khz; + + for (i = 0; i < num_10ms_frames; i++) { + WebRtcSpl_Resample48khzTo8khz(speech_frame, + &speech_nb[i * kFrameLen10ms8khz], + &inst->state_48_to_8, + tmp_mem); + } + + // Do VAD on an 8 kHz signal + vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6); + + return vad; +} + +int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, + size_t frame_length) +{ + size_t len; + int vad; + int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB) + int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) + + + // Downsample signal 32->16->8 before doing VAD + WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]), + frame_length); + len = frame_length / 2; + + WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len); + len /= 2; + + // Do VAD on an 8 kHz signal + vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); + + return vad; +} + +int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, + size_t frame_length) +{ + size_t len; + int vad; + int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) + + // Wideband: Downsample signal before doing VAD + WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states, + frame_length); + + len = frame_length / 2; + vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); + + return vad; +} + +int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, + size_t frame_length) +{ + int16_t feature_vector[kNumChannels], total_power; + + // Get power in the bands + total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length, + feature_vector); + + // Make a VAD + inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length); + + return inst->vad; +} diff --git a/src/webrtc/common_audio/vad/vad_filterbank.c b/src/webrtc/common_audio/vad/vad_filterbank.c new file mode 100644 index 0000000..aff63f7 --- /dev/null +++ b/src/webrtc/common_audio/vad/vad_filterbank.c @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/vad/vad_filterbank.h" + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// Constants used in LogOfEnergy(). +static const int16_t kLogConst = 24660; // 160*log10(2) in Q9. +static const int16_t kLogEnergyIntPart = 14336; // 14 in Q10 + +// Coefficients used by HighPassFilter, Q14. +static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 }; +static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 }; + +// Allpass filter coefficients, upper and lower, in Q15. +// Upper: 0.64, Lower: 0.17 +static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 }; + +// Adjustment for division with two in SplitFilter. +static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 }; + +// High pass filtering, with a cut-off frequency at 80 Hz, if the `data_in` is +// sampled at 500 Hz. +// +// - data_in [i] : Input audio data sampled at 500 Hz. +// - data_length [i] : Length of input and output data. +// - filter_state [i/o] : State of the filter. +// - data_out [o] : Output audio data in the frequency interval +// 80 - 250 Hz. +static void HighPassFilter(const int16_t* data_in, size_t data_length, + int16_t* filter_state, int16_t* data_out) { + size_t i; + const int16_t* in_ptr = data_in; + int16_t* out_ptr = data_out; + int32_t tmp32 = 0; + + + // The sum of the absolute values of the impulse response: + // The zero/pole-filter has a max amplification of a single sample of: 1.4546 + // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 + // The all-zero section has a max amplification of a single sample of: 1.6189 + // Impulse response: 0.4047 -0.8094 0.4047 0 0 0 + // The all-pole section has a max amplification of a single sample of: 1.9931 + // Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532 + + for (i = 0; i < data_length; i++) { + // All-zero section (filter coefficients in Q14). + tmp32 = kHpZeroCoefs[0] * *in_ptr; + tmp32 += kHpZeroCoefs[1] * filter_state[0]; + tmp32 += kHpZeroCoefs[2] * filter_state[1]; + filter_state[1] = filter_state[0]; + filter_state[0] = *in_ptr++; + + // All-pole section (filter coefficients in Q14). + tmp32 -= kHpPoleCoefs[1] * filter_state[2]; + tmp32 -= kHpPoleCoefs[2] * filter_state[3]; + filter_state[3] = filter_state[2]; + filter_state[2] = (int16_t) (tmp32 >> 14); + *out_ptr++ = filter_state[2]; + } +} + +// All pass filtering of `data_in`, used before splitting the signal into two +// frequency bands (low pass vs high pass). +// Note that `data_in` and `data_out` can NOT correspond to the same address. +// +// - data_in [i] : Input audio signal given in Q0. +// - data_length [i] : Length of input and output data. +// - filter_coefficient [i] : Given in Q15. +// - filter_state [i/o] : State of the filter given in Q(-1). +// - data_out [o] : Output audio signal given in Q(-1). +static void AllPassFilter(const int16_t* data_in, size_t data_length, + int16_t filter_coefficient, int16_t* filter_state, + int16_t* data_out) { + // The filter can only cause overflow (in the w16 output variable) + // if more than 4 consecutive input numbers are of maximum value and + // has the the same sign as the impulse responses first taps. + // First 6 taps of the impulse response: + // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990 + + size_t i; + int16_t tmp16 = 0; + int32_t tmp32 = 0; + int32_t state32 = ((int32_t) (*filter_state) * (1 << 16)); // Q15 + + for (i = 0; i < data_length; i++) { + tmp32 = state32 + filter_coefficient * *data_in; + tmp16 = (int16_t) (tmp32 >> 16); // Q(-1) + *data_out++ = tmp16; + state32 = (*data_in * (1 << 14)) - filter_coefficient * tmp16; // Q14 + state32 *= 2; // Q15. + data_in += 2; + } + + *filter_state = (int16_t) (state32 >> 16); // Q(-1) +} + +// Splits `data_in` into `hp_data_out` and `lp_data_out` corresponding to +// an upper (high pass) part and a lower (low pass) part respectively. +// +// - data_in [i] : Input audio data to be split into two frequency bands. +// - data_length [i] : Length of `data_in`. +// - upper_state [i/o] : State of the upper filter, given in Q(-1). +// - lower_state [i/o] : State of the lower filter, given in Q(-1). +// - hp_data_out [o] : Output audio data of the upper half of the spectrum. +// The length is `data_length` / 2. +// - lp_data_out [o] : Output audio data of the lower half of the spectrum. +// The length is `data_length` / 2. +static void SplitFilter(const int16_t* data_in, size_t data_length, + int16_t* upper_state, int16_t* lower_state, + int16_t* hp_data_out, int16_t* lp_data_out) { + size_t i; + size_t half_length = data_length >> 1; // Downsampling by 2. + int16_t tmp_out; + + // All-pass filtering upper branch. + AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state, + hp_data_out); + + // All-pass filtering lower branch. + AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state, + lp_data_out); + + // Make LP and HP signals. + for (i = 0; i < half_length; i++) { + tmp_out = *hp_data_out; + *hp_data_out++ -= *lp_data_out; + *lp_data_out++ += tmp_out; + } +} + +// Calculates the energy of `data_in` in dB, and also updates an overall +// `total_energy` if necessary. +// +// - data_in [i] : Input audio data for energy calculation. +// - data_length [i] : Length of input data. +// - offset [i] : Offset value added to `log_energy`. +// - total_energy [i/o] : An external energy updated with the energy of +// `data_in`. +// NOTE: `total_energy` is only updated if +// `total_energy` <= `kMinEnergy`. +// - log_energy [o] : 10 * log10("energy of `data_in`") given in Q4. +static void LogOfEnergy(const int16_t* data_in, size_t data_length, + int16_t offset, int16_t* total_energy, + int16_t* log_energy) { + // `tot_rshifts` accumulates the number of right shifts performed on `energy`. + int tot_rshifts = 0; + // The `energy` will be normalized to 15 bits. We use unsigned integer because + // we eventually will mask out the fractional part. + uint32_t energy = 0; + + RTC_DCHECK(data_in); + RTC_DCHECK_GT(data_length, 0); + + energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length, + &tot_rshifts); + + if (energy != 0) { + // By construction, normalizing to 15 bits is equivalent with 17 leading + // zeros of an unsigned 32 bit value. + int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy); + // In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is + // (14 << 10), which is what we initialize `log2_energy` with. For a more + // detailed derivations, see below. + int16_t log2_energy = kLogEnergyIntPart; + + tot_rshifts += normalizing_rshifts; + // Normalize `energy` to 15 bits. + // `tot_rshifts` is now the total number of right shifts performed on + // `energy` after normalization. This means that `energy` is in + // Q(-tot_rshifts). + if (normalizing_rshifts < 0) { + energy <<= -normalizing_rshifts; + } else { + energy >>= normalizing_rshifts; + } + + // Calculate the energy of `data_in` in dB, in Q4. + // + // 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") = + // 160 * log10(`energy` * 2^`tot_rshifts`) = + // 160 * log10(2) * log2(`energy` * 2^`tot_rshifts`) = + // 160 * log10(2) * (log2(`energy`) + log2(2^`tot_rshifts`)) = + // (160 * log10(2)) * (log2(`energy`) + `tot_rshifts`) = + // `kLogConst` * (`log2_energy` + `tot_rshifts`) + // + // We know by construction that `energy` is normalized to 15 bits. Hence, + // `energy` = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15. + // Further, we'd like `log2_energy` in Q10 + // log2(`energy`) in Q10 = 2^10 * log2(2^14 + frac_Q15) = + // 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) = + // 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~= + // (14 << 10) + 2^10 * (frac_Q15 * 2^-14) = + // (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4) + // + // Note that frac_Q15 = (`energy` & 0x00003FFF) + + // Calculate and add the fractional part to `log2_energy`. + log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4); + + // `kLogConst` is in Q9, `log2_energy` in Q10 and `tot_rshifts` in Q0. + // Note that we in our derivation above have accounted for an output in Q4. + *log_energy = (int16_t)(((kLogConst * log2_energy) >> 19) + + ((tot_rshifts * kLogConst) >> 9)); + + if (*log_energy < 0) { + *log_energy = 0; + } + } else { + *log_energy = offset; + return; + } + + *log_energy += offset; + + // Update the approximate `total_energy` with the energy of `data_in`, if + // `total_energy` has not exceeded `kMinEnergy`. `total_energy` is used as an + // energy indicator in WebRtcVad_GmmProbability() in vad_core.c. + if (*total_energy <= kMinEnergy) { + if (tot_rshifts >= 0) { + // We know by construction that the `energy` > `kMinEnergy` in Q0, so add + // an arbitrary value such that `total_energy` exceeds `kMinEnergy`. + *total_energy += kMinEnergy + 1; + } else { + // By construction `energy` is represented by 15 bits, hence any number of + // right shifted `energy` will fit in an int16_t. In addition, adding the + // value to `total_energy` is wrap around safe as long as + // `kMinEnergy` < 8192. + *total_energy += (int16_t) (energy >> -tot_rshifts); // Q0. + } + } +} + +int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, + size_t data_length, int16_t* features) { + int16_t total_energy = 0; + // We expect `data_length` to be 80, 160 or 240 samples, which corresponds to + // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will + // have at most 120 samples after the first split and at most 60 samples after + // the second split. + int16_t hp_120[120], lp_120[120]; + int16_t hp_60[60], lp_60[60]; + const size_t half_data_length = data_length >> 1; + size_t length = half_data_length; // `data_length` / 2, corresponds to + // bandwidth = 2000 Hz after downsampling. + + // Initialize variables for the first SplitFilter(). + int frequency_band = 0; + const int16_t* in_ptr = data_in; // [0 - 4000] Hz. + int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz. + int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz. + + RTC_DCHECK_LE(data_length, 240); + RTC_DCHECK_LT(4, kNumChannels - 1); // Checking maximum `frequency_band`. + + // Split at 2000 Hz and downsample. + SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample. + frequency_band = 1; + in_ptr = hp_120; // [2000 - 4000] Hz. + hp_out_ptr = hp_60; // [3000 - 4000] Hz. + lp_out_ptr = lp_60; // [2000 - 3000] Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 3000 Hz - 4000 Hz. + length >>= 1; // `data_length` / 4 <=> bandwidth = 1000 Hz. + + LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]); + + // Energy in 2000 Hz - 3000 Hz. + LogOfEnergy(lp_60, length, kOffsetVector[4], &total_energy, &features[4]); + + // For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample. + frequency_band = 2; + in_ptr = lp_120; // [0 - 2000] Hz. + hp_out_ptr = hp_60; // [1000 - 2000] Hz. + lp_out_ptr = lp_60; // [0 - 1000] Hz. + length = half_data_length; // `data_length` / 2 <=> bandwidth = 2000 Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 1000 Hz - 2000 Hz. + length >>= 1; // `data_length` / 4 <=> bandwidth = 1000 Hz. + LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]); + + // For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample. + frequency_band = 3; + in_ptr = lp_60; // [0 - 1000] Hz. + hp_out_ptr = hp_120; // [500 - 1000] Hz. + lp_out_ptr = lp_120; // [0 - 500] Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 500 Hz - 1000 Hz. + length >>= 1; // `data_length` / 8 <=> bandwidth = 500 Hz. + LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]); + + // For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample. + frequency_band = 4; + in_ptr = lp_120; // [0 - 500] Hz. + hp_out_ptr = hp_60; // [250 - 500] Hz. + lp_out_ptr = lp_60; // [0 - 250] Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 250 Hz - 500 Hz. + length >>= 1; // `data_length` / 16 <=> bandwidth = 250 Hz. + LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]); + + // Remove 0 Hz - 80 Hz, by high pass filtering the lower band. + HighPassFilter(lp_60, length, self->hp_filter_state, hp_120); + + // Energy in 80 Hz - 250 Hz. + LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]); + + return total_energy; +} diff --git a/src/webrtc/common_audio/vad/vad_gmm.c b/src/webrtc/common_audio/vad/vad_gmm.c new file mode 100644 index 0000000..4a7fe67 --- /dev/null +++ b/src/webrtc/common_audio/vad/vad_gmm.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/vad/vad_gmm.h" + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +static const int32_t kCompVar = 22005; +static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12. + +// For a normal distribution, the probability of `input` is calculated and +// returned (in Q20). The formula for normal distributed probability is +// +// 1 / s * exp(-(x - m)^2 / (2 * s^2)) +// +// where the parameters are given in the following Q domains: +// m = `mean` (Q7) +// s = `std` (Q7) +// x = `input` (Q4) +// in addition to the probability we output `delta` (in Q11) used when updating +// the noise/speech model. +int32_t WebRtcVad_GaussianProbability(int16_t input, + int16_t mean, + int16_t std, + int16_t* delta) { + int16_t tmp16, inv_std, inv_std2, exp_value = 0; + int32_t tmp32; + + // Calculate `inv_std` = 1 / s, in Q10. + // 131072 = 1 in Q17, and (`std` >> 1) is for rounding instead of truncation. + // Q-domain: Q17 / Q7 = Q10. + tmp32 = (int32_t) 131072 + (int32_t) (std >> 1); + inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std); + + // Calculate `inv_std2` = 1 / s^2, in Q14. + tmp16 = (inv_std >> 2); // Q10 -> Q8. + // Q-domain: (Q8 * Q8) >> 2 = Q14. + inv_std2 = (int16_t)((tmp16 * tmp16) >> 2); + // TODO(bjornv): Investigate if changing to + // inv_std2 = (int16_t)((inv_std * inv_std) >> 6); + // gives better accuracy. + + tmp16 = (input << 3); // Q4 -> Q7 + tmp16 = tmp16 - mean; // Q7 - Q7 = Q7 + + // To be used later, when updating noise/speech model. + // `delta` = (x - m) / s^2, in Q11. + // Q-domain: (Q14 * Q7) >> 10 = Q11. + *delta = (int16_t)((inv_std2 * tmp16) >> 10); + + // Calculate the exponent `tmp32` = (x - m)^2 / (2 * s^2), in Q10. Replacing + // division by two with one shift. + // Q-domain: (Q11 * Q7) >> 8 = Q10. + tmp32 = (*delta * tmp16) >> 9; + + // If the exponent is small enough to give a non-zero probability we calculate + // `exp_value` ~= exp(-(x - m)^2 / (2 * s^2)) + // ~= exp2(-log2(exp(1)) * `tmp32`). + if (tmp32 < kCompVar) { + // Calculate `tmp16` = log2(exp(1)) * `tmp32`, in Q10. + // Q-domain: (Q12 * Q10) >> 12 = Q10. + tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12); + tmp16 = -tmp16; + exp_value = (0x0400 | (tmp16 & 0x03FF)); + tmp16 ^= 0xFFFF; + tmp16 >>= 10; + tmp16 += 1; + // Get `exp_value` = exp(-`tmp32`) in Q10. + exp_value >>= tmp16; + } + + // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20. + // Q-domain: Q10 * Q10 = Q20. + return inv_std * exp_value; +} diff --git a/src/webrtc/common_audio/vad/vad_sp.c b/src/webrtc/common_audio/vad/vad_sp.c new file mode 100644 index 0000000..3d24cf6 --- /dev/null +++ b/src/webrtc/common_audio/vad/vad_sp.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/vad/vad_sp.h" + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "common_audio/vad/vad_core.h" + +// Allpass filter coefficients, upper and lower, in Q13. +// Upper: 0.64, Lower: 0.17. +static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13. +static const int16_t kSmoothingDown = 6553; // 0.2 in Q15. +static const int16_t kSmoothingUp = 32439; // 0.99 in Q15. + +// TODO(bjornv): Move this function to vad_filterbank.c. +// Downsampling filter based on splitting filter and allpass functions. +void WebRtcVad_Downsampling(const int16_t* signal_in, + int16_t* signal_out, + int32_t* filter_state, + size_t in_length) { + int16_t tmp16_1 = 0, tmp16_2 = 0; + int32_t tmp32_1 = filter_state[0]; + int32_t tmp32_2 = filter_state[1]; + size_t n = 0; + // Downsampling by 2 gives half length. + size_t half_length = (in_length >> 1); + + // Filter coefficients in Q13, filter state in Q0. + for (n = 0; n < half_length; n++) { + // All-pass filtering upper branch. + tmp16_1 = (int16_t) ((tmp32_1 >> 1) + + ((kAllPassCoefsQ13[0] * *signal_in) >> 14)); + *signal_out = tmp16_1; + tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12); + + // All-pass filtering lower branch. + tmp16_2 = (int16_t) ((tmp32_2 >> 1) + + ((kAllPassCoefsQ13[1] * *signal_in) >> 14)); + *signal_out++ += tmp16_2; + tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12); + } + // Store the filter states. + filter_state[0] = tmp32_1; + filter_state[1] = tmp32_2; +} + +// Inserts `feature_value` into `low_value_vector`, if it is one of the 16 +// smallest values the last 100 frames. Then calculates and returns the median +// of the five smallest values. +int16_t WebRtcVad_FindMinimum(VadInstT* self, + int16_t feature_value, + int channel) { + int i = 0, j = 0; + int position = -1; + // Offset to beginning of the 16 minimum values in memory. + const int offset = (channel << 4); + int16_t current_median = 1600; + int16_t alpha = 0; + int32_t tmp32 = 0; + // Pointer to memory for the 16 minimum values and the age of each value of + // the `channel`. + int16_t* age = &self->index_vector[offset]; + int16_t* smallest_values = &self->low_value_vector[offset]; + + RTC_DCHECK_LT(channel, kNumChannels); + + // Each value in `smallest_values` is getting 1 loop older. Update `age`, and + // remove old values. + for (i = 0; i < 16; i++) { + if (age[i] != 100) { + age[i]++; + } else { + // Too old value. Remove from memory and shift larger values downwards. + for (j = i; j < 15; j++) { + smallest_values[j] = smallest_values[j + 1]; + age[j] = age[j + 1]; + } + age[15] = 101; + smallest_values[15] = 10000; + } + } + + // Check if `feature_value` is smaller than any of the values in + // `smallest_values`. If so, find the `position` where to insert the new value + // (`feature_value`). + if (feature_value < smallest_values[7]) { + if (feature_value < smallest_values[3]) { + if (feature_value < smallest_values[1]) { + if (feature_value < smallest_values[0]) { + position = 0; + } else { + position = 1; + } + } else if (feature_value < smallest_values[2]) { + position = 2; + } else { + position = 3; + } + } else if (feature_value < smallest_values[5]) { + if (feature_value < smallest_values[4]) { + position = 4; + } else { + position = 5; + } + } else if (feature_value < smallest_values[6]) { + position = 6; + } else { + position = 7; + } + } else if (feature_value < smallest_values[15]) { + if (feature_value < smallest_values[11]) { + if (feature_value < smallest_values[9]) { + if (feature_value < smallest_values[8]) { + position = 8; + } else { + position = 9; + } + } else if (feature_value < smallest_values[10]) { + position = 10; + } else { + position = 11; + } + } else if (feature_value < smallest_values[13]) { + if (feature_value < smallest_values[12]) { + position = 12; + } else { + position = 13; + } + } else if (feature_value < smallest_values[14]) { + position = 14; + } else { + position = 15; + } + } + + // If we have detected a new small value, insert it at the correct position + // and shift larger values up. + if (position > -1) { + for (i = 15; i > position; i--) { + smallest_values[i] = smallest_values[i - 1]; + age[i] = age[i - 1]; + } + smallest_values[position] = feature_value; + age[position] = 1; + } + + // Get `current_median`. + if (self->frame_counter > 2) { + current_median = smallest_values[2]; + } else if (self->frame_counter > 0) { + current_median = smallest_values[0]; + } + + // Smooth the median value. + if (self->frame_counter > 0) { + if (current_median < self->mean_value[channel]) { + alpha = kSmoothingDown; // 0.2 in Q15. + } else { + alpha = kSmoothingUp; // 0.99 in Q15. + } + } + tmp32 = (alpha + 1) * self->mean_value[channel]; + tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median; + tmp32 += 16384; + self->mean_value[channel] = (int16_t) (tmp32 >> 15); + + return self->mean_value[channel]; +} diff --git a/src/webrtc/common_audio/vad/webrtc_vad.c b/src/webrtc/common_audio/vad/webrtc_vad.c new file mode 100644 index 0000000..6dd14d8 --- /dev/null +++ b/src/webrtc/common_audio/vad/webrtc_vad.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/vad/include/webrtc_vad.h" + +#include +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "common_audio/vad/vad_core.h" + +static const int kInitCheck = 42; +static const int kValidRates[] = { 8000, 16000, 32000, 48000 }; +static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates); +static const int kMaxFrameLengthMs = 30; + +VadInst* WebRtcVad_Create(void) { + VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT)); + + self->init_flag = 0; + + return (VadInst*)self; +} + +void WebRtcVad_Free(VadInst* handle) { + free(handle); +} + +// TODO(bjornv): Move WebRtcVad_InitCore() code here. +int WebRtcVad_Init(VadInst* handle) { + // Initialize the core VAD component. + return WebRtcVad_InitCore((VadInstT*) handle); +} + +// TODO(bjornv): Move WebRtcVad_set_mode_core() code here. +int WebRtcVad_set_mode(VadInst* handle, int mode) { + VadInstT* self = (VadInstT*) handle; + + if (handle == NULL) { + return -1; + } + if (self->init_flag != kInitCheck) { + return -1; + } + + return WebRtcVad_set_mode_core(self, mode); +} + +int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, + size_t frame_length) { + int vad = -1; + VadInstT* self = (VadInstT*) handle; + + if (handle == NULL) { + return -1; + } + + if (self->init_flag != kInitCheck) { + return -1; + } + if (audio_frame == NULL) { + return -1; + } + if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) { + return -1; + } + + if (fs == 48000) { + vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length); + } else if (fs == 32000) { + vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length); + } else if (fs == 16000) { + vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length); + } else if (fs == 8000) { + vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length); + } + + if (vad > 0) { + vad = 1; + } + return vad; +} + +int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) { + int return_value = -1; + size_t i; + int valid_length_ms; + size_t valid_length; + + // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and + // see if we have a matching pair. + for (i = 0; i < kRatesSize; i++) { + if (kValidRates[i] == rate) { + for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs; + valid_length_ms += 10) { + valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms); + if (frame_length == valid_length) { + return_value = 0; + break; + } + } + break; + } + } + + return return_value; +}