diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..ad80b72 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "eigen"] + path = eigen + url = https://gitlab.com/libeigen/eigen.git +[submodule "kissfft"] + path = kissfft + url = https://github.com/mborgerding/kissfft.git diff --git a/eigen b/eigen new file mode 160000 index 0000000..d49ede4 --- /dev/null +++ b/eigen @@ -0,0 +1 @@ +Subproject commit d49ede4dc4203942c2e01a7b39f23f2d9710765e diff --git a/kissfft b/kissfft new file mode 160000 index 0000000..8f47a67 --- /dev/null +++ b/kissfft @@ -0,0 +1 @@ +Subproject commit 8f47a67f595a6641c566087bf5277034be64f24d diff --git a/librosapp.hpp b/librosapp.hpp index 3259b57..7861617 100644 --- a/librosapp.hpp +++ b/librosapp.hpp @@ -1,11 +1,21 @@ #pragma once #include +#include #include #include +#include "eigen/Eigen/Core" +#include "eigen/Eigen/Dense" +#include "eigen/unsupported/Eigen/FFT" #include "kiss_fft.h" +using Eigen::Dynamic; +using Eigen::Matrix; +using Eigen::MatrixXd; +using Eigen::VectorXd; + +using std::complex; using std::string; using std::vector; @@ -47,54 +57,69 @@ vector> transpose(vector> v) { return res; } -vector> stft(stft_arg* arg) { - if (arg->hop_length <= 0) { - arg->hop_length = arg->n_fft / 4; +vector>> convert_complex_matrix( + Matrix, Dynamic, Dynamic> m) { + auto res_vec = vector>>(m.rows()); + for (int i = 0; i < m.rows(); i++) { + for (int j = 0; j < m.cols(); j++) { + std::complex a(m(i, j).real(), m(i, j).imag()); + res_vec[i].push_back(a); + } } + return res_vec; +} + +vector> convert_matrix(Matrix m) { + auto res_vec = vector>(m.rows()); + for (int i = 0; i < m.rows(); i++) { + for (int j = 0; j < m.cols(); j++) { + res_vec[i].push_back(m(i, j)); + } + } + return res_vec; +} +Matrix, Dynamic, Dynamic> stft(stft_arg* arg) { auto padded_len = arg->y.size() + arg->n_fft / 2 * 2; - auto cx_in = vector(padded_len); - for (int i = 0; i < cx_in.size(); i++) { - cx_in[i] = kiss_fft_cpx(); + auto input = Matrix(padded_len); + for (int i = 0; i < padded_len; i++) { if (i < arg->n_fft / 2 || i >= arg->y.size() + arg->n_fft / 2) { - cx_in[i].r = 0.0f; - cx_in[i].i = 0.0f; + input(i) = 0.0; } else { - cx_in[i].r = arg->y[i - arg->n_fft / 2]; - cx_in[i].i = 0.0f; + input(i) = arg->y[i - arg->n_fft / 2]; } } - auto hanning = vector(arg->n_fft); + auto hanning = Matrix(1, arg->n_fft); for (int i = 0; i < arg->n_fft; i++) { auto n = 1 - arg->n_fft + 2 * i; - hanning[i] = + hanning(i) = 0.5 + 0.5 * cosf(LIBROSA_PI * float(n) / float(arg->n_fft - 1)); } - kiss_fft_cfg cfg = kiss_fft_alloc(arg->n_fft, false, 0, 0); - - auto cols = (cx_in.size() - arg->n_fft) / arg->hop_length + 1; - auto result = vector>( - cols, vector(arg->n_fft / 2 + 1)); - auto fft_input = vector(arg->n_fft); - auto fft_result = vector(arg->n_fft); + auto cols = (padded_len - arg->n_fft) / arg->hop_length + 1; + auto result = + Matrix, Dynamic, Dynamic>(cols, arg->n_fft / 2 + 1); + auto fft_input = vector(arg->n_fft); + auto fft_result = vector>(arg->n_fft); for (int col = 0; col < cols; col++) { auto start = col * arg->hop_length; + auto fft_input_m = + input(0, Eigen::seqN(start, arg->n_fft)).cwiseProduct(hanning); + for (int i = 0; i < arg->n_fft; i++) { - fft_input[i].r = cx_in[start + i].r * hanning[i]; - fft_input[i].i = 0.0f; + fft_input[i] = fft_input_m(0, i); } - kiss_fft(cfg, fft_input.data(), fft_result.data()); + + Eigen::FFT fft; + fft.fwd(fft_result, fft_input); + for (int j = 0; j < arg->n_fft / 2 + 1; j++) { - result[col][j].r = fft_result[j].r; - result[col][j].i = fft_result[j].i; + result(col, j) = fft_result[j]; } } - auto res = librosa::transpose(result); - kiss_fft_free(cfg); - return res; + return result.transpose(); } namespace core { @@ -128,11 +153,11 @@ vector hz_to_mel(vector freqs, bool htk = false) { return mels; } -vector mel_to_hz(vector mels, bool htk = false) { - vector freqs(mels.size()); +VectorXd mel_to_hz(VectorXd mels, bool htk = false) { + VectorXd freqs(mels.size()); if (htk) { for (int i = 0; i < mels.size(); i++) { - freqs[i] = 700.0 * (powf(10.0, mels[i] / 2595.0) - 1.0); + freqs(i) = 700.0 * (powf(10.0, mels(i) / 2595.0) - 1.0); } return freqs; } @@ -140,17 +165,15 @@ vector mel_to_hz(vector mels, bool htk = false) { float f_min = 0.0; float f_sp = 200.0 / 3; - for (int i = 0; i < mels.size(); i++) { - freqs[i] = f_min + f_sp * mels[i]; - } + freqs = f_sp * mels + VectorXd::Constant(mels.size(), f_min); float min_log_hz = 1000.0; float min_log_mel = (min_log_hz - f_min) / f_sp; float logstep = logf(6.4) / 27.0; for (int i = 0; i < mels.size(); i++) { - if (mels[i] >= min_log_mel) { - freqs[i] = min_log_hz * expf(logstep * (mels[i] - min_log_mel)); + if (mels(i) >= min_log_mel) { + freqs(i) = min_log_hz * expf(logstep * (mels(i) - min_log_mel)); } } @@ -171,17 +194,13 @@ struct mel_frequencies_arg { } }; -vector mel_frequencies(mel_frequencies_arg* arg) { +VectorXd mel_frequencies(mel_frequencies_arg* arg) { auto fmin_v = vector(1, arg->fmin); auto fmax_v = vector(1, arg->fmax); float min_mel = hz_to_mel(fmin_v, arg->htk)[0]; float max_mel = hz_to_mel(fmax_v, arg->htk)[0]; - auto step = (max_mel - min_mel) / float(arg->n_mels - 1); - vector mels = vector(arg->n_mels); - for (int i = 0; i < arg->n_mels; i++) { - mels[i] = min_mel + step * float(i); - } + auto mels = VectorXd::LinSpaced(arg->n_mels, min_mel, max_mel); return mel_to_hz(mels, arg->htk); } @@ -206,28 +225,19 @@ struct _spectrogram_arg { } }; -vector> _spectrogram(_spectrogram_arg* arg) { +Matrix _spectrogram(_spectrogram_arg* arg) { stft_arg s_arg; s_arg.y = arg->y; s_arg.n_fft = arg->n_fft; s_arg.hop_length = arg->hop_length; auto stft_result = stft(&s_arg); - if (stft_result.size() == 0) { - return vector>(); + if (stft_result.rows() == 0) { + return Matrix(0, 0); } - auto spec = vector>(stft_result.size(), - vector(stft_result[0].size())); - for (int i = 0; i < stft_result.size(); i++) { - for (int j = 0; j < stft_result[i].size(); j++) { - auto norm = sqrtf(powf(stft_result[i][j].i, 2.0) + - powf(stft_result[i][j].r, 2.0)); - - spec[i][j] = powf(norm, arg->power); - } - } - return spec; + auto a = stft_result.cwiseAbs(); + return a.pow(arg->power); } } // namespace spectrum } // namespace core @@ -253,18 +263,16 @@ struct mel_arg { } }; -vector> mel(mel_arg* arg) { +Matrix mel(mel_arg* arg) { int length = 1 + arg->n_fft / 2; if (arg->fmax < 0.0) { arg->fmax = float(arg->sr) / 2.0; } - vector> weights(arg->n_mels, vector(length)); + Matrix weights(arg->n_mels, length); - vector fft_freqs(length); - for (int i = 0; i < length; i++) { - fft_freqs[i] = float(arg->sr) / float(arg->n_fft) * float(i); - } + auto fft_freqs = VectorXd::LinSpaced(length, 0.0, float(length - 1)) * + float(arg->sr) / float(arg->n_fft); librosa::core::convert::mel_frequencies_arg mel_freq_arg; mel_freq_arg.fmin = arg->fmin; @@ -273,51 +281,39 @@ vector> mel(mel_arg* arg) { mel_freq_arg.htk = arg->htk; auto mel_f = librosa::core::convert::mel_frequencies(&mel_freq_arg); - vector fdiff(mel_f.size() - 1); + VectorXd fdiff(mel_f.size() - 1); for (int i = 0; i < fdiff.size(); i++) { - fdiff[i] = mel_f[i + 1] - mel_f[i]; + fdiff(i) = mel_f(i + 1) - mel_f(i); } - vector> ramps(mel_f.size(), vector(fft_freqs.size())); + // TODO: 流石にどうにかできそう + Matrix ramps(mel_f.size(), fft_freqs.size()); for (int i = 0; i < mel_f.size(); i++) { for (int j = 0; j < fft_freqs.size(); j++) { - ramps[i][j] = mel_f[i] - fft_freqs[j]; + ramps(i, j) = mel_f(i) - fft_freqs(j); } } - auto lower = vector(fft_freqs.size()); - auto upper = vector(fft_freqs.size()); + auto lower = VectorXd(fft_freqs.size()); + auto upper = VectorXd(fft_freqs.size()); for (int i = 0; i < arg->n_mels; i++) { - for (int j = 0; j < lower.size(); j++) { - lower[j] = -1 * ramps[i][j] / fdiff[i]; - } + auto lower = -1.0 * ramps(i, Eigen::placeholders::all) / fdiff(i); + // for (int j = 0; j < lower.size(); j++) { + // lower[j] = -1 * ramps[i][j] / fdiff[i]; + // } - for (int j = 0; j < lower.size(); j++) { - upper[j] = ramps[i + 2][j] / fdiff[i + 1]; - } + auto upper = ramps(i + 2, Eigen::placeholders::all) / fdiff(i + 1); + // for (int j = 0; j < lower.size(); j++) { + // upper[j] = ramps[i + 2][j] / fdiff[i + 1]; + // } // weights[i] = np.maximum(0, np.minimum(lower, upper)); - for (int j = 0; j < lower.size(); j++) { - auto lower_upper_minimum = 0.0; - if (lower[j] > upper[j]) { - lower_upper_minimum = upper[j]; - } else { - lower_upper_minimum = lower[j]; - } - - if (lower_upper_minimum > 0.0) { - weights[i][j] = lower_upper_minimum; - } else { - weights[i][j] = 0.0; - } - } + weights(i, Eigen::placeholders::all) = lower.cwiseMin(upper).cwiseMax(0.0); } for (int i = 0; i < arg->n_mels; i++) { - auto enorm = 2.0 / (mel_f[2 + i] - mel_f[i]); - for (int j = 0; j < length; j++) { - weights[i][j] = enorm * weights[i][j]; - } + auto enorm = 2.0 / (mel_f(2 + i) - mel_f(i)); + weights(i, Eigen::placeholders::all) *= enorm; } return weights; @@ -349,7 +345,7 @@ struct melspectrogram_arg { } }; -vector> melspectrogram(melspectrogram_arg* arg) { +Matrix melspectrogram(melspectrogram_arg* arg) { librosa::core::spectrum::_spectrogram_arg spec_arg; spec_arg.y = arg->y; spec_arg.n_fft = arg->n_fft; @@ -358,8 +354,8 @@ vector> melspectrogram(melspectrogram_arg* arg) { spec_arg.power = arg->power; auto S = librosa::core::spectrum::_spectrogram(&spec_arg); - if (S.size() == 0) { - return vector>(); + if (S.rows() == 0) { + return Matrix(0, 0); } librosa::filters::mel_arg mel_arg; @@ -371,15 +367,14 @@ vector> melspectrogram(melspectrogram_arg* arg) { auto mel_basis = librosa::filters::mel(&mel_arg); // return np.einsum("...ft,mf->...mt", S, mel_basis, optimize=True) - auto melspec = - vector>(mel_basis.size(), vector(S[0].size())); - for (int m = 0; m < mel_basis.size(); m++) { - for (int t = 0; t < S[0].size(); t++) { + Matrix melspec(mel_basis.rows(), S.cols()); + for (int m = 0; m < mel_basis.rows(); m++) { + for (int t = 0; t < S.cols(); t++) { float val = 0.0; - for (int f = 0; f < S.size(); f++) { - val += S[f][t] * mel_basis[m][f]; + for (int f = 0; f < S.rows(); f++) { + val += S(f, t) * mel_basis(m, f); } - melspec[m][t] = val; + melspec(m, t) = val; } } diff --git a/test.cpp b/test.cpp index f548bde..72cb08e 100644 --- a/test.cpp +++ b/test.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include @@ -33,17 +34,19 @@ int main() { // stft test { + auto start = std::chrono::system_clock::now(); librosa::stft_arg stft_arg; stft_arg.y = audio; stft_arg.n_fft = 512; stft_arg.hop_length = 128; - auto actual_stft = librosa::stft(&stft_arg); + auto actual_stft_complex = librosa::stft(&stft_arg); + auto actual_complex = librosa::convert_complex_matrix(actual_stft_complex); auto actual = vector>(); - for (int i = 0; i < actual_stft.size(); i++) { + for (int i = 0; i < actual_complex.size(); i++) { actual.push_back(vector()); - for (int j = 0; j < actual_stft[i].size(); j++) { - actual[i].push_back(actual_stft[i][j].r); - actual[i].push_back(actual_stft[i][j].i); + for (int j = 0; j < actual_complex[i].size(); j++) { + actual[i].push_back(actual_complex[i][j].real()); + actual[i].push_back(actual_complex[i][j].imag()); } } auto expected = test_json["stft_512_128"].get>>(); @@ -52,16 +55,22 @@ int main() { assert(is_equal_matrix(actual, expected)); std::cout << "[SUCCESS] stft test has passed" << std::endl; + auto dur = std::chrono::system_clock::now() - start; + auto msec = + std::chrono::duration_cast(dur).count(); + std::cout << msec << " milli sec \n"; } // _spectrum test { + auto start = std::chrono::system_clock::now(); librosa::core::spectrum::_spectrogram_arg spec_arg; spec_arg.y = audio; spec_arg.n_fft = 512; spec_arg.hop_length = 128; spec_arg.power = 1.0; - auto actual = librosa::core::spectrum::_spectrogram(&spec_arg); + auto actual_mat = librosa::core::spectrum::_spectrogram(&spec_arg); + auto actual = librosa::convert_matrix(actual_mat); auto expected = test_json["_spectrum_512_128_1.0"].get>>(); @@ -70,16 +79,22 @@ int main() { assert(is_equal_matrix(actual, expected)); std::cout << "[SUCCESS] _spectrum test has passed" << std::endl; + auto dur = std::chrono::system_clock::now() - start; + auto msec = + std::chrono::duration_cast(dur).count(); + std::cout << msec << " milli sec \n"; } // filters::mel test { + auto start = std::chrono::system_clock::now(); librosa::filters::mel_arg mel_arg; mel_arg.sr = 16000; mel_arg.n_fft = 512; mel_arg.n_mels = 60; - auto actual = librosa::filters::mel(&mel_arg); + auto actual_mat = librosa::filters::mel(&mel_arg); + auto actual = librosa::convert_matrix(actual_mat); auto expected = test_json["mel_16000_512_60"].get>>(); assert(actual.size() == expected.size()); @@ -87,10 +102,15 @@ int main() { assert(is_equal_matrix(actual, expected)); std::cout << "[SUCCESS] filters::mel test has passed" << std::endl; + auto dur = std::chrono::system_clock::now() - start; + auto msec = + std::chrono::duration_cast(dur).count(); + std::cout << msec << " milli sec \n"; } // melspectrogram test { + auto start = std::chrono::system_clock::now(); librosa::feature::melspectrogram_arg melspec_arg; melspec_arg.y = audio; melspec_arg.sr = 16000; @@ -98,7 +118,8 @@ int main() { melspec_arg.hop_length = 128; melspec_arg.n_mels = 60; - auto actual = librosa::feature::melspectrogram(&melspec_arg); + auto actual_mat = librosa::feature::melspectrogram(&melspec_arg); + auto actual = librosa::convert_matrix(actual_mat); auto expected = test_json["melspectrogram_16000_512_60_128_2.0"] .get>>(); @@ -107,6 +128,10 @@ int main() { assert(is_equal_matrix(actual, expected)); std::cout << "[SUCCESS] melspectrogram test has passed" << std::endl; + auto dur = std::chrono::system_clock::now() - start; + auto msec = + std::chrono::duration_cast(dur).count(); + std::cout << msec << " milli sec \n"; } return 0;