diff --git a/cmake/libs/libfaiss.cmake b/cmake/libs/libfaiss.cmake index f4022bb4a..e9d175f59 100644 --- a/cmake/libs/libfaiss.cmake +++ b/cmake/libs/libfaiss.cmake @@ -34,12 +34,7 @@ if(__X86_64) add_library(utils_avx OBJECT ${UTILS_AVX_SRC}) add_library(utils_avx512 OBJECT ${UTILS_AVX512_SRC}) - check_cxx_compiler_flag("-mf16c" COMPILER_SUPPORTS_F16C) - if(COMPILER_SUPPORTS_F16C) - target_compile_options(utils_sse PRIVATE -msse4.2 -mpopcnt -mf16c) - else() - target_compile_options(utils_sse PRIVATE -msse4.2 -mpopcnt) - endif() + target_compile_options(utils_sse PRIVATE -msse4.2 -mpopcnt) target_compile_options(utils_avx PRIVATE -mfma -mf16c -mavx2 -mpopcnt) target_compile_options(utils_avx512 PRIVATE -mfma -mf16c -mavx512f -mavx512dq -mavx512bw -mpopcnt -mavx512vl) diff --git a/src/simd/distances_sse.cc b/src/simd/distances_sse.cc index 22c6d4035..15ca437d0 100644 --- a/src/simd/distances_sse.cc +++ b/src/simd/distances_sse.cc @@ -65,24 +65,6 @@ fvec_norm_L2sqr_sse(const float* x, size_t d) { return _mm_cvtss_f32(msum1); } -float -fp16_vec_norm_L2sqr_sse(const knowhere::fp16* x, size_t d) { - __m128 m_res = _mm_setzero_ps(); - while (d >= 4) { - __m128 m_x = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i_u*)x)); - m_res = _mm_add_ps(m_res, _mm_mul_ps(m_x, m_x)); - x += 4; - d -= 4; - } - if (d > 0) { - __m128 m_x = _mm_cvtph_ps(mm_masked_read_short(d, (uint16_t*)x)); - m_res = _mm_add_ps(m_res, _mm_mul_ps(m_x, m_x)); - } - m_res = _mm_hadd_ps(m_res, m_res); - m_res = _mm_hadd_ps(m_res, m_res); - return _mm_cvtss_f32(m_res); -} - float bf16_vec_norm_L2sqr_sse(const knowhere::bf16* x, size_t d) { __m128 m_res = _mm_setzero_ps(); @@ -315,29 +297,6 @@ fvec_L2sqr_sse(const float* x, const float* y, size_t d) { return _mm_cvtss_f32(msum1); } -float -fp16_vec_L2sqr_sse(const knowhere::fp16* x, const knowhere::fp16* y, size_t d) { - __m128 m_res = _mm_setzero_ps(); - while (d >= 4) { - __m128 m_x = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i_u*)x)); - __m128 m_y = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i_u*)y)); - m_x = _mm_sub_ps(m_x, m_y); - m_res = _mm_add_ps(m_res, _mm_mul_ps(m_x, m_x)); - x += 4; - y += 4; - d -= 4; - } - if (d > 0) { - __m128 m_x = _mm_cvtph_ps(mm_masked_read_short(d, (uint16_t*)x)); - __m128 m_y = _mm_cvtph_ps(mm_masked_read_short(d, (uint16_t*)y)); - m_x = _mm_sub_ps(m_x, m_y); - m_res = _mm_add_ps(m_res, _mm_mul_ps(m_x, m_x)); - } - m_res = _mm_hadd_ps(m_res, m_res); - m_res = _mm_hadd_ps(m_res, m_res); - return _mm_cvtss_f32(m_res); -} - float bf16_vec_L2sqr_sse(const knowhere::bf16* x, const knowhere::bf16* y, size_t d) { __m128 m_res = _mm_setzero_ps(); @@ -387,27 +346,6 @@ fvec_inner_product_sse(const float* x, const float* y, size_t d) { return _mm_cvtss_f32(msum1); } -float -fp16_vec_inner_product_sse(const knowhere::fp16* x, const knowhere::fp16* y, size_t d) { - __m128 m_res = _mm_setzero_ps(); - while (d >= 4) { - __m128 m_x = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i*)x)); - __m128 m_y = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i*)y)); - m_res = _mm_add_ps(m_res, _mm_mul_ps(m_x, m_y)); - x += 4; - y += 4; - d -= 4; - } - if (d > 0) { - __m128 m_x = _mm_cvtph_ps(mm_masked_read_short(d, (uint16_t*)x)); - __m128 m_y = _mm_cvtph_ps(mm_masked_read_short(d, (uint16_t*)y)); - m_res = _mm_add_ps(m_res, _mm_mul_ps(m_x, m_y)); - } - m_res = _mm_hadd_ps(m_res, m_res); - m_res = _mm_hadd_ps(m_res, m_res); - return _mm_cvtss_f32(m_res); -} - float bf16_vec_inner_product_sse(const knowhere::bf16* x, const knowhere::bf16* y, size_t d) { __m128 m_res = _mm_setzero_ps(); diff --git a/src/simd/distances_sse.h b/src/simd/distances_sse.h index a2a1a0e4b..4e159a90e 100644 --- a/src/simd/distances_sse.h +++ b/src/simd/distances_sse.h @@ -22,9 +22,6 @@ namespace faiss { float fvec_L2sqr_sse(const float* x, const float* y, size_t d); -float -fp16_vec_L2sqr_sse(const knowhere::fp16* x, const knowhere::fp16* y, size_t d); - float bf16_vec_L2sqr_sse(const knowhere::bf16* x, const knowhere::bf16* y, size_t d); @@ -32,9 +29,6 @@ bf16_vec_L2sqr_sse(const knowhere::bf16* x, const knowhere::bf16* y, size_t d); float fvec_inner_product_sse(const float* x, const float* y, size_t d); -float -fp16_vec_inner_product_sse(const knowhere::fp16* x, const knowhere::fp16* y, size_t d); - float bf16_vec_inner_product_sse(const knowhere::bf16* x, const knowhere::bf16* y, size_t d); @@ -49,9 +43,6 @@ fvec_Linf_sse(const float* x, const float* y, size_t d); float fvec_norm_L2sqr_sse(const float* x, size_t d); -float -fp16_vec_norm_L2sqr_sse(const knowhere::fp16* x, size_t d); - float bf16_vec_norm_L2sqr_sse(const knowhere::bf16* x, size_t d); diff --git a/src/simd/hook.cc b/src/simd/hook.cc index a079eef06..81e0c7e2d 100644 --- a/src/simd/hook.cc +++ b/src/simd/hook.cc @@ -240,15 +240,10 @@ fvec_hook(std::string& simd_type) { bf16_vec_L2sqr = bf16_vec_L2sqr_sse; bf16_vec_norm_L2sqr = bf16_vec_norm_L2sqr_sse; - if (cpu_support_f16c()) { - fp16_vec_inner_product = fp16_vec_inner_product_sse; - fp16_vec_L2sqr = fp16_vec_L2sqr_sse; - fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_sse; - } else { - fp16_vec_inner_product = fp16_vec_inner_product_ref; - fp16_vec_L2sqr = fp16_vec_L2sqr_ref; - fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_ref; - } + fp16_vec_inner_product = fp16_vec_inner_product_ref; + fp16_vec_L2sqr = fp16_vec_L2sqr_ref; + fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_ref; + simd_type = "SSE4_2"; support_pq_fast_scan = false; } else { diff --git a/tests/ut/test_simd.cc b/tests/ut/test_simd.cc index e978dc3c8..5e15227f4 100644 --- a/tests/ut/test_simd.cc +++ b/tests/ut/test_simd.cc @@ -153,13 +153,6 @@ TEST_CASE("Test fp16 distance", "[fp16]") { REQUIRE_THAT(faiss::fp16_vec_norm_L2sqr_neon(x.get(), dim), Catch::Matchers::WithinRel(ref_norm_l2_dist, 0.001f)); #endif #if defined(__x86_64__) - if (faiss::cpu_support_sse4_2()) { - REQUIRE_THAT(faiss::fp16_vec_L2sqr_sse(x.get(), y.get(), dim), Catch::Matchers::WithinRel(ref_l2_dist, 0.001f)); - REQUIRE_THAT(faiss::fp16_vec_inner_product_sse(x.get(), y.get(), dim), - Catch::Matchers::WithinRel(ref_ip_dist, 0.001f)); - REQUIRE_THAT(faiss::fp16_vec_norm_L2sqr_sse(x.get(), dim), - Catch::Matchers::WithinRel(ref_norm_l2_dist, 0.001f)); - } if (faiss::cpu_support_avx2()) { REQUIRE_THAT(faiss::fp16_vec_L2sqr_avx(x.get(), y.get(), dim), Catch::Matchers::WithinRel(ref_l2_dist, 0.001f)); REQUIRE_THAT(faiss::fp16_vec_inner_product_avx(x.get(), y.get(), dim),