From 0aaefc8d9f7843d0d286c6020d364fff3f3b5786 Mon Sep 17 00:00:00 2001 From: Yang Hau Date: Mon, 1 Jan 2024 12:04:33 +0800 Subject: [PATCH] feat: Add _mm_extract* --- sse2rvv.h | 36 +++++-- tests/impl.cpp | 255 ++++++++++++++++++++++++------------------------- 2 files changed, 157 insertions(+), 134 deletions(-) diff --git a/sse2rvv.h b/sse2rvv.h index dab9f3d..b999fe9 100644 --- a/sse2rvv.h +++ b/sse2rvv.h @@ -1294,17 +1294,41 @@ FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b) { // FORCE_INLINE __m128 _mm_dp_ps (__m128 a, __m128 b, const int imm8) {} -// FORCE_INLINE int _mm_extract_epi16 (__m128i a, int imm8) {} +FORCE_INLINE int _mm_extract_epi16(__m128i a, int imm8) { + vint16m1_t _a = vreinterpretq_m128i_i16(a); + vint16m1_t a_s = __riscv_vslidedown_vx_i16m1(_a, imm8 & 0x7, 8); + return (int)__riscv_vmv_x_s_i16m1_i16(a_s) & UINT16_MAX; +} -// FORCE_INLINE int _mm_extract_epi32 (__m128i a, const int imm8) {} +FORCE_INLINE int _mm_extract_epi32(__m128i a, const int imm8) { + vint32m1_t _a = vreinterpretq_m128i_i32(a); + vint32m1_t a_s = __riscv_vslidedown_vx_i32m1(_a, imm8 & 0x3, 4); + return (int)__riscv_vmv_x_s_i32m1_i32(a_s); +} -// FORCE_INLINE __int64 _mm_extract_epi64 (__m128i a, const int imm8) {} +FORCE_INLINE __int64 _mm_extract_epi64(__m128i a, const int imm8) { + vint64m1_t _a = vreinterpretq_m128i_i64(a); + vint64m1_t a_s = __riscv_vslidedown_vx_i64m1(_a, imm8 & 0x1, 2); + return (__int64)__riscv_vmv_x_s_i64m1_i64(a_s); +} -// FORCE_INLINE int _mm_extract_epi8 (__m128i a, const int imm8) {} +FORCE_INLINE int _mm_extract_epi8(__m128i a, const int imm8) { + vint8m1_t _a = vreinterpretq_m128i_i8(a); + vint8m1_t a_s = __riscv_vslidedown_vx_i8m1(_a, imm8 & 0xf, 16); + return (int)__riscv_vmv_x_s_i8m1_i8(a_s) & UINT8_MAX; +} -// FORCE_INLINE int _mm_extract_pi16 (__m64 a, int imm8) {} +FORCE_INLINE int _mm_extract_pi16(__m64 a, int imm8) { + vint16m1_t _a = vreinterpretq_m64_i16(a); + vint16m1_t a_s = __riscv_vslidedown_vx_i16m1(_a, imm8 & 0x3, 8); + return (int)__riscv_vmv_x_s_i16m1_i16(a_s) & UINT16_MAX; +} -// FORCE_INLINE int _mm_extract_ps (__m128 a, const int imm8) {} +FORCE_INLINE int _mm_extract_ps(__m128 a, const int imm8) { + vint32m1_t _a = vreinterpretq_m128_i32(a); + vint32m1_t a_s = __riscv_vslidedown_vx_i32m1(_a, imm8 & 0x3, 4); + return (int)__riscv_vmv_x_s_i32m1_i32(a_s); +} // FORCE_INLINE __m128d _mm_floor_pd (__m128d a) {} diff --git a/tests/impl.cpp b/tests/impl.cpp index 48d1009..1c59046 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -2061,43 +2061,41 @@ result_t test_mm_div_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) { } result_t test_mm_extract_pi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) { - // #ifdef ENABLE_TEST_ALL - // FIXME GCC has bug on "_mm_extract_pi16" intrinsics. We will enable this - // test when GCC fix this bug. - // see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98495 for more - // information - // #if defined(__clang__) || defined(_MSC_VER) - // uint64_t *_a = (uint64_t *)impl.test_cases_int_pointer1; - // const int idx = iter & 0x3; - // - // __m64 a = load_m64(_a); - // int c; - // switch (idx) { - // case 0: - // c = _mm_extract_pi16(a, 0); - // break; - // case 1: - // c = _mm_extract_pi16(a, 1); - // break; - // case 2: - // c = _mm_extract_pi16(a, 2); - // break; - // case 3: - // c = _mm_extract_pi16(a, 3); - // break; - // } - // - // ASSERT_RETURN((uint64_t)c == ((*_a >> (idx * 16)) & 0xFFFF)); - // ASSERT_RETURN(0 == ((uint64_t)c & 0xFFFF0000)); - // return TEST_SUCCESS; - // #else - // #else +#ifdef ENABLE_TEST_ALL +// FIXME GCC has bug on "_mm_extract_pi16" intrinsics. We will enable this +// test when GCC fix this bug. +// see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98495 for more +// information +#if defined(__clang__) || defined(_MSC_VER) + uint64_t *_a = (uint64_t *)impl.test_cases_int_pointer1; + const int idx = iter & 0x3; + + __m64 a = load_m64(_a); + int c; + switch (idx) { + case 0: + c = _mm_extract_pi16(a, 0); + break; + case 1: + c = _mm_extract_pi16(a, 1); + break; + case 2: + c = _mm_extract_pi16(a, 2); + break; + case 3: + c = _mm_extract_pi16(a, 3); + break; + } + + ASSERT_RETURN((uint64_t)c == ((*_a >> (idx * 16)) & 0xFFFF)); + ASSERT_RETURN(0 == ((uint64_t)c & 0xFFFF0000)); + return TEST_SUCCESS; +#else return TEST_UNIMPL; - // #endif // ENABLE_TEST_ALL - // #endif - // #else +#endif +#else return TEST_UNIMPL; - // #endif // ENABLE_TEST_ALL +#endif // ENABLE_TEST_ALL } result_t test_mm_malloc(const SSE2RVV_TEST_IMPL &impl, uint32_t iter); @@ -5552,43 +5550,43 @@ result_t test_mm_div_sd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) { } result_t test_mm_extract_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) { - // #ifdef ENABLE_TEST_ALL - // uint16_t *_a = (uint16_t *)impl.test_cases_int_pointer1; - // const int idx = iter & 0x7; - // __m128i a = load_m128i(_a); - // int c; - // switch (idx) { - // case 0: - // c = _mm_extract_epi16(a, 0); - // break; - // case 1: - // c = _mm_extract_epi16(a, 1); - // break; - // case 2: - // c = _mm_extract_epi16(a, 2); - // break; - // case 3: - // c = _mm_extract_epi16(a, 3); - // break; - // case 4: - // c = _mm_extract_epi16(a, 4); - // break; - // case 5: - // c = _mm_extract_epi16(a, 5); - // break; - // case 6: - // c = _mm_extract_epi16(a, 6); - // break; - // case 7: - // c = _mm_extract_epi16(a, 7); - // break; - // } - // - // ASSERT_RETURN(c == *(_a + idx)); - // return TEST_SUCCESS; - // #else +#ifdef ENABLE_TEST_ALL + uint16_t *_a = (uint16_t *)impl.test_cases_int_pointer1; + const int idx = iter & 0x7; + __m128i a = load_m128i(_a); + int c; + switch (idx) { + case 0: + c = _mm_extract_epi16(a, 0); + break; + case 1: + c = _mm_extract_epi16(a, 1); + break; + case 2: + c = _mm_extract_epi16(a, 2); + break; + case 3: + c = _mm_extract_epi16(a, 3); + break; + case 4: + c = _mm_extract_epi16(a, 4); + break; + case 5: + c = _mm_extract_epi16(a, 5); + break; + case 6: + c = _mm_extract_epi16(a, 6); + break; + case 7: + c = _mm_extract_epi16(a, 7); + break; + } + + ASSERT_RETURN(c == *(_a + idx)); + return TEST_SUCCESS; +#else return TEST_UNIMPL; - // #endif // ENABLE_TEST_ALL +#endif // ENABLE_TEST_ALL } result_t test_mm_insert_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) { @@ -9792,76 +9790,77 @@ result_t test_mm_dp_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) { } result_t test_mm_extract_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) { - // #ifdef ENABLE_TEST_ALL - // int32_t *_a = (int32_t *)impl.test_cases_int_pointer1; - // __m128i a = load_m128i(_a); - // int c; - // - // #define TEST_IMPL(IDX) - // c = _mm_extract_epi32(a, IDX); - // ASSERT_RETURN(c == *(_a + IDX)); - // - // IMM_4_ITER - // #undef TEST_IMPL - // return TEST_SUCCESS; - // #else +#ifdef ENABLE_TEST_ALL + int32_t *_a = (int32_t *)impl.test_cases_int_pointer1; + __m128i a = load_m128i(_a); + int c; + +#define TEST_IMPL(IDX) \ + c = _mm_extract_epi32(a, IDX); \ + ASSERT_RETURN(c == *(_a + IDX)); + + IMM_4_ITER +#undef TEST_IMPL + return TEST_SUCCESS; +#else return TEST_UNIMPL; - // #endif // ENABLE_TEST_ALL +#endif // ENABLE_TEST_ALL } result_t test_mm_extract_epi64(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) { - // #ifdef ENABLE_TEST_ALL - // int64_t *_a = (int64_t *)impl.test_cases_int_pointer1; - // __m128i a = load_m128i(_a); - // __int64 c; - // - // #define TEST_IMPL(IDX) - // c = _mm_extract_epi64(a, IDX); - // ASSERT_RETURN(c == *(_a + IDX)); - // - // IMM_2_ITER - // #undef TEST_IMPL - // return TEST_SUCCESS; - // #else +#ifdef ENABLE_TEST_ALL + int64_t *_a = (int64_t *)impl.test_cases_int_pointer1; + __m128i a = load_m128i(_a); + __int64 c; + +#define TEST_IMPL(IDX) \ + c = _mm_extract_epi64(a, IDX); \ + ASSERT_RETURN(c == *(_a + IDX)); + + IMM_2_ITER +#undef TEST_IMPL + + return TEST_SUCCESS; +#else return TEST_UNIMPL; - // #endif // ENABLE_TEST_ALL +#endif // ENABLE_TEST_ALL } result_t test_mm_extract_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) { - // #ifdef ENABLE_TEST_ALL - // uint8_t *_a = (uint8_t *)impl.test_cases_int_pointer1; - // __m128i a = load_m128i(_a); - // int c; - // - // #define TEST_IMPL(IDX) - // c = _mm_extract_epi8(a, IDX); - // ASSERT_RETURN(c == *(_a + IDX)); - // - // IMM_8_ITER - // #undef TEST_IMPL - // return TEST_SUCCESS; - // #else +#ifdef ENABLE_TEST_ALL + uint8_t *_a = (uint8_t *)impl.test_cases_int_pointer1; + __m128i a = load_m128i(_a); + int c; + +#define TEST_IMPL(IDX) \ + c = _mm_extract_epi8(a, IDX); \ + ASSERT_RETURN(c == *(_a + IDX)); + + IMM_8_ITER +#undef TEST_IMPL + return TEST_SUCCESS; +#else return TEST_UNIMPL; - // #endif // ENABLE_TEST_ALL +#endif // ENABLE_TEST_ALL } result_t test_mm_extract_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) { - // #ifdef ENABLE_TEST_ALL - // const float *_a = (const float *)impl.test_cases_float_pointer1; - // - // __m128 a = _mm_load_ps(_a); - // int32_t c; - // - // #define TEST_IMPL(IDX) - // c = _mm_extract_ps(a, IDX); - // ASSERT_RETURN(c == *(const int32_t *)(_a + IDX)); - // - // IMM_4_ITER - // #undef TEST_IMPL - // return TEST_SUCCESS; - // #else +#ifdef ENABLE_TEST_ALL + const float *_a = (const float *)impl.test_cases_float_pointer1; + + __m128 a = _mm_load_ps(_a); + int32_t c; + +#define TEST_IMPL(IDX) \ + c = _mm_extract_ps(a, IDX); \ + ASSERT_RETURN(c == *(const int32_t *)(_a + IDX)); + + IMM_4_ITER +#undef TEST_IMPL + return TEST_SUCCESS; +#else return TEST_UNIMPL; - // #endif // ENABLE_TEST_ALL +#endif // ENABLE_TEST_ALL } result_t test_mm_floor_pd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {