Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add _mm_minpos_epu16 #46

Merged
merged 1 commit into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion sse2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1931,7 +1931,22 @@ FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b) {
return vreinterpretq_f32_m128(__riscv_vslideup_vx_f32m1(_a, min, 0, 1));
}

// FORCE_INLINE __m128i _mm_minpos_epu16 (__m128i a) {}
FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a) {
// TODO add macro for ignoring index
vuint16m1_t _a = vreinterpretq_m128i_u16(a);
vuint16m1_t a_min = __riscv_vredminu_vs_u16m1_u16m1(_a, _a, 8);
vuint16m1_t a_min_dup = __riscv_vrgather_vx_u16m1(a_min, 0, 8);
vuint16m1_t vid = __riscv_vid_v_u16m1(8);
vbool16_t eq_mask = __riscv_vmseq_vv_u16m1_b16(_a, a_min_dup, 8);
vuint16m1_t min_vids = __riscv_vmerge_vvm_u16m1(
__riscv_vmv_v_x_u16m1(UINT16_MAX, 8), vid, eq_mask, 8);
// FIXME sth wrong with __riscv_vredminu_vs_u16m1_u16m1_m()
vuint16m1_t min_vid = __riscv_vredminu_vs_u16m1_u16m1(min_vids, min_vids, 8);
vuint16m1_t min_index = __riscv_vslideup_vx_u16m1(a_min_dup, min_vid, 1, 2);
vuint16m1_t zeros = __riscv_vmv_v_x_u16m1(0, 8);
return vreinterpretq_u16_m128i(
__riscv_vslideup_vx_u16m1(zeros, min_index, 0, 2));
}

// FORCE_INLINE __m128i _mm_move_epi64 (__m128i a) {}

Expand Down
45 changes: 17 additions & 28 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,24 +264,14 @@ class SSE2RVV_TEST_IMPL : public SSE2RVV_TEST {
}

ret = run_single_test(test, i);
if (ret == TEST_FAIL) // the test failed...
{
// Set a breakpoint here if you want to step through the failure
// case in the debugger
ret = run_single_test(test, i);
if (ret == TEST_FAIL) {
break;
}
}
return ret;
}
};

const char *instructionString[] = {
#define _(x) #x,
INTRIN_LIST
#undef _
};

// Produce rounding which is the same as SSE instructions with _MM_ROUND_NEAREST
// rounding mode
static inline float bankers_rounding(float val) {
Expand Down Expand Up @@ -10192,24 +10182,23 @@ result_t test_mm_min_epu32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
}

result_t test_mm_minpos_epu16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
// const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
// uint16_t index = 0, min = (uint16_t)_a[0];
// for (int i = 0; i < 8; i++) {
// if ((uint16_t)_a[i] < min) {
// index = (uint16_t)i;
// min = (uint16_t)_a[i];
// }
// }
//
// uint16_t d[8] = {min, index, 0, 0, 0, 0, 0, 0};
//
// __m128i a = load_m128i(_a);
// __m128i ret = _mm_minpos_epu16(a);
// return VALIDATE_UINT16_M128(ret, d);
// #else
#ifdef ENABLE_TEST_ALL
const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
uint16_t index = 0, min = (uint16_t)_a[0];
for (int i = 0; i < 8; i++) {
if ((uint16_t)_a[i] < min) {
index = (uint16_t)i;
min = (uint16_t)_a[i];
}
}
uint16_t _c[8] = {min, index, 0, 0, 0, 0, 0, 0};

__m128i a = load_m128i(_a);
__m128i c = _mm_minpos_epu16(a);
return VALIDATE_UINT16_M128(c, _c);
#else
return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
#endif // ENABLE_TEST_ALL
}

result_t test_mm_mpsadbw_epu8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
Expand Down