From 5395ff3181b3677c2a472309c05216fe3f3a8e72 Mon Sep 17 00:00:00 2001
From: Yang Hau <yuanyanghau@gmail.com>
Date: Fri, 2 Feb 2024 21:44:57 +0800
Subject: [PATCH] test: Refactor tests

---
 tests/impl.cpp | 1665 +++++++++++++++++-------------------------------
 1 file changed, 598 insertions(+), 1067 deletions(-)

diff --git a/tests/impl.cpp b/tests/impl.cpp
index bdb9cd3..c78f443 100644
--- a/tests/impl.cpp
+++ b/tests/impl.cpp
@@ -790,10 +790,6 @@ result_t test_mm_and_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #endif // ENABLE_TEST_ALL
 }
 
-// r0 := ~a0 & b0
-// r1 := ~a1 & b1
-// r2 := ~a2 & b2
-// r3 := ~a3 & b3
 result_t test_mm_andnot_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *_a = impl.test_cases_float_pointer1;
@@ -827,10 +823,9 @@ result_t test_mm_avg_pu16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const uint16_t *_a = (const uint16_t *)impl.test_cases_int_pointer1;
   const uint16_t *_b = (const uint16_t *)impl.test_cases_int_pointer2;
   uint16_t _c[4];
-  _c[0] = (_a[0] + _b[0] + 1) >> 1;
-  _c[1] = (_a[1] + _b[1] + 1) >> 1;
-  _c[2] = (_a[2] + _b[2] + 1) >> 1;
-  _c[3] = (_a[3] + _b[3] + 1) >> 1;
+  for (int i = 0; i < 4; i++) {
+    _c[i] = (_a[i] + _b[i] + 1) >> 1;
+  }
 
   __m64 a = load_m64(_a);
   __m64 b = load_m64(_b);
@@ -845,21 +840,16 @@ result_t test_mm_avg_pu8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const uint8_t *_a = (const uint8_t *)impl.test_cases_int_pointer1;
   const uint8_t *_b = (const uint8_t *)impl.test_cases_int_pointer2;
-  uint8_t d[8];
-  d[0] = (_a[0] + _b[0] + 1) >> 1;
-  d[1] = (_a[1] + _b[1] + 1) >> 1;
-  d[2] = (_a[2] + _b[2] + 1) >> 1;
-  d[3] = (_a[3] + _b[3] + 1) >> 1;
-  d[4] = (_a[4] + _b[4] + 1) >> 1;
-  d[5] = (_a[5] + _b[5] + 1) >> 1;
-  d[6] = (_a[6] + _b[6] + 1) >> 1;
-  d[7] = (_a[7] + _b[7] + 1) >> 1;
+  uint8_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = (_a[i] + _b[i] + 1) >> 1;
+  }
 
   __m64 a = load_m64(_a);
   __m64 b = load_m64(_b);
   __m64 c = _mm_avg_pu8(a, b);
 
-  return VALIDATE_UINT8_M64(c, d);
+  return VALIDATE_UINT8_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -872,15 +862,14 @@ result_t test_mm_cmpeq_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  int32_t result[4];
-  result[0] = _a[0] == _b[0] ? -1 : 0;
-  result[1] = _a[1] == _b[1] ? -1 : 0;
-  result[2] = _a[2] == _b[2] ? -1 : 0;
-  result[3] = _a[3] == _b[3] ? -1 : 0;
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] == _b[i] ? -1 : 0;
+  }
 
   __m128 ret = _mm_cmpeq_ps(a, b);
-  __m128i iret = *(const __m128i *)&ret;
-  return VALIDATE_INT32_M128(iret, result);
+  __m128i c = *(const __m128i *)&ret;
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -893,14 +882,14 @@ result_t test_mm_cmpeq_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  float result[4];
-  result[0] = _a[0] == _b[0] ? ALL_BIT_1_32 : 0;
-  result[1] = _a[1];
-  result[2] = _a[2];
-  result[3] = _a[3];
+  float _c[4];
+  _c[0] = _a[0] == _b[0] ? ALL_BIT_1_32 : 0;
+  _c[1] = _a[1];
+  _c[2] = _a[2];
+  _c[3] = _a[3];
 
-  __m128 ret = _mm_cmpeq_ss(a, b);
-  return validate_float(ret, result[0], result[1], result[2], result[3]);
+  __m128 c = _mm_cmpeq_ss(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -913,15 +902,14 @@ result_t test_mm_cmpge_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  int32_t result[4];
-  result[0] = _a[0] >= _b[0] ? -1 : 0;
-  result[1] = _a[1] >= _b[1] ? -1 : 0;
-  result[2] = _a[2] >= _b[2] ? -1 : 0;
-  result[3] = _a[3] >= _b[3] ? -1 : 0;
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] >= _b[i] ? -1 : 0;
+  }
 
   __m128 ret = _mm_cmpge_ps(a, b);
-  __m128i iret = *(const __m128i *)&ret;
-  return VALIDATE_INT32_M128(iret, result);
+  __m128i c = *(const __m128i *)&ret;
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -934,14 +922,14 @@ result_t test_mm_cmpge_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  float result[4];
-  result[0] = _a[0] >= _b[0] ? ALL_BIT_1_32 : 0;
-  result[1] = _a[1];
-  result[2] = _a[2];
-  result[3] = _a[3];
+  float _c[4];
+  _c[0] = _a[0] >= _b[0] ? ALL_BIT_1_32 : 0;
+  _c[1] = _a[1];
+  _c[2] = _a[2];
+  _c[3] = _a[3];
 
-  __m128 ret = _mm_cmpge_ss(a, b);
-  return validate_float(ret, result[0], result[1], result[2], result[3]);
+  __m128 c = _mm_cmpge_ss(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -954,15 +942,14 @@ result_t test_mm_cmpgt_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  int32_t result[4];
-  result[0] = _a[0] > _b[0] ? -1 : 0;
-  result[1] = _a[1] > _b[1] ? -1 : 0;
-  result[2] = _a[2] > _b[2] ? -1 : 0;
-  result[3] = _a[3] > _b[3] ? -1 : 0;
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] > _b[i] ? -1 : 0;
+  }
 
   __m128 ret = _mm_cmpgt_ps(a, b);
-  __m128i iret = *(const __m128i *)&ret;
-  return VALIDATE_INT32_M128(iret, result);
+  __m128i c = *(const __m128i *)&ret;
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -975,14 +962,14 @@ result_t test_mm_cmpgt_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  float result[4];
-  result[0] = _a[0] > _b[0] ? ALL_BIT_1_32 : 0;
-  result[1] = _a[1];
-  result[2] = _a[2];
-  result[3] = _a[3];
+  float _c[4];
+  _c[0] = _a[0] > _b[0] ? ALL_BIT_1_32 : 0;
+  _c[1] = _a[1];
+  _c[2] = _a[2];
+  _c[3] = _a[3];
 
-  __m128 ret = _mm_cmpgt_ss(a, b);
-  return validate_float(ret, result[0], result[1], result[2], result[3]);
+  __m128 c = _mm_cmpgt_ss(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -995,15 +982,14 @@ result_t test_mm_cmple_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  int32_t result[4];
-  result[0] = _a[0] <= _b[0] ? -1 : 0;
-  result[1] = _a[1] <= _b[1] ? -1 : 0;
-  result[2] = _a[2] <= _b[2] ? -1 : 0;
-  result[3] = _a[3] <= _b[3] ? -1 : 0;
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] <= _b[i] ? -1 : 0;
+  }
 
   __m128 ret = _mm_cmple_ps(a, b);
-  __m128i iret = *(const __m128i *)&ret;
-  return VALIDATE_INT32_M128(iret, result);
+  __m128i c = *(const __m128i *)&ret;
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -1016,14 +1002,14 @@ result_t test_mm_cmple_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  float result[4];
-  result[0] = _a[0] <= _b[0] ? ALL_BIT_1_32 : 0;
-  result[1] = _a[1];
-  result[2] = _a[2];
-  result[3] = _a[3];
+  float _c[4];
+  _c[0] = _a[0] <= _b[0] ? ALL_BIT_1_32 : 0;
+  _c[1] = _a[1];
+  _c[2] = _a[2];
+  _c[3] = _a[3];
 
-  __m128 ret = _mm_cmple_ss(a, b);
-  return validate_float(ret, result[0], result[1], result[2], result[3]);
+  __m128 c = _mm_cmple_ss(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -1036,15 +1022,14 @@ result_t test_mm_cmplt_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  int32_t result[4];
-  result[0] = _a[0] < _b[0] ? -1 : 0;
-  result[1] = _a[1] < _b[1] ? -1 : 0;
-  result[2] = _a[2] < _b[2] ? -1 : 0;
-  result[3] = _a[3] < _b[3] ? -1 : 0;
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] < _b[i] ? -1 : 0;
+  }
 
   __m128 ret = _mm_cmplt_ps(a, b);
-  __m128i iret = *(const __m128i *)&ret;
-  return VALIDATE_INT32_M128(iret, result);
+  __m128i c = *(const __m128i *)&ret;
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -1058,14 +1043,14 @@ result_t test_mm_cmplt_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  float result[4];
-  result[0] = _a[0] < _b[0] ? ALL_BIT_1_32 : 0;
-  result[1] = _a[1];
-  result[2] = _a[2];
-  result[3] = _a[3];
+  float _c[4];
+  _c[0] = _a[0] < _b[0] ? ALL_BIT_1_32 : 0;
+  _c[1] = _a[1];
+  _c[2] = _a[2];
+  _c[3] = _a[3];
 
-  __m128 ret = _mm_cmplt_ss(a, b);
-  return validate_float(ret, result[0], result[1], result[2], result[3]);
+  __m128 c = _mm_cmplt_ss(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -1078,15 +1063,14 @@ result_t test_mm_cmpneq_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  int32_t result[4];
-  result[0] = _a[0] != _b[0] ? -1 : 0;
-  result[1] = _a[1] != _b[1] ? -1 : 0;
-  result[2] = _a[2] != _b[2] ? -1 : 0;
-  result[3] = _a[3] != _b[3] ? -1 : 0;
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] != _b[i] ? -1 : 0;
+  }
 
   __m128 ret = _mm_cmpneq_ps(a, b);
-  __m128i iret = *(const __m128i *)&ret;
-  return VALIDATE_INT32_M128(iret, result);
+  __m128i c = *(const __m128i *)&ret;
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -1119,16 +1103,13 @@ result_t test_mm_cmpnge_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  uint32_t _c[4];
-  _c[0] = !(_a[0] >= _b[0]) ? UINT32_MAX : 0;
-  _c[1] = !(_a[1] >= _b[1]) ? UINT32_MAX : 0;
-  _c[2] = !(_a[2] >= _b[2]) ? UINT32_MAX : 0;
-  _c[3] = !(_a[3] >= _b[3]) ? UINT32_MAX : 0;
+  float _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = !(_a[i] >= _b[i]) ? ALL_BIT_1_32 : 0;
+  }
 
   __m128 c = _mm_cmpnge_ps(a, b);
-
-  return validate_float(c, *(float *)&_c[0], *(float *)&_c[1], *(float *)&_c[2],
-                        *(float *)&_c[3]);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -1162,10 +1143,9 @@ result_t test_mm_cmpngt_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 b = load_m128(_b);
 
   float _c[4];
-  _c[0] = !(_a[0] > _b[0]) ? ALL_BIT_1_32 : 0;
-  _c[1] = !(_a[1] > _b[1]) ? ALL_BIT_1_32 : 0;
-  _c[2] = !(_a[2] > _b[2]) ? ALL_BIT_1_32 : 0;
-  _c[3] = !(_a[3] > _b[3]) ? ALL_BIT_1_32 : 0;
+  for (int i = 0; i < 4; i++) {
+    _c[i] = !(_a[i] > _b[i]) ? ALL_BIT_1_32 : 0;
+  }
 
   __m128 c = _mm_cmpngt_ps(a, b);
   return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
@@ -1202,10 +1182,9 @@ result_t test_mm_cmpnle_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 b = load_m128(_b);
 
   float _c[4];
-  _c[0] = !(_a[0] <= _b[0]) ? ALL_BIT_1_32 : 0;
-  _c[1] = !(_a[1] <= _b[1]) ? ALL_BIT_1_32 : 0;
-  _c[2] = !(_a[2] <= _b[2]) ? ALL_BIT_1_32 : 0;
-  _c[3] = !(_a[3] <= _b[3]) ? ALL_BIT_1_32 : 0;
+  for (int i = 0; i < 4; i++) {
+    _c[i] = !(_a[i] <= _b[i]) ? ALL_BIT_1_32 : 0;
+  }
 
   __m128 c = _mm_cmpnle_ps(a, b);
   return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
@@ -1242,11 +1221,9 @@ result_t test_mm_cmpnlt_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 b = load_m128(_b);
 
   float _c[4];
-  _c[0] = !(_a[0] < _b[0]) ? ALL_BIT_1_32 : 0;
-  _c[1] = !(_a[1] < _b[1]) ? ALL_BIT_1_32 : 0;
-  _c[2] = !(_a[2] < _b[2]) ? ALL_BIT_1_32 : 0;
-  _c[3] = !(_a[3] < _b[3]) ? ALL_BIT_1_32 : 0;
-
+  for (int i = 0; i < 4; i++) {
+    _c[i] = !(_a[i] < _b[i]) ? ALL_BIT_1_32 : 0;
+  }
   __m128 c = _mm_cmpnlt_ps(a, b);
   return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
@@ -1421,10 +1398,10 @@ result_t test_mm_comile_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  int32_t _C = comile_ss(_a[0], _b[0]);
+  int32_t _c = comile_ss(_a[0], _b[0]);
   int32_t c = _mm_comile_ss(a, b);
 
-  return _C == c ? TEST_SUCCESS : TEST_FAIL;
+  return _c == c ? TEST_SUCCESS : TEST_FAIL;
 #endif
 #else
   return TEST_UNIMPL;
@@ -1469,10 +1446,10 @@ result_t test_mm_comineq_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
 
-  int32_t result = comineq_ss(_a[0], _b[0]);
-  int32_t ret = _mm_comineq_ss(a, b);
+  int32_t _c = comineq_ss(_a[0], _b[0]);
+  int32_t c = _mm_comineq_ss(a, b);
 
-  return result == ret ? TEST_SUCCESS : TEST_FAIL;
+  return _c == c ? TEST_SUCCESS : TEST_FAIL;
 #endif
 #else
   return TEST_UNIMPL;
@@ -1502,16 +1479,15 @@ result_t test_mm_cvt_pi2ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_cvt_ps2pi(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *_a = impl.test_cases_float_pointer1;
-  int32_t d[2];
+  int32_t _c[2];
 
-  for (int idx = 0; idx < 2; idx++) {
-    d[idx] = (int32_t)(bankers_rounding(_a[idx]));
+  for (int i = 0; i < 2; i++) {
+    _c[i] = (int32_t)(bankers_rounding(_a[i]));
   }
-
   __m128 a = load_m128(_a);
-  __m64 ret = _mm_cvt_ps2pi(a);
+  __m64 c = _mm_cvt_ps2pi(a);
 
-  return VALIDATE_INT32_M64(ret, d);
+  return VALIDATE_INT32_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -1522,15 +1498,15 @@ result_t test_mm_cvt_si2ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const float *_a = impl.test_cases_float_pointer1;
   const int32_t b = *impl.test_cases_int_pointer2;
 
-  float dx = (float)b;
-  float dy = _a[1];
-  float dz = _a[2];
-  float dw = _a[3];
-
+  float _c[4];
+  _c[0] = (float)b;
+  _c[1] = _a[1];
+  _c[2] = _a[2];
+  _c[3] = _a[3];
   __m128 a = load_m128(_a);
   __m128 c = _mm_cvt_si2ss(a, b);
 
-  return validate_float(c, dx, dy, dz, dw);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -1539,11 +1515,11 @@ result_t test_mm_cvt_si2ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_cvt_ss2si(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *_a = impl.test_cases_float_pointer1;
-  int32_t _d = (int32_t)(bankers_rounding(_a[0]));
+  int32_t _c = (int32_t)(bankers_rounding(_a[0]));
 
   __m128 a = load_m128(_a);
-  int32_t d = _mm_cvt_ss2si(a);
-  return d == _d ? TEST_SUCCESS : TEST_FAIL;
+  int32_t c = _mm_cvt_ss2si(a);
+  return c == _c ? TEST_SUCCESS : TEST_FAIL;
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2070,7 +2046,6 @@ result_t test_mm_free(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 }
 
 result_t test_mm_get_flush_zero_mode(const SSE2RVV_TEST_IMPL &impl,
-                                     // #ifdef ENABLE_TEST_ALL
                                      uint32_t iter) {
   //   int res_flush_zero_on, res_flush_zero_off;
   //   _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
@@ -2086,7 +2061,6 @@ result_t test_mm_get_flush_zero_mode(const SSE2RVV_TEST_IMPL &impl,
 }
 
 result_t test_mm_get_rounding_mode(const SSE2RVV_TEST_IMPL &impl,
-                                   // #ifdef ENABLE_TEST_ALL
                                    uint32_t iter) {
   //   int res_toward_zero, res_to_neg_inf, res_to_pos_inf, res_nearest;
   //   _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
@@ -2135,22 +2109,22 @@ result_t test_mm_insert_pi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t insert = (int16_t)impl.test_cases_ints[iter];
-  __m64 a;
-  __m64 b;
+  __m64 a, c;
+  int16_t _c[4];
 
 #define TEST_IMPL(IDX)                                                         \
-  int16_t d##IDX[4];                                                           \
   for (int i = 0; i < 4; i++) {                                                \
-    d##IDX[i] = _a[i];                                                         \
+    _c[i] = _a[i];                                                             \
   }                                                                            \
-  d##IDX[IDX] = insert;                                                        \
+  _c[IDX] = insert;                                                            \
                                                                                \
   a = load_m64(_a);                                                            \
-  b = _mm_insert_pi16(a, insert, IDX);                                         \
-  CHECK_RESULT(VALIDATE_INT16_M64(b, d##IDX))
+  c = _mm_insert_pi16(a, insert, IDX);                                         \
+  CHECK_RESULT(VALIDATE_INT16_M64(c, _c))
 
   IMM_4_ITER
 #undef TEST_IMPL
+
   return TEST_SUCCESS;
 #else
   return TEST_UNIMPL;
@@ -2159,11 +2133,11 @@ result_t test_mm_insert_pi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_mm_load_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  const float *addr = impl.test_cases_float_pointer1;
+  const float *p = impl.test_cases_float_pointer1;
 
-  __m128 ret = _mm_load_ps(addr);
+  __m128 c = _mm_load_ps(p);
 
-  return validate_float(ret, addr[0], addr[1], addr[2], addr[3]);
+  return validate_float(c, p[0], p[1], p[2], p[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2171,11 +2145,11 @@ result_t test_mm_load_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_mm_load_ps1(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  const float *addr = impl.test_cases_float_pointer1;
+  const float *p = impl.test_cases_float_pointer1;
 
-  __m128 ret = _mm_load_ps1(addr);
+  __m128 c = _mm_load_ps1(p);
 
-  return validate_float(ret, addr[0], addr[0], addr[0], addr[0]);
+  return validate_float(c, p[0], p[0], p[0], p[0]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2183,11 +2157,11 @@ result_t test_mm_load_ps1(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_mm_load_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  const float *addr = impl.test_cases_float_pointer1;
+  const float *p = impl.test_cases_float_pointer1;
 
-  __m128 ret = _mm_load_ss(addr);
+  __m128 c = _mm_load_ss(p);
 
-  return validate_float(ret, addr[0], 0, 0, 0);
+  return validate_float(c, p[0], 0, 0, 0);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2196,7 +2170,9 @@ result_t test_mm_load_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_load1_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *p = impl.test_cases_float_pointer1;
+
   __m128 a = _mm_load1_ps(p);
+
   return validate_float(a, p[0], p[0], p[0], p[0]);
 #else
   return TEST_UNIMPL;
@@ -2233,11 +2209,11 @@ result_t test_mm_loadl_pi(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_mm_loadr_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  const float *addr = impl.test_cases_float_pointer1;
+  const float *p = impl.test_cases_float_pointer1;
 
-  __m128 ret = _mm_loadr_ps(addr);
+  __m128 c = _mm_loadr_ps(p);
 
-  return validate_float(ret, addr[3], addr[2], addr[1], addr[0]);
+  return validate_float(c, p[3], p[2], p[1], p[0]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2245,11 +2221,11 @@ result_t test_mm_loadr_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_mm_loadu_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  const float *addr = impl.test_cases_float_pointer1;
+  const float *p = impl.test_cases_float_pointer1;
 
-  __m128 ret = _mm_loadu_ps(addr);
+  __m128 c = _mm_loadu_ps(p);
 
-  return validate_float(ret, addr[0], addr[1], addr[2], addr[3]);
+  return validate_float(c, p[0], p[1], p[2], p[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2262,11 +2238,11 @@ result_t test_mm_loadu_si16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 // for more information.
 #if (defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ <= 10)
 #else
-  const int16_t *addr = (const int16_t *)impl.test_cases_int_pointer1;
+  const int16_t *p = (const int16_t *)impl.test_cases_int_pointer1;
 
-  __m128i ret = _mm_loadu_si16((const void *)addr);
+  __m128i ret = _mm_loadu_si16((const void *)p);
 
-  return validate_int16(ret, addr[0], 0, 0, 0, 0, 0, 0, 0);
+  return validate_int16(ret, p[0], 0, 0, 0, 0, 0, 0, 0);
 #endif
 #else
   return TEST_UNIMPL;
@@ -2280,11 +2256,11 @@ result_t test_mm_loadu_si64(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 // for more information.
 #if (defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 9)
 #else
-  const int64_t *addr = (const int64_t *)impl.test_cases_int_pointer1;
+  const int64_t *p = (const int64_t *)impl.test_cases_int_pointer1;
 
-  __m128i ret = _mm_loadu_si64((const void *)addr);
+  __m128i ret = _mm_loadu_si64((const void *)p);
 
-  return validate_int64(ret, addr[0], 0);
+  return validate_int64(ret, p[0], 0);
 #endif
 #else
   return TEST_UNIMPL;
@@ -2357,17 +2333,16 @@ result_t test_mm_max_pi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  int16_t c[4];
+  int16_t _c[4];
 
-  c[0] = _a[0] > _b[0] ? _a[0] : _b[0];
-  c[1] = _a[1] > _b[1] ? _a[1] : _b[1];
-  c[2] = _a[2] > _b[2] ? _a[2] : _b[2];
-  c[3] = _a[3] > _b[3] ? _a[3] : _b[3];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] > _b[i] ? _a[i] : _b[i];
+  }
 
   __m64 a = load_m64(_a);
   __m64 b = load_m64(_b);
-  __m64 ret = _mm_max_pi16(a, b);
-  return VALIDATE_INT16_M64(ret, c);
+  __m64 c = _mm_max_pi16(a, b);
+  return VALIDATE_INT16_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2377,17 +2352,16 @@ result_t test_mm_max_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *_a = impl.test_cases_float_pointer1;
   const float *_b = impl.test_cases_float_pointer2;
-  float c[4];
+  float _c[4];
 
-  c[0] = _a[0] > _b[0] ? _a[0] : _b[0];
-  c[1] = _a[1] > _b[1] ? _a[1] : _b[1];
-  c[2] = _a[2] > _b[2] ? _a[2] : _b[2];
-  c[3] = _a[3] > _b[3] ? _a[3] : _b[3];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] > _b[i] ? _a[i] : _b[i];
+  }
 
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
-  __m128 ret = _mm_max_ps(a, b);
-  return validate_float(ret, c[0], c[1], c[2], c[3]);
+  __m128 c = _mm_max_ps(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2397,21 +2371,15 @@ result_t test_mm_max_pu8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const uint8_t *_a = (const uint8_t *)impl.test_cases_int_pointer1;
   const uint8_t *_b = (const uint8_t *)impl.test_cases_int_pointer2;
-  uint8_t c[8];
-
-  c[0] = _a[0] > _b[0] ? _a[0] : _b[0];
-  c[1] = _a[1] > _b[1] ? _a[1] : _b[1];
-  c[2] = _a[2] > _b[2] ? _a[2] : _b[2];
-  c[3] = _a[3] > _b[3] ? _a[3] : _b[3];
-  c[4] = _a[4] > _b[4] ? _a[4] : _b[4];
-  c[5] = _a[5] > _b[5] ? _a[5] : _b[5];
-  c[6] = _a[6] > _b[6] ? _a[6] : _b[6];
-  c[7] = _a[7] > _b[7] ? _a[7] : _b[7];
+  uint8_t _c[8];
 
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] > _b[i] ? _a[i] : _b[i];
+  }
   __m64 a = load_m64(_a);
   __m64 b = load_m64(_b);
-  __m64 ret = _mm_max_pu8(a, b);
-  return VALIDATE_UINT8_M64(ret, c);
+  __m64 c = _mm_max_pu8(a, b);
+  return VALIDATE_UINT8_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2422,16 +2390,16 @@ result_t test_mm_max_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const float *_a = impl.test_cases_float_pointer1;
   const float *_b = impl.test_cases_float_pointer1;
 
-  float f0 = _a[0] > _b[0] ? _a[0] : _b[0];
-  float f1 = _a[1];
-  float f2 = _a[2];
-  float f3 = _a[3];
+  float _c[4];
+  _c[0] = _a[0] > _b[0] ? _a[0] : _b[0];
+  _c[1] = _a[1];
+  _c[2] = _a[2];
+  _c[3] = _a[3];
 
   __m128 a = _mm_load_ps(_a);
   __m128 b = _mm_load_ps(_b);
   __m128 c = _mm_max_ss(a, b);
-
-  return validate_float(c, f0, f1, f2, f3);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2441,17 +2409,15 @@ result_t test_mm_min_pi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  int16_t c[4];
-
-  c[0] = _a[0] < _b[0] ? _a[0] : _b[0];
-  c[1] = _a[1] < _b[1] ? _a[1] : _b[1];
-  c[2] = _a[2] < _b[2] ? _a[2] : _b[2];
-  c[3] = _a[3] < _b[3] ? _a[3] : _b[3];
+  int16_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] < _b[i] ? _a[i] : _b[i];
+  }
 
   __m64 a = load_m64(_a);
   __m64 b = load_m64(_b);
-  __m64 ret = _mm_min_pi16(a, b);
-  return VALIDATE_INT16_M64(ret, c);
+  __m64 c = _mm_min_pi16(a, b);
+  return VALIDATE_INT16_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2461,17 +2427,15 @@ result_t test_mm_min_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *_a = impl.test_cases_float_pointer1;
   const float *_b = impl.test_cases_float_pointer2;
-  float c[4];
-
-  c[0] = _a[0] < _b[0] ? _a[0] : _b[0];
-  c[1] = _a[1] < _b[1] ? _a[1] : _b[1];
-  c[2] = _a[2] < _b[2] ? _a[2] : _b[2];
-  c[3] = _a[3] < _b[3] ? _a[3] : _b[3];
+  float _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] < _b[i] ? _a[i] : _b[i];
+  }
 
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
-  __m128 ret = _mm_min_ps(a, b);
-  return validate_float(ret, c[0], c[1], c[2], c[3]);
+  __m128 c = _mm_min_ps(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2481,21 +2445,15 @@ result_t test_mm_min_pu8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const uint8_t *_a = (const uint8_t *)impl.test_cases_int_pointer1;
   const uint8_t *_b = (const uint8_t *)impl.test_cases_int_pointer2;
-  uint8_t c[8];
-
-  c[0] = _a[0] < _b[0] ? _a[0] : _b[0];
-  c[1] = _a[1] < _b[1] ? _a[1] : _b[1];
-  c[2] = _a[2] < _b[2] ? _a[2] : _b[2];
-  c[3] = _a[3] < _b[3] ? _a[3] : _b[3];
-  c[4] = _a[4] < _b[4] ? _a[4] : _b[4];
-  c[5] = _a[5] < _b[5] ? _a[5] : _b[5];
-  c[6] = _a[6] < _b[6] ? _a[6] : _b[6];
-  c[7] = _a[7] < _b[7] ? _a[7] : _b[7];
+  uint8_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] < _b[i] ? _a[i] : _b[i];
+  }
 
   __m64 a = load_m64(_a);
   __m64 b = load_m64(_b);
-  __m64 ret = _mm_min_pu8(a, b);
-  return VALIDATE_UINT8_M64(ret, c);
+  __m64 c = _mm_min_pu8(a, b);
+  return VALIDATE_UINT8_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2505,15 +2463,12 @@ result_t test_mm_min_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *_a = impl.test_cases_float_pointer1;
   const float *_b = impl.test_cases_float_pointer2;
-  float c;
-
-  c = _a[0] < _b[0] ? _a[0] : _b[0];
 
+  float _c = _a[0] < _b[0] ? _a[0] : _b[0];
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
-  __m128 ret = _mm_min_ss(a, b);
-
-  return validate_float(ret, c, _a[1], _a[2], _a[3]);
+  __m128 c = _mm_min_ss(a, b);
+  return validate_float(c, _c, _a[1], _a[2], _a[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2531,7 +2486,6 @@ result_t test_mm_move_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   _c[1] = _a[1];
   _c[2] = _a[2];
   _c[3] = _a[3];
-
   __m128 c = _mm_move_ss(a, b);
   return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
@@ -2544,16 +2498,16 @@ result_t test_mm_movehl_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const float *_a = impl.test_cases_float_pointer1;
   const float *_b = impl.test_cases_float_pointer2;
 
-  float f0 = _b[2];
-  float f1 = _b[3];
-  float f2 = _a[2];
-  float f3 = _a[3];
+  float _c[4];
+  _c[0] = _b[2];
+  _c[1] = _b[3];
+  _c[2] = _a[2];
+  _c[3] = _a[3];
 
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
-  __m128 ret = _mm_movehl_ps(a, b);
-
-  return validate_float(ret, f0, f1, f2, f3);
+  __m128 c = _mm_movehl_ps(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2564,16 +2518,16 @@ result_t test_mm_movelh_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const float *_a = impl.test_cases_float_pointer1;
   const float *_b = impl.test_cases_float_pointer2;
 
-  float f0 = _a[0];
-  float f1 = _a[1];
-  float f2 = _b[0];
-  float f3 = _b[1];
+  float _c[4];
+  _c[0] = _a[0];
+  _c[1] = _a[1];
+  _c[2] = _b[0];
+  _c[3] = _b[1];
 
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
-  __m128 ret = _mm_movelh_ps(a, b);
-
-  return validate_float(ret, f0, f1, f2, f3);
+  __m128 c = _mm_movelh_ps(a, b);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2592,8 +2546,7 @@ result_t test_mm_movemask_pi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const __m64 *a = (const __m64 *)_a;
   int c = _mm_movemask_pi8(*a);
 
-  ASSERT_RETURN((unsigned int)c == _c);
-  return TEST_SUCCESS;
+  return ((unsigned int)c == _c) ? TEST_SUCCESS : TEST_FAIL;
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2602,24 +2555,18 @@ result_t test_mm_movemask_pi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_movemask_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *p = impl.test_cases_float_pointer1;
-  int ret = 0;
+  int _c = 0;
 
   const uint32_t *ip = (const uint32_t *)p;
-  if (ip[0] & 0x80000000) {
-    ret |= 1;
-  }
-  if (ip[1] & 0x80000000) {
-    ret |= 2;
-  }
-  if (ip[2] & 0x80000000) {
-    ret |= 4;
-  }
-  if (ip[3] & 0x80000000) {
-    ret |= 8;
+  for (int i = 0; i < 4; i++) {
+    if (ip[i] & 0x80000000) {
+      _c |= (1 << i);
+    }
   }
+
   __m128 a = load_m128(p);
-  int val = _mm_movemask_ps(a);
-  return val == ret ? TEST_SUCCESS : TEST_FAIL;
+  int c = _mm_movemask_ps(a);
+  return c == _c ? TEST_SUCCESS : TEST_FAIL;
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2629,15 +2576,15 @@ result_t test_mm_mul_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *_a = impl.test_cases_float_pointer1;
   const float *_b = impl.test_cases_float_pointer2;
-  float dx = _a[0] * _b[0];
-  float dy = _a[1] * _b[1];
-  float dz = _a[2] * _b[2];
-  float dw = _a[3] * _b[3];
+  float _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] * _b[i];
+  }
 
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
   __m128 c = _mm_mul_ps(a, b);
-  return validate_float(c, dx, dy, dz, dw);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2648,15 +2595,16 @@ result_t test_mm_mul_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const float *_a = impl.test_cases_float_pointer1;
   const float *_b = impl.test_cases_float_pointer2;
 
-  float dx = _a[0] * _b[0];
-  float dy = _a[1];
-  float dz = _a[2];
-  float dw = _a[3];
+  float _c[4];
+  _c[0] = _a[0] * _b[0];
+  _c[1] = _a[1];
+  _c[2] = _a[2];
+  _c[3] = _a[3];
 
   __m128 a = load_m128(_a);
   __m128 b = load_m128(_b);
   __m128 c = _mm_mul_ss(a, b);
-  return validate_float(c, dx, dy, dz, dw);
+  return validate_float(c, _c[0], _c[1], _c[2], _c[3]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2667,7 +2615,7 @@ result_t test_mm_mulhi_pu16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const uint16_t *_a = (const uint16_t *)impl.test_cases_int_pointer1;
   const uint16_t *_b = (const uint16_t *)impl.test_cases_int_pointer2;
   uint16_t d[4];
-  for (uint32_t i = 0; i < 4; i++) {
+  for (int i = 0; i < 4; i++) {
     uint32_t m = (uint32_t)_a[i] * (uint32_t)_b[i];
     d[i] = (uint16_t)(m >> 16);
   }
@@ -2710,7 +2658,18 @@ result_t test_mm_or_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_m_pavgb(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  return test_mm_avg_pu8(impl, iter);
+  const uint8_t *_a = (const uint8_t *)impl.test_cases_int_pointer1;
+  const uint8_t *_b = (const uint8_t *)impl.test_cases_int_pointer2;
+  uint8_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = (_a[i] + _b[i] + 1) >> 1;
+  }
+
+  __m64 a = load_m64(_a);
+  __m64 b = load_m64(_b);
+  __m64 c = _m_pavgb(a, b);
+
+  return VALIDATE_UINT8_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2718,7 +2677,17 @@ result_t test_m_pavgb(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_m_pavgw(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  return test_mm_avg_pu16(impl, iter);
+  const uint16_t *_a = (const uint16_t *)impl.test_cases_int_pointer1;
+  const uint16_t *_b = (const uint16_t *)impl.test_cases_int_pointer2;
+  uint16_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = (_a[i] + _b[i] + 1) >> 1;
+  }
+
+  __m64 a = load_m64(_a);
+  __m64 b = load_m64(_b);
+  __m64 c = _m_pavgw(a, b);
+  return VALIDATE_UINT16_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2766,24 +2735,23 @@ result_t test_m_pinsrw(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t insert = (int16_t)impl.test_cases_ints[iter];
-  __m64 a;
-  __m64 b;
+  __m64 a, c;
+  int16_t _c[4];
 
 #define TEST_IMPL(IDX)                                                         \
-  int16_t d##IDX[4];                                                           \
   for (int i = 0; i < 4; i++) {                                                \
-    d##IDX[i] = _a[i];                                                         \
+    _c[i] = _a[i];                                                             \
   }                                                                            \
-  d##IDX[IDX] = insert;                                                        \
+  _c[IDX] = insert;                                                            \
                                                                                \
   a = load_m64(_a);                                                            \
-  b = _m_pinsrw(a, insert, IDX);                                               \
-  CHECK_RESULT(VALIDATE_INT16_M64(b, d##IDX))
+  c = _m_pinsrw(a, insert, IDX);                                               \
+  CHECK_RESULT(VALIDATE_INT16_M64(c, _c))
 
   IMM_4_ITER
 #undef TEST_IMPL
-  return TEST_SUCCESS;
 
+  return TEST_SUCCESS;
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2793,17 +2761,16 @@ result_t test_m_pmaxsw(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  int16_t c[4];
+  int16_t _c[4];
 
-  c[0] = _a[0] > _b[0] ? _a[0] : _b[0];
-  c[1] = _a[1] > _b[1] ? _a[1] : _b[1];
-  c[2] = _a[2] > _b[2] ? _a[2] : _b[2];
-  c[3] = _a[3] > _b[3] ? _a[3] : _b[3];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] > _b[i] ? _a[i] : _b[i];
+  }
 
   __m64 a = load_m64(_a);
   __m64 b = load_m64(_b);
-  __m64 ret = _m_pmaxsw(a, b);
-  return VALIDATE_INT16_M64(ret, c);
+  __m64 c = _m_pmaxsw(a, b);
+  return VALIDATE_INT16_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2813,21 +2780,16 @@ result_t test_m_pmaxub(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const uint8_t *_a = (const uint8_t *)impl.test_cases_int_pointer1;
   const uint8_t *_b = (const uint8_t *)impl.test_cases_int_pointer2;
-  uint8_t c[8];
 
-  c[0] = _a[0] > _b[0] ? _a[0] : _b[0];
-  c[1] = _a[1] > _b[1] ? _a[1] : _b[1];
-  c[2] = _a[2] > _b[2] ? _a[2] : _b[2];
-  c[3] = _a[3] > _b[3] ? _a[3] : _b[3];
-  c[4] = _a[4] > _b[4] ? _a[4] : _b[4];
-  c[5] = _a[5] > _b[5] ? _a[5] : _b[5];
-  c[6] = _a[6] > _b[6] ? _a[6] : _b[6];
-  c[7] = _a[7] > _b[7] ? _a[7] : _b[7];
+  uint8_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] > _b[i] ? _a[i] : _b[i];
+  }
 
   __m64 a = load_m64(_a);
   __m64 b = load_m64(_b);
-  __m64 ret = _m_pmaxub(a, b);
-  return VALIDATE_UINT8_M64(ret, c);
+  __m64 c = _m_pmaxub(a, b);
+  return VALIDATE_UINT8_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2857,21 +2819,15 @@ result_t test_m_pminub(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const uint8_t *_a = (const uint8_t *)impl.test_cases_int_pointer1;
   const uint8_t *_b = (const uint8_t *)impl.test_cases_int_pointer2;
-  uint8_t c[8];
-
-  c[0] = _a[0] < _b[0] ? _a[0] : _b[0];
-  c[1] = _a[1] < _b[1] ? _a[1] : _b[1];
-  c[2] = _a[2] < _b[2] ? _a[2] : _b[2];
-  c[3] = _a[3] < _b[3] ? _a[3] : _b[3];
-  c[4] = _a[4] < _b[4] ? _a[4] : _b[4];
-  c[5] = _a[5] < _b[5] ? _a[5] : _b[5];
-  c[6] = _a[6] < _b[6] ? _a[6] : _b[6];
-  c[7] = _a[7] < _b[7] ? _a[7] : _b[7];
+  uint8_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] < _b[i] ? _a[i] : _b[i];
+  }
 
   __m64 a = load_m64(_a);
   __m64 b = load_m64(_b);
-  __m64 ret = _m_pminub(a, b);
-  return VALIDATE_UINT8_M64(ret, c);
+  __m64 c = _m_pminub(a, b);
+  return VALIDATE_UINT8_M64(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -2990,19 +2946,17 @@ result_t test_m_psadbw(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_m_pshufw(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
-  __m64 a;
-  __m64 d;
-
+  __m64 a, c;
+  int16_t _c[4];
 #define TEST_IMPL(IDX)                                                         \
-  a = load_m64(_a);                                                            \
-  d = _m_pshufw(a, IDX);                                                       \
+  _c[0] = _a[IDX & 0x3];                                                       \
+  _c[1] = _a[(IDX >> 2) & 0x3];                                                \
+  _c[2] = _a[(IDX >> 4) & 0x3];                                                \
+  _c[3] = _a[(IDX >> 6) & 0x3];                                                \
                                                                                \
-  int16_t _d##IDX[4];                                                          \
-  _d##IDX[0] = _a[IDX & 0x3];                                                  \
-  _d##IDX[1] = _a[(IDX >> 2) & 0x3];                                           \
-  _d##IDX[2] = _a[(IDX >> 4) & 0x3];                                           \
-  _d##IDX[3] = _a[(IDX >> 6) & 0x3];                                           \
-  if (VALIDATE_INT16_M64(d, _d##IDX) != TEST_SUCCESS) {                        \
+  a = load_m64(_a);                                                            \
+  c = _m_pshufw(a, IDX);                                                       \
+  if (VALIDATE_INT16_M64(c, _c) != TEST_SUCCESS) {                             \
     return TEST_FAIL;                                                          \
   }
 
@@ -3246,21 +3200,18 @@ result_t test_mm_sfence(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_shuffle_pi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
-  __m64 a;
-  __m64 d;
+  __m64 a, c;
+  int16_t _c[4];
 
 #define TEST_IMPL(IDX)                                                         \
   a = load_m64(_a);                                                            \
-  d = _mm_shuffle_pi16(a, IDX);                                                \
+  c = _mm_shuffle_pi16(a, IDX);                                                \
                                                                                \
-  int16_t _d##IDX[4];                                                          \
-  _d##IDX[0] = _a[IDX & 0x3];                                                  \
-  _d##IDX[1] = _a[(IDX >> 2) & 0x3];                                           \
-  _d##IDX[2] = _a[(IDX >> 4) & 0x3];                                           \
-  _d##IDX[3] = _a[(IDX >> 6) & 0x3];                                           \
-  if (VALIDATE_INT16_M64(d, _d##IDX) != TEST_SUCCESS) {                        \
-    return TEST_FAIL;                                                          \
-  }
+  _c[0] = _a[IDX & 0x3];                                                       \
+  _c[1] = _a[(IDX >> 2) & 0x3];                                                \
+  _c[2] = _a[(IDX >> 4) & 0x3];                                                \
+  _c[3] = _a[(IDX >> 6) & 0x3];                                                \
+  CHECK_RESULT(VALIDATE_INT16_M64(c, _c))
 
   IMM_256_ITER
 #undef TEST_IMPL
@@ -3302,15 +3253,14 @@ result_t test_mm_sqrt_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *_a = (const float *)impl.test_cases_float_pointer1;
 
-  float f0 = sqrt(_a[0]);
-  float f1 = sqrt(_a[1]);
-  float f2 = sqrt(_a[2]);
-  float f3 = sqrt(_a[3]);
+  float _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = sqrt(_a[i]);
+  }
 
   __m128 a = load_m128(_a);
   __m128 c = _mm_sqrt_ps(a);
-
-  return validate_float_error(c, f0, f1, f2, f3, 0.1f);
+  return validate_float_error(c, _c[0], _c[1], _c[2], _c[3], 0.1f);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -3320,15 +3270,15 @@ result_t test_mm_sqrt_ss(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const float *_a = (const float *)impl.test_cases_float_pointer1;
 
-  float f0 = sqrt(_a[0]);
-  float f1 = _a[1];
-  float f2 = _a[2];
-  float f3 = _a[3];
+  float _c[4];
+  _c[0] = sqrt(_a[0]);
+  _c[1] = _a[1];
+  _c[2] = _a[2];
+  _c[3] = _a[3];
 
   __m128 a = load_m128(_a);
   __m128 c = _mm_sqrt_ss(a);
-
-  return validate_float_error(c, f0, f1, f2, f3, 0.1f);
+  return validate_float_error(c, _c[0], _c[1], _c[2], _c[3], 0.1f);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -3843,28 +3793,15 @@ result_t test_mm_add_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  int8_t d[16];
-  d[0] = _a[0] + _b[0];
-  d[1] = _a[1] + _b[1];
-  d[2] = _a[2] + _b[2];
-  d[3] = _a[3] + _b[3];
-  d[4] = _a[4] + _b[4];
-  d[5] = _a[5] + _b[5];
-  d[6] = _a[6] + _b[6];
-  d[7] = _a[7] + _b[7];
-  d[8] = _a[8] + _b[8];
-  d[9] = _a[9] + _b[9];
-  d[10] = _a[10] + _b[10];
-  d[11] = _a[11] + _b[11];
-  d[12] = _a[12] + _b[12];
-  d[13] = _a[13] + _b[13];
-  d[14] = _a[14] + _b[14];
-  d[15] = _a[15] + _b[15];
+  int8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = _a[i] + _b[i];
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_add_epi8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -3923,53 +3860,21 @@ result_t test_mm_adds_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  int32_t d[8];
-  d[0] = (int32_t)_a[0] + (int32_t)_b[0];
-  if (d[0] > 32767)
-    d[0] = 32767;
-  if (d[0] < -32768)
-    d[0] = -32768;
-  d[1] = (int32_t)_a[1] + (int32_t)_b[1];
-  if (d[1] > 32767)
-    d[1] = 32767;
-  if (d[1] < -32768)
-    d[1] = -32768;
-  d[2] = (int32_t)_a[2] + (int32_t)_b[2];
-  if (d[2] > 32767)
-    d[2] = 32767;
-  if (d[2] < -32768)
-    d[2] = -32768;
-  d[3] = (int32_t)_a[3] + (int32_t)_b[3];
-  if (d[3] > 32767)
-    d[3] = 32767;
-  if (d[3] < -32768)
-    d[3] = -32768;
-  d[4] = (int32_t)_a[4] + (int32_t)_b[4];
-  if (d[4] > 32767)
-    d[4] = 32767;
-  if (d[4] < -32768)
-    d[4] = -32768;
-  d[5] = (int32_t)_a[5] + (int32_t)_b[5];
-  if (d[5] > 32767)
-    d[5] = 32767;
-  if (d[5] < -32768)
-    d[5] = -32768;
-  d[6] = (int32_t)_a[6] + (int32_t)_b[6];
-  if (d[6] > 32767)
-    d[6] = 32767;
-  if (d[6] < -32768)
-    d[6] = -32768;
-  d[7] = (int32_t)_a[7] + (int32_t)_b[7];
-  if (d[7] > 32767)
-    d[7] = 32767;
-  if (d[7] < -32768)
-    d[7] = -32768;
+  int32_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = (int32_t)_a[i] + (int32_t)_b[i];
+    if (_c[i] > INT16_MAX) {
+      _c[i] = INT16_MAX;
+    }
+    if (_c[i] < INT16_MIN) {
+      _c[i] = INT16_MIN;
+    }
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
-
   __m128i c = _mm_adds_epi16(a, b);
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -3983,10 +3888,12 @@ result_t test_mm_adds_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   int16_t d[16];
   for (int i = 0; i < 16; i++) {
     d[i] = (int16_t)_a[i] + (int16_t)_b[i];
-    if (d[i] > 127)
-      d[i] = 127;
-    if (d[i] < -128)
-      d[i] = -128;
+    if (d[i] > INT8_MAX) {
+      d[i] = INT8_MAX;
+    }
+    if (d[i] < INT8_MIN) {
+      d[i] = INT8_MIN;
+    }
   }
 
   __m128i a = load_m128i(_a);
@@ -4001,25 +3908,18 @@ result_t test_mm_adds_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_mm_adds_epu16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  uint32_t max = 0xFFFF;
   const uint16_t *_a = (const uint16_t *)impl.test_cases_int_pointer1;
   const uint16_t *_b = (const uint16_t *)impl.test_cases_int_pointer2;
 
-  uint16_t d[8];
-  d[0] = (uint32_t)_a[0] + (uint32_t)_b[0] > max ? max : _a[0] + _b[0];
-  d[1] = (uint32_t)_a[1] + (uint32_t)_b[1] > max ? max : _a[1] + _b[1];
-  d[2] = (uint32_t)_a[2] + (uint32_t)_b[2] > max ? max : _a[2] + _b[2];
-  d[3] = (uint32_t)_a[3] + (uint32_t)_b[3] > max ? max : _a[3] + _b[3];
-  d[4] = (uint32_t)_a[4] + (uint32_t)_b[4] > max ? max : _a[4] + _b[4];
-  d[5] = (uint32_t)_a[5] + (uint32_t)_b[5] > max ? max : _a[5] + _b[5];
-  d[6] = (uint32_t)_a[6] + (uint32_t)_b[6] > max ? max : _a[6] + _b[6];
-  d[7] = (uint32_t)_a[7] + (uint32_t)_b[7] > max ? max : _a[7] + _b[7];
-
+  uint16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = (uint32_t)_a[i] + (uint32_t)_b[i] > UINT16_MAX ? UINT16_MAX
+                                                           : _a[i] + _b[i];
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_adds_epu16(a, b);
-
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4029,60 +3929,17 @@ result_t test_mm_adds_epu8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  uint8_t d[16];
-  d[0] = (uint8_t)_a[0] + (uint8_t)_b[0];
-  if (d[0] < (uint8_t)_a[0])
-    d[0] = 255;
-  d[1] = (uint8_t)_a[1] + (uint8_t)_b[1];
-  if (d[1] < (uint8_t)_a[1])
-    d[1] = 255;
-  d[2] = (uint8_t)_a[2] + (uint8_t)_b[2];
-  if (d[2] < (uint8_t)_a[2])
-    d[2] = 255;
-  d[3] = (uint8_t)_a[3] + (uint8_t)_b[3];
-  if (d[3] < (uint8_t)_a[3])
-    d[3] = 255;
-  d[4] = (uint8_t)_a[4] + (uint8_t)_b[4];
-  if (d[4] < (uint8_t)_a[4])
-    d[4] = 255;
-  d[5] = (uint8_t)_a[5] + (uint8_t)_b[5];
-  if (d[5] < (uint8_t)_a[5])
-    d[5] = 255;
-  d[6] = (uint8_t)_a[6] + (uint8_t)_b[6];
-  if (d[6] < (uint8_t)_a[6])
-    d[6] = 255;
-  d[7] = (uint8_t)_a[7] + (uint8_t)_b[7];
-  if (d[7] < (uint8_t)_a[7])
-    d[7] = 255;
-  d[8] = (uint8_t)_a[8] + (uint8_t)_b[8];
-  if (d[8] < (uint8_t)_a[8])
-    d[8] = 255;
-  d[9] = (uint8_t)_a[9] + (uint8_t)_b[9];
-  if (d[9] < (uint8_t)_a[9])
-    d[9] = 255;
-  d[10] = (uint8_t)_a[10] + (uint8_t)_b[10];
-  if (d[10] < (uint8_t)_a[10])
-    d[10] = 255;
-  d[11] = (uint8_t)_a[11] + (uint8_t)_b[11];
-  if (d[11] < (uint8_t)_a[11])
-    d[11] = 255;
-  d[12] = (uint8_t)_a[12] + (uint8_t)_b[12];
-  if (d[12] < (uint8_t)_a[12])
-    d[12] = 255;
-  d[13] = (uint8_t)_a[13] + (uint8_t)_b[13];
-  if (d[13] < (uint8_t)_a[13])
-    d[13] = 255;
-  d[14] = (uint8_t)_a[14] + (uint8_t)_b[14];
-  if (d[14] < (uint8_t)_a[14])
-    d[14] = 255;
-  d[15] = (uint8_t)_a[15] + (uint8_t)_b[15];
-  if (d[15] < (uint8_t)_a[15])
-    d[15] = 255;
-
+  uint8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = (uint8_t)_a[i] + (uint8_t)_b[i];
+    if (_c[i] < (uint8_t)_a[i]) {
+      _c[i] = UINT8_MAX;
+    }
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_adds_epu8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4099,7 +3956,6 @@ result_t test_mm_and_pd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128d a = load_m128d(_a);
   __m128d b = load_m128d(_b);
   __m128d c = _mm_and_pd(a, b);
-
   return validate_double(c, *((double *)&d0), *((double *)&d1));
 #else
   return TEST_UNIMPL;
@@ -4209,27 +4065,14 @@ result_t test_mm_avg_epu8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  uint8_t d[16];
-  d[0] = ((uint8_t)_a[0] + (uint8_t)_b[0] + 1) >> 1;
-  d[1] = ((uint8_t)_a[1] + (uint8_t)_b[1] + 1) >> 1;
-  d[2] = ((uint8_t)_a[2] + (uint8_t)_b[2] + 1) >> 1;
-  d[3] = ((uint8_t)_a[3] + (uint8_t)_b[3] + 1) >> 1;
-  d[4] = ((uint8_t)_a[4] + (uint8_t)_b[4] + 1) >> 1;
-  d[5] = ((uint8_t)_a[5] + (uint8_t)_b[5] + 1) >> 1;
-  d[6] = ((uint8_t)_a[6] + (uint8_t)_b[6] + 1) >> 1;
-  d[7] = ((uint8_t)_a[7] + (uint8_t)_b[7] + 1) >> 1;
-  d[8] = ((uint8_t)_a[8] + (uint8_t)_b[8] + 1) >> 1;
-  d[9] = ((uint8_t)_a[9] + (uint8_t)_b[9] + 1) >> 1;
-  d[10] = ((uint8_t)_a[10] + (uint8_t)_b[10] + 1) >> 1;
-  d[11] = ((uint8_t)_a[11] + (uint8_t)_b[11] + 1) >> 1;
-  d[12] = ((uint8_t)_a[12] + (uint8_t)_b[12] + 1) >> 1;
-  d[13] = ((uint8_t)_a[13] + (uint8_t)_b[13] + 1) >> 1;
-  d[14] = ((uint8_t)_a[14] + (uint8_t)_b[14] + 1) >> 1;
-  d[15] = ((uint8_t)_a[15] + (uint8_t)_b[15] + 1) >> 1;
+  uint8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = ((uint8_t)_a[i] + (uint8_t)_b[i] + 1) >> 1;
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_avg_epu8(a, b);
-  return VALIDATE_UINT8_M128(c, d);
+  return VALIDATE_UINT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4413,20 +4256,14 @@ result_t test_mm_cmpeq_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  int16_t d[8];
-  d[0] = (_a[0] == _b[0]) ? ~UINT16_C(0) : 0x0;
-  d[1] = (_a[1] == _b[1]) ? ~UINT16_C(0) : 0x0;
-  d[2] = (_a[2] == _b[2]) ? ~UINT16_C(0) : 0x0;
-  d[3] = (_a[3] == _b[3]) ? ~UINT16_C(0) : 0x0;
-  d[4] = (_a[4] == _b[4]) ? ~UINT16_C(0) : 0x0;
-  d[5] = (_a[5] == _b[5]) ? ~UINT16_C(0) : 0x0;
-  d[6] = (_a[6] == _b[6]) ? ~UINT16_C(0) : 0x0;
-  d[7] = (_a[7] == _b[7]) ? ~UINT16_C(0) : 0x0;
-
+  int16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = (_a[i] == _b[i]) ? UINT16_MAX : 0x0;
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_cmpeq_epi16(a, b);
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4437,17 +4274,15 @@ result_t test_mm_cmpeq_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int32_t *_a = impl.test_cases_int_pointer1;
   const int32_t *_b = impl.test_cases_int_pointer2;
 
-  int32_t d[4];
-  d[0] = (_a[0] == _b[0]) ? ~UINT32_C(0) : 0x0;
-  d[1] = (_a[1] == _b[1]) ? ~UINT32_C(0) : 0x0;
-  d[2] = (_a[2] == _b[2]) ? ~UINT32_C(0) : 0x0;
-  d[3] = (_a[3] == _b[3]) ? ~UINT32_C(0) : 0x0;
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = (_a[i] == _b[i]) ? UINT32_MAX : 0x0;
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_cmpeq_epi32(a, b);
-
-  return VALIDATE_INT32_M128(c, d);
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4457,28 +4292,14 @@ result_t test_mm_cmpeq_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  int8_t d[16];
-  d[0] = (_a[0] == _b[0]) ? ~UINT8_C(0) : 0x00;
-  d[1] = (_a[1] == _b[1]) ? ~UINT8_C(0) : 0x00;
-  d[2] = (_a[2] == _b[2]) ? ~UINT8_C(0) : 0x00;
-  d[3] = (_a[3] == _b[3]) ? ~UINT8_C(0) : 0x00;
-  d[4] = (_a[4] == _b[4]) ? ~UINT8_C(0) : 0x00;
-  d[5] = (_a[5] == _b[5]) ? ~UINT8_C(0) : 0x00;
-  d[6] = (_a[6] == _b[6]) ? ~UINT8_C(0) : 0x00;
-  d[7] = (_a[7] == _b[7]) ? ~UINT8_C(0) : 0x00;
-  d[8] = (_a[8] == _b[8]) ? ~UINT8_C(0) : 0x00;
-  d[9] = (_a[9] == _b[9]) ? ~UINT8_C(0) : 0x00;
-  d[10] = (_a[10] == _b[10]) ? ~UINT8_C(0) : 0x00;
-  d[11] = (_a[11] == _b[11]) ? ~UINT8_C(0) : 0x00;
-  d[12] = (_a[12] == _b[12]) ? ~UINT8_C(0) : 0x00;
-  d[13] = (_a[13] == _b[13]) ? ~UINT8_C(0) : 0x00;
-  d[14] = (_a[14] == _b[14]) ? ~UINT8_C(0) : 0x00;
-  d[15] = (_a[15] == _b[15]) ? ~UINT8_C(0) : 0x00;
-
+  int8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = (_a[i] == _b[i]) ? UINT8_MAX : 0x0;
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_cmpeq_epi8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4488,8 +4309,8 @@ result_t test_mm_cmpeq_pd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const double *_a = (const double *)impl.test_cases_float_pointer1;
   const double *_b = (const double *)impl.test_cases_float_pointer2;
-  uint64_t d0 = (_a[0] == _b[0]) ? 0xffffffffffffffff : 0;
-  uint64_t d1 = (_a[1] == _b[1]) ? 0xffffffffffffffff : 0;
+  uint64_t d0 = (_a[0] == _b[0]) ? UINT64_MAX : 0;
+  uint64_t d1 = (_a[1] == _b[1]) ? UINT64_MAX : 0;
 
   __m128d a = load_m128d(_a);
   __m128d b = load_m128d(_b);
@@ -4555,21 +4376,14 @@ result_t test_mm_cmpgt_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  uint16_t d[8];
-  d[0] = _a[0] > _b[0] ? ~UINT16_C(0) : 0;
-  d[1] = _a[1] > _b[1] ? ~UINT16_C(0) : 0;
-  d[2] = _a[2] > _b[2] ? ~UINT16_C(0) : 0;
-  d[3] = _a[3] > _b[3] ? ~UINT16_C(0) : 0;
-  d[4] = _a[4] > _b[4] ? ~UINT16_C(0) : 0;
-  d[5] = _a[5] > _b[5] ? ~UINT16_C(0) : 0;
-  d[6] = _a[6] > _b[6] ? ~UINT16_C(0) : 0;
-  d[7] = _a[7] > _b[7] ? ~UINT16_C(0) : 0;
-
+  uint16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] > _b[i] ? UINT16_MAX : 0;
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_cmpgt_epi16(a, b);
-
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4579,18 +4393,15 @@ result_t test_mm_cmpgt_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int32_t *_a = impl.test_cases_int_pointer1;
   const int32_t *_b = impl.test_cases_int_pointer2;
+
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] > _b[i] ? UINT32_MAX : 0;
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
-
-  int32_t result[4];
-
-  result[0] = _a[0] > _b[0] ? -1 : 0;
-  result[1] = _a[1] > _b[1] ? -1 : 0;
-  result[2] = _a[2] > _b[2] ? -1 : 0;
-  result[3] = _a[3] > _b[3] ? -1 : 0;
-
-  __m128i iret = _mm_cmpgt_epi32(a, b);
-  return VALIDATE_INT32_M128(iret, result);
+  __m128i c = _mm_cmpgt_epi32(a, b);
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4600,28 +4411,14 @@ result_t test_mm_cmpgt_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  int8_t d[16];
-  d[0] = (_a[0] > _b[0]) ? ~UINT8_C(0) : 0x00;
-  d[1] = (_a[1] > _b[1]) ? ~UINT8_C(0) : 0x00;
-  d[2] = (_a[2] > _b[2]) ? ~UINT8_C(0) : 0x00;
-  d[3] = (_a[3] > _b[3]) ? ~UINT8_C(0) : 0x00;
-  d[4] = (_a[4] > _b[4]) ? ~UINT8_C(0) : 0x00;
-  d[5] = (_a[5] > _b[5]) ? ~UINT8_C(0) : 0x00;
-  d[6] = (_a[6] > _b[6]) ? ~UINT8_C(0) : 0x00;
-  d[7] = (_a[7] > _b[7]) ? ~UINT8_C(0) : 0x00;
-  d[8] = (_a[8] > _b[8]) ? ~UINT8_C(0) : 0x00;
-  d[9] = (_a[9] > _b[9]) ? ~UINT8_C(0) : 0x00;
-  d[10] = (_a[10] > _b[10]) ? ~UINT8_C(0) : 0x00;
-  d[11] = (_a[11] > _b[11]) ? ~UINT8_C(0) : 0x00;
-  d[12] = (_a[12] > _b[12]) ? ~UINT8_C(0) : 0x00;
-  d[13] = (_a[13] > _b[13]) ? ~UINT8_C(0) : 0x00;
-  d[14] = (_a[14] > _b[14]) ? ~UINT8_C(0) : 0x00;
-  d[15] = (_a[15] > _b[15]) ? ~UINT8_C(0) : 0x00;
-
+  int8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = (_a[i] > _b[i]) ? UINT8_MAX : 0x0;
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_cmpgt_epi8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4699,21 +4496,16 @@ result_t test_mm_cmplt_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  uint16_t d[8];
-  d[0] = _a[0] < _b[0] ? ~UINT16_C(0) : 0;
-  d[1] = _a[1] < _b[1] ? ~UINT16_C(0) : 0;
-  d[2] = _a[2] < _b[2] ? ~UINT16_C(0) : 0;
-  d[3] = _a[3] < _b[3] ? ~UINT16_C(0) : 0;
-  d[4] = _a[4] < _b[4] ? ~UINT16_C(0) : 0;
-  d[5] = _a[5] < _b[5] ? ~UINT16_C(0) : 0;
-  d[6] = _a[6] < _b[6] ? ~UINT16_C(0) : 0;
-  d[7] = _a[7] < _b[7] ? ~UINT16_C(0) : 0;
+  uint16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] < _b[i] ? UINT16_MAX : 0;
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_cmplt_epi16(a, b);
 
-  return VALIDATE_UINT16_M128(c, d);
+  return VALIDATE_UINT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4726,14 +4518,12 @@ result_t test_mm_cmplt_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
 
-  int32_t result[4];
-  result[0] = _a[0] < _b[0] ? -1 : 0;
-  result[1] = _a[1] < _b[1] ? -1 : 0;
-  result[2] = _a[2] < _b[2] ? -1 : 0;
-  result[3] = _a[3] < _b[3] ? -1 : 0;
-
-  __m128i iret = _mm_cmplt_epi32(a, b);
-  return VALIDATE_INT32_M128(iret, result);
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] < _b[i] ? UINT32_MAX : 0;
+  }
+  __m128i c = _mm_cmplt_epi32(a, b);
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -4743,28 +4533,14 @@ result_t test_mm_cmplt_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  int8_t d[16];
-  d[0] = (_a[0] < _b[0]) ? ~UINT8_C(0) : 0x00;
-  d[1] = (_a[1] < _b[1]) ? ~UINT8_C(0) : 0x00;
-  d[2] = (_a[2] < _b[2]) ? ~UINT8_C(0) : 0x00;
-  d[3] = (_a[3] < _b[3]) ? ~UINT8_C(0) : 0x00;
-  d[4] = (_a[4] < _b[4]) ? ~UINT8_C(0) : 0x00;
-  d[5] = (_a[5] < _b[5]) ? ~UINT8_C(0) : 0x00;
-  d[6] = (_a[6] < _b[6]) ? ~UINT8_C(0) : 0x00;
-  d[7] = (_a[7] < _b[7]) ? ~UINT8_C(0) : 0x00;
-  d[8] = (_a[8] < _b[8]) ? ~UINT8_C(0) : 0x00;
-  d[9] = (_a[9] < _b[9]) ? ~UINT8_C(0) : 0x00;
-  d[10] = (_a[10] < _b[10]) ? ~UINT8_C(0) : 0x00;
-  d[11] = (_a[11] < _b[11]) ? ~UINT8_C(0) : 0x00;
-  d[12] = (_a[12] < _b[12]) ? ~UINT8_C(0) : 0x00;
-  d[13] = (_a[13] < _b[13]) ? ~UINT8_C(0) : 0x00;
-  d[14] = (_a[14] < _b[14]) ? ~UINT8_C(0) : 0x00;
-  d[15] = (_a[15] < _b[15]) ? ~UINT8_C(0) : 0x00;
-
+  int8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = (_a[i] < _b[i]) ? UINT8_MAX : 0x0;
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_cmplt_epi8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6004,21 +5780,14 @@ result_t test_mm_max_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  int16_t d[8];
-  d[0] = _a[0] > _b[0] ? _a[0] : _b[0];
-  d[1] = _a[1] > _b[1] ? _a[1] : _b[1];
-  d[2] = _a[2] > _b[2] ? _a[2] : _b[2];
-  d[3] = _a[3] > _b[3] ? _a[3] : _b[3];
-  d[4] = _a[4] > _b[4] ? _a[4] : _b[4];
-  d[5] = _a[5] > _b[5] ? _a[5] : _b[5];
-  d[6] = _a[6] > _b[6] ? _a[6] : _b[6];
-  d[7] = _a[7] > _b[7] ? _a[7] : _b[7];
-
+  int16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] > _b[i] ? _a[i] : _b[i];
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
-
   __m128i c = _mm_max_epi16(a, b);
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6028,44 +5797,15 @@ result_t test_mm_max_epu8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  uint8_t d[16];
-  d[0] =
-      ((uint8_t)_a[0] > (uint8_t)_b[0]) ? ((uint8_t)_a[0]) : ((uint8_t)_b[0]);
-  d[1] =
-      ((uint8_t)_a[1] > (uint8_t)_b[1]) ? ((uint8_t)_a[1]) : ((uint8_t)_b[1]);
-  d[2] =
-      ((uint8_t)_a[2] > (uint8_t)_b[2]) ? ((uint8_t)_a[2]) : ((uint8_t)_b[2]);
-  d[3] =
-      ((uint8_t)_a[3] > (uint8_t)_b[3]) ? ((uint8_t)_a[3]) : ((uint8_t)_b[3]);
-  d[4] =
-      ((uint8_t)_a[4] > (uint8_t)_b[4]) ? ((uint8_t)_a[4]) : ((uint8_t)_b[4]);
-  d[5] =
-      ((uint8_t)_a[5] > (uint8_t)_b[5]) ? ((uint8_t)_a[5]) : ((uint8_t)_b[5]);
-  d[6] =
-      ((uint8_t)_a[6] > (uint8_t)_b[6]) ? ((uint8_t)_a[6]) : ((uint8_t)_b[6]);
-  d[7] =
-      ((uint8_t)_a[7] > (uint8_t)_b[7]) ? ((uint8_t)_a[7]) : ((uint8_t)_b[7]);
-  d[8] =
-      ((uint8_t)_a[8] > (uint8_t)_b[8]) ? ((uint8_t)_a[8]) : ((uint8_t)_b[8]);
-  d[9] =
-      ((uint8_t)_a[9] > (uint8_t)_b[9]) ? ((uint8_t)_a[9]) : ((uint8_t)_b[9]);
-  d[10] = ((uint8_t)_a[10] > (uint8_t)_b[10]) ? ((uint8_t)_a[10])
-                                              : ((uint8_t)_b[10]);
-  d[11] = ((uint8_t)_a[11] > (uint8_t)_b[11]) ? ((uint8_t)_a[11])
-                                              : ((uint8_t)_b[11]);
-  d[12] = ((uint8_t)_a[12] > (uint8_t)_b[12]) ? ((uint8_t)_a[12])
-                                              : ((uint8_t)_b[12]);
-  d[13] = ((uint8_t)_a[13] > (uint8_t)_b[13]) ? ((uint8_t)_a[13])
-                                              : ((uint8_t)_b[13]);
-  d[14] = ((uint8_t)_a[14] > (uint8_t)_b[14]) ? ((uint8_t)_a[14])
-                                              : ((uint8_t)_b[14]);
-  d[15] = ((uint8_t)_a[15] > (uint8_t)_b[15]) ? ((uint8_t)_a[15])
-                                              : ((uint8_t)_b[15]);
-
+  uint8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] =
+        ((uint8_t)_a[i] > (uint8_t)_b[i]) ? ((uint8_t)_a[i]) : ((uint8_t)_b[i]);
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_max_epu8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6119,20 +5859,14 @@ result_t test_mm_min_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  int16_t d[8];
-  d[0] = _a[0] < _b[0] ? _a[0] : _b[0];
-  d[1] = _a[1] < _b[1] ? _a[1] : _b[1];
-  d[2] = _a[2] < _b[2] ? _a[2] : _b[2];
-  d[3] = _a[3] < _b[3] ? _a[3] : _b[3];
-  d[4] = _a[4] < _b[4] ? _a[4] : _b[4];
-  d[5] = _a[5] < _b[5] ? _a[5] : _b[5];
-  d[6] = _a[6] < _b[6] ? _a[6] : _b[6];
-  d[7] = _a[7] < _b[7] ? _a[7] : _b[7];
-
+  int16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] < _b[i] ? _a[i] : _b[i];
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_min_epi16(a, b);
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6142,34 +5876,14 @@ result_t test_mm_min_epu8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  uint8_t d[16];
-  d[0] = ((uint8_t)_a[0] < (uint8_t)_b[0]) ? (uint8_t)_a[0] : (uint8_t)_b[0];
-  d[1] = ((uint8_t)_a[1] < (uint8_t)_b[1]) ? (uint8_t)_a[1] : (uint8_t)_b[1];
-  d[2] = ((uint8_t)_a[2] < (uint8_t)_b[2]) ? (uint8_t)_a[2] : (uint8_t)_b[2];
-  d[3] = ((uint8_t)_a[3] < (uint8_t)_b[3]) ? (uint8_t)_a[3] : (uint8_t)_b[3];
-  d[4] = ((uint8_t)_a[4] < (uint8_t)_b[4]) ? (uint8_t)_a[4] : (uint8_t)_b[4];
-  d[5] = ((uint8_t)_a[5] < (uint8_t)_b[5]) ? (uint8_t)_a[5] : (uint8_t)_b[5];
-  d[6] = ((uint8_t)_a[6] < (uint8_t)_b[6]) ? (uint8_t)_a[6] : (uint8_t)_b[6];
-  d[7] = ((uint8_t)_a[7] < (uint8_t)_b[7]) ? (uint8_t)_a[7] : (uint8_t)_b[7];
-  d[8] = ((uint8_t)_a[8] < (uint8_t)_b[8]) ? (uint8_t)_a[8] : (uint8_t)_b[8];
-  d[9] = ((uint8_t)_a[9] < (uint8_t)_b[9]) ? (uint8_t)_a[9] : (uint8_t)_b[9];
-  d[10] =
-      ((uint8_t)_a[10] < (uint8_t)_b[10]) ? (uint8_t)_a[10] : (uint8_t)_b[10];
-  d[11] =
-      ((uint8_t)_a[11] < (uint8_t)_b[11]) ? (uint8_t)_a[11] : (uint8_t)_b[11];
-  d[12] =
-      ((uint8_t)_a[12] < (uint8_t)_b[12]) ? (uint8_t)_a[12] : (uint8_t)_b[12];
-  d[13] =
-      ((uint8_t)_a[13] < (uint8_t)_b[13]) ? (uint8_t)_a[13] : (uint8_t)_b[13];
-  d[14] =
-      ((uint8_t)_a[14] < (uint8_t)_b[14]) ? (uint8_t)_a[14] : (uint8_t)_b[14];
-  d[15] =
-      ((uint8_t)_a[15] < (uint8_t)_b[15]) ? (uint8_t)_a[15] : (uint8_t)_b[15];
-
+  uint8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = ((uint8_t)_a[i] < (uint8_t)_b[i]) ? (uint8_t)_a[i] : (uint8_t)_b[i];
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_min_epu8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6636,13 +6350,9 @@ result_t test_mm_set_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_mm_set_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  int32_t d[4];
-  d[3] = impl.test_cases_ints[iter];
-  d[2] = impl.test_cases_ints[iter + 1];
-  d[1] = impl.test_cases_ints[iter + 2];
-  d[0] = impl.test_cases_ints[iter + 3];
-  __m128i a = _mm_set_epi32(d[3], d[2], d[1], d[0]);
-  return VALIDATE_INT32_M128(a, d);
+  const int32_t *_a = impl.test_cases_int_pointer1;
+  __m128i a = _mm_set_epi32(_a[3], _a[2], _a[1], _a[0]);
+  return VALIDATE_INT32_M128(a, _a);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6675,27 +6385,15 @@ result_t test_mm_set_epi64x(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_set_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
-  int8_t d[16];
-  d[0] = _a[0];
-  d[1] = _a[1];
-  d[2] = _a[2];
-  d[3] = _a[3];
-  d[4] = _a[4];
-  d[5] = _a[5];
-  d[6] = _a[6];
-  d[7] = _a[7];
-  d[8] = _a[8];
-  d[9] = _a[9];
-  d[10] = _a[10];
-  d[11] = _a[11];
-  d[12] = _a[12];
-  d[13] = _a[13];
-  d[14] = _a[14];
-  d[15] = _a[15];
-
-  __m128i c = _mm_set_epi8(d[15], d[14], d[13], d[12], d[11], d[10], d[9], d[8],
-                           d[7], d[6], d[5], d[4], d[3], d[2], d[1], d[0]);
-  return VALIDATE_INT8_M128(c, d);
+  int8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = _a[i];
+  }
+
+  __m128i c =
+      _mm_set_epi8(_c[15], _c[14], _c[13], _c[12], _c[11], _c[10], _c[9], _c[8],
+                   _c[7], _c[6], _c[5], _c[4], _c[3], _c[2], _c[1], _c[0]);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6704,10 +6402,10 @@ result_t test_mm_set_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_set_pd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const double *p = (const double *)impl.test_cases_float_pointer1;
-  double x = p[0];
-  double y = p[1];
-  __m128d a = _mm_set_pd(x, y);
-  return validate_double(a, y, x);
+  double d0 = p[0];
+  double d1 = p[1];
+  __m128d a = _mm_set_pd(d0, d1);
+  return validate_double(a, d1, d0);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6742,10 +6440,10 @@ result_t test_mm_set_sd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_set1_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
-  int16_t d0 = _a[0];
 
-  __m128i c = _mm_set1_epi16(d0);
-  return validate_int16(c, d0, d0, d0, d0, d0, d0, d0, d0);
+  __m128i c = _mm_set1_epi16(_a[0]);
+  return validate_int16(c, _a[0], _a[0], _a[0], _a[0], _a[0], _a[0], _a[0],
+                        _a[0]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6753,9 +6451,9 @@ result_t test_mm_set1_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_mm_set1_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  int32_t x = impl.test_cases_ints[iter];
-  __m128i a = _mm_set1_epi32(x);
-  return validate_int32(a, x, x, x, x);
+  const int32_t *_a = impl.test_cases_int_pointer1;
+  __m128i a = _mm_set1_epi32(_a[0]);
+  return validate_int32(a, _a[0], _a[0], _a[0], _a[0]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6788,10 +6486,10 @@ result_t test_mm_set1_epi64x(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_set1_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
-  int8_t d0 = _a[0];
-  __m128i c = _mm_set1_epi8(d0);
-  return validate_int8(c, d0, d0, d0, d0, d0, d0, d0, d0, d0, d0, d0, d0, d0,
-                       d0, d0, d0);
+  __m128i c = _mm_set1_epi8(_a[0]);
+  return validate_int8(c, _a[0], _a[0], _a[0], _a[0], _a[0], _a[0], _a[0],
+                       _a[0], _a[0], _a[0], _a[0], _a[0], _a[0], _a[0], _a[0],
+                       _a[0]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6800,9 +6498,8 @@ result_t test_mm_set1_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_set1_pd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const double *_a = (const double *)impl.test_cases_float_pointer1;
-  double d0 = _a[0];
-  __m128d c = _mm_set1_pd(d0);
-  return validate_double(c, d0, d0);
+  __m128d c = _mm_set1_pd(_a[0]);
+  return validate_double(c, _a[0], _a[0]);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6859,12 +6556,11 @@ result_t test_mm_setr_pd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const double *p = (const double *)impl.test_cases_float_pointer1;
 
-  double x = p[0];
-  double y = p[1];
-
-  __m128d a = _mm_setr_pd(x, y);
+  double d0 = p[0];
+  double d1 = p[1];
 
-  return validate_double(a, x, y);
+  __m128d a = _mm_setr_pd(d0, d1);
+  return validate_double(a, d0, d1);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -6926,7 +6622,7 @@ result_t test_mm_shuffle_pd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   c = _mm_shuffle_pd(a, b, IDX);                                               \
                                                                                \
   _c[0] = _a[IDX & 0x1];                                                       \
-  _c[1] = _b[(IDX & 0x2) >> 1];                                                \
+  _c[1] = _b[(IDX >> 1) & 0x1];                                                \
   CHECK_RESULT(validate_double(c, _c[0], _c[1]))
 
   IMM_4_ITER
@@ -6998,21 +6694,16 @@ result_t test_mm_sll_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   __m128i a, b, c;
-  uint16_t _d[8];
+  uint16_t _c[8];
 
 #define TEST_IMPL(IDX)                                                         \
-  _d[0] = (IDX > 15) ? 0 : _a[0] << IDX;                                       \
-  _d[1] = (IDX > 15) ? 0 : _a[1] << IDX;                                       \
-  _d[2] = (IDX > 15) ? 0 : _a[2] << IDX;                                       \
-  _d[3] = (IDX > 15) ? 0 : _a[3] << IDX;                                       \
-  _d[4] = (IDX > 15) ? 0 : _a[4] << IDX;                                       \
-  _d[5] = (IDX > 15) ? 0 : _a[5] << IDX;                                       \
-  _d[6] = (IDX > 15) ? 0 : _a[6] << IDX;                                       \
-  _d[7] = (IDX > 15) ? 0 : _a[7] << IDX;                                       \
+  for (int i = 0; i < 8; i++) {                                                \
+    _c[i] = (IDX > 15) ? i : _a[i] << IDX;                                     \
+  }                                                                            \
   a = load_m128i(_a);                                                          \
   b = _mm_set1_epi64x(IDX);                                                    \
   c = _mm_sll_epi16(a, b);                                                     \
-  CHECK_RESULT(VALIDATE_INT16_M128(c, _d))
+  CHECK_RESULT(VALIDATE_INT16_M128(c, _c))
 
   IMM_64_ITER
 #undef TEST_IMPL
@@ -7030,10 +6721,9 @@ result_t test_mm_sll_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   uint32_t _d[4];
 
 #define TEST_IMPL(IDX)                                                         \
-  _d[0] = (IDX > 31) ? 0 : _a[0] << IDX;                                       \
-  _d[1] = (IDX > 31) ? 0 : _a[1] << IDX;                                       \
-  _d[2] = (IDX > 31) ? 0 : _a[2] << IDX;                                       \
-  _d[3] = (IDX > 31) ? 0 : _a[3] << IDX;                                       \
+  for (int i = 0; i < 4; i++) {                                                \
+    _d[i] = (IDX > 31) ? 0 : _a[i] << IDX;                                     \
+  }                                                                            \
   a = load_m128i(_a);                                                          \
   b = _mm_set1_epi64x(IDX);                                                    \
   c = _mm_sll_epi32(a, b);                                                     \
@@ -7073,20 +6763,15 @@ result_t test_mm_slli_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   __m128i a, c;
-  int16_t _d[8];
+  int16_t _c[8];
 
 #define TEST_IMPL(IDX)                                                         \
-  _d[0] = (IDX > 15) ? 0 : _a[0] << IDX;                                       \
-  _d[1] = (IDX > 15) ? 0 : _a[1] << IDX;                                       \
-  _d[2] = (IDX > 15) ? 0 : _a[2] << IDX;                                       \
-  _d[3] = (IDX > 15) ? 0 : _a[3] << IDX;                                       \
-  _d[4] = (IDX > 15) ? 0 : _a[4] << IDX;                                       \
-  _d[5] = (IDX > 15) ? 0 : _a[5] << IDX;                                       \
-  _d[6] = (IDX > 15) ? 0 : _a[6] << IDX;                                       \
-  _d[7] = (IDX > 15) ? 0 : _a[7] << IDX;                                       \
+  for (int i = 0; i < 8; i++) {                                                \
+    _c[i] = (IDX > 15) ? 0 : _a[i] << IDX;                                     \
+  }                                                                            \
   a = load_m128i(_a);                                                          \
   c = _mm_slli_epi16(a, IDX);                                                  \
-  CHECK_RESULT(VALIDATE_INT16_M128(c, _d))
+  CHECK_RESULT(VALIDATE_INT16_M128(c, _c))
 
   IMM_64_ITER
 #undef TEST_IMPL
@@ -7107,15 +6792,15 @@ result_t test_mm_slli_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int count = (int)(iter % 34 - 1); // range: -1 ~ 32
 #endif
 
-  int32_t d[4];
-  d[0] = (count & ~31) ? 0 : _a[0] << count;
-  d[1] = (count & ~31) ? 0 : _a[1] << count;
-  d[2] = (count & ~31) ? 0 : _a[2] << count;
-  d[3] = (count & ~31) ? 0 : _a[3] << count;
+  int32_t _c[4];
+  _c[0] = (count & ~31) ? 0 : _a[0] << count;
+  _c[1] = (count & ~31) ? 0 : _a[1] << count;
+  _c[2] = (count & ~31) ? 0 : _a[2] << count;
+  _c[3] = (count & ~31) ? 0 : _a[3] << count;
 
   __m128i a = load_m128i(_a);
   __m128i c = _mm_slli_epi32(a, count);
-  return VALIDATE_INT32_M128(c, d);
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7219,21 +6904,15 @@ result_t test_mm_sra_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int64_t count = (int64_t)(iter % 18 - 1); // range: -1 ~ 16
 
-  int16_t d[8];
-  d[0] = (count & ~15) ? (_a[0] < 0 ? ~UINT16_C(0) : 0) : (_a[0] >> count);
-  d[1] = (count & ~15) ? (_a[1] < 0 ? ~UINT16_C(0) : 0) : (_a[1] >> count);
-  d[2] = (count & ~15) ? (_a[2] < 0 ? ~UINT16_C(0) : 0) : (_a[2] >> count);
-  d[3] = (count & ~15) ? (_a[3] < 0 ? ~UINT16_C(0) : 0) : (_a[3] >> count);
-  d[4] = (count & ~15) ? (_a[4] < 0 ? ~UINT16_C(0) : 0) : (_a[4] >> count);
-  d[5] = (count & ~15) ? (_a[5] < 0 ? ~UINT16_C(0) : 0) : (_a[5] >> count);
-  d[6] = (count & ~15) ? (_a[6] < 0 ? ~UINT16_C(0) : 0) : (_a[6] >> count);
-  d[7] = (count & ~15) ? (_a[7] < 0 ? ~UINT16_C(0) : 0) : (_a[7] >> count);
+  int16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = (count & ~15) ? (_a[i] < 0 ? UINT16_MAX : 0) : (_a[i] >> count);
+  }
 
   __m128i a = _mm_load_si128((const __m128i *)_a);
   __m128i b = _mm_set1_epi64x(count);
   __m128i c = _mm_sra_epi16(a, b);
-
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7244,17 +6923,16 @@ result_t test_mm_sra_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int32_t *_a = (const int32_t *)impl.test_cases_int_pointer1;
   const int64_t count = (int64_t)(iter % 34 - 1); // range: -1 ~ 32
 
-  int32_t d[4];
-  d[0] = (count & ~31) ? (_a[0] < 0 ? ~UINT32_C(0) : 0) : _a[0] >> count;
-  d[1] = (count & ~31) ? (_a[1] < 0 ? ~UINT32_C(0) : 0) : _a[1] >> count;
-  d[2] = (count & ~31) ? (_a[2] < 0 ? ~UINT32_C(0) : 0) : _a[2] >> count;
-  d[3] = (count & ~31) ? (_a[3] < 0 ? ~UINT32_C(0) : 0) : _a[3] >> count;
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = (count & ~31) ? (_a[i] < 0 ? UINT32_MAX : 0) : _a[i] >> count;
+  }
 
   __m128i a = _mm_load_si128((const __m128i *)_a);
   __m128i b = _mm_set1_epi64x(count);
   __m128i c = _mm_sra_epi32(a, b);
 
-  return VALIDATE_INT32_M128(c, d);
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7305,20 +6983,15 @@ result_t test_mm_srl_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int64_t count = (int64_t)(iter % 18 - 1); // range: -1 ~ 16
 
-  uint16_t _d[8];
-  _d[0] = (count > 15) ? 0 : (uint16_t)(_a[0]) >> count;
-  _d[1] = (count > 15) ? 0 : (uint16_t)(_a[1]) >> count;
-  _d[2] = (count > 15) ? 0 : (uint16_t)(_a[2]) >> count;
-  _d[3] = (count > 15) ? 0 : (uint16_t)(_a[3]) >> count;
-  _d[4] = (count > 15) ? 0 : (uint16_t)(_a[4]) >> count;
-  _d[5] = (count > 15) ? 0 : (uint16_t)(_a[5]) >> count;
-  _d[6] = (count > 15) ? 0 : (uint16_t)(_a[6]) >> count;
-  _d[7] = (count > 15) ? 0 : (uint16_t)(_a[7]) >> count;
+  uint16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = (count > 15) ? 0 : (uint16_t)(_a[i]) >> count;
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = _mm_set1_epi64x(count);
   __m128i c = _mm_srl_epi16(a, b);
-  return VALIDATE_INT16_M128(c, _d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7329,17 +7002,15 @@ result_t test_mm_srl_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int32_t *_a = (const int32_t *)impl.test_cases_int_pointer1;
   const int64_t count = (int64_t)(iter % 34 - 1); // range: -1 ~ 32
 
-  uint32_t d[4];
-  d[0] = (count > 31) ? 0 : (uint32_t)(_a[0]) >> count;
-  d[1] = (count > 31) ? 0 : (uint32_t)(_a[1]) >> count;
-  d[2] = (count > 31) ? 0 : (uint32_t)(_a[2]) >> count;
-  d[3] = (count > 31) ? 0 : (uint32_t)(_a[3]) >> count;
+  uint32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[0] = (count > 31) ? 0 : (uint32_t)(_a[0]) >> count;
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = _mm_set1_epi64x(count);
   __m128i c = _mm_srl_epi32(a, b);
-
-  return VALIDATE_INT32_M128(c, d);
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7368,20 +7039,14 @@ result_t test_mm_srli_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int count = (int)(iter % 18 - 1); // range: -1 ~ 16
 
-  int16_t d[8];
-  d[0] = count & ~15 ? 0 : (uint16_t)(_a[0]) >> count;
-  d[1] = count & ~15 ? 0 : (uint16_t)(_a[1]) >> count;
-  d[2] = count & ~15 ? 0 : (uint16_t)(_a[2]) >> count;
-  d[3] = count & ~15 ? 0 : (uint16_t)(_a[3]) >> count;
-  d[4] = count & ~15 ? 0 : (uint16_t)(_a[4]) >> count;
-  d[5] = count & ~15 ? 0 : (uint16_t)(_a[5]) >> count;
-  d[6] = count & ~15 ? 0 : (uint16_t)(_a[6]) >> count;
-  d[7] = count & ~15 ? 0 : (uint16_t)(_a[7]) >> count;
+  int16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = count & ~15 ? 0 : (uint16_t)(_a[i]) >> count;
+  }
 
   __m128i a = load_m128i(_a);
   __m128i c = _mm_srli_epi16(a, count);
-
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7392,16 +7057,14 @@ result_t test_mm_srli_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int32_t *_a = (const int32_t *)impl.test_cases_int_pointer1;
   const int count = (int)(iter % 34 - 1); // range: -1 ~ 32
 
-  int32_t d[4];
-  d[0] = count & ~31 ? 0 : (uint32_t)(_a[0]) >> count;
-  d[1] = count & ~31 ? 0 : (uint32_t)(_a[1]) >> count;
-  d[2] = count & ~31 ? 0 : (uint32_t)(_a[2]) >> count;
-  d[3] = count & ~31 ? 0 : (uint32_t)(_a[3]) >> count;
+  int32_t _c[4];
+  for (int i = 0; i < 8; i++) {
+    _c[0] = count & ~31 ? 0 : (uint32_t)(_a[0]) >> count;
+  }
 
   __m128i a = load_m128i(_a);
   __m128i c = _mm_srli_epi32(a, count);
-
-  return VALIDATE_INT32_M128(c, d);
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7417,7 +7080,6 @@ result_t test_mm_srli_epi64(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
   __m128i a = load_m128i(_a);
   __m128i c = _mm_srli_epi64(a, count);
-
   return validate_int64(c, d0, d1);
 #else
   return TEST_UNIMPL;
@@ -7466,13 +7128,13 @@ result_t test_mm_srli_si128(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 result_t test_mm_store_pd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   double *p = (double *)impl.test_cases_float_pointer1;
-  double x = impl.test_cases_floats[iter + 4];
-  double y = impl.test_cases_floats[iter + 6];
+  double d0 = impl.test_cases_float_pointer2[0];
+  double d1 = impl.test_cases_float_pointer2[1];
 
-  __m128d a = _mm_set_pd(x, y);
+  __m128d a = _mm_set_pd(d0, d1);
   _mm_store_pd(p, a);
-  ASSERT_RETURN(p[0] == y);
-  ASSERT_RETURN(p[1] == x);
+  ASSERT_RETURN(p[0] == d1);
+  ASSERT_RETURN(p[1] == d0);
   return TEST_SUCCESS;
 #else
   return TEST_UNIMPL;
@@ -7698,20 +7360,15 @@ result_t test_mm_sub_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  int16_t d[8];
-  d[0] = _a[0] - _b[0];
-  d[1] = _a[1] - _b[1];
-  d[2] = _a[2] - _b[2];
-  d[3] = _a[3] - _b[3];
-  d[4] = _a[4] - _b[4];
-  d[5] = _a[5] - _b[5];
-  d[6] = _a[6] - _b[6];
-  d[7] = _a[7] - _b[7];
+  int16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] - _b[i];
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_sub_epi16(a, b);
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7721,16 +7378,15 @@ result_t test_mm_sub_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int32_t *_a = impl.test_cases_int_pointer1;
   const int32_t *_b = impl.test_cases_int_pointer2;
-  int32_t d[4];
-  d[0] = _a[0] - _b[0];
-  d[1] = _a[1] - _b[1];
-  d[2] = _a[2] - _b[2];
-  d[3] = _a[3] - _b[3];
+  int32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] - _b[i];
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_sub_epi32(a, b);
-  return VALIDATE_INT32_M128(c, d);
+  return VALIDATE_INT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7756,28 +7412,15 @@ result_t test_mm_sub_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  int8_t d[16];
-  d[0] = _a[0] - _b[0];
-  d[1] = _a[1] - _b[1];
-  d[2] = _a[2] - _b[2];
-  d[3] = _a[3] - _b[3];
-  d[4] = _a[4] - _b[4];
-  d[5] = _a[5] - _b[5];
-  d[6] = _a[6] - _b[6];
-  d[7] = _a[7] - _b[7];
-  d[8] = _a[8] - _b[8];
-  d[9] = _a[9] - _b[9];
-  d[10] = _a[10] - _b[10];
-  d[11] = _a[11] - _b[11];
-  d[12] = _a[12] - _b[12];
-  d[13] = _a[13] - _b[13];
-  d[14] = _a[14] - _b[14];
-  d[15] = _a[15] - _b[15];
+  int8_t _c[16];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] - _b[i];
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_sub_epi8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7837,22 +7480,22 @@ result_t test_mm_subs_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
 
-  int16_t d[8];
+  int16_t _c[8];
   for (int i = 0; i < 8; i++) {
     int32_t res = (int32_t)_a[i] - (int32_t)_b[i];
-    if (res > INT16_MAX)
-      d[i] = INT16_MAX;
-    else if (res < INT16_MIN)
-      d[i] = INT16_MIN;
-    else
-      d[i] = (int16_t)res;
+    if (res > INT16_MAX) {
+      _c[i] = INT16_MAX;
+    } else if (res < INT16_MIN) {
+      _c[i] = INT16_MIN;
+    } else {
+      _c[i] = res;
+    }
   }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_subs_epi16(a, b);
-
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7863,22 +7506,22 @@ result_t test_mm_subs_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
 
-  int8_t d[16];
+  int8_t _c[16];
   for (int i = 0; i < 16; i++) {
     int16_t res = (int16_t)_a[i] - (int16_t)_b[i];
-    if (res > INT8_MAX)
-      d[i] = INT8_MAX;
-    else if (res < INT8_MIN)
-      d[i] = INT8_MIN;
-    else
-      d[i] = (int8_t)res;
+    if (res > INT8_MAX) {
+      _c[i] = INT8_MAX;
+    } else if (res < INT8_MIN) {
+      _c[i] = INT8_MIN;
+    } else {
+      _c[i] = res;
+    }
   }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_subs_epi8(a, b);
-
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7888,37 +7531,17 @@ result_t test_mm_subs_epu16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
   const int16_t *_b = (const int16_t *)impl.test_cases_int_pointer2;
-  uint16_t d[8];
-  d[0] = (uint16_t)_a[0] - (uint16_t)_b[0];
-  if (d[0] > (uint16_t)_a[0])
-    d[0] = 0;
-  d[1] = (uint16_t)_a[1] - (uint16_t)_b[1];
-  if (d[1] > (uint16_t)_a[1])
-    d[1] = 0;
-  d[2] = (uint16_t)_a[2] - (uint16_t)_b[2];
-  if (d[2] > (uint16_t)_a[2])
-    d[2] = 0;
-  d[3] = (uint16_t)_a[3] - (uint16_t)_b[3];
-  if (d[3] > (uint16_t)_a[3])
-    d[3] = 0;
-  d[4] = (uint16_t)_a[4] - (uint16_t)_b[4];
-  if (d[4] > (uint16_t)_a[4])
-    d[4] = 0;
-  d[5] = (uint16_t)_a[5] - (uint16_t)_b[5];
-  if (d[5] > (uint16_t)_a[5])
-    d[5] = 0;
-  d[6] = (uint16_t)_a[6] - (uint16_t)_b[6];
-  if (d[6] > (uint16_t)_a[6])
-    d[6] = 0;
-  d[7] = (uint16_t)_a[7] - (uint16_t)_b[7];
-  if (d[7] > (uint16_t)_a[7])
-    d[7] = 0;
-
+  uint16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = (uint16_t)_a[i] - (uint16_t)_b[i];
+    if (_c[i] > (uint16_t)_a[i]) {
+      _c[i] = 0;
+    }
+  }
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
-
   __m128i c = _mm_subs_epu16(a, b);
-  return VALIDATE_INT16_M128(c, d);
+  return VALIDATE_INT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -7928,60 +7551,18 @@ result_t test_mm_subs_epu8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  uint8_t d[16];
-  d[0] = (uint8_t)_a[0] - (uint8_t)_b[0];
-  if (d[0] > (uint8_t)_a[0])
-    d[0] = 0;
-  d[1] = (uint8_t)_a[1] - (uint8_t)_b[1];
-  if (d[1] > (uint8_t)_a[1])
-    d[1] = 0;
-  d[2] = (uint8_t)_a[2] - (uint8_t)_b[2];
-  if (d[2] > (uint8_t)_a[2])
-    d[2] = 0;
-  d[3] = (uint8_t)_a[3] - (uint8_t)_b[3];
-  if (d[3] > (uint8_t)_a[3])
-    d[3] = 0;
-  d[4] = (uint8_t)_a[4] - (uint8_t)_b[4];
-  if (d[4] > (uint8_t)_a[4])
-    d[4] = 0;
-  d[5] = (uint8_t)_a[5] - (uint8_t)_b[5];
-  if (d[5] > (uint8_t)_a[5])
-    d[5] = 0;
-  d[6] = (uint8_t)_a[6] - (uint8_t)_b[6];
-  if (d[6] > (uint8_t)_a[6])
-    d[6] = 0;
-  d[7] = (uint8_t)_a[7] - (uint8_t)_b[7];
-  if (d[7] > (uint8_t)_a[7])
-    d[7] = 0;
-  d[8] = (uint8_t)_a[8] - (uint8_t)_b[8];
-  if (d[8] > (uint8_t)_a[8])
-    d[8] = 0;
-  d[9] = (uint8_t)_a[9] - (uint8_t)_b[9];
-  if (d[9] > (uint8_t)_a[9])
-    d[9] = 0;
-  d[10] = (uint8_t)_a[10] - (uint8_t)_b[10];
-  if (d[10] > (uint8_t)_a[10])
-    d[10] = 0;
-  d[11] = (uint8_t)_a[11] - (uint8_t)_b[11];
-  if (d[11] > (uint8_t)_a[11])
-    d[11] = 0;
-  d[12] = (uint8_t)_a[12] - (uint8_t)_b[12];
-  if (d[12] > (uint8_t)_a[12])
-    d[12] = 0;
-  d[13] = (uint8_t)_a[13] - (uint8_t)_b[13];
-  if (d[13] > (uint8_t)_a[13])
-    d[13] = 0;
-  d[14] = (uint8_t)_a[14] - (uint8_t)_b[14];
-  if (d[14] > (uint8_t)_a[14])
-    d[14] = 0;
-  d[15] = (uint8_t)_a[15] - (uint8_t)_b[15];
-  if (d[15] > (uint8_t)_a[15])
-    d[15] = 0;
+  uint8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = (uint8_t)_a[i] - (uint8_t)_b[i];
+    if (_c[i] > (uint8_t)_a[i]) {
+      _c[i] = 0;
+    }
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_subs_epu8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -8096,7 +7677,6 @@ result_t test_mm_ucomineq_sd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128d a = load_m128d(_a);
   __m128d b = load_m128d(_b);
   int32_t c = _mm_ucomineq_sd(a, b);
-
   ASSERT_RETURN(c == _c);
   return TEST_SUCCESS;
 #endif
@@ -8562,17 +8142,11 @@ result_t test_mm_abs_epi16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   __m128i a = load_m128i(_a);
   __m128i c = _mm_abs_epi16(a);
 
-  uint32_t d[8];
-  d[0] = (_a[0] < 0) ? -_a[0] : _a[0];
-  d[1] = (_a[1] < 0) ? -_a[1] : _a[1];
-  d[2] = (_a[2] < 0) ? -_a[2] : _a[2];
-  d[3] = (_a[3] < 0) ? -_a[3] : _a[3];
-  d[4] = (_a[4] < 0) ? -_a[4] : _a[4];
-  d[5] = (_a[5] < 0) ? -_a[5] : _a[5];
-  d[6] = (_a[6] < 0) ? -_a[6] : _a[6];
-  d[7] = (_a[7] < 0) ? -_a[7] : _a[7];
-
-  return VALIDATE_UINT16_M128(c, d);
+  uint32_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = (_a[i] < 0) ? -_a[i] : _a[i];
+  }
+  return VALIDATE_UINT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -10055,20 +9629,21 @@ result_t test_mm_insert_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int32_t *_a = (const int32_t *)impl.test_cases_int_pointer1;
   const int32_t insert = (int32_t)*impl.test_cases_int_pointer2;
   __m128i a, b;
+  int32_t _c[4];
 
 #define TEST_IMPL(IDX)                                                         \
-  int32_t d##IDX[4];                                                           \
   for (int i = 0; i < 4; i++) {                                                \
-    d##IDX[i] = _a[i];                                                         \
+    _c[i] = _a[i];                                                             \
   }                                                                            \
-  d##IDX[IDX] = insert;                                                        \
+  _c[IDX] = insert;                                                            \
                                                                                \
   a = load_m128i(_a);                                                          \
   b = _mm_insert_epi32(a, (int)insert, IDX);                                   \
-  CHECK_RESULT(VALIDATE_INT32_M128(b, d##IDX));
+  CHECK_RESULT(VALIDATE_INT32_M128(b, _c));
 
   IMM_4_ITER
 #undef TEST_IMPL
+
   return TEST_SUCCESS;
 #else
   return TEST_UNIMPL;
@@ -10179,29 +9754,15 @@ result_t test_mm_max_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
-  int8_t d[16];
-  d[0] = _a[0] > _b[0] ? _a[0] : _b[0];
-  d[1] = _a[1] > _b[1] ? _a[1] : _b[1];
-  d[2] = _a[2] > _b[2] ? _a[2] : _b[2];
-  d[3] = _a[3] > _b[3] ? _a[3] : _b[3];
-  d[4] = _a[4] > _b[4] ? _a[4] : _b[4];
-  d[5] = _a[5] > _b[5] ? _a[5] : _b[5];
-  d[6] = _a[6] > _b[6] ? _a[6] : _b[6];
-  d[7] = _a[7] > _b[7] ? _a[7] : _b[7];
-  d[8] = _a[8] > _b[8] ? _a[8] : _b[8];
-  d[9] = _a[9] > _b[9] ? _a[9] : _b[9];
-  d[10] = _a[10] > _b[10] ? _a[10] : _b[10];
-  d[11] = _a[11] > _b[11] ? _a[11] : _b[11];
-  d[12] = _a[12] > _b[12] ? _a[12] : _b[12];
-  d[13] = _a[13] > _b[13] ? _a[13] : _b[13];
-  d[14] = _a[14] > _b[14] ? _a[14] : _b[14];
-  d[15] = _a[15] > _b[15] ? _a[15] : _b[15];
+  int8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = _a[i] > _b[i] ? _a[i] : _b[i];
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
-
   __m128i c = _mm_max_epi8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -10212,21 +9773,15 @@ result_t test_mm_max_epu16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const uint16_t *_a = (const uint16_t *)impl.test_cases_int_pointer1;
   const uint16_t *_b = (const uint16_t *)impl.test_cases_int_pointer2;
 
-  uint16_t d[8];
-  d[0] = _a[0] > _b[0] ? _a[0] : _b[0];
-  d[1] = _a[1] > _b[1] ? _a[1] : _b[1];
-  d[2] = _a[2] > _b[2] ? _a[2] : _b[2];
-  d[3] = _a[3] > _b[3] ? _a[3] : _b[3];
-  d[4] = _a[4] > _b[4] ? _a[4] : _b[4];
-  d[5] = _a[5] > _b[5] ? _a[5] : _b[5];
-  d[6] = _a[6] > _b[6] ? _a[6] : _b[6];
-  d[7] = _a[7] > _b[7] ? _a[7] : _b[7];
+  uint16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] > _b[i] ? _a[i] : _b[i];
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_max_epu16(a, b);
-
-  return VALIDATE_UINT16_M128(c, d);
+  return VALIDATE_UINT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -10279,29 +9834,15 @@ result_t test_mm_min_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1;
   const int8_t *_b = (const int8_t *)impl.test_cases_int_pointer2;
 
-  int8_t d[16];
-  d[0] = _a[0] < _b[0] ? _a[0] : _b[0];
-  d[1] = _a[1] < _b[1] ? _a[1] : _b[1];
-  d[2] = _a[2] < _b[2] ? _a[2] : _b[2];
-  d[3] = _a[3] < _b[3] ? _a[3] : _b[3];
-  d[4] = _a[4] < _b[4] ? _a[4] : _b[4];
-  d[5] = _a[5] < _b[5] ? _a[5] : _b[5];
-  d[6] = _a[6] < _b[6] ? _a[6] : _b[6];
-  d[7] = _a[7] < _b[7] ? _a[7] : _b[7];
-  d[8] = _a[8] < _b[8] ? _a[8] : _b[8];
-  d[9] = _a[9] < _b[9] ? _a[9] : _b[9];
-  d[10] = _a[10] < _b[10] ? _a[10] : _b[10];
-  d[11] = _a[11] < _b[11] ? _a[11] : _b[11];
-  d[12] = _a[12] < _b[12] ? _a[12] : _b[12];
-  d[13] = _a[13] < _b[13] ? _a[13] : _b[13];
-  d[14] = _a[14] < _b[14] ? _a[14] : _b[14];
-  d[15] = _a[15] < _b[15] ? _a[15] : _b[15];
+  int8_t _c[16];
+  for (int i = 0; i < 16; i++) {
+    _c[i] = _a[i] < _b[i] ? _a[i] : _b[i];
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
-
   __m128i c = _mm_min_epi8(a, b);
-  return VALIDATE_INT8_M128(c, d);
+  return VALIDATE_INT8_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -10312,21 +9853,15 @@ result_t test_mm_min_epu16(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const uint16_t *_a = (const uint16_t *)impl.test_cases_int_pointer1;
   const uint16_t *_b = (const uint16_t *)impl.test_cases_int_pointer2;
 
-  uint16_t d[8];
-  d[0] = _a[0] < _b[0] ? _a[0] : _b[0];
-  d[1] = _a[1] < _b[1] ? _a[1] : _b[1];
-  d[2] = _a[2] < _b[2] ? _a[2] : _b[2];
-  d[3] = _a[3] < _b[3] ? _a[3] : _b[3];
-  d[4] = _a[4] < _b[4] ? _a[4] : _b[4];
-  d[5] = _a[5] < _b[5] ? _a[5] : _b[5];
-  d[6] = _a[6] < _b[6] ? _a[6] : _b[6];
-  d[7] = _a[7] < _b[7] ? _a[7] : _b[7];
+  uint16_t _c[8];
+  for (int i = 0; i < 8; i++) {
+    _c[i] = _a[i] < _b[i] ? _a[i] : _b[i];
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_min_epu16(a, b);
-
-  return VALIDATE_UINT16_M128(c, d);
+  return VALIDATE_UINT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -10337,17 +9872,15 @@ result_t test_mm_min_epu32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
   const uint32_t *_a = (const uint32_t *)impl.test_cases_int_pointer1;
   const uint32_t *_b = (const uint32_t *)impl.test_cases_int_pointer2;
 
-  uint32_t d[4];
-  d[0] = _a[0] < _b[0] ? _a[0] : _b[0];
-  d[1] = _a[1] < _b[1] ? _a[1] : _b[1];
-  d[2] = _a[2] < _b[2] ? _a[2] : _b[2];
-  d[3] = _a[3] < _b[3] ? _a[3] : _b[3];
+  uint32_t _c[4];
+  for (int i = 0; i < 4; i++) {
+    _c[i] = _a[i] < _b[i] ? _a[i] : _b[i];
+  }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_min_epu32(a, b);
-
-  return VALIDATE_UINT32_M128(c, d);
+  return VALIDATE_UINT32_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL
@@ -10441,34 +9974,32 @@ result_t test_mm_mullo_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 
 result_t test_mm_packus_epi32(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
-  uint16_t max = UINT16_MAX;
-  uint16_t min = 0;
   const int32_t *_a = (const int32_t *)impl.test_cases_int_pointer1;
   const int32_t *_b = (const int32_t *)impl.test_cases_int_pointer2;
 
-  uint16_t d[8];
+  uint16_t _c[8];
   for (int i = 0; i < 4; i++) {
-    if (_a[i] > (int32_t)max)
-      d[i] = max;
-    else if (_a[i] < (int32_t)min)
-      d[i] = min;
-    else
-      d[i] = (uint16_t)_a[i];
-  }
-  for (int i = 0; i < 4; i++) {
-    if (_b[i] > (int32_t)max)
-      d[i + 4] = max;
-    else if (_b[i] < (int32_t)min)
-      d[i + 4] = min;
-    else
-      d[i + 4] = (uint16_t)_b[i];
+    if (_a[i] > UINT16_MAX) {
+      _c[i] = UINT16_MAX;
+    } else if (_a[i] < 0) {
+      _c[i] = 0;
+    } else {
+      _c[i] = (uint16_t)_a[i];
+    }
+
+    if (_b[i] > UINT16_MAX) {
+      _c[i + 4] = UINT16_MAX;
+    } else if (_b[i] < 0) {
+      _c[i + 4] = 0;
+    } else {
+      _c[i + 4] = (uint16_t)_b[i];
+    }
   }
 
   __m128i a = load_m128i(_a);
   __m128i b = load_m128i(_b);
   __m128i c = _mm_packus_epi32(a, b);
-
-  return VALIDATE_UINT16_M128(c, d);
+  return VALIDATE_UINT16_M128(c, _c);
 #else
   return TEST_UNIMPL;
 #endif // ENABLE_TEST_ALL