Skip to content

Commit

Permalink
w
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Jan 16, 2025
1 parent 4d0e47a commit 36d37f6
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/layer/x86/convolution_im2col_gemm_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ static void convolution_im2col_input_tile_conv1x1s1d1_int8(const Mat& bottom_blo
const signed char* p0 = (const signed char*)bottom_blob.channel(k) + (j + jj);

int kk = 0;
#if __SSE2__
#if __AVX512VNNI__ || __AVXVNNI__
for (; kk + 3 < max_kk; kk += 4)
{
Expand Down Expand Up @@ -676,6 +677,7 @@ static void convolution_im2col_input_tile_conv1x1s1d1_int8(const Mat& bottom_blo
pp += 4;
p0 += bottom_blob.cstep * 2;
}
#endif // __SSE2__
for (; kk < max_kk; kk++)
{
pp[0] = p0[0];
Expand Down Expand Up @@ -715,6 +717,7 @@ static void convolution_im2col_input_tile_conv1x1s1d1_int8(const Mat& bottom_blo
const signed char* p0 = (const signed char*)bottom_blob.channel(k) + (j + jj);

int kk = 0;
#if __SSE2__
#if __AVX512VNNI__ || __AVXVNNI__
for (; kk + 3 < max_kk; kk += 4)
{
Expand All @@ -733,6 +736,7 @@ static void convolution_im2col_input_tile_conv1x1s1d1_int8(const Mat& bottom_blo
p0 += bottom_blob.cstep * 4;
}
#endif // __AVX512VNNI__ || __AVXVNNI__
#endif // __SSE2__
for (; kk < max_kk; kk++)
{
pp[0] = p0[0];
Expand Down Expand Up @@ -2179,6 +2183,7 @@ static void convolution_im2col_input_tile_int8_impl(const Mat& bottom_blob, Mat&
if (elempack == 1)
{
int kk = 0;
#if __SSE2__
#if __AVX512VNNI__ || __AVXVNNI__
for (; kk + 3 < max_kk; kk += 4)
{
Expand Down Expand Up @@ -2242,6 +2247,7 @@ static void convolution_im2col_input_tile_int8_impl(const Mat& bottom_blob, Mat&
pp[3] = sptr1[1];
pp += 4;
}
#endif // __SSE2__
for (; kk < max_kk; kk += 1)
{
int p0 = (k + kk) / maxk;
Expand Down Expand Up @@ -2301,6 +2307,7 @@ static void convolution_im2col_input_tile_int8_impl(const Mat& bottom_blob, Mat&
int kk = 0;
if (elempack == 1)
{
#if __SSE2__
#if __AVX512VNNI__ || __AVXVNNI__
for (; kk + 3 < max_kk; kk += 4)
{
Expand Down Expand Up @@ -2392,6 +2399,7 @@ static void convolution_im2col_input_tile_int8_impl(const Mat& bottom_blob, Mat&
pp[3] = sptr11[0];
pp += 4;
}
#endif // __SSE2__
for (; kk < max_kk; kk += 1)
{
int p = (k + kk) / maxk;
Expand Down Expand Up @@ -2459,6 +2467,7 @@ static void convolution_im2col_input_tile_int8_impl(const Mat& bottom_blob, Mat&
if (elempack == 1)
{
int kk = 0;
#if __SSE2__
#if __AVX512VNNI__ || __AVXVNNI__
for (; kk + 3 < max_kk; kk += 4)
{
Expand Down Expand Up @@ -2525,6 +2534,7 @@ static void convolution_im2col_input_tile_int8_impl(const Mat& bottom_blob, Mat&
pp[1] = sptr1[0];
pp += 2;
}
#endif // __SSE2__
for (; kk < max_kk; kk += 1)
{
int p = (k + kk) / maxk;
Expand Down

0 comments on commit 36d37f6

Please sign in to comment.