Skip to content

Commit

Permalink
w
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Jan 17, 2025
1 parent 6dff15d commit 564cd1d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
6 changes: 2 additions & 4 deletions src/layer/x86/convolution_im2col_gemm_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ static void convolution_gemm_transB_packed_tile_int8(const Mat& AT_tile, const M
#endif
}

static NCNN_FORCEINLINE void convolution_im2col_gemm_get_optimal_tile_mnk_int8(int M, int N, int K, int& TILE_M, int& TILE_N, int& TILE_K, int nT)
static void convolution_im2col_gemm_get_optimal_tile_mnk_int8(int M, int N, int K, int& TILE_M, int& TILE_N, int& TILE_K, int nT)
{
// resolve optimal tile size from cache size
const int l2_cache_size_int8 = (int)(get_cpu_level2_cache_size() / sizeof(signed char));
Expand Down Expand Up @@ -205,7 +205,7 @@ static NCNN_FORCEINLINE void convolution_im2col_gemm_get_optimal_tile_mnk_int8(i
}
}

static NCNN_FORCEINLINE void convolution_im2col_input_tile_conv1x1s1d1_int8(const Mat& bottom_blob, Mat& B, int j, int max_jj, int k, int max_kk)
static void convolution_im2col_input_tile_conv1x1s1d1_int8(const Mat& bottom_blob, Mat& B, int j, int max_jj, int k, int max_kk)
{
const int elempack = bottom_blob.elempack;
const int cstep = (int)bottom_blob.cstep;
Expand Down Expand Up @@ -896,8 +896,6 @@ static void convolution_im2col_input_tile_int8_impl(const Mat& bottom_blob, Mat&

_mm_store_si128((__m128i*)pp, _p0);

// NCNN_LOGE("qwq");

pp += 16;
}
}
Expand Down
7 changes: 7 additions & 0 deletions src/layer/x86/convolution_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,14 @@ int Convolution_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, con
#if __SSE2__
if (opt.use_packing_layout)
{
#if __AVX512F__
out_elempack_int32 = num_output % 16 == 0 ? 16 : num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1;
#elif __AVX__
out_elempack_int32 = num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1;
#else
out_elempack_int32 = num_output % 4 == 0 ? 4 : 1;
#endif
// out_elempack_int32 = num_output % 4 == 0 ? 4 : 1;
}
#endif // __SSE2__

Expand Down

0 comments on commit 564cd1d

Please sign in to comment.