From 0cf9a8a5bdf6e528952f80a1c408ad1c3e1fd1ec Mon Sep 17 00:00:00 2001 From: "Xuxin, Zeng" Date: Wed, 23 Oct 2024 13:00:21 -0700 Subject: [PATCH 1/3] cpu: x64: matmul: fix segfault on group scales --- src/cpu/x64/matmul/brgemm_matmul_utils.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/cpu/x64/matmul/brgemm_matmul_utils.cpp b/src/cpu/x64/matmul/brgemm_matmul_utils.cpp index 413c22dc26d..f5b261de229 100644 --- a/src/cpu/x64/matmul/brgemm_matmul_utils.cpp +++ b/src/cpu/x64/matmul/brgemm_matmul_utils.cpp @@ -1812,10 +1812,11 @@ void init_aux_values(brgemm_matmul_conf_t &bgmmc, bgmmc.has_zero_point_b = bgmmc.wei_zp_type != brgemm_broadcast_t::none; bgmmc.has_zero_point_c = bgmmc.dst_zp_type != brgemm_broadcast_t::none; bgmmc.post_ops_applicable = one_of(true, bgmmc.with_sum, bgmmc.with_bias, - bgmmc.with_scales, bgmmc.with_eltwise, bgmmc.with_binary, - bgmmc.acc_dt != bgmmc.dst_dt, bgmmc.s8s8_compensation_required, - bgmmc.has_zero_point_a, bgmmc.has_zero_point_b, - bgmmc.has_zero_point_c, bgmmc.with_dst_scales); + bgmmc.with_scales && !bgmmc.apply_scales_in_buffer_b, + bgmmc.with_eltwise, bgmmc.with_binary, bgmmc.acc_dt != bgmmc.dst_dt, + bgmmc.s8s8_compensation_required, bgmmc.has_zero_point_a, + bgmmc.has_zero_point_b, bgmmc.has_zero_point_c, + bgmmc.with_dst_scales); bgmmc.zp_a_comp_shift_n = bgmmc.wei_n_blk; bgmmc.zp_a_comp_elems_per_thr From e28a35abb25eca84f2beb7c296d2626c7dea4e3c Mon Sep 17 00:00:00 2001 From: "Xuxin, Zeng" Date: Wed, 23 Oct 2024 13:02:23 -0700 Subject: [PATCH 2/3] cpu: x64: matmul: set wei_tag any layout into plain layout for int4 weights --- src/cpu/x64/matmul/brgemm_matmul_utils.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cpu/x64/matmul/brgemm_matmul_utils.cpp b/src/cpu/x64/matmul/brgemm_matmul_utils.cpp index f5b261de229..b35eeb84974 100644 --- a/src/cpu/x64/matmul/brgemm_matmul_utils.cpp +++ b/src/cpu/x64/matmul/brgemm_matmul_utils.cpp @@ -296,6 +296,7 @@ status_t brgemm_matmul_conf_utils_t::set_or_check_B_tag(memory_desc_t &B_md, ? get_default_n_block(format_tag::undef) : bgmmc.N_blk; bgmmc.wei_tag = blocked_B_layouts_allowed && !bgmmc.is_runtime_N + && !bgmmc.is_int4_weights ? this->pick_blocked_B_layout(default_n_block) : plain_tensor_layout_tag; VCONDCHECK_BG( @@ -311,6 +312,7 @@ status_t brgemm_matmul_conf_utils_t::set_or_check_B_tag(memory_desc_t &B_md, } } else { bgmmc.wei_tag = blocked_B_layouts_allowed && !bgmmc.is_runtime_N + && !bgmmc.is_int4_weights ? memory_desc_matches_one_of_tag(B_md, plain_tensor_layout_tag, transposed_tensor_layout_tag, blocked_64n_B_layout_tag, blocked_48n_B_layout_tag, blocked_32n_B_layout_tag, From 94276471756314cedc9112f86360c126784e62f0 Mon Sep 17 00:00:00 2001 From: "Xuxin, Zeng" Date: Tue, 29 Oct 2024 17:31:38 -0700 Subject: [PATCH 3/3] cpu: x64: matmul: disable parallel_k_reduction for int8 --- src/cpu/x64/matmul/brgemm_matmul_utils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpu/x64/matmul/brgemm_matmul_utils.cpp b/src/cpu/x64/matmul/brgemm_matmul_utils.cpp index b35eeb84974..a86e4d7054f 100644 --- a/src/cpu/x64/matmul/brgemm_matmul_utils.cpp +++ b/src/cpu/x64/matmul/brgemm_matmul_utils.cpp @@ -892,6 +892,7 @@ float compute_blocking_heuristic_avx512(brgemm_matmul_conf_t &bgmmc, // Parallelize across K for shapes with big 'K' dimension bool bwd_w_par_k_blk = bgmmc.batch == 1 && bm_conf_utils.check_is_transposed(bgmmc.src_tag) + && !bm_conf_utils.is_int8() && IMPLICATION(bm_conf_utils.is_bf16(), math::is_pow2(matmul.K)) && matmul.K >= 2048; if (bwd_w_par_k_blk) {