Skip to content

Commit

Permalink
gpu: intel: jit: gemm: add new thin kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
rjoursler committed May 2, 2024
1 parent d8e8593 commit 52bd4f8
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions src/gpu/intel/jit/gemm/kernel.db
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*******************************************************************************/

/*@kcatalog@*/
kcatalog::FlatCatalog<1021> _CATALOG_
{1, 8377, 1021, {
kcatalog::FlatCatalog<1022> _CATALOG_
{1, 8378, 1022, {
{{'9', "gemm", {"B", "B", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "ab2x2 as8x2 ab l4 acb", {8, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {4096, 4096, 2048}, {4096, 4096, 2048}, {32, 16, 16}, {2, 8, 1}, 1, (WGType) 0, 1, 0, 0, {2, 2, 4}, {true, true, true}}, {'W', 1, {512}}},
{{'9', "gemm", {"B", "B", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "ab4 ab4x2 ab l4 acb nmk", {8, (LoopType) 255, 128, {(LoopType) 1, (LoopType) 0, (LoopType) 255}, {4096, 4096, 2048}, {4096, 4096, 2048}, {32, 16, 8}, {8, 2, 1}, 1, (WGType) 0, 1, 0, 0, {2, 2, 4}, {true, true, true}}, {'W', 1, {512}}},
{{'9', "gemm", {"B", "B", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "as8 as16 ab l4 acb", {8, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {4096, 4096, 1024}, {4096, 4096, 1024}, {16, 16, 16}, {2, 8, 1}, 1, (WGType) 0, 1, 0, 0, {2, 2, 4}, {true, true, true}}, {'W', 1, {256}}},
Expand Down Expand Up @@ -1040,5 +1040,6 @@ kcatalog::FlatCatalog<1021> _CATALOG_
{{'F', "gemm", {"[SB]", "[SB]", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aS16x2+S16@32 aB32/16 aB wg 4x2x4 kr cb4x2 ks32 af vav di hi pt bk0 sm sn grf256 sys dm sr br", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {262144, 262144, 16777216}, {8192, 8192, 16777216}, {16, 16, 32}, {4, 2, 4}, 1, (WGType) 1, 261, 32768, 32768, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.11924e+06, 685119, -3931.23, 112817, 0, 0, 1.10764, 1.15354, 2.18745, 4.81102, 0.0412576, 0.0412576, 0, 0.774083, 1.27327, 0.874451, 4.61396e-12}}},
{{'F', "gemm", {"[SB]", "[SB]", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aS16+S1,16@32 aB32x2 aB wg 1x4x8 kr cb4 ks32 af vav di hi pt bk0 sm sn grf256 sys dm sr br", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 131072, 16777216}, {8192, 8192, 16777216}, {32, 8, 32}, {1, 4, 8}, 1, (WGType) 1, 261, 65536, 16384, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.0672e+06, 762832, 500.274, 77177.4, 0, 0, 1.36188, 1.19635, 3.53273, 8.13743, 0.0615908, 0.0615908, 0, 0.79968, 1.28461, 0.929214, 4.37091e-12}}},
{{'F', "gemm", {"[SB]", "[SB]", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aS16+S32@48 aB32/16x2 aB wg 8x4 cb4x2 ks32 af vav di hi pt bk0 sm sn grf256 sys dm sr br", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 262144, 16777216}, {8192, 8192, 16777216}, {16, 16, 32}, {8, 4, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {964729, 752109, 0, 0, 0, 0, 1.46062, 1.46554, 1.01724, 2.17327, 0.0239322, 0.0239322, 0, 1, 1.25665, 0.945813, 3.00962e-12}}},
{{'F', "gemm", {"[SB]", "[SB]", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aS16+S64@64 aB16x2 aB wg 4x8 cb4 ks64 af vav di hi pt bk0 sm sn grf256 sys dm sr br", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 131072, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 32768, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.00341e+06, 675337, 0, 0, 0, 0, 2.06693, 1.45219, 1.47193, 3.06315, 0.0374548, 0.0374548, 0, 1, 1.33339, 0.377183, 1.26633e-11}}}
{{'F', "gemm", {"[SB]", "[SB]", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aS16+S64@64 aB16x2 aB wg 4x8 cb4 ks64 af vav di hi pt bk0 sm sn grf256 sys dm sr br", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 131072, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 32768, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.00341e+06, 675337, 0, 0, 0, 0, 2.06693, 1.45219, 1.47193, 3.06315, 0.0374548, 0.0374548, 0, 1, 1.33339, 0.377183, 1.26633e-11}}},
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 8, -1}, {16, 16, 1}, "IAB"}, "at32x2+m64@16 am64x2 aB wg 4x1x4 kr af vav ar sb64 bm0 bk0 sys np", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'W', 1, {128}}}
}}

0 comments on commit 52bd4f8

Please sign in to comment.