diff --git a/CMakeLists.txt b/CMakeLists.txt index cbb81f1b..e61de1fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.5) -project(xbyak LANGUAGES CXX VERSION 7.01) +project(xbyak LANGUAGES CXX VERSION 7.02) file(GLOB headers xbyak/*.h) diff --git a/doc/changelog.md b/doc/changelog.md index 59932c3a..aec29a49 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -1,5 +1,6 @@ # History +* 2023/Dec/20 ver 7.02 SHA* support APX * 2023/Dec/19 ver 7.01 support AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE, detection of APX10/APX * 2023/Dec/01 ver 7.00 support APX * 2023/Aug/07 ver 6.73 add sha512/sm3/sm4/avx-vnni-int16 diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 892fdbcb..d1a0bd73 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1402,14 +1402,6 @@ void put() { 0x2E, "ucomisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 }, { 0x2E, "ucomiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 }, - - { 0xCC, "sha1rnds4", T_0F3A, true, 1 }, - { 0xC8, "sha1nexte", T_0F38, false, 1 }, - { 0xC9, "sha1msg1", T_0F38, false, 1 }, - { 0xCA, "sha1msg2", T_0F38, false, 1 }, - { 0xCB, "sha256rnds2", T_0F38, false, 1 }, - { 0xCC, "sha256msg1", T_0F38, false, 1 }, - { 0xCD, "sha256msg2", T_0F38, false, 1 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -1425,6 +1417,26 @@ void put() } } } + // sha + { + const struct Tbl { + uint8_t code; + uint8_t code2; + const char *name; + } tbl[] = { + { 0xC8, 0xD8, "sha1nexte" }, + { 0xC9, 0xD9, "sha1msg1" }, + { 0xCA, 0xDA, "sha1msg2" }, + { 0xCB, 0xDB, "sha256rnds2" }, + { 0xCC, 0xDC, "sha256msg1" }, + { 0xCD, 0xDD, "sha256msg2" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0x%02X, T_MUST_EVEX, 0x%02X); }\n", p->name, p->code, p->code2); + } + puts("void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }"); + } // (m, x), (m, y) { const struct Tbl { @@ -2036,9 +2048,9 @@ void put64() std::string s1 = type2String(p->type1); std::string s2 = type2String(p->type2); if (p->idx == 8) { - printf("void %s(const Xmm& x, const Address& addr) { opAESKL(&x, addr, %s, %s, 0x%02X); }\n", p->name, s1.c_str(), s2.c_str(), p->code); + printf("void %s(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, %s, 0x%02X, %s, 0x%02X); }\n", p->name, s1.c_str(), p->code, s2.c_str(), p->code); } else { - printf("void %s(const Address& addr) { opAESKL(&xmm%d, addr, %s, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), s2.c_str(), p->code); + printf("void %s(const Address& addr) { opSSE_APX(xmm%d, addr, %s, 0x%02X, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), p->code, s2.c_str(), p->code); } } } diff --git a/meson.build b/meson.build index a9f354e0..edc97cda 100644 --- a/meson.build +++ b/meson.build @@ -5,7 +5,7 @@ project( 'xbyak', 'cpp', - version: '7.01', + version: '7.02', license: 'BSD-3-Clause', default_options: 'b_ndebug=if-release' ) diff --git a/readme.md b/readme.md index 14ab86cd..0de90244 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,5 @@ -# Xbyak 7.01 [![Badge Build]][Build Status] +# Xbyak 7.02 [![Badge Build]][Build Status] *A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)* diff --git a/readme.txt b/readme.txt index 7d823567..08e9deb5 100644 --- a/readme.txt +++ b/readme.txt @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.01 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.02 ----------------------------------------------------------------------------- ◎概要 @@ -404,6 +404,9 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から ----------------------------------------------------------------------------- ◎履歴 +2023/12/20 ver 7.02 SHA*のAPX対応 +2023/12/19 ver 7.01 AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE対応 APX10/APX判定対応 +2023/12/01 ver 7.00 APX対応 2023/08/07 ver 6.73 sha512/sm3/sm4/avx-vnni-int16追加 2023/08/02 ver 6.72 xabort, xbegin, xend追加 2023/07/27 ver 6.71 Allocatorでhuge pageを考慮する。 diff --git a/test/apx.cpp b/test/apx.cpp index b2675b87..207389c8 100644 --- a/test/apx.cpp +++ b/test/apx.cpp @@ -1870,3 +1870,31 @@ CYBOZU_TEST_AUTO(encodekey) CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } +CYBOZU_TEST_AUTO(sha) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + sha1msg1(xmm15, ptr [r30+r29*8+0x12]); + sha1msg2(xmm15, ptr [r30+r29*8+0x12]); + sha1nexte(xmm15, ptr [r30+r29*8+0x12]); + sha256msg1(xmm15, ptr [r30+r29*8+0x12]); + sha256msg2(xmm15, ptr [r30+r29*8+0x12]); + sha256rnds2(xmm15, ptr [r30+r29*8+0x12]); + sha1rnds4(xmm15, ptr [r30+r29*8+0x12], 0x23); + } + } c; + const uint8_t tbl[] = { + 0x62, 0x1c, 0x78, 0x08, 0xd9, 0x7c, 0xee, 0x12, + 0x62, 0x1c, 0x78, 0x08, 0xda, 0x7c, 0xee, 0x12, + 0x62, 0x1c, 0x78, 0x08, 0xd8, 0x7c, 0xee, 0x12, + 0x62, 0x1c, 0x78, 0x08, 0xdc, 0x7c, 0xee, 0x12, + 0x62, 0x1c, 0x78, 0x08, 0xdd, 0x7c, 0xee, 0x12, + 0x62, 0x1c, 0x78, 0x08, 0xdb, 0x7c, 0xee, 0x12, + 0x62, 0x1c, 0x78, 0x08, 0xd4, 0x7c, 0xee, 0x12, 0x23, + }; + const size_t n = sizeof(tbl); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} + diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 8c633caa..c99e809d 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -155,7 +155,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x7010 /* 0xABCD = A.BC(.D) */ + VERSION = 0x7020 /* 0xABCD = A.BC(.D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED @@ -2738,15 +2738,6 @@ class CodeGenerator : public CodeArray { if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return; opVex(static_cast(*p1), 0, *p2, T_L0|T_0F|type, code); } - void opAESKL(const Xmm *x, const Address& addr, uint64_t type1, uint64_t type2, uint8_t code) - { - if (x && x->getIdx() >= 16) XBYAK_THROW(ERR_INVALID_REG_IDX) - if (addr.hasRex2()) { - opROO(Reg(), addr, *x, type2, code); - return; - } - opRO(*x, addr, type1, code); - } void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2) { if (r1.getIdx() < 8 && r2.getIdx() < 8) { @@ -2755,6 +2746,14 @@ class CodeGenerator : public CodeArray { } opROO(Reg(), r2, r1, T_MUST_EVEX|T_F3, code2); } + void opSSE_APX(const Xmm& x, const Operand& op, uint64_t type1, uint8_t code1, uint64_t type2, uint8_t code2, int imm = NONE) + { + if (x.getIdx() <= 15 && op.hasRex2() && opROO(Reg(), op, x, type2, code2, imm != NONE ? 1 : 0)) { + if (imm != NONE) db(imm); + return; + } + opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm); + } public: unsigned int getVersion() const { return VERSION; } using CodeArray::db; @@ -3139,6 +3138,10 @@ class CodeGenerator : public CodeArray { // set default encoding to select Vex or Evex void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; } + void sha1msg12(const Xmm& x, const Operand& op) + { + opROO(Reg(), op, x, T_MUST_EVEX, 0xD9); + } /* use single byte nop if useMultiByteNop = false */ diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 196f5d40..d863d46a 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "7.01"; } +const char *getVersionString() const { return "7.02"; } void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC); } void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38 | T_66, 0x0FC); } void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); } @@ -988,13 +988,13 @@ void setpo(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x4 void sets(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 8)) return; opRext(op, 8, 0, T_0F, 0x90 | 8); }//-V524 void setz(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 4)) return; opRext(op, 8, 0, T_0F, 0x90 | 4); }//-V524 void sfence() { db(0x0F); db(0xAE); db(0xF8); } -void sha1msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xC9, isXMM_XMMorMEM, NONE); } -void sha1msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCA, isXMM_XMMorMEM, NONE); } -void sha1nexte(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xC8, isXMM_XMMorMEM, NONE); } -void sha1rnds4(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_0F3A, 0xCC, isXMM_XMMorMEM, imm); } -void sha256msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCC, isXMM_XMMorMEM, NONE); } -void sha256msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCD, isXMM_XMMorMEM, NONE); } -void sha256rnds2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCB, isXMM_XMMorMEM, NONE); } +void sha1msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC9, T_MUST_EVEX, 0xD9); } +void sha1msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCA, T_MUST_EVEX, 0xDA); } +void sha1nexte(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC8, T_MUST_EVEX, 0xD8); } +void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); } +void sha256msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCC, T_MUST_EVEX, 0xDC); } +void sha256msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCD, T_MUST_EVEX, 0xDD); } +void sha256rnds2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCB, T_MUST_EVEX, 0xDB); } void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12); } void shl(const Operand& op, int imm) { opShift(op, imm, 12); } void shl(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12, &d); } @@ -1926,14 +1926,14 @@ void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEA); } void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE8); } void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); } -void aesdec128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDD); } -void aesdec256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDF); } -void aesdecwide128kl(const Address& addr) { opAESKL(&xmm1, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); } -void aesdecwide256kl(const Address& addr) { opAESKL(&xmm3, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); } -void aesenc128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDC); } -void aesenc256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDE); } -void aesencwide128kl(const Address& addr) { opAESKL(&xmm0, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); } -void aesencwide256kl(const Address& addr) { opAESKL(&xmm2, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); } +void aesdec128kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDD, T_F3|T_MUST_EVEX, 0xDD); } +void aesdec256kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDF, T_F3|T_MUST_EVEX, 0xDF); } +void aesdecwide128kl(const Address& addr) { opSSE_APX(xmm1, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); } +void aesdecwide256kl(const Address& addr) { opSSE_APX(xmm3, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); } +void aesenc128kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDC, T_F3|T_MUST_EVEX, 0xDC); } +void aesenc256kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDE, T_F3|T_MUST_EVEX, 0xDE); } +void aesencwide128kl(const Address& addr) { opSSE_APX(xmm0, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); } +void aesencwide256kl(const Address& addr) { opSSE_APX(xmm2, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); } void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); } void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); } void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }