From f6ac7740cf0262aff5e09990042fe2062ef32f98 Mon Sep 17 00:00:00 2001 From: Basil Hess Date: Wed, 21 Feb 2024 17:22:07 +0100 Subject: [PATCH] - adds META.yml files - adds new api: crypto_sign_signature and crypto_sign_verify - adds namespacing - convert CRLF to LF --- .cmake/target.cmake | 2 +- META/MAYO_1_META.yml | 37 ++ META/MAYO_2_META.yml | 37 ++ META/MAYO_3_META.yml | 37 ++ META/MAYO_5_META.yml | 37 ++ apps/PQCgenKAT_sign.c | 2 +- apps/example.c | 16 +- apps/example_nistapi.c | 34 ++ include/mayo.h | 94 +++ include/{rng.h => randombytes.h} | 6 +- src/AVX2/echelon_form.h | 190 +++--- src/AVX2/echelon_form_loop.h | 114 ++-- src/AVX2/shuffle_arithmetic_64.h | 962 +++++++++++++++---------------- src/CMakeLists.txt | 4 + src/arithmetic.h | 61 +- src/common/randombytes_system.c | 2 +- src/generic/arithmetic_common.h | 2 +- src/generic/echelon_form.h | 276 ++++----- src/mayo.c | 46 +- src/mayo_1/api.c | 32 +- src/mayo_1/api.h | 16 + src/mayo_2/api.c | 31 +- src/mayo_2/api.h | 17 + src/mayo_3/api.c | 32 +- src/mayo_3/api.h | 18 + src/mayo_5/api.c | 32 +- src/mayo_5/api.h | 17 + src/params.c | 2 + test/bench.c | 18 +- test/test_kat.c | 22 +- test/test_mayo.c | 14 +- test/test_sample_solution.c | 2 +- 32 files changed, 1307 insertions(+), 905 deletions(-) create mode 100644 META/MAYO_1_META.yml create mode 100644 META/MAYO_2_META.yml create mode 100644 META/MAYO_3_META.yml create mode 100644 META/MAYO_5_META.yml rename include/{rng.h => randombytes.h} (91%) diff --git a/.cmake/target.cmake b/.cmake/target.cmake index 368b846..b030ca2 100644 --- a/.cmake/target.cmake +++ b/.cmake/target.cmake @@ -54,7 +54,7 @@ if (${MAYO_BUILD_TYPE} MATCHES "ref") option(ENABLE_AESNI "Use AESni" OFF) option(ENABLE_PARAMS_DYNAMIC "Use dynamic parameters" ON) add_definitions(-DMAYO_BUILD_TYPE_REF) -elseif(${MAYO_BUILD_TYPE} MATCHES "ref") +elseif(${MAYO_BUILD_TYPE} MATCHES "opt") add_definitions(-DMAYO_BUILD_TYPE_OPT) option(ENABLE_PARAMS_DYNAMIC "Use dynamic parameters" OFF) elseif(${MAYO_BUILD_TYPE} MATCHES "avx2") diff --git a/META/MAYO_1_META.yml b/META/MAYO_1_META.yml new file mode 100644 index 0000000..dde3d59 --- /dev/null +++ b/META/MAYO_1_META.yml @@ -0,0 +1,37 @@ +name: MAYO_1 +type: signature +claimed-nist-level: 1 +length-public-key: 1168 +length-secret-key: 24 +length-signature: 321 +nistkat-sha256: ba2473dedd92cf3b8a1fc14fc22f2ffdde972c8b64cfcd8cddb4f803e48df017 +principal-submitters: + - Ward Beullens + - Fabio Campos + - Sofía Celi + - Basil Hess + - Matthias J. Kannwischer +implementations: + - name: opt + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_1 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL + signature_keypair: pqmayo_MAYO_1_opt_crypto_sign_keypair + signature_signature: pqmayo_MAYO_1_opt_crypto_sign_signature + signature_verify: pqmayo_MAYO_1_opt_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_1/api.c ./src/common/aes128ctr.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_1/api.h ./src/simple_arithmetic.h ./src/generic/arithmetic_common.h ./src/generic/echelon_form.h ./src/generic/arithmetic_96.h ./src/generic/arithmetic_64.h ./src/generic/arithmetic_128.h ./src/arithmetic.h ./src/common/aes_ctr.h + - name: avx2 + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_1 -DMAYO_BUILD_TYPE_AVX2 -DMAYO_AVX -DHAVE_RANDOMBYTES_NORETVAL + signature_keypair: pqmayo_MAYO_1_avx2_crypto_sign_keypair + signature_signature: pqmayo_MAYO_1_avx2_crypto_sign_signature + signature_verify: pqmayo_MAYO_1_avx2_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_1/api.c ./src/common/aes128ctr.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_1/api.h ./src/simple_arithmetic.h ./src/arithmetic.h ./src/common/aes_ctr.h ./src/AVX2/arithmetic_128.h ./src/AVX2/arithmetic_96.h ./src/AVX2/echelon_form.h ./src/AVX2/shuffle_arithmetic_128.h ./src/AVX2/shuffle_arithmetic_96.h ./src/AVX2/arithmetic_64.h ./src/AVX2/arithmetic_common.h ./src/AVX2/echelon_form_loop.h ./src/AVX2/shuffle_arithmetic_64.h + supported_platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 diff --git a/META/MAYO_2_META.yml b/META/MAYO_2_META.yml new file mode 100644 index 0000000..0431803 --- /dev/null +++ b/META/MAYO_2_META.yml @@ -0,0 +1,37 @@ +name: MAYO_2 +type: signature +claimed-nist-level: 1 +length-public-key: 5488 +length-secret-key: 24 +length-signature: 180 +nistkat-sha256: 72cb237642b2c0c4e7f8c824d9c8601ac7189784649d28dbb2cccfb94732c9a3 +principal-submitters: + - Ward Beullens + - Fabio Campos + - Sofía Celi + - Basil Hess + - Matthias J. Kannwischer +implementations: + - name: opt + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_2 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL + signature_keypair: pqmayo_MAYO_2_opt_crypto_sign_keypair + signature_signature: pqmayo_MAYO_2_opt_crypto_sign_signature + signature_verify: pqmayo_MAYO_2_opt_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_2/api.c ./src/common/aes128ctr.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_2/api.h ./src/simple_arithmetic.h ./src/generic/arithmetic_common.h ./src/generic/echelon_form.h ./src/generic/arithmetic_96.h ./src/generic/arithmetic_64.h ./src/generic/arithmetic_128.h ./src/arithmetic.h ./src/common/aes_ctr.h + - name: avx2 + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_2 -DMAYO_BUILD_TYPE_AVX2 -DMAYO_AVX -DHAVE_RANDOMBYTES_NORETVAL + signature_keypair: pqmayo_MAYO_2_avx2_crypto_sign_keypair + signature_signature: pqmayo_MAYO_2_avx2_crypto_sign_signature + signature_verify: pqmayo_MAYO_2_avx2_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_2/api.c ./src/common/aes128ctr.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_2/api.h ./src/simple_arithmetic.h ./src/arithmetic.h ./src/common/aes_ctr.h ./src/AVX2/arithmetic_128.h ./src/AVX2/arithmetic_96.h ./src/AVX2/echelon_form.h ./src/AVX2/shuffle_arithmetic_128.h ./src/AVX2/shuffle_arithmetic_96.h ./src/AVX2/arithmetic_64.h ./src/AVX2/arithmetic_common.h ./src/AVX2/echelon_form_loop.h ./src/AVX2/shuffle_arithmetic_64.h + supported_platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 diff --git a/META/MAYO_3_META.yml b/META/MAYO_3_META.yml new file mode 100644 index 0000000..eeb6767 --- /dev/null +++ b/META/MAYO_3_META.yml @@ -0,0 +1,37 @@ +name: MAYO_3 +type: signature +claimed-nist-level: 3 +length-public-key: 2656 +length-secret-key: 32 +length-signature: 577 +nistkat-sha256: dbc49f4fdfa0de69d416051215cb53c042c4a329d325452d079f3734b7467a6b +principal-submitters: + - Ward Beullens + - Fabio Campos + - Sofía Celi + - Basil Hess + - Matthias J. Kannwischer +implementations: + - name: opt + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_3 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT + signature_keypair: pqmayo_MAYO_3_opt_crypto_sign_keypair + signature_signature: pqmayo_MAYO_3_opt_crypto_sign_signature + signature_verify: pqmayo_MAYO_3_opt_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_3/api.c ./src/common/aes128ctr.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_3/api.h ./src/simple_arithmetic.h ./src/generic/arithmetic_common.h ./src/generic/echelon_form.h ./src/generic/arithmetic_96.h ./src/generic/arithmetic_64.h ./src/generic/arithmetic_128.h ./src/arithmetic.h ./src/common/aes_ctr.h + - name: avx2 + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_3 -DMAYO_BUILD_TYPE_AVX2 -DMAYO_AVX -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT + signature_keypair: pqmayo_MAYO_3_avx2_crypto_sign_keypair + signature_signature: pqmayo_MAYO_3_avx2_crypto_sign_signature + signature_verify: pqmayo_MAYO_3_avx2_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_3/api.c ./src/common/aes128ctr.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_3/api.h ./src/simple_arithmetic.h ./src/arithmetic.h ./src/common/aes_ctr.h ./src/AVX2/arithmetic_128.h ./src/AVX2/arithmetic_96.h ./src/AVX2/echelon_form.h ./src/AVX2/shuffle_arithmetic_128.h ./src/AVX2/shuffle_arithmetic_96.h ./src/AVX2/arithmetic_64.h ./src/AVX2/arithmetic_common.h ./src/AVX2/echelon_form_loop.h ./src/AVX2/shuffle_arithmetic_64.h + supported_platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 diff --git a/META/MAYO_5_META.yml b/META/MAYO_5_META.yml new file mode 100644 index 0000000..dd97a30 --- /dev/null +++ b/META/MAYO_5_META.yml @@ -0,0 +1,37 @@ +name: MAYO_5 +type: signature +claimed-nist-level: 5 +length-public-key: 5008 +length-secret-key: 40 +length-signature: 838 +nistkat-sha256: f2c1c69045c7d15e714a04119965e8a7007ef54f9293158587560227c97b237d +principal-submitters: + - Ward Beullens + - Fabio Campos + - Sofía Celi + - Basil Hess + - Matthias J. Kannwischer +implementations: + - name: opt + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_5 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT + signature_keypair: pqmayo_MAYO_5_opt_crypto_sign_keypair + signature_signature: pqmayo_MAYO_5_opt_crypto_sign_signature + signature_verify: pqmayo_MAYO_5_opt_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_5/api.c ./src/common/aes128ctr.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_5/api.h ./src/simple_arithmetic.h ./src/generic/arithmetic_common.h ./src/generic/echelon_form.h ./src/generic/arithmetic_96.h ./src/generic/arithmetic_64.h ./src/generic/arithmetic_128.h ./src/arithmetic.h ./src/common/aes_ctr.h + - name: avx2 + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_5 -DMAYO_BUILD_TYPE_AVX2 -DMAYO_AVX -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT + signature_keypair: pqmayo_MAYO_5_avx2_crypto_sign_keypair + signature_signature: pqmayo_MAYO_5_avx2_crypto_sign_signature + signature_verify: pqmayo_MAYO_5_avx2_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_5/api.c ./src/common/aes128ctr.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_5/api.h ./src/simple_arithmetic.h ./src/arithmetic.h ./src/common/aes_ctr.h ./src/AVX2/arithmetic_128.h ./src/AVX2/arithmetic_96.h ./src/AVX2/echelon_form.h ./src/AVX2/shuffle_arithmetic_128.h ./src/AVX2/shuffle_arithmetic_96.h ./src/AVX2/arithmetic_64.h ./src/AVX2/arithmetic_common.h ./src/AVX2/echelon_form_loop.h ./src/AVX2/shuffle_arithmetic_64.h + supported_platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 diff --git a/apps/PQCgenKAT_sign.c b/apps/PQCgenKAT_sign.c index 7cd1fa7..1673a3a 100644 --- a/apps/PQCgenKAT_sign.c +++ b/apps/PQCgenKAT_sign.c @@ -31,7 +31,7 @@ protection within the United States. */ #include "api.h" -#include "rng.h" +#include "randombytes.h" #include #include #include diff --git a/apps/example.c b/apps/example.c index 0694dce..9c25bb1 100644 --- a/apps/example.c +++ b/apps/example.c @@ -25,20 +25,20 @@ static int example_mayo(const mayo_params_t* p) { unsigned long long msglen = 32; - unsigned long long smlen = p->sig_bytes + msglen; + unsigned long long smlen = PARAM_sig_bytes(p) + msglen; - unsigned char *pk = calloc(p->cpk_bytes, 1); - unsigned char *sk = calloc(p->csk_bytes, 1); + unsigned char *pk = calloc(PARAM_cpk_bytes(p), 1); + unsigned char *sk = calloc(PARAM_csk_bytes(p), 1); - unsigned char *epk = calloc(p->epk_bytes, 1); + unsigned char *epk = calloc(PARAM_epk_bytes(p), 1); sk_t *esk = calloc(sizeof(sk_t), 1); - unsigned char *sig = calloc(p->sig_bytes + msglen, 1); + unsigned char *sig = calloc(PARAM_sig_bytes(p) + msglen, 1); unsigned char msg[32] = { 0xe }; unsigned char msg2[32] = { 0 }; - printf("Example with %s\n", p->name); + printf("Example with %s\n", PARAM_name(p)); printf("mayo_keypair -> "); int res = mayo_keypair(p, pk, sk); @@ -129,7 +129,7 @@ static int example_mayo(const mayo_params_t* p) { err: free(pk); free(epk); - mayo_secure_free(sk, p->csk_bytes); + mayo_secure_free(sk, PARAM_csk_bytes(p)); mayo_secure_free(esk, sizeof(sk_t)); free(sig); return res; @@ -145,6 +145,6 @@ int main(void) { } } #else - return example_mayo(&MAYO_VARIANT); + return example_mayo(0); #endif } diff --git a/apps/example_nistapi.c b/apps/example_nistapi.c index ffa9943..6c59146 100644 --- a/apps/example_nistapi.c +++ b/apps/example_nistapi.c @@ -22,6 +22,7 @@ static int example_mayo(void) { unsigned long long msglen = 32; unsigned long long smlen = CRYPTO_BYTES + msglen; + unsigned long long siglen = CRYPTO_BYTES; unsigned char *pk = calloc(CRYPTO_PUBLICKEYBYTES, 1); unsigned char *sk = calloc(CRYPTO_SECRETKEYBYTES, 1); @@ -76,6 +77,39 @@ static int example_mayo(void) { res = 0; printf("OK\n"); } + + printf("crypto_sign_signature -> "); + res = crypto_sign_signature(sig, &siglen, msg, msglen, sk); + if (res) { + printf("FAIL\n"); + res = -1; + goto err; + } else { + printf("OK\n"); + } + + printf("crypto_sign_verify (with correct signature) -> "); + res = crypto_sign_verify(sig, siglen, msg, msglen, pk); + if (res) { + printf("FAIL\n"); + res = -1; + goto err; + } else { + res = 0; + printf("OK\n"); + } + + printf("crypto_sign_verify (with altered signature) -> "); + sig[0] = ~sig[0]; + res = crypto_sign_verify(sig, siglen, msg, msglen, pk); + if (!res) { + printf("FAIL\n"); + res = -1; + goto err; + } else { + res = 0; + printf("OK\n"); + } err: free(pk); diff --git a/include/mayo.h b/include/mayo.h index f49bcab..cf630e5 100644 --- a/include/mayo.h +++ b/include/mayo.h @@ -13,6 +13,7 @@ #define F_TAIL_128 \ { 4, 8, 0, 4, 2 } // f(z) = z^128 + x*z^4 + x^2*z^3 + x^3*z + x^2 +#define MAYO_1_name "MAYO_1" #define MAYO_1_n 66 #define MAYO_1_m 64 #define MAYO_1_o 8 @@ -39,6 +40,7 @@ #define MAYO_1_pk_seed_bytes 16 #define MAYO_1_sk_seed_bytes 24 +#define MAYO_2_name "MAYO_2" #define MAYO_2_n 78 #define MAYO_2_m 64 #define MAYO_2_o 18 @@ -65,6 +67,7 @@ #define MAYO_2_pk_seed_bytes 16 #define MAYO_2_sk_seed_bytes 24 +#define MAYO_3_name "MAYO_3" #define MAYO_3_n 99 #define MAYO_3_m 96 #define MAYO_3_o 10 @@ -91,6 +94,7 @@ #define MAYO_3_pk_seed_bytes 16 #define MAYO_3_sk_seed_bytes 32 +#define MAYO_5_name "MAYO_5" #define MAYO_5_n 133 #define MAYO_5_m 128 #define MAYO_5_o 12 @@ -121,6 +125,25 @@ #define PARAM_JOIN2(a, b) PARAM_JOIN2_(a, b) #define PARAM_NAME(end) PARAM_JOIN2(MAYO_VARIANT, end) +#if defined(MAYO_VARIANT) +#define PARAM_JOIN3_(a, b, c) pqmayo_##a##_##b##_##c +#define PARAM_JOIN3(a, b, c) PARAM_JOIN3_(a, b, c) +#define PARAM_NAME3(end, s) PARAM_JOIN3(MAYO_VARIANT, end, s) + +#if defined(MAYO_BUILD_TYPE_REF) +#define MAYO_NAMESPACE(s) PARAM_NAME3(ref, s) +#elif defined(MAYO_BUILD_TYPE_OPT) +#define MAYO_NAMESPACE(s) PARAM_NAME3(opt, s) +#elif defined(MAYO_BUILD_TYPE_AVX2) +#define MAYO_NAMESPACE(s) PARAM_NAME3(avx2, s) +#else +#error "Build type not known" +#endif + +#else +#define MAYO_NAMESPACE(s) s +#endif + #ifdef ENABLE_PARAMS_DYNAMIC #define NAME_MAX mayo5 #define N_MAX 133 @@ -174,6 +197,63 @@ #error "Parameter not specified" #endif +#ifdef ENABLE_PARAMS_DYNAMIC +#define PARAM_name(p) (p->name) +#define PARAM_m(p) (p->m) +#define PARAM_n(p) (p->n) +#define PARAM_o(p) (p->o) +#define PARAM_v(p) (p->n - p->o) +#define PARAM_A_cols(p) (p->k * p->o + 1) +#define PARAM_k(p) (p->k) +#define PARAM_q(p) (p->q) +#define PARAM_m_bytes(p) (p->m_bytes) +#define PARAM_O_bytes(p) (p->O_bytes) +#define PARAM_v_bytes(p) (p->v_bytes) +#define PARAM_r_bytes(p) (p->r_bytes) +#define PARAM_P1_bytes(p) (p->P1_bytes) +#define PARAM_P2_bytes(p) (p->P2_bytes) +#define PARAM_P3_bytes(p) (p->P3_bytes) +#define PARAM_csk_bytes(p) (p->csk_bytes) +#define PARAM_esk_bytes(p) (p->esk_bytes) +#define PARAM_cpk_bytes(p) (p->cpk_bytes) +#define PARAM_epk_bytes(p) (p->epk_bytes) +#define PARAM_sig_bytes(p) (p->sig_bytes) +#define PARAM_f_tail(p) (p->f_tail) +#define PARAM_salt_bytes(p) (p->salt_bytes) +#define PARAM_sk_seed_bytes(p) (p->sk_seed_bytes) +#define PARAM_digest_bytes(p) (p->digest_bytes) +#define PARAM_pk_seed_bytes(p) (p->pk_seed_bytes) +#elif defined(MAYO_VARIANT) +#define PARAM_name(p) PARAM_NAME(name) +#define PARAM_m(p) PARAM_NAME(m) +#define PARAM_n(p) PARAM_NAME(n) +#define PARAM_o(p) PARAM_NAME(o) +#define PARAM_v(p) PARAM_NAME(v) +#define PARAM_A_cols(p) PARAM_NAME(A_cols) +#define PARAM_k(p) PARAM_NAME(k) +#define PARAM_q(p) PARAM_NAME(q) +#define PARAM_m_bytes(p) PARAM_NAME(m_bytes) +#define PARAM_O_bytes(p) PARAM_NAME(O_bytes) +#define PARAM_v_bytes(p) PARAM_NAME(v_bytes) +#define PARAM_r_bytes(p) PARAM_NAME(r_bytes) +#define PARAM_P1_bytes(p) PARAM_NAME(P1_bytes) +#define PARAM_P2_bytes(p) PARAM_NAME(P2_bytes) +#define PARAM_P3_bytes(p) PARAM_NAME(P3_bytes) +#define PARAM_csk_bytes(p) PARAM_NAME(csk_bytes) +#define PARAM_esk_bytes(p) PARAM_NAME(esk_bytes) +#define PARAM_cpk_bytes(p) PARAM_NAME(cpk_bytes) +#define PARAM_epk_bytes(p) PARAM_NAME(epk_bytes) +#define PARAM_sig_bytes(p) PARAM_NAME(sig_bytes) +static const unsigned char f_tail[] = PARAM_NAME(f_tail); +#define PARAM_salt_bytes(p) PARAM_NAME(salt_bytes) +#define PARAM_sk_seed_bytes(p) PARAM_NAME(sk_seed_bytes) +#define PARAM_digest_bytes(p) PARAM_NAME(digest_bytes) +#define PARAM_pk_seed_bytes(p) PARAM_NAME(pk_seed_bytes) +#define PARAM_f_tail(p) f_tail +#else +#error "Parameter not specified" +#endif + /** * Struct defining MAYO parameters */ @@ -212,10 +292,12 @@ typedef struct sk_t { /** * MAYO parameter sets */ +#ifdef ENABLE_PARAMS_DYNAMIC extern const mayo_params_t MAYO_1; extern const mayo_params_t MAYO_2; extern const mayo_params_t MAYO_3; extern const mayo_params_t MAYO_5; +#endif /** * Status codes @@ -234,8 +316,14 @@ extern const mayo_params_t MAYO_5; * @param[out] sk Mayo secret key * @return int status code */ +#define mayo_keypair MAYO_NAMESPACE(mayo_keypair) int mayo_keypair(const mayo_params_t *p, unsigned char *pk, unsigned char *sk); +#define mayo_sign_signature MAYO_NAMESPACE(mayo_sign_signature) +int mayo_sign_signature(const mayo_params_t *p, unsigned char *sig, + unsigned long long *siglen, const unsigned char *m, + unsigned long long mlen, const unsigned char *csk); + /** * MAYO signature generation. * @@ -251,6 +339,7 @@ int mayo_keypair(const mayo_params_t *p, unsigned char *pk, unsigned char *sk); * @param[in] sk Compacted secret key * @return int status code */ +#define mayo_sign MAYO_NAMESPACE(mayo_sign) int mayo_sign(const mayo_params_t *p, unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk); @@ -270,6 +359,7 @@ int mayo_sign(const mayo_params_t *p, unsigned char *sm, * @param[in] pk Compacted public key * @return int status code */ +#define mayo_open MAYO_NAMESPACE(mayo_open) int mayo_open(const mayo_params_t *p, unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk); @@ -288,6 +378,7 @@ int mayo_open(const mayo_params_t *p, unsigned char *m, * @param[out] csk Mayo compacted secret key * @return int status code */ +#define mayo_keypair_compact MAYO_NAMESPACE(mayo_keypair_compact) int mayo_keypair_compact(const mayo_params_t *p, unsigned char *cpk, unsigned char *csk); @@ -302,6 +393,7 @@ int mayo_keypair_compact(const mayo_params_t *p, unsigned char *cpk, * @param[out] epk Expanded public key. * @return int return code */ +#define mayo_expand_pk MAYO_NAMESPACE(mayo_expand_pk) int mayo_expand_pk(const mayo_params_t *p, const unsigned char *cpk, unsigned char *epk); @@ -316,6 +408,7 @@ int mayo_expand_pk(const mayo_params_t *p, const unsigned char *cpk, * @param[out] esk Expanded secret key. * @return int return code */ +#define mayo_expand_sk MAYO_NAMESPACE(mayo_expand_sk) int mayo_expand_sk(const mayo_params_t *p, const unsigned char *csk, sk_t *esk); @@ -332,6 +425,7 @@ int mayo_expand_sk(const mayo_params_t *p, const unsigned char *csk, * @param[in] pk Compacted public key * @return int 0 if verification succeeded, 1 otherwise. */ +#define mayo_verify MAYO_NAMESPACE(mayo_verify) int mayo_verify(const mayo_params_t *p, const unsigned char *m, unsigned long long mlen, const unsigned char *sig, const unsigned char *pk); diff --git a/include/rng.h b/include/randombytes.h similarity index 91% rename from include/rng.h rename to include/randombytes.h index 7bf4efa..8261217 100644 --- a/include/rng.h +++ b/include/randombytes.h @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 -#ifndef rng_h -#define rng_h +#ifndef randombytes_h +#define randombytes_h /** * Randombytes initialization. @@ -25,4 +25,4 @@ void randombytes_init(unsigned char *entropy_input, */ int randombytes(unsigned char *x, unsigned long long xlen); -#endif /* rng_h */ +#endif /* randombytes_h */ diff --git a/src/AVX2/echelon_form.h b/src/AVX2/echelon_form.h index 523ebdd..42406c7 100644 --- a/src/AVX2/echelon_form.h +++ b/src/AVX2/echelon_form.h @@ -1,95 +1,95 @@ -// SPDX-License-Identifier: Apache-2.0 - -#include -#include - - -#define MAYO_MAX(x, y) (((x) > (y)) ? (x) : (y)) -#define MAYO_MIN(x, y) (((x) < (y)) ? (x) : (y)) - - -// -// generate multiplication table for '4-bit' variable 'b'. From https://eprint.iacr.org/2023/059/. -// -static inline __m256i tbl32_gf16_multab( uint8_t b ) { - static const unsigned char __gf16_mulbase[128] __attribute__((aligned(32))) = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x03, 0x01, 0x07, 0x05, 0x0b, 0x09, 0x0f, 0x0d, 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x03, 0x01, 0x07, 0x05, 0x0b, 0x09, 0x0f, 0x0d, - 0x00, 0x04, 0x08, 0x0c, 0x03, 0x07, 0x0b, 0x0f, 0x06, 0x02, 0x0e, 0x0a, 0x05, 0x01, 0x0d, 0x09, 0x00, 0x04, 0x08, 0x0c, 0x03, 0x07, 0x0b, 0x0f, 0x06, 0x02, 0x0e, 0x0a, 0x05, 0x01, 0x0d, 0x09, - 0x00, 0x08, 0x03, 0x0b, 0x06, 0x0e, 0x05, 0x0d, 0x0c, 0x04, 0x0f, 0x07, 0x0a, 0x02, 0x09, 0x01, 0x00, 0x08, 0x03, 0x0b, 0x06, 0x0e, 0x05, 0x0d, 0x0c, 0x04, 0x0f, 0x07, 0x0a, 0x02, 0x09, 0x01 - }; - - - __m256i bx = _mm256_set1_epi16( b & 0xf ); - __m256i b1 = _mm256_srli_epi16( bx, 1 ); - - const __m256i tab0 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 0)); - const __m256i tab1 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 1)); - const __m256i tab2 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 2)); - const __m256i tab3 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 3)); - - __m256i mask_1 = _mm256_set1_epi16(1); - __m256i mask_4 = _mm256_set1_epi16(4); - __m256i mask_0 = _mm256_setzero_si256(); - - return ( tab0 & _mm256_cmpgt_epi16( bx & mask_1, mask_0) ) - ^ ( tab1 & _mm256_cmpgt_epi16( b1 & mask_1, mask_0) ) - ^ ( tab2 & _mm256_cmpgt_epi16( bx & mask_4, mask_0) ) - ^ ( tab3 & _mm256_cmpgt_epi16( b1 & mask_4, mask_0) ); -} - -/* put matrix in row echelon form with ones on first nonzero entries in constant time*/ -static inline void EF(unsigned char *A, int _nrows, int _ncols) { - - (void) _nrows; - (void) _ncols; - - #define nrows M_MAX - #define ncols (K_MAX * O_MAX + 1) - - #define AVX_REGS_PER_ROW ((K_MAX * O_MAX + 1 + 31) / 32) - #define MAX_COLS (AVX_REGS_PER_ROW * 32) - - __m256i _pivot_row[AVX_REGS_PER_ROW]; - __m256i A_avx[AVX_REGS_PER_ROW* M_MAX]; - - unsigned char* pivot_row_bytes = (unsigned char*) _pivot_row; - unsigned char* A_bytes = (unsigned char*) A_avx; - - // load A in the tail of AVX2 registers - for (int i = 0; i < nrows; i++) { - for (int j = 0; j < ncols; j++) - { - A_bytes[i*MAX_COLS + (MAX_COLS - ncols) + j] = A[ i*ncols + j ]; - } - } - - // pivot row is secret, pivot col is not - unsigned char inverse; - int pivot_row = 0; - int pivot_col = MAYO_MAX(MAX_COLS - ncols,0); - for (; pivot_col < MAX_COLS-128; pivot_col++) { - #include "echelon_form_loop.h" - } - for (; pivot_col < MAX_COLS-96; pivot_col++) { - #include "echelon_form_loop.h" - } - for (; pivot_col < MAX_COLS-64; pivot_col++) { - #include "echelon_form_loop.h" - } - for (; pivot_col < MAX_COLS-32; pivot_col++) { - #include "echelon_form_loop.h" - } - for (; pivot_col < MAX_COLS; pivot_col++) { - #include "echelon_form_loop.h" - } - - // write the matrix A back - for (int i = 0; i < nrows; i++) { - for (int j = 0; j < ncols; j++) { - A[i * ncols + j] = A_bytes[i*AVX_REGS_PER_ROW*32 + (MAX_COLS - ncols) + j]; - } - } - mayo_secure_clear(_pivot_row, AVX_REGS_PER_ROW * 32); - mayo_secure_clear(A_avx, AVX_REGS_PER_ROW * 32 * nrows); -} \ No newline at end of file +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + + +#define MAYO_MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MAYO_MIN(x, y) (((x) < (y)) ? (x) : (y)) + + +// +// generate multiplication table for '4-bit' variable 'b'. From https://eprint.iacr.org/2023/059/. +// +static inline __m256i tbl32_gf16_multab( uint8_t b ) { + static const unsigned char __gf16_mulbase[128] __attribute__((aligned(32))) = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x03, 0x01, 0x07, 0x05, 0x0b, 0x09, 0x0f, 0x0d, 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x03, 0x01, 0x07, 0x05, 0x0b, 0x09, 0x0f, 0x0d, + 0x00, 0x04, 0x08, 0x0c, 0x03, 0x07, 0x0b, 0x0f, 0x06, 0x02, 0x0e, 0x0a, 0x05, 0x01, 0x0d, 0x09, 0x00, 0x04, 0x08, 0x0c, 0x03, 0x07, 0x0b, 0x0f, 0x06, 0x02, 0x0e, 0x0a, 0x05, 0x01, 0x0d, 0x09, + 0x00, 0x08, 0x03, 0x0b, 0x06, 0x0e, 0x05, 0x0d, 0x0c, 0x04, 0x0f, 0x07, 0x0a, 0x02, 0x09, 0x01, 0x00, 0x08, 0x03, 0x0b, 0x06, 0x0e, 0x05, 0x0d, 0x0c, 0x04, 0x0f, 0x07, 0x0a, 0x02, 0x09, 0x01 + }; + + + __m256i bx = _mm256_set1_epi16( b & 0xf ); + __m256i b1 = _mm256_srli_epi16( bx, 1 ); + + const __m256i tab0 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 0)); + const __m256i tab1 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 1)); + const __m256i tab2 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 2)); + const __m256i tab3 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 3)); + + __m256i mask_1 = _mm256_set1_epi16(1); + __m256i mask_4 = _mm256_set1_epi16(4); + __m256i mask_0 = _mm256_setzero_si256(); + + return ( tab0 & _mm256_cmpgt_epi16( bx & mask_1, mask_0) ) + ^ ( tab1 & _mm256_cmpgt_epi16( b1 & mask_1, mask_0) ) + ^ ( tab2 & _mm256_cmpgt_epi16( bx & mask_4, mask_0) ) + ^ ( tab3 & _mm256_cmpgt_epi16( b1 & mask_4, mask_0) ); +} + +/* put matrix in row echelon form with ones on first nonzero entries in constant time*/ +static inline void EF(unsigned char *A, int _nrows, int _ncols) { + + (void) _nrows; + (void) _ncols; + + #define nrows M_MAX + #define ncols (K_MAX * O_MAX + 1) + + #define AVX_REGS_PER_ROW ((K_MAX * O_MAX + 1 + 31) / 32) + #define MAX_COLS (AVX_REGS_PER_ROW * 32) + + __m256i _pivot_row[AVX_REGS_PER_ROW]; + __m256i A_avx[AVX_REGS_PER_ROW* M_MAX]; + + unsigned char* pivot_row_bytes = (unsigned char*) _pivot_row; + unsigned char* A_bytes = (unsigned char*) A_avx; + + // load A in the tail of AVX2 registers + for (int i = 0; i < nrows; i++) { + for (int j = 0; j < ncols; j++) + { + A_bytes[i*MAX_COLS + (MAX_COLS - ncols) + j] = A[ i*ncols + j ]; + } + } + + // pivot row is secret, pivot col is not + unsigned char inverse; + int pivot_row = 0; + int pivot_col = MAYO_MAX(MAX_COLS - ncols,0); + for (; pivot_col < MAX_COLS-128; pivot_col++) { + #include "echelon_form_loop.h" + } + for (; pivot_col < MAX_COLS-96; pivot_col++) { + #include "echelon_form_loop.h" + } + for (; pivot_col < MAX_COLS-64; pivot_col++) { + #include "echelon_form_loop.h" + } + for (; pivot_col < MAX_COLS-32; pivot_col++) { + #include "echelon_form_loop.h" + } + for (; pivot_col < MAX_COLS; pivot_col++) { + #include "echelon_form_loop.h" + } + + // write the matrix A back + for (int i = 0; i < nrows; i++) { + for (int j = 0; j < ncols; j++) { + A[i * ncols + j] = A_bytes[i*AVX_REGS_PER_ROW*32 + (MAX_COLS - ncols) + j]; + } + } + mayo_secure_clear(_pivot_row, AVX_REGS_PER_ROW * 32); + mayo_secure_clear(A_avx, AVX_REGS_PER_ROW * 32 * nrows); +} diff --git a/src/AVX2/echelon_form_loop.h b/src/AVX2/echelon_form_loop.h index 1b64523..4b85f3d 100644 --- a/src/AVX2/echelon_form_loop.h +++ b/src/AVX2/echelon_form_loop.h @@ -1,57 +1,57 @@ -// SPDX-License-Identifier: Apache-2.0 - -int pivot_col_rounded = pivot_col/32; - -int pivot_row_lower_bound = MAYO_MAX(0, pivot_col + nrows - MAX_COLS); -int pivot_row_upper_bound = MAYO_MIN(nrows - 1, pivot_col - MAX_COLS + ncols); -/* the pivot row is guaranteed to be between these lower and upper bounds if A has full rank*/ - -/* zero out pivot row */ -for (int i = pivot_col_rounded; i < AVX_REGS_PER_ROW; i++) { - _pivot_row[i] = _mm256_set1_epi8(0); -} - -/* try to get a pivot row in constant time */ -unsigned char pivot = 0; -uint32_t pivot_is_zero = -1; -for (int row = pivot_row_lower_bound; - row <= MAYO_MIN(nrows - 1, pivot_row_upper_bound + 32); row++) { - uint32_t is_pivot_row = ~ct_compare_32(row, pivot_row); - uint32_t below_pivot_row = ct_is_greater_than(row, pivot_row); - __m256i mask = _mm256_set1_epi32( is_pivot_row | (below_pivot_row & pivot_is_zero) ); - for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { - _pivot_row[j] ^= mask & A_avx[row * AVX_REGS_PER_ROW + j]; - } - pivot = pivot_row_bytes[pivot_col]; - pivot_is_zero = ~ct_compare_32((int) pivot, 0); -} - -/* multiply pivot row by inverse of pivot */ -inverse = inverse_f(pivot); -__m256i inverse_multab = tbl32_gf16_multab(inverse); - -for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { - _pivot_row[j] = _mm256_shuffle_epi8(inverse_multab, _pivot_row[j]); -} - -/* conditionally write pivot row to the correct row, if there is a nonzero pivot */ -/* eliminate entries below pivot */ -for (int row = pivot_row_lower_bound; row < nrows; row++) { - unsigned char below_pivot = (unsigned char) (ct_is_greater_than(row, pivot_row)); - unsigned char elt_to_elim = A_bytes[row*AVX_REGS_PER_ROW*32 + pivot_col]; - - __m256i multab = tbl32_gf16_multab(below_pivot & elt_to_elim); - if (row <= pivot_row_upper_bound) { - __m256i mask = _mm256_set1_epi32(~ct_compare_32(row, pivot_row) & ~pivot_is_zero); - for (int col = pivot_col_rounded; col < AVX_REGS_PER_ROW; col++) { - A_avx[row*AVX_REGS_PER_ROW + col] = _mm256_blendv_epi8(A_avx[row*AVX_REGS_PER_ROW + col], _pivot_row[col], mask) ^ - _mm256_shuffle_epi8(multab, _pivot_row[col]); - } - } else { - for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { - A_avx[row*AVX_REGS_PER_ROW + j] ^= _mm256_shuffle_epi8(multab, _pivot_row[j]); - } - } -} - -pivot_row += (-(int32_t)(~pivot_is_zero)); \ No newline at end of file +// SPDX-License-Identifier: Apache-2.0 + +int pivot_col_rounded = pivot_col/32; + +int pivot_row_lower_bound = MAYO_MAX(0, pivot_col + nrows - MAX_COLS); +int pivot_row_upper_bound = MAYO_MIN(nrows - 1, pivot_col - MAX_COLS + ncols); +/* the pivot row is guaranteed to be between these lower and upper bounds if A has full rank*/ + +/* zero out pivot row */ +for (int i = pivot_col_rounded; i < AVX_REGS_PER_ROW; i++) { + _pivot_row[i] = _mm256_set1_epi8(0); +} + +/* try to get a pivot row in constant time */ +unsigned char pivot = 0; +uint32_t pivot_is_zero = -1; +for (int row = pivot_row_lower_bound; + row <= MAYO_MIN(nrows - 1, pivot_row_upper_bound + 32); row++) { + uint32_t is_pivot_row = ~ct_compare_32(row, pivot_row); + uint32_t below_pivot_row = ct_is_greater_than(row, pivot_row); + __m256i mask = _mm256_set1_epi32( is_pivot_row | (below_pivot_row & pivot_is_zero) ); + for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { + _pivot_row[j] ^= mask & A_avx[row * AVX_REGS_PER_ROW + j]; + } + pivot = pivot_row_bytes[pivot_col]; + pivot_is_zero = ~ct_compare_32((int) pivot, 0); +} + +/* multiply pivot row by inverse of pivot */ +inverse = inverse_f(pivot); +__m256i inverse_multab = tbl32_gf16_multab(inverse); + +for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { + _pivot_row[j] = _mm256_shuffle_epi8(inverse_multab, _pivot_row[j]); +} + +/* conditionally write pivot row to the correct row, if there is a nonzero pivot */ +/* eliminate entries below pivot */ +for (int row = pivot_row_lower_bound; row < nrows; row++) { + unsigned char below_pivot = (unsigned char) (ct_is_greater_than(row, pivot_row)); + unsigned char elt_to_elim = A_bytes[row*AVX_REGS_PER_ROW*32 + pivot_col]; + + __m256i multab = tbl32_gf16_multab(below_pivot & elt_to_elim); + if (row <= pivot_row_upper_bound) { + __m256i mask = _mm256_set1_epi32(~ct_compare_32(row, pivot_row) & ~pivot_is_zero); + for (int col = pivot_col_rounded; col < AVX_REGS_PER_ROW; col++) { + A_avx[row*AVX_REGS_PER_ROW + col] = _mm256_blendv_epi8(A_avx[row*AVX_REGS_PER_ROW + col], _pivot_row[col], mask) ^ + _mm256_shuffle_epi8(multab, _pivot_row[col]); + } + } else { + for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { + A_avx[row*AVX_REGS_PER_ROW + j] ^= _mm256_shuffle_epi8(multab, _pivot_row[j]); + } + } +} + +pivot_row += (-(int32_t)(~pivot_is_zero)); diff --git a/src/AVX2/shuffle_arithmetic_64.h b/src/AVX2/shuffle_arithmetic_64.h index 75162b4..f5c5ded 100644 --- a/src/AVX2/shuffle_arithmetic_64.h +++ b/src/AVX2/shuffle_arithmetic_64.h @@ -1,481 +1,481 @@ -// SPDX-License-Identifier: Apache-2.0 - -#ifndef SHUFFLE_ARITHMETIC_64_H -#define SHUFFLE_ARITHMETIC_64_H - -#include -#include -#include -#include - -// P1*0 -> P1: v x v, O: v x o -static -inline void mayo_12_P1_times_O_avx2(const uint64_t *_P1, __m256i *O_multabs, uint64_t *_acc){ - - const __m256i *P1 = (__m256i *) _P1; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - size_t cols_used = 0; - for (size_t r = 0; r < V_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[O_MAX] = {0}; - for (size_t c = r; c < V_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - cols_used ++; - - for (size_t k = 0; k < O_MAX; k+=2) - { - temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); - temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); - } - } - - // convert to normal format and add to accumulator - for (size_t k = 0; k < O_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*O_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[(r*O_MAX) + k + 1] ^= temp[k+1] ^ t; - } - } -} - - -static -inline void mayo_12_Ot_times_P1O_P2_avx2(const uint64_t *_P1O_P2, __m256i *O_multabs, uint64_t *_acc){ - - const __m256i *P1O_P2 = (__m256i *) _P1O_P2; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - for (size_t c = 0; c < O_MAX; c++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[O_MAX] = {0}; - for (size_t r = 0; r < V_MAX; r++) - { - __m256i in_odd = _mm256_loadu_si256(P1O_P2 + r*O_MAX + c); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < O_MAX; k+=2) - { - temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*r + k/2], in_odd); - temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*r + k/2], in_even); - } - } - - // convert to normal format and add to accumulator - for (size_t k = 0; k < O_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(k*O_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[((k+1)*O_MAX) + c] ^= temp[k+1] ^ t; - } - } -} - -static -inline void mayo_12_P1P1t_times_O(const uint64_t *_P1, const unsigned char *O, uint64_t *_acc){ - - const __m256i *P1 = (__m256i *) _P1; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - __m256i O_multabs[O_MAX/2*V_MAX]; - mayo_O_multabs_avx2(O, O_multabs); - - size_t cols_used = 0; - for (size_t r = 0; r < V_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[O_MAX] = {0}; - cols_used += 1; - size_t pos = r; - for (size_t c = 0; c < r; c++) - { - __m256i in_odd = _mm256_loadu_si256(P1 + pos); - pos += (V_MAX -c - 1); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < O_MAX; k+=2) - { - temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); - temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); - } - } - - for (size_t c = r+1; c < V_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - cols_used ++; - - for (size_t k = 0; k < O_MAX; k+=2) - { - temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); - temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); - } - } - - for (size_t k = 0; k < O_MAX; k+=2) - { - __m256i acc0 = _mm256_loadu_si256(acc + (r*O_MAX + k )); - __m256i acc1 = _mm256_loadu_si256(acc + (r*O_MAX + k + 1)); - - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - - _mm256_storeu_si256(acc + (r*O_MAX + k ), acc0 ^ temp[k ] ^ _mm256_slli_epi16(t,4)); - _mm256_storeu_si256(acc + (r*O_MAX + k + 1), acc1 ^ temp[k+1] ^ t); - } - } -} - - -static -inline void mayo_12_Vt_times_L_avx2(const uint64_t *_L, const __m256i *V_multabs, uint64_t *_acc){ - - const __m256i *L = (__m256i *) _L; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - size_t k; - - for (size_t c = 0; c < O_MAX; c++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - for (size_t r = 0; r < V_MAX; r++) - { - __m256i in_odd = _mm256_loadu_si256(L + r*O_MAX + c); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k+1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(k*O_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[((k+1)*O_MAX) + c] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[k*O_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - - -static -inline void mayo_12_Vt_times_Pv_avx2(const uint64_t *_Pv, const __m256i *V_multabs, uint64_t *_acc){ - - const __m256i *Pv = (__m256i *) _Pv; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - size_t k; - - for (size_t c = 0; c < K_MAX; c++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - for (size_t r = 0; r < V_MAX; r++) - { - __m256i in_odd = _mm256_loadu_si256(Pv + r*K_MAX + c); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k+1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(k*K_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[((k+1)*K_MAX) + c] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[k*K_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - -static -inline void mayo_12_P1_times_Vt_avx2(const uint64_t *_P1, __m256i *V_multabs, uint64_t *_acc){ - size_t k,c; - const __m256i *P1 = (__m256i *) _P1; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - size_t cols_used = 0; - for (size_t r = 0; r < V_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - - for (c=r; c < V_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - cols_used ++; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k + 1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - -// P1*S1 -> P1: v x v, S1: v x k // P1 upper triangular -// same as mayo_12_P1_times_Vt_avx2 -static -inline void mayo_12_P1_times_S1_avx2(const uint64_t *_P1, __m256i *S1_multabs, uint64_t *_acc){ - mayo_12_P1_times_Vt_avx2(_P1, S1_multabs, _acc); -} - -static -inline void mayo_12_S1t_times_PS1_avx2(const uint64_t *_PS1, __m256i *S1_multabs, uint64_t *_acc){ - mayo_12_Vt_times_Pv_avx2(_PS1, S1_multabs, _acc); -} - -static -inline void mayo_12_S2t_times_PS2_avx2(const uint64_t *_PS2, __m256i *S2_multabs, uint64_t *_acc){ - const __m256i *PS2 = (__m256i *) _PS2; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - size_t k; - - for (size_t c = 0; c < K_MAX; c++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - for (size_t r = 0; r < O_MAX; r++) - { - __m256i in_odd = _mm256_loadu_si256(PS2 + r*K_MAX + c); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k+1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(k*K_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[((k+1)*K_MAX) + c] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[k*K_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - - -// P2*S2 -> P2: v x o, S2: o x k -static -inline void mayo_12_P2_times_S2_avx2(const uint64_t *_P2, __m256i *S2_multabs, uint64_t *_acc){ - size_t k,c; - const __m256i *P2 = (__m256i *) _P2; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - size_t cols_used = 0; - for (size_t r = 0; r < V_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - - for (c=0; c < O_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P2 + cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - cols_used ++; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k + 1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - - -// P2*S2 -> P2: v x o, S2: o x k -static -inline void mayo_12_P1_times_S1_plus_P2_times_S2_avx2(const uint64_t *_P1, const uint64_t *_P2, __m256i *S1_multabs, __m256i *S2_multabs, uint64_t *_acc){ - size_t k,c; - const __m256i *P1 = (__m256i *) _P1; - const __m256i *P2 = (__m256i *) _P2; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - size_t P1_cols_used = 0; - for (size_t r = 0; r < V_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - - - // P1 * S1 - for (c=r; c < V_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P1 + P1_cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - P1_cols_used ++; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_even); - } - } - - // P2 * S2 - for (c=0; c < O_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P2 + r*O_MAX + c); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k + 1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - -// P3*S2 -> P3: o x o, S2: o x k // P3 upper triangular -static -inline void mayo_12_P3_times_S2_avx2(const uint64_t *_P3, __m256i *S2_multabs, uint64_t *_acc){ - size_t k,c; - const __m256i *P3 = (__m256i *) _P3; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - size_t cols_used = 0; - for (size_t r = 0; r < O_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - - for (c=r; c < O_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P3 + cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - cols_used ++; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k + 1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - - -static inline -void mayo12_m_upper(int m_legs, const uint64_t *in, uint64_t *out, int size) { - (void) size; - int m_vecs_stored = 0; - - for (int r = 0; r < O_MAX; ++r) { - const __m256i* _in = (const __m256i*) (in + m_legs * 2 * (r * size + r)); - __m256i* _out = (__m256i*) (out + m_legs * 2 * m_vecs_stored); - _out[0] = _in[0]; - m_vecs_stored++; - for (int c = r + 1; c < O_MAX; ++c) { - const __m256i* _in2 = (const __m256i*) (in + m_legs * 2 * (r * size + c)); - const __m256i* _in3 = (const __m256i*) (in + m_legs * 2 * (c * size + r)); - _out = (__m256i*) (out + m_legs * 2 * m_vecs_stored); - _out[0] = _in2[0] ^ _in3[0]; - m_vecs_stored++; - } - } -} - - -#undef K_OVER_2 -#endif - +// SPDX-License-Identifier: Apache-2.0 + +#ifndef SHUFFLE_ARITHMETIC_64_H +#define SHUFFLE_ARITHMETIC_64_H + +#include +#include +#include +#include + +// P1*0 -> P1: v x v, O: v x o +static +inline void mayo_12_P1_times_O_avx2(const uint64_t *_P1, __m256i *O_multabs, uint64_t *_acc){ + + const __m256i *P1 = (__m256i *) _P1; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + size_t cols_used = 0; + for (size_t r = 0; r < V_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[O_MAX] = {0}; + for (size_t c = r; c < V_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + cols_used ++; + + for (size_t k = 0; k < O_MAX; k+=2) + { + temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); + temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); + } + } + + // convert to normal format and add to accumulator + for (size_t k = 0; k < O_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*O_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[(r*O_MAX) + k + 1] ^= temp[k+1] ^ t; + } + } +} + + +static +inline void mayo_12_Ot_times_P1O_P2_avx2(const uint64_t *_P1O_P2, __m256i *O_multabs, uint64_t *_acc){ + + const __m256i *P1O_P2 = (__m256i *) _P1O_P2; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + for (size_t c = 0; c < O_MAX; c++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[O_MAX] = {0}; + for (size_t r = 0; r < V_MAX; r++) + { + __m256i in_odd = _mm256_loadu_si256(P1O_P2 + r*O_MAX + c); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (size_t k = 0; k < O_MAX; k+=2) + { + temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*r + k/2], in_odd); + temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*r + k/2], in_even); + } + } + + // convert to normal format and add to accumulator + for (size_t k = 0; k < O_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(k*O_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[((k+1)*O_MAX) + c] ^= temp[k+1] ^ t; + } + } +} + +static +inline void mayo_12_P1P1t_times_O(const uint64_t *_P1, const unsigned char *O, uint64_t *_acc){ + + const __m256i *P1 = (__m256i *) _P1; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + __m256i O_multabs[O_MAX/2*V_MAX]; + mayo_O_multabs_avx2(O, O_multabs); + + size_t cols_used = 0; + for (size_t r = 0; r < V_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[O_MAX] = {0}; + cols_used += 1; + size_t pos = r; + for (size_t c = 0; c < r; c++) + { + __m256i in_odd = _mm256_loadu_si256(P1 + pos); + pos += (V_MAX -c - 1); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (size_t k = 0; k < O_MAX; k+=2) + { + temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); + temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); + } + } + + for (size_t c = r+1; c < V_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + cols_used ++; + + for (size_t k = 0; k < O_MAX; k+=2) + { + temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); + temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); + } + } + + for (size_t k = 0; k < O_MAX; k+=2) + { + __m256i acc0 = _mm256_loadu_si256(acc + (r*O_MAX + k )); + __m256i acc1 = _mm256_loadu_si256(acc + (r*O_MAX + k + 1)); + + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + + _mm256_storeu_si256(acc + (r*O_MAX + k ), acc0 ^ temp[k ] ^ _mm256_slli_epi16(t,4)); + _mm256_storeu_si256(acc + (r*O_MAX + k + 1), acc1 ^ temp[k+1] ^ t); + } + } +} + + +static +inline void mayo_12_Vt_times_L_avx2(const uint64_t *_L, const __m256i *V_multabs, uint64_t *_acc){ + + const __m256i *L = (__m256i *) _L; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + size_t k; + + for (size_t c = 0; c < O_MAX; c++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + for (size_t r = 0; r < V_MAX; r++) + { + __m256i in_odd = _mm256_loadu_si256(L + r*O_MAX + c); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (size_t k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k+1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(k*O_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[((k+1)*O_MAX) + c] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[k*O_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + + +static +inline void mayo_12_Vt_times_Pv_avx2(const uint64_t *_Pv, const __m256i *V_multabs, uint64_t *_acc){ + + const __m256i *Pv = (__m256i *) _Pv; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + size_t k; + + for (size_t c = 0; c < K_MAX; c++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + for (size_t r = 0; r < V_MAX; r++) + { + __m256i in_odd = _mm256_loadu_si256(Pv + r*K_MAX + c); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (size_t k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k+1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(k*K_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[((k+1)*K_MAX) + c] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[k*K_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + +static +inline void mayo_12_P1_times_Vt_avx2(const uint64_t *_P1, __m256i *V_multabs, uint64_t *_acc){ + size_t k,c; + const __m256i *P1 = (__m256i *) _P1; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + size_t cols_used = 0; + for (size_t r = 0; r < V_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + + for (c=r; c < V_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + cols_used ++; + + for (size_t k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k + 1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + +// P1*S1 -> P1: v x v, S1: v x k // P1 upper triangular +// same as mayo_12_P1_times_Vt_avx2 +static +inline void mayo_12_P1_times_S1_avx2(const uint64_t *_P1, __m256i *S1_multabs, uint64_t *_acc){ + mayo_12_P1_times_Vt_avx2(_P1, S1_multabs, _acc); +} + +static +inline void mayo_12_S1t_times_PS1_avx2(const uint64_t *_PS1, __m256i *S1_multabs, uint64_t *_acc){ + mayo_12_Vt_times_Pv_avx2(_PS1, S1_multabs, _acc); +} + +static +inline void mayo_12_S2t_times_PS2_avx2(const uint64_t *_PS2, __m256i *S2_multabs, uint64_t *_acc){ + const __m256i *PS2 = (__m256i *) _PS2; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + size_t k; + + for (size_t c = 0; c < K_MAX; c++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + for (size_t r = 0; r < O_MAX; r++) + { + __m256i in_odd = _mm256_loadu_si256(PS2 + r*K_MAX + c); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (size_t k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k+1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(k*K_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[((k+1)*K_MAX) + c] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[k*K_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + + +// P2*S2 -> P2: v x o, S2: o x k +static +inline void mayo_12_P2_times_S2_avx2(const uint64_t *_P2, __m256i *S2_multabs, uint64_t *_acc){ + size_t k,c; + const __m256i *P2 = (__m256i *) _P2; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + size_t cols_used = 0; + for (size_t r = 0; r < V_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + + for (c=0; c < O_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P2 + cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + cols_used ++; + + for (size_t k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k + 1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + + +// P2*S2 -> P2: v x o, S2: o x k +static +inline void mayo_12_P1_times_S1_plus_P2_times_S2_avx2(const uint64_t *_P1, const uint64_t *_P2, __m256i *S1_multabs, __m256i *S2_multabs, uint64_t *_acc){ + size_t k,c; + const __m256i *P1 = (__m256i *) _P1; + const __m256i *P2 = (__m256i *) _P2; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + size_t P1_cols_used = 0; + for (size_t r = 0; r < V_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + + + // P1 * S1 + for (c=r; c < V_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P1 + P1_cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + P1_cols_used ++; + + for (size_t k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_even); + } + } + + // P2 * S2 + for (c=0; c < O_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P2 + r*O_MAX + c); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (size_t k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k + 1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + +// P3*S2 -> P3: o x o, S2: o x k // P3 upper triangular +static +inline void mayo_12_P3_times_S2_avx2(const uint64_t *_P3, __m256i *S2_multabs, uint64_t *_acc){ + size_t k,c; + const __m256i *P3 = (__m256i *) _P3; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + size_t cols_used = 0; + for (size_t r = 0; r < O_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + + for (c=r; c < O_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P3 + cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + cols_used ++; + + for (size_t k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k + 1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + + +static inline +void mayo12_m_upper(int m_legs, const uint64_t *in, uint64_t *out, int size) { + (void) size; + int m_vecs_stored = 0; + + for (int r = 0; r < O_MAX; ++r) { + const __m256i* _in = (const __m256i*) (in + m_legs * 2 * (r * size + r)); + __m256i* _out = (__m256i*) (out + m_legs * 2 * m_vecs_stored); + _out[0] = _in[0]; + m_vecs_stored++; + for (int c = r + 1; c < O_MAX; ++c) { + const __m256i* _in2 = (const __m256i*) (in + m_legs * 2 * (r * size + c)); + const __m256i* _in3 = (const __m256i*) (in + m_legs * 2 * (c * size + r)); + _out = (__m256i*) (out + m_legs * 2 * m_vecs_stored); + _out[0] = _in2[0] ^ _in3[0]; + m_vecs_stored++; + } + } +} + + +#undef K_OVER_2 +#endif + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d2aa397..25472ba 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,9 +49,11 @@ if (ENABLE_PARAMS_DYNAMIC) set(SOURCE_FILES_VARIANT ${MVARIANT_LOWER}/api.c) add_library(${MVARIANT_LOWER}_nistapi ${SOURCE_FILES_VARIANT}) target_link_libraries(${MVARIANT_LOWER}_nistapi PRIVATE mayo) + target_compile_definitions(${MVARIANT_LOWER}_nistapi PUBLIC ENABLE_PARAMS_DYNAMIC) target_include_directories(${MVARIANT_LOWER}_nistapi PUBLIC ${MVARIANT_LOWER} ${INC_PLATFORM}) add_library(${MVARIANT_LOWER}_test_nistapi ${SOURCE_FILES_VARIANT}) target_link_libraries(${MVARIANT_LOWER}_test_nistapi PRIVATE mayo_test) + target_compile_definitions(${MVARIANT_LOWER}_test_nistapi PUBLIC ENABLE_PARAMS_DYNAMIC) target_include_directories(${MVARIANT_LOWER}_test_nistapi PUBLIC ${MVARIANT_LOWER} ${INC_PLATFORM}) endforeach() @@ -77,5 +79,7 @@ else() add_library(${MVARIANT_LOWER}_test_nistapi ${SOURCE_FILES_VARIANT}) target_link_libraries(${MVARIANT_LOWER}_test_nistapi PRIVATE ${MVARIANT_LOWER}_test) target_include_directories(${MVARIANT_LOWER}_test_nistapi PUBLIC ${MVARIANT_LOWER} ${INC_PLATFORM}) + target_compile_definitions(${MVARIANT_LOWER}_nistapi PUBLIC MAYO_VARIANT=${MVARIANT}) + target_compile_definitions(${MVARIANT_LOWER}_test_nistapi PUBLIC MAYO_VARIANT=${MVARIANT}) endforeach() endif() diff --git a/src/arithmetic.h b/src/arithmetic.h index ec79f33..268ecba 100644 --- a/src/arithmetic.h +++ b/src/arithmetic.h @@ -29,77 +29,28 @@ #include #endif -#ifdef ENABLE_PARAMS_DYNAMIC -#define PARAM_m(p) (p->m) -#define PARAM_n(p) (p->n) -#define PARAM_o(p) (p->o) -#define PARAM_v(p) (p->n - p->o) -#define PARAM_A_cols(p) (p->k * p->o + 1) -#define PARAM_k(p) (p->k) -#define PARAM_q(p) (p->q) -#define PARAM_m_bytes(p) (p->m_bytes) -#define PARAM_O_bytes(p) (p->O_bytes) -#define PARAM_v_bytes(p) (p->v_bytes) -#define PARAM_r_bytes(p) (p->r_bytes) -#define PARAM_P1_bytes(p) (p->P1_bytes) -#define PARAM_P2_bytes(p) (p->P2_bytes) -#define PARAM_P3_bytes(p) (p->P3_bytes) -#define PARAM_csk_bytes(p) (p->csk_bytes) -#define PARAM_esk_bytes(p) (p->esk_bytes) -#define PARAM_cpk_bytes(p) (p->cpk_bytes) -#define PARAM_epk_bytes(p) (p->epk_bytes) -#define PARAM_sig_bytes(p) (p->sig_bytes) -#define PARAM_f_tail(p) (p->f_tail) -#define PARAM_salt_bytes(p) (p->salt_bytes) -#define PARAM_sk_seed_bytes(p) (p->sk_seed_bytes) -#define PARAM_digest_bytes(p) (p->digest_bytes) -#define PARAM_pk_seed_bytes(p) (p->pk_seed_bytes) -#elif defined(MAYO_VARIANT) -#define PARAM_m(p) PARAM_NAME(m) -#define PARAM_n(p) PARAM_NAME(n) -#define PARAM_o(p) PARAM_NAME(o) -#define PARAM_v(p) PARAM_NAME(v) -#define PARAM_A_cols(p) PARAM_NAME(A_cols) -#define PARAM_k(p) PARAM_NAME(k) -#define PARAM_q(p) PARAM_NAME(q) -#define PARAM_m_bytes(p) PARAM_NAME(m_bytes) -#define PARAM_O_bytes(p) PARAM_NAME(O_bytes) -#define PARAM_v_bytes(p) PARAM_NAME(v_bytes) -#define PARAM_r_bytes(p) PARAM_NAME(r_bytes) -#define PARAM_P1_bytes(p) PARAM_NAME(P1_bytes) -#define PARAM_P2_bytes(p) PARAM_NAME(P2_bytes) -#define PARAM_P3_bytes(p) PARAM_NAME(P3_bytes) -#define PARAM_csk_bytes(p) PARAM_NAME(csk_bytes) -#define PARAM_esk_bytes(p) PARAM_NAME(esk_bytes) -#define PARAM_cpk_bytes(p) PARAM_NAME(cpk_bytes) -#define PARAM_epk_bytes(p) PARAM_NAME(epk_bytes) -#define PARAM_sig_bytes(p) PARAM_NAME(sig_bytes) -static const unsigned char f_tail[] = PARAM_NAME(f_tail); -#define PARAM_salt_bytes(p) PARAM_NAME(salt_bytes) -#define PARAM_sk_seed_bytes(p) PARAM_NAME(sk_seed_bytes) -#define PARAM_digest_bytes(p) PARAM_NAME(digest_bytes) -#define PARAM_pk_seed_bytes(p) PARAM_NAME(pk_seed_bytes) -#define PARAM_f_tail(p) f_tail -#else -#error "Parameter not specified" -#endif - // Calculate P3 = O^T * (P1*O + P2) in KeyGen +#define Ot_times_P1O_P2 MAYO_NAMESPACE(Ot_times_P1O_P2) void Ot_times_P1O_P2(const mayo_params_t* p, const uint64_t* P1, const unsigned char* O, uint64_t* P1O_P2, uint64_t* P3); // Calculate Upper in KeyGen +#define m_upper MAYO_NAMESPACE(m_upper) void m_upper(int m_legs, const uint64_t *in, uint64_t *out, int size); // Calculate acc = (P1+P1^T)*O in expand_sk +#define P1P1t_times_O MAYO_NAMESPACE(P1P1t_times_O) void P1P1t_times_O(const mayo_params_t* p, const uint64_t* P1P1t, const unsigned char* O, uint64_t* acc); // Calculate M=V*L and Y=V*P1*V^T in Sign +#define V_times_L__V_times_P1_times_Vt MAYO_NAMESPACE(V_times_L__V_times_P1_times_Vt) void V_times_L__V_times_P1_times_Vt(const mayo_params_t* p, const uint64_t* L, const unsigned char* V, uint64_t* M, const uint64_t* P1, uint64_t* Y); // Sample solution in Sign +#define sample_solution MAYO_NAMESPACE(sample_solution) int sample_solution(const mayo_params_t *p, unsigned char *A, const unsigned char *y, const unsigned char *r, unsigned char *x, int k, int o, int m, int A_cols); // Calculate SPS = S*P*S^T in Verify +#define m_calculate_PS_SPS MAYO_NAMESPACE(m_calculate_PS_SPS) void m_calculate_PS_SPS(const uint64_t *P1, const uint64_t *P2, const uint64_t *P3, const unsigned char *S, const int m, const int v, const int o, const int k, uint64_t *SPS); diff --git a/src/common/randombytes_system.c b/src/common/randombytes_system.c index a8783c3..04c8870 100644 --- a/src/common/randombytes_system.c +++ b/src/common/randombytes_system.c @@ -31,7 +31,7 @@ THE SOFTWARE. # define _GNU_SOURCE #endif /* defined(__linux__) || defined(__GNU__) */ -#include +#include #if defined(_WIN32) /* Windows */ diff --git a/src/generic/arithmetic_common.h b/src/generic/arithmetic_common.h index 079bdf6..d337bc2 100644 --- a/src/generic/arithmetic_common.h +++ b/src/generic/arithmetic_common.h @@ -84,7 +84,7 @@ static inline void mayo_generic_m_calculate_PS(const uint64_t *P1, const uint64_ */ // use more stack efficient version for MAYO_3 and MAYO_5 - #if defined(PQM4) && N_MAX > 78 + #if (defined(HAVE_STACKEFFICIENT) || defined(PQM4)) && N_MAX > 78 uint64_t accumulator[M_MAX * N_MAX] = {0}; int P1_used; int P3_used; diff --git a/src/generic/echelon_form.h b/src/generic/echelon_form.h index a7ea621..c317dbd 100644 --- a/src/generic/echelon_form.h +++ b/src/generic/echelon_form.h @@ -1,138 +1,138 @@ - -// SPDX-License-Identifier: Apache-2.0 - -#ifndef ECHELON_FORM_H -#define ECHELON_FORM_H - -#include -#include -#include -#include - -#define MAYO_MAX(x, y) (((x) > (y)) ? (x) : (y)) -#define MAYO_MIN(x, y) (((x) < (y)) ? (x) : (y)) - -static inline unsigned char -m_extract_element(const uint64_t *in, int index) { - const int leg = index / 16; - const int offset = index % 16; - - return (in[leg] >> (offset*4)) & 0xF; -} - -static inline void -ef_pack_m_vec(const unsigned char *in, uint64_t *out, int ncols) { - int i; - unsigned char *out8 = (unsigned char *)out; - for(i = 0; i+1 < ncols; i += 2){ - out8[i/2] = (in[i+0] << 0) | (in[i+1] << 4); - } - if (ncols % 2 == 1){ - out8[i/2] = (in[i+0] << 0); - } -} - -static inline void -ef_unpack_m_vec(int legs, const uint64_t *in, unsigned char *out) { - const unsigned char *in8 = (const unsigned char *)in; - for(int i = 0; i < legs * 16; i += 2){ - out[i] = (in8[i/2]) & 0xF; - out[i+1] = (in8[i/2] >> 4); - } -} - - -// put matrix in row echelon form with ones on first nonzero entries *in -// constant time* -static inline void EF(unsigned char *A, int nrows, int ncols) { - - alignas (32) uint64_t _pivot_row[(K_MAX * O_MAX + 1 + 15) / 16]; - alignas (32) uint64_t _pivot_row2[(K_MAX * O_MAX + 1 + 15) / 16]; - alignas (32) uint64_t packed_A[((K_MAX * O_MAX + 1 + 15) / 16) * M_MAX]; - - int row_len = (ncols + 15) / 16; - - // nibbleslice the matrix A - for (int i = 0; i < nrows; i++) { - ef_pack_m_vec(A + i * ncols, packed_A + i * row_len, ncols); - } - - // pivot row is secret, pivot col is not - - unsigned char inverse; - int pivot_row = 0; - for (int pivot_col = 0; pivot_col < ncols; pivot_col++) { - - int pivot_row_lower_bound = MAYO_MAX(0, pivot_col + nrows - ncols); - int pivot_row_upper_bound = MAYO_MIN(nrows - 1, pivot_col); - // the pivot row is guaranteed to be between these lower and upper bounds if - // A has full rank - - // zero out pivot row - for (int i = 0; i < row_len; i++) { - _pivot_row[i] = 0; - _pivot_row2[i] = 0; - } - - // try to get a pivot row in constant time - unsigned char pivot = 0; - uint64_t pivot_is_zero = -1; - for (int row = pivot_row_lower_bound; - row <= MAYO_MIN(nrows - 1, pivot_row_upper_bound + 32); row++) { - - uint64_t is_pivot_row = ~ct_compare_64(row, pivot_row); - uint64_t below_pivot_row = ct_64_is_greater_than(row, pivot_row); - - for (int j = 0; j < row_len; j++) { - _pivot_row[j] ^= (is_pivot_row | (below_pivot_row & pivot_is_zero)) & - packed_A[row * row_len + j]; - } - pivot = m_extract_element(_pivot_row, pivot_col); - pivot_is_zero = ~ct_compare_64((int) pivot, 0); - } - - // multiply pivot row by inverse of pivot - inverse = inverse_f(pivot); - vec_mul_add_u64(row_len, _pivot_row, inverse, _pivot_row2); - - // conditionally write pivot row to the correct row, if there is a nonzero - // pivot - for (int row = pivot_row_lower_bound; row <= pivot_row_upper_bound; row++) { - uint64_t do_copy = ~ct_compare_64(row, pivot_row) & ~pivot_is_zero; - uint64_t do_not_copy = ~do_copy; - for (int col = 0; col < row_len; col++) { - packed_A[row * row_len + col] = - (do_not_copy & packed_A[row * row_len + col]) + - (do_copy & _pivot_row2[col]); - } - } - - // eliminate entries below pivot - for (int row = pivot_row_lower_bound; row < nrows; row++) { - unsigned char below_pivot = (row > pivot_row); - unsigned char elt_to_elim = m_extract_element(packed_A + row * row_len, pivot_col); - - vec_mul_add_u64(row_len, _pivot_row2, below_pivot * elt_to_elim, - packed_A + row * row_len); - } - - pivot_row += (-(int32_t)(~pivot_is_zero)); - } - - unsigned char temp[(O_MAX * K_MAX + 1 + 15)]; - - // unbitslice the matrix A - for (int i = 0; i < nrows; i++) { - ef_unpack_m_vec(row_len, packed_A + i * row_len, temp); - for (int j = 0; j < ncols; j++) { - A[i * ncols + j] = temp[j]; - } - } - - mayo_secure_clear(temp, K_MAX * O_MAX + 1 + 15); - mayo_secure_clear(_pivot_row, (K_MAX * O_MAX + 1 + 15) / 16 * 8); - mayo_secure_clear(_pivot_row2, (K_MAX * O_MAX + 1 + 15) / 16 * 8); - mayo_secure_clear(packed_A, ((K_MAX * O_MAX + 1 + 15) / 16) * M_MAX * 8); -} - -#endif \ No newline at end of file + +// SPDX-License-Identifier: Apache-2.0 + +#ifndef ECHELON_FORM_H +#define ECHELON_FORM_H + +#include +#include +#include +#include + +#define MAYO_MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MAYO_MIN(x, y) (((x) < (y)) ? (x) : (y)) + +static inline unsigned char +m_extract_element(const uint64_t *in, int index) { + const int leg = index / 16; + const int offset = index % 16; + + return (in[leg] >> (offset*4)) & 0xF; +} + +static inline void +ef_pack_m_vec(const unsigned char *in, uint64_t *out, int ncols) { + int i; + unsigned char *out8 = (unsigned char *)out; + for(i = 0; i+1 < ncols; i += 2){ + out8[i/2] = (in[i+0] << 0) | (in[i+1] << 4); + } + if (ncols % 2 == 1){ + out8[i/2] = (in[i+0] << 0); + } +} + +static inline void +ef_unpack_m_vec(int legs, const uint64_t *in, unsigned char *out) { + const unsigned char *in8 = (const unsigned char *)in; + for(int i = 0; i < legs * 16; i += 2){ + out[i] = (in8[i/2]) & 0xF; + out[i+1] = (in8[i/2] >> 4); + } +} + + +// put matrix in row echelon form with ones on first nonzero entries *in +// constant time* +static inline void EF(unsigned char *A, int nrows, int ncols) { + + alignas (32) uint64_t _pivot_row[(K_MAX * O_MAX + 1 + 15) / 16]; + alignas (32) uint64_t _pivot_row2[(K_MAX * O_MAX + 1 + 15) / 16]; + alignas (32) uint64_t packed_A[((K_MAX * O_MAX + 1 + 15) / 16) * M_MAX]; + + int row_len = (ncols + 15) / 16; + + // nibbleslice the matrix A + for (int i = 0; i < nrows; i++) { + ef_pack_m_vec(A + i * ncols, packed_A + i * row_len, ncols); + } + + // pivot row is secret, pivot col is not + + unsigned char inverse; + int pivot_row = 0; + for (int pivot_col = 0; pivot_col < ncols; pivot_col++) { + + int pivot_row_lower_bound = MAYO_MAX(0, pivot_col + nrows - ncols); + int pivot_row_upper_bound = MAYO_MIN(nrows - 1, pivot_col); + // the pivot row is guaranteed to be between these lower and upper bounds if + // A has full rank + + // zero out pivot row + for (int i = 0; i < row_len; i++) { + _pivot_row[i] = 0; + _pivot_row2[i] = 0; + } + + // try to get a pivot row in constant time + unsigned char pivot = 0; + uint64_t pivot_is_zero = -1; + for (int row = pivot_row_lower_bound; + row <= MAYO_MIN(nrows - 1, pivot_row_upper_bound + 32); row++) { + + uint64_t is_pivot_row = ~ct_compare_64(row, pivot_row); + uint64_t below_pivot_row = ct_64_is_greater_than(row, pivot_row); + + for (int j = 0; j < row_len; j++) { + _pivot_row[j] ^= (is_pivot_row | (below_pivot_row & pivot_is_zero)) & + packed_A[row * row_len + j]; + } + pivot = m_extract_element(_pivot_row, pivot_col); + pivot_is_zero = ~ct_compare_64((int) pivot, 0); + } + + // multiply pivot row by inverse of pivot + inverse = inverse_f(pivot); + vec_mul_add_u64(row_len, _pivot_row, inverse, _pivot_row2); + + // conditionally write pivot row to the correct row, if there is a nonzero + // pivot + for (int row = pivot_row_lower_bound; row <= pivot_row_upper_bound; row++) { + uint64_t do_copy = ~ct_compare_64(row, pivot_row) & ~pivot_is_zero; + uint64_t do_not_copy = ~do_copy; + for (int col = 0; col < row_len; col++) { + packed_A[row * row_len + col] = + (do_not_copy & packed_A[row * row_len + col]) + + (do_copy & _pivot_row2[col]); + } + } + + // eliminate entries below pivot + for (int row = pivot_row_lower_bound; row < nrows; row++) { + unsigned char below_pivot = (row > pivot_row); + unsigned char elt_to_elim = m_extract_element(packed_A + row * row_len, pivot_col); + + vec_mul_add_u64(row_len, _pivot_row2, below_pivot * elt_to_elim, + packed_A + row * row_len); + } + + pivot_row += (-(int32_t)(~pivot_is_zero)); + } + + unsigned char temp[(O_MAX * K_MAX + 1 + 15)]; + + // unbitslice the matrix A + for (int i = 0; i < nrows; i++) { + ef_unpack_m_vec(row_len, packed_A + i * row_len, temp); + for (int j = 0; j < ncols; j++) { + A[i * ncols + j] = temp[j]; + } + } + + mayo_secure_clear(temp, K_MAX * O_MAX + 1 + 15); + mayo_secure_clear(_pivot_row, (K_MAX * O_MAX + 1 + 15) / 16 * 8); + mayo_secure_clear(_pivot_row2, (K_MAX * O_MAX + 1 + 15) / 16 * 8); + mayo_secure_clear(packed_A, ((K_MAX * O_MAX + 1 + 15) / 16) * M_MAX * 8); +} + +#endif diff --git a/src/mayo.c b/src/mayo.c index 84dd93f..4c2da41 100644 --- a/src/mayo.c +++ b/src/mayo.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -234,8 +234,8 @@ int mayo_keypair(const mayo_params_t *p, unsigned char *pk, unsigned char *sk) { return ret; } -int mayo_sign(const mayo_params_t *p, unsigned char *sm, - unsigned long long *smlen, const unsigned char *m, +int mayo_sign_signature(const mayo_params_t *p, unsigned char *sig, + unsigned long long *siglen, const unsigned char *m, unsigned long long mlen, const unsigned char *csk) { int ret = MAYO_OK; unsigned char tenc[M_BYTES_MAX], t[M_MAX]; // no secret data @@ -295,13 +295,13 @@ int mayo_sign(const mayo_params_t *p, unsigned char *sm, #endif // choose the randomizer - #ifndef PQM4 + #if defined(PQM4) || defined(HAVE_RANDOMBYTES_NORETVAL) + randombytes(tmp + param_digest_bytes, param_salt_bytes); + #else if (randombytes(tmp + param_digest_bytes, param_salt_bytes) != MAYO_OK) { ret = MAYO_ERR; goto err; } - #else - randombytes(tmp + param_digest_bytes, param_salt_bytes); #endif // hashing to salt @@ -360,11 +360,9 @@ int mayo_sign(const mayo_params_t *p, unsigned char *sm, mat_add(vi, Ox, s + i * param_n, param_n - param_o, 1); memcpy(s + i * param_n + (param_n - param_o), x + i * param_o, param_o); } - encode(s, sm, param_n * param_k); - memcpy(sm + param_sig_bytes - param_salt_bytes, salt, param_salt_bytes); - memmove(sm + param_sig_bytes, m, - mlen); // assert: smlen == param_k * param_n + mlen - *smlen = param_sig_bytes + mlen; + encode(s, sig, param_n * param_k); + memcpy(sig + param_sig_bytes - param_salt_bytes, salt, param_salt_bytes); + *siglen = param_sig_bytes; err: mayo_secure_clear(V, K_MAX * V_BYTES_MAX + R_BYTES_MAX); mayo_secure_clear(Vdec, N_MINUS_O_MAX * K_MAX); @@ -378,6 +376,22 @@ int mayo_sign(const mayo_params_t *p, unsigned char *sm, return ret; } +int mayo_sign(const mayo_params_t *p, unsigned char *sm, + unsigned long long *smlen, const unsigned char *m, + unsigned long long mlen, const unsigned char *csk) { + int ret = MAYO_OK; + const int param_sig_bytes = PARAM_sig_bytes(p); + unsigned long long siglen = param_sig_bytes; + ret = mayo_sign_signature(p, sm, &siglen, m, mlen, csk); + if (ret != MAYO_OK || siglen != (unsigned long long) param_sig_bytes) + goto err; + + memmove(sm + param_sig_bytes, m, mlen); + *smlen = siglen + mlen; +err: + return ret; +} + int mayo_open(const mayo_params_t *p, unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk) { @@ -418,13 +432,13 @@ int mayo_keypair_compact(const mayo_params_t *p, unsigned char *cpk, const int param_sk_seed_bytes = PARAM_sk_seed_bytes(p); // seed_sk $←- B^(sk_seed bytes) - #ifndef PQM4 + #if defined(PQM4) || defined(HAVE_RANDOMBYTES_NORETVAL) + randombytes(seed_sk, param_sk_seed_bytes); + #else if (randombytes(seed_sk, param_sk_seed_bytes) != MAYO_OK) { ret = MAYO_ERR; goto err; } - #else - randombytes(seed_sk, param_sk_seed_bytes); #endif // S ← shake256(seedsk, pk seed bytes + O bytes) @@ -444,6 +458,7 @@ int mayo_keypair_compact(const mayo_params_t *p, unsigned char *cpk, PK_PRF((unsigned char *)P, param_P1_bytes + param_P2_bytes, seed_pk, param_pk_seed_bytes); + int m_legs = param_m / 32; uint64_t *P1 = P; @@ -462,7 +477,8 @@ int mayo_keypair_compact(const mayo_params_t *p, unsigned char *cpk, memcpy(cpk + param_pk_seed_bytes, P3_upper, param_P3_bytes); -#ifndef PQM4 + +#if !defined(PQM4) && !defined(HAVE_RANDOMBYTES_NORETVAL) err: #endif mayo_secure_clear(O, (N_MINUS_O_MAX)*O_MAX); diff --git a/src/mayo_1/api.c b/src/mayo_1/api.c index 5c0021a..8c10f2a 100644 --- a/src/mayo_1/api.c +++ b/src/mayo_1/api.c @@ -3,9 +3,15 @@ #include #include +#ifdef ENABLE_PARAMS_DYNAMIC +#define MAYO_PARAMS &MAYO_1 +#else +#define MAYO_PARAMS 0 +#endif + int crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { - return mayo_keypair(&MAYO_1, pk, sk); + return mayo_keypair(MAYO_PARAMS, pk, sk); } #ifndef PQM4 @@ -13,17 +19,31 @@ int crypto_sign(unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk) { - return mayo_sign(&MAYO_1, sm, smlen, m, mlen, sk); + return mayo_sign(MAYO_PARAMS, sm, smlen, m, mlen, sk); +} + +int +crypto_sign_signature(unsigned char *sig, + unsigned long long *siglen, const unsigned char *m, + unsigned long long mlen, const unsigned char *sk) { + return mayo_sign_signature(MAYO_PARAMS, sig, siglen, m, mlen, sk); } int crypto_sign_open(unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk) { - return mayo_open(&MAYO_1, m, mlen, sm, smlen, pk); + return mayo_open(MAYO_PARAMS, m, mlen, sm, smlen, pk); } - +int +crypto_sign_verify(const unsigned char *sig, unsigned long long siglen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *pk) { + if (siglen != CRYPTO_BYTES) + return -1; + return mayo_verify(MAYO_PARAMS, m, mlen, sig, pk); +} #else int crypto_sign(unsigned char *sm, size_t *smlen, @@ -31,7 +51,7 @@ crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *sk) { unsigned long long smlen_ll; - int rc = mayo_sign(&MAYO_1, sm, &smlen_ll, m, mlen, sk); + int rc = mayo_sign(MAYO_PARAMS, sm, &smlen_ll, m, mlen, sk); *smlen = smlen_ll; return rc; } @@ -41,7 +61,7 @@ crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { unsigned long long mlen_ll; - int rc = mayo_open(&MAYO_1, m, &mlen_ll, sm, smlen, pk); + int rc = mayo_open(MAYO_PARAMS, m, &mlen_ll, sm, smlen, pk); *mlen = mlen_ll; return rc; } diff --git a/src/mayo_1/api.h b/src/mayo_1/api.h index 0a9cfb4..07b98c6 100644 --- a/src/mayo_1/api.h +++ b/src/mayo_1/api.h @@ -3,27 +3,43 @@ #ifndef api_h #define api_h +#include + #define CRYPTO_SECRETKEYBYTES 24 #define CRYPTO_PUBLICKEYBYTES 1168 #define CRYPTO_BYTES 321 #define CRYPTO_ALGNAME "MAYO_1" +#define crypto_sign_keypair MAYO_NAMESPACE(crypto_sign_keypair) int crypto_sign_keypair(unsigned char *pk, unsigned char *sk); #ifndef PQM4 +#define crypto_sign MAYO_NAMESPACE(crypto_sign) int crypto_sign(unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk); +#define crypto_sign_signature MAYO_NAMESPACE(crypto_sign_signature) +int +crypto_sign_signature(unsigned char *sig, + unsigned long long *siglen, const unsigned char *m, + unsigned long long mlen, const unsigned char *sk); + +#define crypto_sign_open MAYO_NAMESPACE(crypto_sign_open) int crypto_sign_open(unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk); +#define crypto_sign_verify MAYO_NAMESPACE(crypto_sign_verify) +int +crypto_sign_verify(const unsigned char *sig, unsigned long long siglen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *pk); #else #include diff --git a/src/mayo_2/api.c b/src/mayo_2/api.c index 5a1ab5b..99e6ec7 100644 --- a/src/mayo_2/api.c +++ b/src/mayo_2/api.c @@ -3,9 +3,15 @@ #include #include +#ifdef ENABLE_PARAMS_DYNAMIC +#define MAYO_PARAMS &MAYO_2 +#else +#define MAYO_PARAMS 0 +#endif + int crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { - return mayo_keypair(&MAYO_2, pk, sk); + return mayo_keypair(MAYO_PARAMS, pk, sk); } #ifndef PQM4 @@ -13,16 +19,31 @@ int crypto_sign(unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk) { - return mayo_sign(&MAYO_2, sm, smlen, m, mlen, sk); + return mayo_sign(MAYO_PARAMS, sm, smlen, m, mlen, sk); +} + +int +crypto_sign_signature(unsigned char *sig, + unsigned long long *siglen, const unsigned char *m, + unsigned long long mlen, const unsigned char *sk) { + return mayo_sign_signature(MAYO_PARAMS, sig, siglen, m, mlen, sk); } int crypto_sign_open(unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk) { - return mayo_open(&MAYO_2, m, mlen, sm, smlen, pk); + return mayo_open(MAYO_PARAMS, m, mlen, sm, smlen, pk); } +int +crypto_sign_verify(const unsigned char *sig, unsigned long long siglen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *pk) { + if (siglen != CRYPTO_BYTES) + return -1; + return mayo_verify(MAYO_PARAMS, m, mlen, sig, pk); +} #else int crypto_sign(unsigned char *sm, size_t *smlen, @@ -30,7 +51,7 @@ crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *sk) { unsigned long long smlen_ll; - int rc = mayo_sign(&MAYO_2, sm, &smlen_ll, m, mlen, sk); + int rc = mayo_sign(MAYO_PARAMS, sm, &smlen_ll, m, mlen, sk); *smlen = smlen_ll; return rc; } @@ -40,7 +61,7 @@ crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { unsigned long long mlen_ll; - int rc = mayo_open(&MAYO_2, m, &mlen_ll, sm, smlen, pk); + int rc = mayo_open(MAYO_PARAMS, m, &mlen_ll, sm, smlen, pk); *mlen = mlen_ll; return rc; } diff --git a/src/mayo_2/api.h b/src/mayo_2/api.h index df291eb..5d980f9 100644 --- a/src/mayo_2/api.h +++ b/src/mayo_2/api.h @@ -3,26 +3,43 @@ #ifndef api_h #define api_h +#include + #define CRYPTO_SECRETKEYBYTES 24 #define CRYPTO_PUBLICKEYBYTES 5488 #define CRYPTO_BYTES 180 #define CRYPTO_ALGNAME "MAYO_2" +#define crypto_sign_keypair MAYO_NAMESPACE(crypto_sign_keypair) int crypto_sign_keypair(unsigned char *pk, unsigned char *sk); #ifndef PQM4 +#define crypto_sign MAYO_NAMESPACE(crypto_sign) int crypto_sign(unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk); +#define crypto_sign_signature MAYO_NAMESPACE(crypto_sign_signature) +int +crypto_sign_signature(unsigned char *sig, + unsigned long long *siglen, const unsigned char *m, + unsigned long long mlen, const unsigned char *sk); + +#define crypto_sign_open MAYO_NAMESPACE(crypto_sign_open) int crypto_sign_open(unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk); + +#define crypto_sign_verify MAYO_NAMESPACE(crypto_sign_verify) +int +crypto_sign_verify(const unsigned char *sig, unsigned long long siglen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *pk); #else #include diff --git a/src/mayo_3/api.c b/src/mayo_3/api.c index 242b060..4117a9a 100644 --- a/src/mayo_3/api.c +++ b/src/mayo_3/api.c @@ -3,9 +3,15 @@ #include #include +#ifdef ENABLE_PARAMS_DYNAMIC +#define MAYO_PARAMS &MAYO_3 +#else +#define MAYO_PARAMS 0 +#endif + int crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { - return mayo_keypair(&MAYO_3, pk, sk); + return mayo_keypair(MAYO_PARAMS, pk, sk); } #ifndef PQM4 @@ -13,14 +19,30 @@ int crypto_sign(unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk) { - return mayo_sign(&MAYO_3, sm, smlen, m, mlen, sk); + return mayo_sign(MAYO_PARAMS, sm, smlen, m, mlen, sk); +} + +int +crypto_sign_signature(unsigned char *sig, + unsigned long long *siglen, const unsigned char *m, + unsigned long long mlen, const unsigned char *sk) { + return mayo_sign_signature(MAYO_PARAMS, sig, siglen, m, mlen, sk); } int crypto_sign_open(unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk) { - return mayo_open(&MAYO_3, m, mlen, sm, smlen, pk); + return mayo_open(MAYO_PARAMS, m, mlen, sm, smlen, pk); +} + +int +crypto_sign_verify(const unsigned char *sig, unsigned long long siglen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *pk) { + if (siglen != CRYPTO_BYTES) + return -1; + return mayo_verify(MAYO_PARAMS, m, mlen, sig, pk); } #else int @@ -29,7 +51,7 @@ crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *sk) { unsigned long long smlen_ll; - int rc = mayo_sign(&MAYO_3, sm, &smlen_ll, m, mlen, sk); + int rc = mayo_sign(MAYO_PARAMS, sm, &smlen_ll, m, mlen, sk); *smlen = smlen_ll; return rc; } @@ -39,7 +61,7 @@ crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { unsigned long long mlen_ll; - int rc = mayo_open(&MAYO_3, m, &mlen_ll, sm, smlen, pk); + int rc = mayo_open(MAYO_PARAMS, m, &mlen_ll, sm, smlen, pk); *mlen = mlen_ll; return rc; } diff --git a/src/mayo_3/api.h b/src/mayo_3/api.h index d9c174c..cd465a4 100644 --- a/src/mayo_3/api.h +++ b/src/mayo_3/api.h @@ -3,25 +3,43 @@ #ifndef api_h #define api_h +#include + #define CRYPTO_SECRETKEYBYTES 32 #define CRYPTO_PUBLICKEYBYTES 2656 #define CRYPTO_BYTES 577 #define CRYPTO_ALGNAME "MAYO_3" +#define crypto_sign_keypair MAYO_NAMESPACE(crypto_sign_keypair) int crypto_sign_keypair(unsigned char *pk, unsigned char *sk); + #ifndef PQM4 +#define crypto_sign MAYO_NAMESPACE(crypto_sign) int crypto_sign(unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk); +#define crypto_sign_signature MAYO_NAMESPACE(crypto_sign_signature) +int +crypto_sign_signature(unsigned char *sig, + unsigned long long *siglen, const unsigned char *m, + unsigned long long mlen, const unsigned char *sk); + +#define crypto_sign_open MAYO_NAMESPACE(crypto_sign_open) int crypto_sign_open(unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk); + +#define crypto_sign_verify MAYO_NAMESPACE(crypto_sign_verify) +int +crypto_sign_verify(const unsigned char *sig, unsigned long long siglen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *pk); #else #include diff --git a/src/mayo_5/api.c b/src/mayo_5/api.c index 24c674e..29e5f0b 100644 --- a/src/mayo_5/api.c +++ b/src/mayo_5/api.c @@ -3,9 +3,15 @@ #include #include +#ifdef ENABLE_PARAMS_DYNAMIC +#define MAYO_PARAMS &MAYO_5 +#else +#define MAYO_PARAMS 0 +#endif + int crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { - return mayo_keypair(&MAYO_5, pk, sk); + return mayo_keypair(MAYO_PARAMS, pk, sk); } #ifndef PQM4 @@ -13,14 +19,30 @@ int crypto_sign(unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk) { - return mayo_sign(&MAYO_5, sm, smlen, m, mlen, sk); + return mayo_sign(MAYO_PARAMS, sm, smlen, m, mlen, sk); +} + +int +crypto_sign_signature(unsigned char *sig, + unsigned long long *siglen, const unsigned char *m, + unsigned long long mlen, const unsigned char *sk) { + return mayo_sign_signature(MAYO_PARAMS, sig, siglen, m, mlen, sk); } int crypto_sign_open(unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk) { - return mayo_open(&MAYO_5, m, mlen, sm, smlen, pk); + return mayo_open(MAYO_PARAMS, m, mlen, sm, smlen, pk); +} + +int +crypto_sign_verify(const unsigned char *sig, unsigned long long siglen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *pk) { + if (siglen != CRYPTO_BYTES) + return -1; + return mayo_verify(MAYO_PARAMS, m, mlen, sig, pk); } #else int @@ -29,7 +51,7 @@ crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *sk) { unsigned long long smlen_ll; - int rc = mayo_sign(&MAYO_5, sm, &smlen_ll, m, mlen, sk); + int rc = mayo_sign(MAYO_PARAMS, sm, &smlen_ll, m, mlen, sk); *smlen = smlen_ll; return rc; } @@ -39,7 +61,7 @@ crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { unsigned long long mlen_ll; - int rc = mayo_open(&MAYO_5, m, &mlen_ll, sm, smlen, pk); + int rc = mayo_open(MAYO_PARAMS, m, &mlen_ll, sm, smlen, pk); *mlen = mlen_ll; return rc; } diff --git a/src/mayo_5/api.h b/src/mayo_5/api.h index 41a819b..a8298f7 100644 --- a/src/mayo_5/api.h +++ b/src/mayo_5/api.h @@ -3,26 +3,43 @@ #ifndef api_h #define api_h +#include + #define CRYPTO_SECRETKEYBYTES 40 #define CRYPTO_PUBLICKEYBYTES 5008 #define CRYPTO_BYTES 838 #define CRYPTO_ALGNAME "MAYO_5" +#define crypto_sign_keypair MAYO_NAMESPACE(crypto_sign_keypair) int crypto_sign_keypair(unsigned char *pk, unsigned char *sk); #ifndef PQM4 +#define crypto_sign MAYO_NAMESPACE(crypto_sign) int crypto_sign(unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk); +#define crypto_sign_signature MAYO_NAMESPACE(crypto_sign_signature) +int +crypto_sign_signature(unsigned char *sig, + unsigned long long *siglen, const unsigned char *m, + unsigned long long mlen, const unsigned char *sk); + +#define crypto_sign_open MAYO_NAMESPACE(crypto_sign_open) int crypto_sign_open(unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk); + +#define crypto_sign_verify MAYO_NAMESPACE(crypto_sign_verify) +int +crypto_sign_verify(const unsigned char *sig, unsigned long long siglen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *pk); #else #include diff --git a/src/params.c b/src/params.c index 3dd4941..7331cb5 100644 --- a/src/params.c +++ b/src/params.c @@ -2,6 +2,7 @@ #include +#ifdef ENABLE_PARAMS_DYNAMIC static const unsigned char f_tail_64[] = F_TAIL_64; static const unsigned char f_tail_96[] = F_TAIL_96; static const unsigned char f_tail_128[] = F_TAIL_128; @@ -37,3 +38,4 @@ MAYO_GEN_PARAMS(MAYO_1); MAYO_GEN_PARAMS(MAYO_2); MAYO_GEN_PARAMS(MAYO_3); MAYO_GEN_PARAMS(MAYO_5); +#endif \ No newline at end of file diff --git a/test/bench.c b/test/bench.c index ba81bf7..78370ff 100644 --- a/test/bench.c +++ b/test/bench.c @@ -45,7 +45,7 @@ int main(int argc, char *argv[]) { goto end; } int runs = atoi(argv[1]); - rc = bench_sig(&MAYO_VARIANT, runs, 0); + rc = bench_sig(0, runs, 0); #endif @@ -96,18 +96,18 @@ static int bench_sig(const mayo_params_t *p, int runs, int csv) { const int m_len = 32; - unsigned char *pk = calloc(p->cpk_bytes, 1); - unsigned char *epk = calloc(p->epk_bytes, 1); - unsigned char *sk = calloc(p->csk_bytes, 1); + unsigned char *pk = calloc(PARAM_cpk_bytes(p), 1); + unsigned char *epk = calloc(PARAM_epk_bytes(p), 1); + unsigned char *sk = calloc(PARAM_csk_bytes(p), 1); sk_t *esk = calloc(sizeof(sk_t), 1); - unsigned char *sig = calloc(p->sig_bytes + m_len, 1); + unsigned char *sig = calloc(PARAM_sig_bytes(p) + m_len, 1); unsigned char *m = calloc(m_len, 1); - unsigned long long len = p->sig_bytes; + unsigned long long len = PARAM_sig_bytes(p); if (csv) { - printf("%s,", p->name); + printf("%s,", PARAM_name(p)); } else { - printf("Benchmarking %s\n", p->name); + printf("Benchmarking %s\n", PARAM_name(p)); } BENCH_CODE_1(runs); @@ -128,7 +128,7 @@ static int bench_sig(const mayo_params_t *p, int runs, int csv) { len = 32; BENCH_CODE_1(runs); - mayo_open(p, m, &len, sig, p->sig_bytes, pk); + mayo_open(p, m, &len, sig, PARAM_sig_bytes(p), pk); BENCH_CODE_2("mayo_verify", csv); if (csv) { diff --git a/test/test_kat.c b/test/test_kat.c index 21ae2d2..b46cd17 100644 --- a/test/test_kat.c +++ b/test/test_kat.c @@ -12,7 +12,7 @@ You are solely responsible for determining the appropriateness of using and dist #include #include #include -#include +#include #include #define MAX_MARKER_LEN 50 @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) { } end: #else - rc = test_sig_kat(&MAYO_VARIANT); + rc = test_sig_kat(0); #endif return rc; } @@ -62,14 +62,14 @@ static int test_sig_kat(const mayo_params_t *p) { unsigned long long mlen, smlen, mlen1; int count; int done; - unsigned char pk[p->cpk_bytes], sk[p->csk_bytes]; + unsigned char pk[PARAM_cpk_bytes(p)], sk[PARAM_csk_bytes(p)]; int ret_val; char fn_rsp[64]; FILE *fp_rsp; - unsigned char pk_rsp[p->cpk_bytes], sk_rsp[p->csk_bytes]; + unsigned char pk_rsp[PARAM_cpk_bytes(p)], sk_rsp[PARAM_csk_bytes(p)]; - sprintf(fn_rsp, "../../KAT/PQCsignKAT_%d_%s.rsp", p->csk_bytes, p->name); + sprintf(fn_rsp, "../../KAT/PQCsignKAT_%d_%s.rsp", PARAM_csk_bytes(p), PARAM_name(p)); if ( (fp_rsp = fopen(fn_rsp, "r")) == NULL ) { printf("Couldn't open <%s> for read\n", fn_rsp); return KAT_FILE_OPEN_ERROR; @@ -100,8 +100,8 @@ static int test_sig_kat(const mayo_params_t *p) { m = (unsigned char *)calloc(mlen, sizeof(unsigned char)); m1 = (unsigned char *)calloc(mlen, sizeof(unsigned char)); - sm = (unsigned char *)calloc(mlen + p->sig_bytes, sizeof(unsigned char)); - sm_rsp = (unsigned char *)calloc(mlen + p->sig_bytes, sizeof(unsigned char)); + sm = (unsigned char *)calloc(mlen + PARAM_sig_bytes(p), sizeof(unsigned char)); + sm_rsp = (unsigned char *)calloc(mlen + PARAM_sig_bytes(p), sizeof(unsigned char)); if ( !ReadHex(fp_rsp, m, (int)mlen, "msg = ") ) { printf("ERROR: unable to read 'msg' from <%s>\n", fn_rsp); @@ -113,20 +113,20 @@ static int test_sig_kat(const mayo_params_t *p) { printf("crypto_sign_keypair returned <%d>\n", ret_val); return KAT_CRYPTO_FAILURE; } - if ( !ReadHex(fp_rsp, pk_rsp, p->cpk_bytes, "pk = ") ) { + if ( !ReadHex(fp_rsp, pk_rsp, PARAM_cpk_bytes(p), "pk = ") ) { printf("ERROR: unable to read 'pk' from <%s>\n", fn_rsp); return KAT_DATA_ERROR; } - if ( !ReadHex(fp_rsp, sk_rsp, p->csk_bytes, "sk = ") ) { + if ( !ReadHex(fp_rsp, sk_rsp, PARAM_csk_bytes(p), "sk = ") ) { printf("ERROR: unable to read 'sk' from <%s>\n", fn_rsp); return KAT_DATA_ERROR; } - if (memcmp(pk, pk_rsp, p->cpk_bytes) != 0) { + if (memcmp(pk, pk_rsp, PARAM_cpk_bytes(p)) != 0) { printf("ERROR: pk is different from <%s>\n", fn_rsp); return KAT_VERIFICATION_ERROR; } - if (memcmp(sk, sk_rsp, p->csk_bytes) != 0) { + if (memcmp(sk, sk_rsp, PARAM_csk_bytes(p)) != 0) { printf("ERROR: sk is different from <%s>\n", fn_rsp); return KAT_VERIFICATION_ERROR; } diff --git a/test/test_mayo.c b/test/test_mayo.c index a868b73..bf1c78d 100644 --- a/test/test_mayo.c +++ b/test/test_mayo.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include @@ -51,7 +51,7 @@ static int test_mayo(const mayo_params_t *p) { randombytes_init(seed, NULL, 256); - printf("Testing Keygen, Sign, Open: %s\n", p->name); + printf("Testing Keygen, Sign, Open: %s\n", PARAM_name(p)); int res = mayo_keypair(p, pk, sk); if (res != MAYO_OK) { @@ -60,10 +60,10 @@ static int test_mayo(const mayo_params_t *p) { } #ifdef ENABLE_CT_TESTING - VALGRIND_MAKE_MEM_DEFINED(pk, p->cpk_bytes); + VALGRIND_MAKE_MEM_DEFINED(pk, PARAM_cpk_bytes(p)); #endif - unsigned long long smlen = p->sig_bytes + 32; + unsigned long long smlen = PARAM_sig_bytes(p) + 32; res = mayo_sign(p, sig, &smlen, msg, 32, sk); if (res != MAYO_OK) { @@ -72,9 +72,9 @@ static int test_mayo(const mayo_params_t *p) { } printf("pk: "); - print_hex(pk, p->cpk_bytes); + print_hex(pk, PARAM_cpk_bytes(p)); printf("sk: "); - print_hex(sk, p->csk_bytes); + print_hex(sk, PARAM_csk_bytes(p)); printf("sm: "); print_hex(sig, smlen); @@ -115,7 +115,7 @@ int main(int argc, char *argv[]) { rc = test_mayo(&MAYO_5); } #else - rc = test_mayo(&MAYO_VARIANT); + rc = test_mayo(0); #endif if (rc != MAYO_OK) { diff --git a/test/test_sample_solution.c b/test/test_sample_solution.c index 9d1a075..82b2ce9 100644 --- a/test/test_sample_solution.c +++ b/test/test_sample_solution.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include static int test_sample_solution(void) {