From 765e5d16891e9e6edd1ffe83edff1dd37c3e36ce Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Sat, 31 Aug 2024 21:34:57 +0800 Subject: [PATCH 1/4] types.h: add powerpc macros --- wolfssl/wolfcrypt/types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wolfssl/wolfcrypt/types.h b/wolfssl/wolfcrypt/types.h index 4696b065b6..ac42afef46 100644 --- a/wolfssl/wolfcrypt/types.h +++ b/wolfssl/wolfcrypt/types.h @@ -158,16 +158,16 @@ decouple library dependencies with standard string, memory and so on. #elif !defined(__BCPLUSPLUS__) && !defined(__EMSCRIPTEN__) #if !defined(SIZEOF_LONG_LONG) && !defined(SIZEOF_LONG) #if (defined(__alpha__) || defined(__ia64__) || \ - defined(_ARCH_PPC64) || defined(__mips64) || \ + defined(_ARCH_PPC64) || defined(__ppc64__) || \ defined(__x86_64__) || defined(__s390x__ ) || \ ((defined(sun) || defined(__sun)) && \ (defined(LP64) || defined(_LP64))) || \ (defined(__riscv_xlen) && (__riscv_xlen == 64)) || \ - defined(__aarch64__) || \ + defined(__aarch64__) || defined(__mips64) || \ (defined(__DCC__) && (defined(__LP64) || defined(__LP64__)))) /* long should be 64bit */ #define SIZEOF_LONG 8 - #elif defined(__i386__) || defined(__CORTEX_M3__) + #elif defined(__i386__) || defined(__CORTEX_M3__) || defined(__ppc__) /* long long should be 64bit */ #define SIZEOF_LONG_LONG 8 #endif @@ -230,7 +230,7 @@ decouple library dependencies with standard string, memory and so on. defined(__x86_64__) || defined(_M_X64)) || \ defined(__aarch64__) || defined(__sparc64__) || defined(__s390x__ ) || \ (defined(__riscv_xlen) && (__riscv_xlen == 64)) || defined(_M_ARM64) || \ - defined(__aarch64__) || \ + defined(__aarch64__) || defined(__ppc64__) || \ (defined(__DCC__) && (defined(__LP64) || defined(__LP64__))) #define WC_64BIT_CPU #elif (defined(sun) || defined(__sun)) && \ From b6bfae9c24dd369111825590f09f6c50a8559061 Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Sun, 1 Sep 2024 19:44:20 +0800 Subject: [PATCH 2/4] asm.c: fix ppc asm for macOS --- wolfcrypt/src/asm.c | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/wolfcrypt/src/asm.c b/wolfcrypt/src/asm.c index c360990675..2096ae90d0 100644 --- a/wolfcrypt/src/asm.c +++ b/wolfcrypt/src/asm.c @@ -529,6 +529,27 @@ __asm__( \ #define LOOP_START \ mu = c[x] * mp +#ifdef __APPLE__ + +#define INNERMUL \ +__asm__( \ + " mullw r16,%3,%4 \n\t" \ + " mulhwu r17,%3,%4 \n\t" \ + " addc r16,r16,%2 \n\t" \ + " addze r17,r17 \n\t" \ + " addc %1,r16,%5 \n\t" \ + " addze %0,r17 \n\t" \ +:"=r"(cy),"=r"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "cc"); ++tmpm; + +#define PROPCARRY \ +__asm__( \ + " addc %1,%3,%2 \n\t" \ + " xor %0,%2,%2 \n\t" \ + " addze %0,%2 \n\t" \ +:"=r"(cy),"=r"(_c[0]):"0"(cy),"1"(_c[0]):"cc"); + +#else + #define INNERMUL \ __asm__( \ " mullw 16,%3,%4 \n\t" \ @@ -546,6 +567,8 @@ __asm__( \ " addze %0,%2 \n\t" \ :"=r"(cy),"=r"(_c[0]):"0"(cy),"1"(_c[0]):"cc"); +#endif + #elif defined(TFM_PPC64) /* PPC64 */ @@ -555,6 +578,8 @@ __asm__( \ #define LOOP_START \ mu = c[x] * mp +#ifdef __APPLE__ + #define INNERMUL \ __asm__( \ " mulld r16,%3,%4 \n\t" \ @@ -576,6 +601,31 @@ __asm__( \ " addze %0,%0 \n\t" \ :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","cc"); +#else + +#define INNERMUL \ +__asm__( \ + " mulld 16,%3,%4 \n\t" \ + " mulhdu 17,%3,%4 \n\t" \ + " addc 16,16,%0 \n\t" \ + " addze 17,17 \n\t" \ + " ldx 18,0,%1 \n\t" \ + " addc 16,16,18 \n\t" \ + " addze %0,17 \n\t" \ + " sdx 16,0,%1 \n\t" \ +:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm; + +#define PROPCARRY \ +__asm__( \ + " ldx 16,0,%1 \n\t" \ + " addc 16,16,%0 \n\t" \ + " sdx 16,0,%1 \n\t" \ + " xor %0,%0,%0 \n\t" \ + " addze %0,%0 \n\t" \ +:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc"); + +#endif + /******************************************************************/ #elif defined(TFM_AVR32) From ef2424336ceb7a7e5c3bb0821a4d8b14507d8af9 Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Sun, 1 Sep 2024 19:33:29 +0800 Subject: [PATCH 3/4] sp_int.c: fix ppc asm for macOS --- wolfcrypt/src/sp_int.c | 302 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index 8c727d738f..34591c62e4 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -3477,6 +3477,156 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo, * CPU: PPC64 */ + #ifdef __APPLE__ + +/* Multiply va by vb and store double size result in: vh | vl */ +#define SP_ASM_MUL(vl, vh, va, vb) \ + __asm__ __volatile__ ( \ + "mulld %[l], %[a], %[b] \n\t" \ + "mulhdu %[h], %[a], %[b] \n\t" \ + : [h] "+r" (vh), [l] "+r" (vl) \ + : [a] "r" (va), [b] "r" (vb) \ + : "memory" \ + ) +/* Multiply va by vb and store double size result in: vo | vh | vl */ +#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \ + __asm__ __volatile__ ( \ + "mulhdu %[h], %[a], %[b] \n\t" \ + "mulld %[l], %[a], %[b] \n\t" \ + "li %[o], 0 \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \ + : [a] "r" (va), [b] "r" (vb) \ + : \ + ) +/* Multiply va by vb and add double size result into: vo | vh | vl */ +#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \ + __asm__ __volatile__ ( \ + "mulld r16, %[a], %[b] \n\t" \ + "mulhdu r17, %[a], %[b] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addze %[o], %[o] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \ + : [a] "r" (va), [b] "r" (vb) \ + : "r16", "r17", "cc" \ + ) +/* Multiply va by vb and add double size result into: vh | vl */ +#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \ + __asm__ __volatile__ ( \ + "mulld r16, %[a], %[b] \n\t" \ + "mulhdu r17, %[a], %[b] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh) \ + : [a] "r" (va), [b] "r" (vb) \ + : "r16", "r17", "cc" \ + ) +/* Multiply va by vb and add double size result twice into: vo | vh | vl */ +#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \ + __asm__ __volatile__ ( \ + "mulld r16, %[a], %[b] \n\t" \ + "mulhdu r17, %[a], %[b] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addze %[o], %[o] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addze %[o], %[o] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \ + : [a] "r" (va), [b] "r" (vb) \ + : "r16", "r17", "cc" \ + ) +/* Multiply va by vb and add double size result twice into: vo | vh | vl + * Assumes first add will not overflow vh | vl + */ +#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \ + __asm__ __volatile__ ( \ + "mulld r16, %[a], %[b] \n\t" \ + "mulhdu r17, %[a], %[b] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addze %[o], %[o] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \ + : [a] "r" (va), [b] "r" (vb) \ + : "r16", "r17", "cc" \ + ) +/* Square va and store double size result in: vh | vl */ +#define SP_ASM_SQR(vl, vh, va) \ + __asm__ __volatile__ ( \ + "mulld %[l], %[a], %[a] \n\t" \ + "mulhdu %[h], %[a], %[a] \n\t" \ + : [h] "+r" (vh), [l] "+r" (vl) \ + : [a] "r" (va) \ + : "memory" \ + ) +/* Square va and add double size result into: vo | vh | vl */ +#define SP_ASM_SQR_ADD(vl, vh, vo, va) \ + __asm__ __volatile__ ( \ + "mulld r16, %[a], %[a] \n\t" \ + "mulhdu r17, %[a], %[a] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addze %[o], %[o] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \ + : [a] "r" (va) \ + : "r16", "r17", "cc" \ + ) +/* Square va and add double size result into: vh | vl */ +#define SP_ASM_SQR_ADD_NO(vl, vh, va) \ + __asm__ __volatile__ ( \ + "mulld r16, %[a], %[a] \n\t" \ + "mulhdu r17, %[a], %[a] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh) \ + : [a] "r" (va) \ + : "r16", "r17", "cc" \ + ) +/* Add va into: vh | vl */ +#define SP_ASM_ADDC(vl, vh, va) \ + __asm__ __volatile__ ( \ + "addc %[l], %[l], %[a] \n\t" \ + "addze %[h], %[h] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh) \ + : [a] "r" (va) \ + : "cc" \ + ) +/* Sub va from: vh | vl */ +#define SP_ASM_SUBB(vl, vh, va) \ + __asm__ __volatile__ ( \ + "subfc %[l], %[a], %[l] \n\t" \ + "li r16, 0 \n\t" \ + "subfe %[h], r16, %[h] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh) \ + : [a] "r" (va) \ + : "r16", "cc" \ + ) +/* Add two times vc | vb | va into vo | vh | vl */ +#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \ + __asm__ __volatile__ ( \ + "addc %[l], %[l], %[a] \n\t" \ + "adde %[h], %[h], %[b] \n\t" \ + "adde %[o], %[o], %[c] \n\t" \ + "addc %[l], %[l], %[a] \n\t" \ + "adde %[h], %[h], %[b] \n\t" \ + "adde %[o], %[o], %[c] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \ + : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \ + : "cc" \ + ) +/* Count leading zeros. */ +#define SP_ASM_LZCNT(va, vn) \ + __asm__ __volatile__ ( \ + "cntlzd %[n], %[a] \n\t" \ + : [n] "=r" (vn) \ + : [a] "r" (va) \ + : \ + ) + + #else /* !defined(__APPLE__) */ + /* Multiply va by vb and store double size result in: vh | vl */ #define SP_ASM_MUL(vl, vh, va, vb) \ __asm__ __volatile__ ( \ @@ -3623,6 +3773,8 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo, : \ ) + #endif /* !defined(__APPLE__) */ + #define SP_INT_ASM_AVAILABLE #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */ @@ -3632,6 +3784,154 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo, * CPU: PPC 32-bit */ + #ifdef __APPLE__ + +/* Multiply va by vb and store double size result in: vh | vl */ +#define SP_ASM_MUL(vl, vh, va, vb) \ + __asm__ __volatile__ ( \ + "mullw %[l], %[a], %[b] \n\t" \ + "mulhwu %[h], %[a], %[b] \n\t" \ + : [h] "+r" (vh), [l] "+r" (vl) \ + : [a] "r" (va), [b] "r" (vb) \ + : "memory" \ + ) +/* Multiply va by vb and store double size result in: vo | vh | vl */ +#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \ + __asm__ __volatile__ ( \ + "mulhwu %[h], %[a], %[b] \n\t" \ + "mullw %[l], %[a], %[b] \n\t" \ + "li %[o], 0 \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \ + : [a] "r" (va), [b] "r" (vb) \ + ) +/* Multiply va by vb and add double size result into: vo | vh | vl */ +#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \ + __asm__ __volatile__ ( \ + "mullw r16, %[a], %[b] \n\t" \ + "mulhwu r17, %[a], %[b] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addze %[o], %[o] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \ + : [a] "r" (va), [b] "r" (vb) \ + : "r16", "r17", "cc" \ + ) +/* Multiply va by vb and add double size result into: vh | vl */ +#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \ + __asm__ __volatile__ ( \ + "mullw r16, %[a], %[b] \n\t" \ + "mulhwu r17, %[a], %[b] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh) \ + : [a] "r" (va), [b] "r" (vb) \ + : "r16", "r17", "cc" \ + ) +/* Multiply va by vb and add double size result twice into: vo | vh | vl */ +#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \ + __asm__ __volatile__ ( \ + "mullw r16, %[a], %[b] \n\t" \ + "mulhwu r17, %[a], %[b] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addze %[o], %[o] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addze %[o], %[o] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \ + : [a] "r" (va), [b] "r" (vb) \ + : "r16", "r17", "cc" \ + ) +/* Multiply va by vb and add double size result twice into: vo | vh | vl + * Assumes first add will not overflow vh | vl + */ +#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \ + __asm__ __volatile__ ( \ + "mullw r16, %[a], %[b] \n\t" \ + "mulhwu r17, %[a], %[b] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addze %[o], %[o] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \ + : [a] "r" (va), [b] "r" (vb) \ + : "r16", "r17", "cc" \ + ) +/* Square va and store double size result in: vh | vl */ +#define SP_ASM_SQR(vl, vh, va) \ + __asm__ __volatile__ ( \ + "mullw %[l], %[a], %[a] \n\t" \ + "mulhwu %[h], %[a], %[a] \n\t" \ + : [h] "+r" (vh), [l] "+r" (vl) \ + : [a] "r" (va) \ + : "memory" \ + ) +/* Square va and add double size result into: vo | vh | vl */ +#define SP_ASM_SQR_ADD(vl, vh, vo, va) \ + __asm__ __volatile__ ( \ + "mullw r16, %[a], %[a] \n\t" \ + "mulhwu r17, %[a], %[a] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + "addze %[o], %[o] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \ + : [a] "r" (va) \ + : "r16", "r17", "cc" \ + ) +/* Square va and add double size result into: vh | vl */ +#define SP_ASM_SQR_ADD_NO(vl, vh, va) \ + __asm__ __volatile__ ( \ + "mullw r16, %[a], %[a] \n\t" \ + "mulhwu r17, %[a], %[a] \n\t" \ + "addc %[l], %[l], r16 \n\t" \ + "adde %[h], %[h], r17 \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh) \ + : [a] "r" (va) \ + : "r16", "r17", "cc" \ + ) +/* Add va into: vh | vl */ +#define SP_ASM_ADDC(vl, vh, va) \ + __asm__ __volatile__ ( \ + "addc %[l], %[l], %[a] \n\t" \ + "addze %[h], %[h] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh) \ + : [a] "r" (va) \ + : "cc" \ + ) +/* Sub va from: vh | vl */ +#define SP_ASM_SUBB(vl, vh, va) \ + __asm__ __volatile__ ( \ + "subfc %[l], %[a], %[l] \n\t" \ + "li r16, 0 \n\t" \ + "subfe %[h], r16, %[h] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh) \ + : [a] "r" (va) \ + : "r16", "cc" \ + ) +/* Add two times vc | vb | va into vo | vh | vl */ +#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \ + __asm__ __volatile__ ( \ + "addc %[l], %[l], %[a] \n\t" \ + "adde %[h], %[h], %[b] \n\t" \ + "adde %[o], %[o], %[c] \n\t" \ + "addc %[l], %[l], %[a] \n\t" \ + "adde %[h], %[h], %[b] \n\t" \ + "adde %[o], %[o], %[c] \n\t" \ + : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \ + : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \ + : "cc" \ + ) +/* Count leading zeros. */ +#define SP_ASM_LZCNT(va, vn) \ + __asm__ __volatile__ ( \ + "cntlzw %[n], %[a] \n\t" \ + : [n] "=r" (vn) \ + : [a] "r" (va) \ + ) + + #else /* !defined(__APPLE__) */ + /* Multiply va by vb and store double size result in: vh | vl */ #define SP_ASM_MUL(vl, vh, va, vb) \ __asm__ __volatile__ ( \ @@ -3776,6 +4076,8 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo, : [a] "r" (va) \ ) + #endif /* !defined(__APPLE__) */ + #define SP_INT_ASM_AVAILABLE #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */ From 94478cb20843cb9d96b1699a17aa6d03143f54ad Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Sat, 31 Aug 2024 22:11:54 +0800 Subject: [PATCH 4/4] wc_port.h: fix macros for powerpc --- wolfssl/wolfcrypt/wc_port.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wolfssl/wolfcrypt/wc_port.h b/wolfssl/wolfcrypt/wc_port.h index d07dbb1b34..0076f7f417 100644 --- a/wolfssl/wolfcrypt/wc_port.h +++ b/wolfssl/wolfcrypt/wc_port.h @@ -1106,7 +1106,7 @@ WOLFSSL_ABI WOLFSSL_API int wolfCrypt_Cleanup(void); #endif /* PowerPC time_t is int */ - #ifdef __PPC__ + #if defined(__PPC__) || defined(__ppc__) #define TIME_T_NOT_64BIT #endif @@ -1274,7 +1274,7 @@ WOLFSSL_ABI WOLFSSL_API int wolfCrypt_Cleanup(void); #define XFENCE() XASM_VOLATILE("isb") #elif defined(__riscv) #define XFENCE() XASM_VOLATILE("fence") - #elif defined(__PPC__) + #elif defined(__PPC__) || defined(__POWERPC__) #define XFENCE() XASM_VOLATILE("isync; sync") #else #define XFENCE() WC_DO_NOTHING