From 7cf327f7eb2bf2329a127a9fc1b9f76159146277 Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 30 Jan 2025 16:39:53 -0800 Subject: [PATCH] Added Xilinx UltraScale+ MPSoC CSU Support CSU support. Enabled support for offloading SHA3 hashing to CSU hardware using PKA=1. Added support for enabling JTAG at runtime if CSU_DEBUG is set. Requires patching PMUFW to enable register access. See: https://xilinx-wiki.atlassian.net/wiki/spaces/A/pages/2587197506/Zynq+UltraScale+MPSoC+JTAG+Enable+in+U-Boot --- IDE/XilinxSDK/README.md | 32 ++-- arch.mk | 6 + config/examples/zynqmp.config | 9 +- hal/zynq.c | 325 +++++++++++++++++++++++++++++++--- hal/zynq.h | 206 +++++++++++++++++++-- options.mk | 4 +- src/boot_aarch64.c | 7 + 7 files changed, 540 insertions(+), 49 deletions(-) diff --git a/IDE/XilinxSDK/README.md b/IDE/XilinxSDK/README.md index 42b92f9ee..8ada3f519 100644 --- a/IDE/XilinxSDK/README.md +++ b/IDE/XilinxSDK/README.md @@ -78,16 +78,7 @@ Note: If not using Position Independent Code (PIC) the linker script `ldscript.l ## Zynq UltraScale+ ARMv8 Crypto Extensions -To enable ARM assembly speedups for SHA: - -1) Add these build symbols: - -``` -WOLFSSL_ARMASM -WOLFSSL_ARMASM_INLINE -``` - -2) Add these compiler misc flags: `-mcpu=generic+crypto -mstrict-align -DWOLFSSL_AARCH64_NO_SQRMLSH` +By default the ARM assembly speedups for SHA will be enabled. This uses inline assembly in wolfcrypt/src/port/arm/ and the armb8 crypto extensions. To disable set `NO_ARM_ASM=1`. ## Generate signing key @@ -154,7 +145,7 @@ the_ROM_image: } ``` -You can also use exception level 3 or 1 depending on your needs. +You can use exception level 3, 2 or 1 depending on your needs. See hal/zynq.h options EL3_SECURE, EL2_HYPERVISOR and EL1_NONSECURE for enabled/disabling entry support for each. Default is support for EL2. From the workspace root: @@ -208,13 +199,12 @@ Hello World Successfully ran Hello World application ``` - ### Adding RSA Authentication 1. Generate keys: - * `bootgen.exe -generate_keys auth pem -arch zynqmp -image boot.bif` + * `bootgen.exe -generate_keys auth pem -arch zynqmp -image boot_auth.bif` 2. Create hash for primary key: - * `bootgen.exe -image boot.bif -arch zynqmp -w -o i BOOT.BIN -efuseppkbits ppkf_hash.txt` + * `bootgen.exe -image boot_auth.bif -arch zynqmp -w -o i BOOT.BIN -efuseppkbits ppkf_hash.txt` 3. Import example project for programming eFuses: * New BSP project (program efuses , ZCU102_hw_platform, standalone, CPU: PSU_cortexa53_0) * Goto Xilinx Board Support Packet Settings. @@ -235,11 +225,23 @@ Successfully ran Hello World application ``` 6. Build “boot.bin” image: - * `bootgen -image boot.bif -arch zynqmp -o i BOOT.BIN -w` + * `bootgen -image boot_auth.bif -arch zynqmp -o i BOOT.BIN -w` + +Note: During testing add `[fsbl_config] bh_auth_enable` to allow skipping of the eFuse check of the PPK hash. In production the RSA_EN eFuses must be blown to force checking of the PPK hash. Note: To generate a report of a boot.bin use the `bootgen_utility` or after 2022.1 use `bootgen -read`: `bootgen -arch zynqmp -read BOOT.BIN` + +# CSU Support + +Enabling PMU firmware support for access to the CSU. +In PetaLinux menuconfig under PMU Configuration add compiler flag `-DSECURE_ACCESS_VAL=1`. +```sh +petalinux-build -c pmufw +petalinux-build +``` + ## Post Quantum ### PQ XMSS diff --git a/arch.mk b/arch.mk index 3d3e24ef9..8e0579ffd 100644 --- a/arch.mk +++ b/arch.mk @@ -75,6 +75,12 @@ ifeq ($(ARCH),AARCH64) # Support detection and skip of U-Boot legacy header */ CFLAGS+=-DWOLFBOOT_UBOOT_LEGACY CFLAGS+=-DWOLFBOOT_DUALBOOT + + ifeq ($(PKA),1) + # Use HAL for hash (see zynqmp.c) + PKA_HASH_HAL=1 + CFLAGS+=-DWOLFBOOT_ZYNQMP_CSU + endif else ifeq ($(TARGET),nxp_ls1028a) ARCH_FLAGS=-mcpu=cortex-a72+crypto -march=armv8-a+crypto -mtune=cortex-a72 diff --git a/config/examples/zynqmp.config b/config/examples/zynqmp.config index dd81067ee..39e804941 100644 --- a/config/examples/zynqmp.config +++ b/config/examples/zynqmp.config @@ -8,6 +8,13 @@ SIGN?=RSA4096 HASH?=SHA3 IMAGE_HEADER_SIZE?=1024 +# Hashing Option +# 1. ARMv8+Crypto Assembly: PKA=0 and NO_ARM_ASM=0 (default) +# 2. CSU SHA3 hardware acceleration PKA=1 and NO_ARM_ASM=1 +# 3. C Only PKA=0 and NO_ARM_ASM=1 +NO_ARM_ASM?=0 +PKA?=0 + # XMSS/XMSS^MT is a post-quantum, stateful, hash-based signature scheme. # Use the helper script `tools/xmss/xmss_siglen.sh` # to calculate your signature length given an xmss parameter string. @@ -36,14 +43,12 @@ CFLAGS_EXTRA+=-DDEBUG_ZYNQ=1 VTOR?=1 CORTEX_M0?=0 NO_ASM?=0 -NO_ARM_ASM?=0 ALLOW_DOWNGRADE?=0 NVM_FLASH_WRITEONCE?=0 V?=0 SPMATH?=1 RAM_CODE?=0 DUALBANK_SWAP?=0 -PKA?=1 WOLFTPM?=0 EXT_FLASH?=1 SPI_FLASH?=0 diff --git a/hal/zynq.c b/hal/zynq.c index 3d1e95b0a..9ea0c78d6 100644 --- a/hal/zynq.c +++ b/hal/zynq.c @@ -71,6 +71,8 @@ typedef struct QspiDev { } QspiDev_t; static QspiDev_t mDev; +static uint32_t pmuVer; +#define PMUFW_MIN_VER 0x10001 /* v1.1*/ /* forward declarations */ static int qspi_wait_ready(QspiDev_t* dev); @@ -82,6 +84,10 @@ static int test_ext_flash(QspiDev_t* dev); /* asm function */ extern void flush_dcache_range(unsigned long start, unsigned long stop); +extern unsigned int current_el(void); + +void hal_delay_ms(uint64_t ms); +uint64_t hal_timer_ms(void); #ifdef DEBUG_UART void uart_init(void) @@ -131,6 +137,246 @@ void uart_write(const char* buf, uint32_t sz) } #endif /* DEBUG_UART */ +/* This struct defines the way the registers are stored on the stack during an + * exception. */ +struct pt_regs { + uint64_t elr; + uint64_t regs[8]; +}; + +/* + * void smc_call(arg0, arg1...arg7) + * + * issue the secure monitor call + * + * x0~x7: input arguments + * x0~x3: output arguments + */ +static void smc_call(struct pt_regs *args) +{ + asm volatile( + "ldr x0, %0\n" + "ldr x1, %1\n" + "ldr x2, %2\n" + "ldr x3, %3\n" + "ldr x4, %4\n" + "ldr x5, %5\n" + "ldr x6, %6\n" + "smc #0\n" + "str x0, %0\n" + "str x1, %1\n" + "str x2, %2\n" + "str x3, %3\n" + : "+m" (args->regs[0]), "+m" (args->regs[1]), + "+m" (args->regs[2]), "+m" (args->regs[3]) + : "m" (args->regs[4]), "m" (args->regs[5]), + "m" (args->regs[6]) + : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", + "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17"); +} + +#define PM_ARGS_CNT 8 +#define PM_SIP_SVC 0xC2000000 +#define PM_GET_API_VERSION 0x01 +#define PM_SECURE_SHA 0x1A +#define PM_SECURE_RSA 0x1B +#define PM_MMIO_WRITE 0x13 +#define PM_MMIO_READ 0x14 + +/* Secure Monitor Call (SMC) to BL31 Silicon Provider (SIP) service, + * which is the PMU Firmware */ +static int pmu_request(uint32_t api_id, + uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, + uint32_t *ret_payload) +{ + struct pt_regs regs; + + regs.regs[0] = PM_SIP_SVC | api_id; + regs.regs[1] = ((uint64_t)arg1 << 32) | arg0; + regs.regs[2] = ((uint64_t)arg3 << 32) | arg2; + + smc_call(®s); + + if (ret_payload != NULL) { + ret_payload[0] = (uint32_t)(regs.regs[0]); + ret_payload[1] = (uint32_t)(regs.regs[0] >> 32); + ret_payload[2] = (uint32_t)(regs.regs[1]); + ret_payload[3] = (uint32_t)(regs.regs[1] >> 32); + ret_payload[4] = (uint32_t)(regs.regs[2]); + ret_payload[5] = (uint32_t)(regs.regs[2] >> 32); + ret_payload[6] = (uint32_t)(regs.regs[3]); + ret_payload[7] = (uint32_t)(regs.regs[3] >> 32); + } + return (ret_payload != NULL) ? ret_payload[0] : 0; +} + + +uint32_t pmu_get_version(void) +{ + uint32_t ret_payload[PM_ARGS_CNT]; + memset(ret_payload, 0, sizeof(ret_payload)); + pmu_request(PM_GET_API_VERSION, 0, 0, 0, 0, ret_payload); + return ret_payload[1]; +} + +uint32_t pmu_mmio_read(uint32_t addr) +{ + uint32_t ret_payload[PM_ARGS_CNT]; + memset(ret_payload, 0, sizeof(ret_payload)); + pmu_request(PM_MMIO_READ, addr, 0, 0, 0, ret_payload); + return ret_payload[1]; +} + +uint32_t pmu_mmio_writemask(uint32_t addr, uint32_t mask, uint32_t val) +{ + uint32_t ret_payload[PM_ARGS_CNT]; + memset(ret_payload, 0, sizeof(ret_payload)); + pmu_request(PM_MMIO_WRITE, addr, mask, val, 0, ret_payload); + return ret_payload[0]; /* 0=Success, 30=No Access */ +} + +uint32_t pmu_mmio_write(uint32_t addr, uint32_t val) +{ + return pmu_mmio_writemask(addr, 0xFFFFFFFF, val); +} + +#ifdef WOLFBOOT_ZYNQMP_CSU + +#ifdef WOLFBOOT_HASH_SHA3_384 +#include +#define XSECURE_SHA3_INIT 1U +#define XSECURE_SHA3_UPDATE 2U +#define XSECURE_SHA3_FINAL 4U +static uint32_t secure_sha3(uint64_t addr, uint32_t sz, uint32_t flags) +{ + uint32_t ret_payload[PM_ARGS_CNT]; + memset(ret_payload, 0, sizeof(ret_payload)); + pmu_request(PM_SECURE_SHA, (addr >> 32), (addr & 0xFFFFFFFF), sz, flags, + ret_payload); + return ret_payload[0]; +} + +int wc_InitSha3_384(wc_Sha3* sha, void* heap, int devId) +{ + (void)sha; + (void)heap; + (void)devId; + return secure_sha3(0, 0, XSECURE_SHA3_INIT); +} +int wc_Sha3_384_Update(wc_Sha3* sha, const byte* data, word32 len) +{ + (void)sha; + flush_dcache_range( + (unsigned long)data, + (unsigned long)data + len); + return secure_sha3((uint64_t)data, len, XSECURE_SHA3_UPDATE); +} +int wc_Sha3_384_Final(wc_Sha3* sha, byte* out) +{ + (void)sha; + flush_dcache_range( + (unsigned long)out, + (unsigned long)out + WC_SHA3_384_DIGEST_SIZE); + return secure_sha3((uint64_t)out, 0, XSECURE_SHA3_FINAL); +} +void wc_Sha3_384_Free(wc_Sha3* sha) +{ + (void)sha; +} +#else +# error PKA=1 only supported with HASH=SHA3 +#endif + +/* CSU PUF */ +#define PUF_REG_TIMEOUT 500000 +int csu_puf_register(uint32_t* syndrome, uint32_t* syndromeSz, uint32_t* chash, + uint32_t* aux) +{ + int ret; + uint32_t puf_status, timeout = 0, idx = 0; + +#if defined(DEBUG_CSU) && DEBUG_CSU >= 1 + wolfBoot_printf("CSU Puf Register\n"); +#endif + + pmu_mmio_write(CSU_PUF_CFG0, CSU_PUF_CFG0_INIT); + pmu_mmio_write(CSU_PUF_CFG1, CSU_PUF_CFG1_INIT); + pmu_mmio_write(CSU_PUF_SHUTTER, CSU_PUF_SHUTTER_INIT); + pmu_mmio_write(CSU_PUF_CMD, CSU_PUF_CMD_REGISTRATION); + while (1) { + /* Wait for PUF status done */ + while (((puf_status = pmu_mmio_read(CSU_PUF_STATUS)) + & CSU_PUF_STATUS_SYN_WRD_RDY_MASK) == 0 + && ++timeout < PUF_REG_TIMEOUT); + if (timeout == PUF_REG_TIMEOUT) { + ret = -1; /* timeout */ + break; + } + if ((idx * 4) > *syndromeSz) { + ret = -2; /* overrun */ + break; + } + if (puf_status & CSU_PUF_STATUS_KEY_RDY_MASK) { + *chash = pmu_mmio_read(CSU_PUF_WORD); + *aux = (puf_status & CSU_PUF_STATUS_AUX_MASK) >> 4; + ret = 0; + break; + } + else { + /* Read in the syndrome */ + syndrome[idx++] = pmu_mmio_read(CSU_PUF_WORD); + } + } + *syndromeSz = idx * 4; + +#if defined(DEBUG_CSU) && DEBUG_CSU >= 1 + wolfBoot_printf("Ret %d, SyndromeSz %d, CHASH 0x%08x, AUX 0x%08x\n", + ret, *syndromeSz, *chash, *aux); + #if DEBUG_CSU >= 2 + for (idx=0; idx<*syndromeSz/4; idx++) { + wolfBoot_printf("%02x", syndrome[idx]); + } + #endif +#endif + + return ret; +} + +#define CSU_PUF_SYNDROME_WORDS 386 +int csu_init(void) +{ + int ret; + uint32_t syndrome[CSU_PUF_SYNDROME_WORDS]; + uint32_t syndromeSz = (uint32_t)sizeof(syndrome); + uint32_t chash=0, aux=0; + uint32_t reg1 = pmu_mmio_read(CSU_IDCODE); + uint32_t reg2 = pmu_mmio_read(CSU_VERSION); + + wolfBoot_printf("CSU ID 0x%08x, Ver 0x%08x\n", + reg1, reg2 & CSU_VERSION_MASK); + +#ifdef DEBUG_CSU + /* Enable JTAG */ + wolfBoot_printf("Enabling JTAG\n"); + pmu_mmio_write(CSU_JTAG_SEC, 0x3F); + pmu_mmio_write(CSU_JTAG_DAP_CFG, 0xFF); + pmu_mmio_write(CSU_JTAG_CHAIN_CFG, 0x3); + pmu_mmio_write(CRL_APB_DBG_LPD_CTRL, 0x01002002); + pmu_mmio_write(CRL_APB_RST_LPD_DBG, 0x0); + pmu_mmio_write(CSU_PCAP_PROG, 0x1); + + /* Wait until JTAG is attached */ + while ((reg1 = pmu_mmio_read(CSU_JTAG_CHAIN_STATUS)) == 0); + wolfBoot_printf("JTAG Attached: status 0x%x\n", reg1); + hal_delay_ms(500); /* give time for debugger to break */ +#endif + + ret = csu_puf_register(syndrome, &syndromeSz, &chash, &aux); + + return ret; +} +#endif /* WOLFBOOT_ZYNQMP_CSU */ + #ifdef USE_XQSPIPSU /* Xilinx BSP Driver */ @@ -197,7 +443,7 @@ static int qspi_transfer(QspiDev_t* pDev, /* Dummy */ if (dummySz > 0) { memset(&msgs[msgCnt], 0, sizeof(XQspiPsu_Msg)); - msgs[msgCnt].ByteCount = dummySz; + msgs[msgCnt].ByteCount = dummySz; /* not used */ msgs[msgCnt].BusWidth = busWidth; msgCnt++; } @@ -406,8 +652,11 @@ static int qspi_cs(QspiDev_t* pDev, int csAssert) reg_genfifo |= GQSPI_GEN_FIFO_MODE_SPI; if (csAssert) { reg_genfifo |= (pDev->cs & GQSPI_GEN_FIFO_CS_MASK); + reg_genfifo |= GQSPI_GEN_FIFO_IMM(GQSPI_CS_ASSERT_CLOCKS); + } + else { + reg_genfifo |= GQSPI_GEN_FIFO_IMM(GQSPI_CS_DEASSERT_CLOCKS); } - reg_genfifo |= GQSPI_GEN_FIFO_IMM(GQSPI_CS_ASSERT_CLOCKS); return qspi_gen_fifo_write(reg_genfifo); } @@ -418,11 +667,11 @@ static uint32_t qspi_calc_exp(uint32_t xferSz, uint32_t* reg_genfifo) if (xferSz > GQSPI_GEN_FIFO_IMM_MASK) { /* Use exponent mode (DMA max is 2^28) */ for (expval=28; expval>=8; expval--) { - /* find highest bit set */ - if (xferSz & (1 << expval)) { + /* find highest value */ + if (xferSz >= (1UL << expval)) { *reg_genfifo |= GQSPI_GEN_FIFO_EXP_MASK; *reg_genfifo |= GQSPI_GEN_FIFO_IMM(expval); /* IMM=exponent */ - xferSz = (1 << expval); + xferSz = (1UL << expval); break; } } @@ -548,6 +797,14 @@ static int qspi_transfer(QspiDev_t* pDev, (unsigned long)dmarxptr + xferSz); #endif +#if defined(DEBUG_ZYNQ) && DEBUG_ZYNQ >= 2 + #ifndef GQSPI_MODE_IO + wolfBoot_printf("DMA: ptr %p, xferSz %d\n", dmarxptr, xferSz); + #else + wolfBoot_printf("IO: ptr %p, xferSz %d\n", rxData, xferSz); + #endif +#endif + /* Submit general FIFO operation */ ret = qspi_gen_fifo_write(reg_genfifo); if (ret != GQSPI_CODE_SUCCESS) { @@ -801,7 +1058,7 @@ static int qspi_exit_4byte_addr(QspiDev_t* dev) #endif /* QSPI functions */ -void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) +void qspi_init(void) { int ret; uint32_t reg_cfg, reg_isr; @@ -814,9 +1071,6 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) XQspiPsu_Config *QspiConfig; #endif - (void)cpu_clock; - (void)flash_freq; - memset(&mDev, 0, sizeof(mDev)); #ifdef USE_XQSPIPSU @@ -865,6 +1119,7 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) GQSPI_ISR = (reg_isr | GQSPI_ISR_WR_TO_CLR_MASK); /* Clear poll timeout counter interrupt */ reg_cfg = GQSPIDMA_ISR; GQSPIDMA_ISR = reg_cfg; /* clear all active interrupts */ + GQSPI_IER = GQSPI_IXR_GEN_FIFO_EMPTY; GQSPI_IDR = GQSPI_IXR_ALL_MASK; /* disable interrupts */ GQSPIDMA_IDR = GQSPIDMA_ISR_ALL_MASK; @@ -873,7 +1128,7 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) /* Initialize clock divisor, write protect hold and start mode */ #ifdef GQSPI_MODE_IO reg_cfg = GQSPI_CFG_MODE_EN_IO; /* Use I/O Transfer Mode */ - reg_cfg |= GQSPI_CFG_START_GEN_FIFO; /* Auto start GFIFO cmd execution */ + reg_cfg |= GQSPI_CFG_START_GEN_FIFO; /* Trigger GFIFO commands to start */ #else reg_cfg = GQSPI_CFG_MODE_EN_DMA; /* Use DMA Transfer Mode */ #endif @@ -900,7 +1155,13 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) * The generic controller should be in clock loopback mode and the clock * tap delay enabled, but the data tap delay disabled. */ /* For EL2 or lower must use IOCTL_SET_TAPDELAY_BYPASS ARG1=2, ARG2=0 */ - IOU_TAPDLY_BYPASS = 0; + if (current_el() <= 2) { + reg_cfg = 0; + pmu_request(PM_MMIO_WRITE, IOU_TAPDLY_BYPASS_ADDR, 0x7, reg_cfg, 0, NULL); + } + else { + IOU_TAPDLY_BYPASS = 0; + } GQSPI_LPBK_DLY_ADJ = GQSPI_LPBK_DLY_ADJ_USE_LPBK; GQSPI_DATA_DLY_ADJ = 0; #endif @@ -913,6 +1174,7 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) /* Reset DMA */ GQSPIDMA_CTRL = GQSPIDMA_CTRL_DEF; GQSPIDMA_CTRL2 = GQSPIDMA_CTRL2_DEF; + GQSPIDMA_IER = GQSPIDMA_ISR_ALL_MASK; GQSPI_EN = 1; /* Enable Device */ #endif /* USE_QNX */ @@ -957,8 +1219,8 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) /* Slave Select */ mDev.mode = GQSPI_QSPI_MODE; #if GQPI_USE_DUAL_PARALLEL == 1 - mDev.bus = GQSPI_GEN_FIFO_BUS_BOTH; - mDev.cs = GQSPI_GEN_FIFO_CS_BOTH; + mDev.bus = GQSPI_GEN_FIFO_BUS_BOTH; /* GQSPI_GEN_FIFO_BUS_LOW or GQSPI_GEN_FIFO_BUS_UP */ + mDev.cs = GQSPI_GEN_FIFO_CS_BOTH; /* GQSPI_GEN_FIFO_CS_LOWER or GQSPI_GEN_FIFO_CS_UPPER */ mDev.stripe = GQSPI_GEN_FIFO_STRIPE; #endif @@ -974,7 +1236,20 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) #endif } -#if 0 +void hal_delay_ms(uint64_t ms) +{ + uint64_t start = hal_timer_ms(); + uint64_t end = start + ms; + + while (1) { + uint64_t cur = hal_timer_ms(); + /* check for timer rollover or expiration */ + if (cur < start || cur >= end) { + break; + } + } +} + uint64_t hal_timer_ms(void) { uint64_t val; @@ -986,25 +1261,33 @@ uint64_t hal_timer_ms(void) val /= cntfrq; return val; } -#endif /* public HAL functions */ void hal_init(void) { - uint32_t cpu_freq = 0; + uint32_t reg; const char* bootMsg = "\nwolfBoot Secure Boot\n"; #ifdef DEBUG_UART uart_init(); #endif wolfBoot_printf(bootMsg); + wolfBoot_printf("Current EL: %d\n", current_el()); -#if 0 - /* This is only allowed for EL-3 */ - asm volatile("msr cntfrq_el0, %0" : : "r" (cpu_freq) : "memory"); -#endif + qspi_init(); - qspi_init(cpu_freq, 0); + pmuVer = pmu_get_version(); + wolfBoot_printf("PMUFW Ver: %d.%d\n", + (int)(pmuVer >> 16), (int)(pmuVer & 0xFFFF)); + +#ifdef WOLFBOOT_ZYNQMP_CSU + if (pmuVer >= PMUFW_MIN_VER) { + csu_init(); + } + else { + wolfBoot_printf("Skipping CSU Init (PMUFW not found)\n"); + } +#endif } void hal_prepare_boot(void) diff --git a/hal/zynq.h b/hal/zynq.h index 63669260d..38af59721 100644 --- a/hal/zynq.h +++ b/hal/zynq.h @@ -100,16 +100,17 @@ #define GQSPIDMA_IMR (*((volatile uint32_t*)(QSPI_BASE + 0x820))) /* DST DMA interrupt mask */ #define GQSPIDMA_CTRL2 (*((volatile uint32_t*)(QSPI_BASE + 0x824))) /* General DST DMA control register 2 */ -#define GQSPI_LPBK_DLY_ADJ_USE_LPBK (1UL << 5) -#define GQSPI_LPBK_DLY_ADJ_DIV0(x) (((x) & 0x7) << 0) -#define GQSPI_LPBK_DLY_ADJ_DLY1(x) (((x) & 0x3) << 3) +#define GQSPI_LPBK_DLY_ADJ_USE_LPBK (1UL << 5) +#define GQSPI_LPBK_DLY_ADJ_DIV0(x) (((x) & 0x7) << 0) +#define GQSPI_LPBK_DLY_ADJ_DLY1(x) (((x) & 0x3) << 3) #define GQSPI_DATA_DLY_ADJ_USE_DATA_DLY (1UL << 31) #define GQSPI_DATA_DLY_ADJ_DATA_DLY_ADJ(x) (((x) & 0x7) << 28) /* GQSPI Registers */ /* GQSPI_CFG: Configuration registers */ -#define GQSPI_CFG_CLK_POL (1UL << 1) /* Clock polarity outside QSPI word: 0: QSPI clock is quiescent low, 1: QSPI clock is quiescent high */ -#define GQSPI_CFG_CLK_PH (1UL << 2) /* Clock phase: 1: the QSPI clock is inactive outside the word, 0: the QSPI clock is active outside the word */ +/* Clock Phase and Polarity. Only mode 3 and 0 are support (11b or 00b) */ +#define GQSPI_CFG_CLK_POL (1UL << 1) /* Clock polarity: 1: QSPI clock is quiescent high, 0: QSPI clock is quiescent low, */ +#define GQSPI_CFG_CLK_PH (1UL << 2) /* Clock phase: 1: QSPI clock is inactive outside the word, 0: QSPI clock is active outside the word */ /* 000: divide by 2, 001: divide by 4, 010: divide by 8, 011: divide by 16, 100: divide by 32, 101: divide by 64, 110: divide by 128, 111: divide by 256 */ @@ -139,8 +140,8 @@ #define GQSPI_IXR_ALL_MASK (GQSPI_IXR_POLL_TIME_EXPIRE | GQSPI_IXR_TX_FIFO_NOT_FULL | \ GQSPI_IXR_TX_FIFO_FULL | GQSPI_IXR_RX_FIFO_NOT_EMPTY | GQSPI_IXR_RX_FIFO_FULL | \ - GQSPI_IXR_GEN_FIFO_EMPTY | GQSPI_IXR_TX_FIFO_EMPTY | GQSPI_IXR_GEN_FIFO_NOT_FULL | \ - GQSPI_IXR_GEN_FIFO_FULL | GQSPI_IXR_RX_FIFO_EMPTY) + GQSPI_IXR_TX_FIFO_EMPTY | GQSPI_IXR_GEN_FIFO_NOT_FULL | GQSPI_IXR_GEN_FIFO_FULL | \ + GQSPI_IXR_RX_FIFO_EMPTY) #define GQSPI_ISR_WR_TO_CLR_MASK 0x00000002U /* GQSPI_GEN_FIFO: FIFO data register */ @@ -175,7 +176,7 @@ #define GQSPI_FIFO_CTRL_RST_RX_FIFO (1UL << 2) /* GQSPIDMA_CTRL */ -#define GQSPIDMA_CTRL_DEF 0x803FFA00UL +#define GQSPIDMA_CTRL_DEF 0x403FFA00UL #define GQSPIDMA_CTRL2_DEF 0x081BFFF8UL /* GQSPIDMA_STS */ @@ -187,14 +188,15 @@ #define GQSPIDMA_ISR_ALL_MASK 0xFEU /* QSPI Configuration (bare-metal only) */ - #ifndef GQSPI_CLK_REF #define GQSPI_CLK_REF 125000000 /* QSPI Reference Clock */ #endif #ifndef GQSPI_CLK_DIV #define GQSPI_CLK_DIV 2 /* (QSPI_REF_CLK (125MHZ) / (2 << DIV) = BUS): 0=DIV2, 1=DIV4, 2=DIV8 */ #endif -#define GQSPI_CS_ASSERT_CLOCKS 5 /* CS Setup Time (tCSS) - num of clock cycles foes in IMM */ +#define GQSPI_CS_ASSERT_CLOCKS 5 /* CS Setup Time (tCSS) */ +#define GQSPI_CS_DEASSERT_CLOCKS 4 /* CS Hold Time */ + #define GQSPI_FIFO_WORD_SZ 4 #define QQSPI_DMA_ALIGN 64 /* L1 cache size */ #ifndef GQSPI_DMA_TMPSZ @@ -380,4 +382,188 @@ #define GICC_BASE 0xF9020000 +/* Clock - Full Power Domain */ +#define CRL_APB_BASE 0xFF5E0000UL +#define QSPI_REF_CTRL (*((volatile uint32_t*)(CRL_APB_BASE + 0x68))) + +#define QSPI_REF_CTRL_SRCSEL_MASK 0x7 +#define QSPI_REF_CTRL_SRCSEL(n) ((n) & QSPI_REF_CTRL_SRCSEL_MASK) +#define QSPI_REF_CTRL_SRCSEL_IOPLL QSPI_REF_CTRL_SRCSEL(0) +#define QSPI_REF_CTRL_SRCSEL_RPLL QSPI_REF_CTRL_SRCSEL(2) +#define QSPI_REF_CTRL_SRCSEL_DPLL QSPI_REF_CTRL_SRCSEL(3) /* DPLL_CLK_TO_LPD */ + +#define QSPI_REF_CTRL_DIVISOR0_MASK (0x3F << 8) +#define QSPI_REF_CTRL_DIVISOR0(n) (((n) << 8) & QSPI_REF_CTRL_DIVISOR0_MASK) + +#define QSPI_REF_CTRL_DIVISOR1_MASK (0x3F << 16) +#define QSPI_REF_CTRL_DIVISOR1(n) (((n) << 16) & QSPI_REF_CTRL_DIVISOR0_MASK) + + +/* Configuration Security Unit (CSU) */ +/* Triple-Dedundant MicroBlaze processor */ +/* 128 KB CSU ROM (immutable) */ +/* 32 KB CSU RAM (with ECC) */ +/* Internal clock source */ +/* CSU must be called through PMUFW. */ +/* PMUFW must be built with SECURE_ACCESS_VAL=1 */ +#define CSU_BASE 0xFFCA0000UL + +#define CSU_STATUS (CSU_BASE + 0x0000U) +#define CSU_STATUS_BOOT_ENC (1 << 1) +#define CSU_STATUS_BOOT_AUTH (1 << 0) + +/* See JTAG IDCODE in TRM */ +#define CSU_IDCODE (CSU_BASE + 0x0040U) +/* 2473_8093h=ZU9EG */ +/* 1471_1093h=ZU2CG/EG */ + +#define CSU_VERSION (CSU_BASE + 0x0044U) +/* 0: XCZU9EG-ES1, + * 1: XCZU3EG-ES1, XCZU15EG-ES1, + * 2: XCZU7EV-ES1, XCZU9EG-ES2, XCZU19EG-ES1, + * 3: All devices as of October 2017 (Production Level) */ +#define CSU_VERSION_MASK 0xF + +#define CSU_TAMPER_STATUS (CSU_BASE + 0x5000U) +#define CSU_TAMPER_TRIG (CSU_BASE + 0x0014U) /* set =1 to trigger tamber event for testing */ + +/* SSS - Secure Stream Switch */ +#define CSU_SSS_CFG (CSU_BASE + 0x0008U) +#define CSU_SSS_CFG_PCAP_MASK 0x0000000FU +#define CSU_SSS_CFG_PCAP(n) (((n) << 0) & CSU_SSS_CFG_PCAP_SSS_MASK) +#define CSU_SSS_CFG_DMA_MASK 0x000000F0U +#define CSU_SSS_CFG_DMA(n) (((n) << 4) & CSU_SSS_CFG_DMA_SSS_MASK) +#define CSU_SSS_CFG_AES_MASK 0x00000F00U +#define CSU_SSS_CFG_AES(n) (((n) << 8) & CSU_SSS_CFG_AES_SSS_MASK) +#define CSU_SSS_CFG_SHA_MASK 0x0000F000U +#define CSU_SSS_CFG_SHA(n) (((n) << 12) & CSU_SSS_CFG_SHA_SSS_MASK) +/* Data Sources */ +#define CSU_SSS_CFG_SRC_NONE 0x0 +#define CSU_SSS_CFG_SRC_PCAP 0x3 /* Processor Configuration Access Port */ +#define CSU_SSS_CFG_SRC_DMA 0x5 +#define CSU_SSS_CFG_SRC_AES 0xA + +/* AES-GCM 256-bit */ +#define CSU_AES_STATUS (CSU_BASE + 0x1000U) +#define CSU_AES_KEY_SRC (CSU_BASE + 0x1004U) /* AES key source selection */ +#define CSU_AES_KEY_LOAD (CSU_BASE + 0x1008U) /* Loads the key selected by AES_KEY_SRC into the AES (self clearing) */ +#define CSU_AES_START_MSG (CSU_BASE + 0x100CU) /* Starts the decryption process. The IV must be loaded before the AES will decrypt a payload (self clearing) */ +#define CSU_AES_RESET (CSU_BASE + 0x1010U) +#define CSU_AES_KEY_CLEAR (CSU_BASE + 0x1014U) +#define CSU_AES_CFG (CSU_BASE + 0x1018U) /* 0=Dec, 1=Enc */ +#define CSU_AES_KUP_WR (CSU_BASE + 0x101CU) +#define CSU_AES_KUP (CSU_BASE + 0x1020U) /* 32 bytes - through 0x40 */ +#define CSU_AES_IV (CSU_BASE + 0x1040U) /* 16 bytes - through 0x50 */ + +#define CSU_AES_STATUS_OKR_ZEROED (1 << 11) +#define CSU_AES_STATUS_BOOT_ZEROED (1 << 10) +#define CSU_AES_STATUS_KUP_ZEROED (1 << 9) +#define CSU_AES_STATUS_AES_KEY_ZEROED (1 << 8) +#define CSU_AES_STATUS_KEY_INIT_DONE (1 << 4) +#define CSU_AES_STATUS_GCM_TAG_PASS (1 << 3) +#define CSU_AES_STATUS_DONE (1 << 2) +#define CSU_AES_STATUS_READY (1 << 1) +#define CSU_AES_STATUS_BUSY (1 << 0) + +#define CSU_AES_KEY_SRC_DEVICE_KEY 1 /* Device key is selected and locked by the CSU ROM during boot */ +#define CSU_AES_KEY_SRC_KUP 0 /* User provided key source */ + +#define CSU_AES_KEY_CLEAR_KUP (1 << 1) /* Zeroize KUP key */ +#define CSU_AES_KEY_CLEAR_EXP (1 << 0) /* Zeroize expanded key */ + +/* PUF */ +#define CSU_PUF_CMD (CSU_BASE + 0x4000U) +#define CSU_PUF_CFG0 (CSU_BASE + 0x4004U) +#define CSU_PUF_CFG1 (CSU_BASE + 0x4008U) +#define CSU_PUF_SHUTTER (CSU_BASE + 0x400CU) +#define CSU_PUF_STATUS (CSU_BASE + 0x4010U) +#define CSU_PUF_WORD (CSU_BASE + 0x4018U) + +#define CSU_PUF_CMD_CLEAR 0x6 /* Clear PUF status */ +#define CSU_PUF_CMD_STATUS 0x5 /* Read out regeneration status */ +#define CSU_PUF_CMD_REGENERATION 0x4 /* Key regeneration */ +#define CSU_PUF_CMD_REGISTRATION 0x1 /* Key registration */ + +#define CSU_PUF_CFG0_INIT 0x2 +#define CSU_PUF_CFG1_INIT 0x0C230090 /* 0x00080080 */ +#define CSU_PUF_SHUTTER_INIT 0x0100005E /* 0x01000100 */ + +#define CSU_PUF_STATUS_OVERFLOW_MASK 0x30000000 /* Overflow, if bits are not 0. Reduce SHUT[SOPEN] value. */ +#define CSU_PUF_STATUS_AUX_MASK 0x0FFFFFF0 /* During provisioning, auxiliary sundrome bits are stored here and must be written to the eFuse or boot image. */ +#define CSU_PUF_STATUS_KEY_RDY_MASK 0x00000008 +#define CSU_PUF_STATUS_KEY_ZERO_MASK 0x00000002 /* Indicates that the PUF key has been zeroized */ +#define CSU_PUF_STATUS_SYN_WRD_RDY_MASK 0x00000001 /* Indicates a syndrome word is ready in the PUF_WORD register. */ + +/* SHA3 */ +#define CSU_SHA_START (CSU_BASE + 0x2000U) +#define CSU_SHA_RESET (CSU_BASE + 0x2004U) +#define CSU_SHA_DONE (CSU_BASE + 0x2008U) +#define CSU_SHA_DIGEST (CSU_BASE + 0x2010U) /* 48 bytes (through 0x40) */ +#define CSU_SHA_START (CSU_BASE + 0x2000U) +#define CSU_SHA_START (CSU_BASE + 0x2000U) + +/* CSU DMA */ +/* Addresses and sizes must be word aligned last two bits = 0 */ +/* 128 x 32-bit data FIFO for each channel (two channels) */ +#define CSUDMA_BASE(ch) (0xFFC80000UL * (((ch) & 0x1) * 0x800)) +#define CSUDMA_ADDR(ch) (CSUDMA_BASE(ch) + 0x0000U) /* Mem address (lower 32-bits) */ +#define CSUDMA_ADDR_MSB(ch) (CSUDMA_BASE(ch) + 0x0028U) /* (upper 17 bits) */ +#define CSUDMA_SIZE(ch) (CSUDMA_BASE(ch) + 0x0004U) /* DMA transfer payload size */ +#define CSUDMA_STS(ch) (CSUDMA_BASE(ch) + 0x0008U) +#define CSUDMA_CTRL(ch) (CSUDMA_BASE(ch) + 0x000CU) +#define CSUDMA_CTRL2(ch) (CSUDMA_BASE(ch) + 0x0024U) +#define CSUDMA_CRC(ch) (CSUDMA_BASE(ch) + 0x0010U) +#define CSUDMA_ISTS(ch) (CSUDMA_BASE(ch) + 0x0014U) +#define CSUDMA_IEN(ch) (CSUDMA_BASE(ch) + 0x0018U) +#define CSUDMA_IDIS(ch) (CSUDMA_BASE(ch) + 0x001CU) +#define CSUDMA_IMASK(ch) (CSUDMA_BASE(ch) + 0x0020U) + +#define CSUDMA_STS_DONE_CNT (0x07 << 13) +#define CSUDMA_STS_SRC_FIFO_LEVEL (0xFF << 5) +#define CSUDMA_STS_RD_OUTSTANDING (0x0F << 1) +#define CSUDMA_STS_BUSY (0x01 << 0) + +#define CSUDMA_CTRL_FIFO_THRESH_MASK (0xF << 2) +#define CSUDMA_CTRL_FIFO_THRESH(n) (((n) << 2) & CSUDMA_CTRL_FIFO_THRESH_MASK) +#define CSUDMA_CTRL_TIMEOUT_VAL_MASK (0xFFF << 10) +#define CSUDMA_CTRL_TIMEOUT_VAL(n) (((n) << 10) & CSUDMA_CTRL_TIMEOUT_VAL_MASK) +#define CSUDMA_CTRL_ENDIANNESS (1 << 23) +#define CSUDMA_CTRL_AXI_BRST_TYPE (1 << 22) + +#define CSUDMA_CTRL2_ARCACHE_MASK (0x7 << 24) +#define CSUDMA_CTRL2_ARCACHE(n) (((n) << 24) & CSUDMA_CTRL2_ARCACHE_MASK) +#define CSUDMA_CTRL2_ROUTE_BIT (1 << 23) +#define CSUDMA_CTRL2_TIMEOUT_EN (1 << 22) +#define CSUDMA_CTRL2_TIMEOUT_PRE_MASK (0xFFF << 4) +#define CSUDMA_CTRL2_TIMEOUT_PRE(n) (((n) << 4) & CSUDMA_CTRL2_TIMEOUT_PRE_MASK) +#define CSUDMA_CTRL2_MAX_OUTS_CMDS_MASK (0xF << 0) +#define CSUDMA_CTRL2_MAX_OUTS_CMDS(n) (((n) << 0) & CSUDMA_CTRL2_MAX_OUTS_CMDS_MASK) + +#define CSUDMA_ISR_INVALID_APB (1 << 6) +#define CSUDMA_ISR_THRESH_HIT (1 << 5) +#define CSUDMA_ISR_TIMEOUT_MEM (1 << 4) +#define CSUDMA_ISR_TIMEOUT_STRM (1 << 3) +#define CSUDMA_ISR_AXI_RDERR (1 << 2) +#define CSUDMA_ISR_DONE (1 << 1) +#define CSUDMA_ISR_MEM_DONE (1 << 0) + +/* CSU DMA Channels */ +#define CSUDMA_CH_SRC 0 +#define CSUDMA_CH_DST 1 + +/* CSU JTAG */ +#define CSU_JTAG_CHAIN_CFG (CSU_BASE + 0x0030U) +#define CSU_JTAG_CHAIN_STATUS (CSU_BASE + 0x0034U) +#define CSU_JTAG_SEC (CSU_BASE + 0x0038U) +#define CSU_JTAG_DAP_CFG (CSU_BASE + 0x003CU) + +#define CSU_PCAP_PROG (CSU_BASE + 0x3000U) + +/* Clock and Reset Control */ +#define CRL_APB_BASE 0xFF5E0000UL +#define CRL_APB_DBG_LPD_CTRL (CRL_APB_BASE + 0x00B0U) +#define CRL_APB_RST_LPD_DBG (CRL_APB_BASE + 0x0240U) + + + #endif /* _ZYNQMP_H_ */ diff --git a/options.mk b/options.mk index 43396e0e8..f3c694152 100644 --- a/options.mk +++ b/options.mk @@ -746,7 +746,9 @@ ifeq ($(WOLFBOOT_NO_PARTITIONS),1) endif ifeq ($(HASH),SHA3) - WOLFCRYPT_OBJS+=./lib/wolfssl/wolfcrypt/src/sha3.o + ifeq ($(PKA_HASH_HAL),) + WOLFCRYPT_OBJS+=./lib/wolfssl/wolfcrypt/src/sha3.o + endif CFLAGS+=-D"WOLFBOOT_HASH_SHA3_384" SIGN_OPTIONS+=--sha3 endif diff --git a/src/boot_aarch64.c b/src/boot_aarch64.c index dec78f6d9..2af3f436d 100644 --- a/src/boot_aarch64.c +++ b/src/boot_aarch64.c @@ -37,6 +37,13 @@ extern unsigned int _end_data; extern void main(void); extern void gicv2_init_secure(void); +unsigned int current_el(void) +{ + unsigned long el; + asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc"); + return (unsigned int)((el >> 2) & 0x3U); +} + void boot_entry_C(void) { register unsigned int *dst, *src;