From b9c09e345a7c6d469b931316a18ead9e5e4aa3c8 Mon Sep 17 00:00:00 2001 From: Niklas Hauser Date: Sat, 30 Dec 2023 21:45:44 +0100 Subject: [PATCH] [fiber] Add support for ARM64 targets --- src/modm/processing/fiber/context_arm64.cpp | 127 ++++++++++++++++++ .../processing/fiber/context_arm64_asm.S.in | 83 ++++++++++++ src/modm/processing/fiber/module.lb | 9 +- src/modm/processing/fiber/task.hpp | 10 +- test/Makefile | 4 + tools/build_script_generator/common.py | 7 +- tools/devices/hosted/darwin-arm64.xml | 13 ++ .../hosted/{darwin.xml => darwin-x86_64.xml} | 0 tools/devices/hosted/linux-arm64.xml | 14 ++ .../hosted/{linux.xml => linux-x86_64.xml} | 0 10 files changed, 257 insertions(+), 10 deletions(-) create mode 100644 src/modm/processing/fiber/context_arm64.cpp create mode 100644 src/modm/processing/fiber/context_arm64_asm.S.in create mode 100644 tools/devices/hosted/darwin-arm64.xml rename tools/devices/hosted/{darwin.xml => darwin-x86_64.xml} (100%) create mode 100644 tools/devices/hosted/linux-arm64.xml rename tools/devices/hosted/{linux.xml => linux-x86_64.xml} (100%) diff --git a/src/modm/processing/fiber/context_arm64.cpp b/src/modm/processing/fiber/context_arm64.cpp new file mode 100644 index 0000000000..46157c78f5 --- /dev/null +++ b/src/modm/processing/fiber/context_arm64.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2020, Erik Henriksson + * Copyright (c) 2021, 2023, Niklas Hauser + * + * This file is part of the modm project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +// ---------------------------------------------------------------------------- + +#include "context.h" +#include + +/* Stack layout (growing downwards): + * + * Permanent Storage: + * Fiber Function + * Fiber Function Argument + * + * Temporary Prepare: + * Entry Function + * + * Register file: + * LR + * FP + * x28 + * x27 + * x26 + * x25 + * x24 + * x23 + * x22 + * x21 + * x20 + * x19 + * d15 + * d14 + * d13 + * d12 + * d11 + * d10 + * d9 + * d8 + */ + +namespace +{ + +constexpr size_t StackWordsReset = 1; +constexpr size_t StackWordsStorage = 2; +constexpr size_t StackWordsRegisters = 20; +constexpr size_t StackWordsAll = StackWordsStorage + StackWordsRegisters; +constexpr size_t StackSizeWord = sizeof(uintptr_t); +constexpr uintptr_t StackWatermark = 0xc0ffee'f00d'facade; + +} + +extern "C" void modm_context_entry(); + +void +modm_context_init(modm_context_t *ctx, + uintptr_t *bottom, uintptr_t *top, + uintptr_t fn, uintptr_t fn_arg) +{ + ctx->bottom = bottom; + ctx->top = top; + + ctx->sp = top; + *--ctx->sp = fn; + *--ctx->sp = fn_arg; +} + +void +modm_context_reset(modm_context_t *ctx) +{ + *ctx->bottom = StackWatermark; + + ctx->sp = ctx->top - StackWordsStorage; + *--ctx->sp = (uintptr_t) modm_context_entry; + ctx->sp -= StackWordsRegisters - StackWordsReset; +} + +void +modm_context_watermark(modm_context_t *ctx) +{ + // clear the register file on the stack + for (auto *word = ctx->top - StackWordsAll; + word < ctx->top - StackWordsStorage - StackWordsReset; word++) + *word = 0; + + // then color the whole stack *below* the register file + for (auto *word = ctx->bottom; word < ctx->top - StackWordsAll; word++) + *word = StackWatermark; +} + +size_t +modm_context_stack_usage(const modm_context_t *ctx) +{ + for (auto *word = ctx->bottom; word < ctx->top; word++) + if (StackWatermark != *word) + return (ctx->top - word) * StackSizeWord; + return 0; +} + +bool +modm_context_stack_overflow(const modm_context_t *ctx) +{ + return *ctx->bottom != StackWatermark; +} + +static modm_context_t main_context; + +void +modm_context_start(modm_context_t *to) +{ + modm_context_jump(&main_context, to); +} + +void +modm_context_end() +{ + modm_context_t dummy; + modm_context_jump(&dummy, &main_context); + __builtin_unreachable(); +} diff --git a/src/modm/processing/fiber/context_arm64_asm.S.in b/src/modm/processing/fiber/context_arm64_asm.S.in new file mode 100644 index 0000000000..ce177f0003 --- /dev/null +++ b/src/modm/processing/fiber/context_arm64_asm.S.in @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2023, Niklas Hauser + * + * This file is part of the modm project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +; ---------------------------------------------------------------------------- + +.text +.globl _modm_context_entry +.balign 16 +_modm_context_entry: + +ldr x0, [sp] ; Load closure data pointer +ldr x1, [sp, #8] ; Jump to closure function +br x1 + + +/* +The assembly code below is adapted from the Boost Context library to work +for Windows, Linux and macOS. +See https://github.com/boostorg/context/tree/develop/src/asm +- Windows: jump_arm64_aapcs_pe_armasm.asm +- Linux: jump_arm64_aapcs_elf_gas.S +- macOS: jump_arm64_aapcs_macho_gas.S + + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +.text +.globl _modm_context_jump +.balign 16 +_modm_context_jump: + +; move stack pointer down +sub sp, sp, #0xa0 + +; save d8 - d15 +stp d8, d9, [sp, #0x00] +stp d10, d11, [sp, #0x10] +stp d12, d13, [sp, #0x20] +stp d14, d15, [sp, #0x30] + +; save x19-x30 +stp x19, x20, [sp, #0x40] +stp x21, x22, [sp, #0x50] +stp x23, x24, [sp, #0x60] +stp x25, x26, [sp, #0x70] +stp x27, x28, [sp, #0x80] +stp fp, lr, [sp, #0x90] + +; Store the SP in from->sp +mov x19, sp +str x19, [x0] + +; Restore SP from to->sp +ldr x19, [x1] +mov sp, x19 + +; load d8 - d15 +ldp d8, d9, [sp, #0x00] +ldp d10, d11, [sp, #0x10] +ldp d12, d13, [sp, #0x20] +ldp d14, d15, [sp, #0x30] + +; load x19-x30 +ldp x19, x20, [sp, #0x40] +ldp x21, x22, [sp, #0x50] +ldp x23, x24, [sp, #0x60] +ldp x25, x26, [sp, #0x70] +ldp x27, x28, [sp, #0x80] +ldp fp, lr, [sp, #0x90] + +; restore stack from GP + FPU +add sp, sp, #0xa0 + +ret diff --git a/src/modm/processing/fiber/module.lb b/src/modm/processing/fiber/module.lb index 22e09dbe3f..9cbf753735 100644 --- a/src/modm/processing/fiber/module.lb +++ b/src/modm/processing/fiber/module.lb @@ -26,8 +26,7 @@ def prepare(module, options): module.add_query( EnvironmentQuery(name="__enabled", factory=is_enabled)) - # No ARM64 support yet! - return "arm64" not in options[":target"].get_driver("core")["type"] + return True def build(env): @@ -64,6 +63,12 @@ def build(env): env.substitutions["default_stack_size"] = 2**20 # 1MB env.template("context_x86_64.cpp.in") + elif "arm64" in core: + env.substitutions["stack_minimum"] = (20 + 2) * 8 + env.substitutions["default_stack_size"] = 2**20 # 1MB + env.copy("context_arm64.cpp") + env.template("context_arm64_asm.S.in") + env.template("context.h.in") env.template("stack.hpp.in") env.template("scheduler.hpp.in") diff --git a/src/modm/processing/fiber/task.hpp b/src/modm/processing/fiber/task.hpp index 5014c619bf..a0270add92 100644 --- a/src/modm/processing/fiber/task.hpp +++ b/src/modm/processing/fiber/task.hpp @@ -126,13 +126,13 @@ Task::Task(Stack& stack, T&& closure, Start start) } else { - // lambda functions with a closure must be allocated on the stack - constexpr size_t closure_size = sizeof(std::decay_t); + // lambda functions with a closure must be allocated on the stack ALIGNED! + constexpr size_t align_mask = std::max(StackAlignment, alignof(std::decay_t)) - 1u; + constexpr size_t closure_size = (sizeof(std::decay_t) + align_mask) & ~align_mask; static_assert(Size >= closure_size + StackSizeMinimum, - "Stack size must ≥({{min_stack_size}}B + sizeof(closure))!"); + "Stack size must ≥({{min_stack_size}}B + aligned sizeof(closure))!"); // Find a suitable aligned area at the top of stack to allocate the closure - uintptr_t ptr = uintptr_t(stack.memory + stack.words) - closure_size; - ptr &= ~(std::max(sizeof(uintptr_t), alignof(std::decay_t)) - 1u); + const uintptr_t ptr = uintptr_t(stack.memory + stack.words) - closure_size; // construct closure in place ::new ((void*)ptr) std::decay_t{std::forward(closure)}; // Encapsulate the proper ABI function call into a simpler function diff --git a/test/Makefile b/test/Makefile index dbb55d7b99..5d41f9d46b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -27,8 +27,12 @@ endef run-hosted-linux: $(call compile-test,hosted,run,-D":target=hosted-linux") +run-hosted-linux-arm64: + $(call compile-test,hosted,run,-D":target=hosted-linux-arm64") run-hosted-darwin: $(call compile-test,hosted,run,-D":target=hosted-darwin") +run-hosted-darwin-arm64: + $(call compile-test,hosted,run,-D":target=hosted-darwin-arm64") run-hosted-windows: $(call compile-test,hosted,run,-D":target=hosted-windows") diff --git a/tools/build_script_generator/common.py b/tools/build_script_generator/common.py index cc0d514c0e..9823967505 100644 --- a/tools/build_script_generator/common.py +++ b/tools/build_script_generator/common.py @@ -243,9 +243,10 @@ def common_compiler_flags(compiler, target): "-finline-limit=10000", "-funsigned-bitfields", ] - flags["ccflags.release"] = [ - "-Os", - ] + if target.identifier["platform"] in ["hosted"]: + flags["ccflags.release"] = ["-O3"] + else: + flags["ccflags.release"] = ["-Os"] # not a valid profile # flags["ccflags.fast"] = [ # "-O3", diff --git a/tools/devices/hosted/darwin-arm64.xml b/tools/devices/hosted/darwin-arm64.xml new file mode 100644 index 0000000000..5d2d79aa7f --- /dev/null +++ b/tools/devices/hosted/darwin-arm64.xml @@ -0,0 +1,13 @@ + + + + + {platform}-{family}-{arch} + + + + + + + + diff --git a/tools/devices/hosted/darwin.xml b/tools/devices/hosted/darwin-x86_64.xml similarity index 100% rename from tools/devices/hosted/darwin.xml rename to tools/devices/hosted/darwin-x86_64.xml diff --git a/tools/devices/hosted/linux-arm64.xml b/tools/devices/hosted/linux-arm64.xml new file mode 100644 index 0000000000..c45e779be1 --- /dev/null +++ b/tools/devices/hosted/linux-arm64.xml @@ -0,0 +1,14 @@ + + + + + {platform}-{family}-{arch} + + + + + + + + + diff --git a/tools/devices/hosted/linux.xml b/tools/devices/hosted/linux-x86_64.xml similarity index 100% rename from tools/devices/hosted/linux.xml rename to tools/devices/hosted/linux-x86_64.xml