diff options
Diffstat (limited to 'coroutine')
-rw-r--r-- | coroutine/amd64/Context.S | 42 | ||||
-rw-r--r-- | coroutine/amd64/Context.h | 25 | ||||
-rw-r--r-- | coroutine/arm64/Context.S | 67 | ||||
-rw-r--r-- | coroutine/arm64/Context.h | 46 | ||||
-rw-r--r-- | coroutine/asyncify/Context.c | 10 | ||||
-rw-r--r-- | coroutine/asyncify/Context.h | 93 | ||||
-rw-r--r-- | coroutine/loongarch64/Context.S | 73 | ||||
-rw-r--r-- | coroutine/loongarch64/Context.h | 46 | ||||
-rw-r--r-- | coroutine/ppc/Context.S | 90 | ||||
-rw-r--r-- | coroutine/ppc/Context.h | 58 | ||||
-rw-r--r-- | coroutine/ppc64/Context.S | 89 | ||||
-rw-r--r-- | coroutine/ppc64/Context.h | 57 | ||||
-rw-r--r-- | coroutine/universal/Context.S | 6 | ||||
-rw-r--r-- | coroutine/universal/Context.h | 6 | ||||
-rw-r--r-- | coroutine/win64/Context.h | 2 |
15 files changed, 676 insertions, 34 deletions
diff --git a/coroutine/amd64/Context.S b/coroutine/amd64/Context.S index d50732adbc..056c276a31 100644 --- a/coroutine/amd64/Context.S +++ b/coroutine/amd64/Context.S @@ -13,29 +13,35 @@ .globl PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer) PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): - # Save caller state - pushq %rbp - pushq %rbx - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - - # Save caller stack pointer + # Make space on the stack for 6 registers: + subq $48, %rsp + + # Save caller state: + movq %rbp, 40(%rsp) + movq %rbx, 32(%rsp) + movq %r12, 24(%rsp) + movq %r13, 16(%rsp) + movq %r14, 8(%rsp) + movq %r15, (%rsp) + + # Save caller stack pointer: movq %rsp, (%rdi) - # Restore callee stack pointer + # Restore callee stack pointer: movq (%rsi), %rsp # Restore callee state - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp - - # Put the first argument into the return value + movq 40(%rsp), %rbp + movq 32(%rsp), %rbx + movq 24(%rsp), %r12 + movq 16(%rsp), %r13 + movq 8(%rsp), %r14 + movq (%rsp), %r15 + + # Adjust stack pointer back: + addq $48, %rsp + + # Put the first argument into the return value: movq %rdi, %rax # We pop the return address and jump to it diff --git a/coroutine/amd64/Context.h b/coroutine/amd64/Context.h index f626a47225..44daa4e01a 100644 --- a/coroutine/amd64/Context.h +++ b/coroutine/amd64/Context.h @@ -19,10 +19,29 @@ enum {COROUTINE_REGISTERS = 6}; +#if defined(__SANITIZE_ADDRESS__) + #define COROUTINE_SANITIZE_ADDRESS +#elif defined(__has_feature) + #if __has_feature(address_sanitizer) + #define COROUTINE_SANITIZE_ADDRESS + #endif +#endif + +#if defined(COROUTINE_SANITIZE_ADDRESS) +#include <sanitizer/common_interface_defs.h> +#include <sanitizer/asan_interface.h> +#endif + struct coroutine_context { void **stack_pointer; void *argument; + +#if defined(COROUTINE_SANITIZE_ADDRESS) + void *fake_stack; + void *stack_base; + size_t stack_size; +#endif }; typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); @@ -39,6 +58,12 @@ static inline void coroutine_initialize( ) { assert(start && stack && size >= 1024); +#if defined(COROUTINE_SANITIZE_ADDRESS) + context->fake_stack = NULL; + context->stack_base = stack; + context->stack_size = size; +#endif + // Stack grows down. Force 16-byte alignment. char * top = (char*)stack + size; context->stack_pointer = (void**)((uintptr_t)top & ~0xF); diff --git a/coroutine/arm64/Context.S b/coroutine/arm64/Context.S index 07d50d30df..5251ab214d 100644 --- a/coroutine/arm64/Context.S +++ b/coroutine/arm64/Context.S @@ -18,11 +18,25 @@ .align 2 #endif +## NOTE(PAC): Use we HINT mnemonics instead of PAC mnemonics to +## keep compatibility with those assemblers that don't support PAC. +## +## See "Providing protection for complex software" for more details about PAC/BTI +## https://developer.arm.com/architectures/learn-the-architecture/providing-protection-for-complex-software + .global PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer) PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): +#if defined(__ARM_FEATURE_PAC_DEFAULT) && (__ARM_FEATURE_PAC_DEFAULT != 0) + # paciasp (it also acts as BTI landing pad, so no need to insert BTI also) + hint #25 +#elif defined(__ARM_FEATURE_BTI_DEFAULT) && (__ARM_FEATURE_BTI_DEFAULT != 0) + # For the case PAC is not enabled but BTI is. + # bti c + hint #34 +#endif # Make space on the stack for caller registers - sub sp, sp, 0xb0 + sub sp, sp, 0xa0 # Save caller registers stp d8, d9, [sp, 0x00] @@ -36,9 +50,6 @@ PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): stp x27, x28, [sp, 0x80] stp x29, x30, [sp, 0x90] - # Save return address - str x30, [sp, 0xa0] - # Save stack pointer to x0 (first argument) mov x2, sp str x2, [x0, 0] @@ -59,15 +70,51 @@ PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): ldp x27, x28, [sp, 0x80] ldp x29, x30, [sp, 0x90] - # Load return address into x4 - ldr x4, [sp, 0xa0] - # Pop stack frame - add sp, sp, 0xb0 + add sp, sp, 0xa0 - # Jump to return address (in x4) - ret x4 +#if defined(__ARM_FEATURE_PAC_DEFAULT) && (__ARM_FEATURE_PAC_DEFAULT != 0) + # autiasp: Authenticate x30 (LR) with SP and key A + hint #29 +#endif + + # Jump to return address (in x30) + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif + +#if __ARM_FEATURE_BTI_DEFAULT != 0 || __ARM_FEATURE_PAC_DEFAULT != 0 +/* See "ELF for the Arm 64-bit Architecture (AArch64)" + https://github.com/ARM-software/abi-aa/blob/2023Q3/aaelf64/aaelf64.rst#program-property */ +# define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1<<0) +# define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1<<1) + +# if __ARM_FEATURE_BTI_DEFAULT != 0 +# define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI +# else +# define BTI_FLAG 0 +# endif +# if __ARM_FEATURE_PAC_DEFAULT != 0 +# define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC +# else +# define PAC_FLAG 0 +# endif + + # The note section format is described by Note Section in Chapter 5 + # of "System V Application Binary Interface, Edition 4.1". + .pushsection .note.gnu.property, "a" + .p2align 3 + .long 0x4 /* Name size ("GNU\0") */ + .long 0x10 /* Descriptor size */ + .long 0x5 /* Type: NT_GNU_PROPERTY_TYPE_0 */ + .asciz "GNU" /* Name */ + # Begin descriptor + .long 0xc0000000 /* Property type: GNU_PROPERTY_AARCH64_FEATURE_1_AND */ + .long 0x4 /* Property size */ + .long (BTI_FLAG|PAC_FLAG) + .long 0x0 /* 8-byte alignment padding */ + # End descriptor + .popsection +#endif diff --git a/coroutine/arm64/Context.h b/coroutine/arm64/Context.h index dbc6ac94fb..eb66fbea0f 100644 --- a/coroutine/arm64/Context.h +++ b/coroutine/arm64/Context.h @@ -17,12 +17,31 @@ #define COROUTINE __attribute__((noreturn)) void -enum {COROUTINE_REGISTERS = 0xb0 / 8}; +enum {COROUTINE_REGISTERS = 0xa0 / 8}; + +#if defined(__SANITIZE_ADDRESS__) + #define COROUTINE_SANITIZE_ADDRESS +#elif defined(__has_feature) + #if __has_feature(address_sanitizer) + #define COROUTINE_SANITIZE_ADDRESS + #endif +#endif + +#if defined(COROUTINE_SANITIZE_ADDRESS) +#include <sanitizer/common_interface_defs.h> +#include <sanitizer/asan_interface.h> +#endif struct coroutine_context { void **stack_pointer; void *argument; + +#if defined(COROUTINE_SANITIZE_ADDRESS) + void *fake_stack; + void *stack_base; + size_t stack_size; +#endif }; typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); @@ -31,6 +50,20 @@ static inline void coroutine_initialize_main(struct coroutine_context * context) context->stack_pointer = NULL; } +static inline void *ptrauth_sign_instruction_addr(void *addr, void *modifier) { +#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT != 0 + // Sign the given instruction address with the given modifier and key A + register void *r17 __asm("r17") = addr; + register void *r16 __asm("r16") = modifier; + // Use HINT mnemonic instead of PACIA1716 for compatibility with older assemblers. + __asm ("hint #8;" : "+r"(r17) : "r"(r16)); + addr = r17; +#else + // No-op if PAC is not enabled +#endif + return addr; +} + static inline void coroutine_initialize( struct coroutine_context *context, coroutine_start start, @@ -39,14 +72,21 @@ static inline void coroutine_initialize( ) { assert(start && stack && size >= 1024); +#if defined(COROUTINE_SANITIZE_ADDRESS) + context->fake_stack = NULL; + context->stack_base = stack; + context->stack_size = size; +#endif + // Stack grows down. Force 16-byte alignment. char * top = (char*)stack + size; - context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + top = (char *)((uintptr_t)top & ~0xF); + context->stack_pointer = (void**)top; context->stack_pointer -= COROUTINE_REGISTERS; memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); - context->stack_pointer[0xa0 / 8] = (void*)start; + context->stack_pointer[0x98 / 8] = ptrauth_sign_instruction_addr((void*)start, (void*)top); } struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); diff --git a/coroutine/asyncify/Context.c b/coroutine/asyncify/Context.c new file mode 100644 index 0000000000..303e5f4429 --- /dev/null +++ b/coroutine/asyncify/Context.c @@ -0,0 +1,10 @@ +#include "Context.h" + +void coroutine_trampoline(void * _start, void * _context) +{ + coroutine_start start = (coroutine_start)_start; + struct coroutine_context * context = _context; + rb_wasm_set_stack_pointer(context->current_sp); + + start(context->from, context); +} diff --git a/coroutine/asyncify/Context.h b/coroutine/asyncify/Context.h new file mode 100644 index 0000000000..71791a4004 --- /dev/null +++ b/coroutine/asyncify/Context.h @@ -0,0 +1,93 @@ +#ifndef COROUTINE_ASYNCIFY_CONTEXT_H +#define COROUTINE_ASYNCIFY_CONTEXT_H + +/* + This is a coroutine implementation based on Binaryen's Asyncify transformation for WebAssembly. + + This implementation is built on low-level ucontext-like API in wasm/fiber.c + This file is an adapter for the common coroutine interface and for stack manipulation. + wasm/fiber.c doesn't take care of stack to avoid duplicate management with this adapter. + + * See also: wasm/fiber.c +*/ + +#include <stddef.h> +#include <stdio.h> +#include <stdint.h> +#include "wasm/asyncify.h" +#include "wasm/machine.h" +#include "wasm/fiber.h" + +#define COROUTINE void __attribute__((__noreturn__)) + +static const int ASYNCIFY_CORO_DEBUG = 0; + +struct coroutine_context +{ + rb_wasm_fiber_context fc; + void *argument; + struct coroutine_context *from; + + void *current_sp; + void *stack_base; + size_t size; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +COROUTINE coroutine_trampoline(void * _start, void * _context); + +static inline void coroutine_initialize_main(struct coroutine_context * context) +{ + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (context = %p)\n", __func__, context); + // NULL fiber entry means it's the main fiber, and handled specially. + rb_wasm_init_context(&context->fc, NULL, NULL, NULL); + // mark the main fiber has already started + context->fc.is_started = true; +} + +static inline void coroutine_initialize(struct coroutine_context *context, coroutine_start start, void *stack, size_t size) +{ + // Linear stack pointer must be always aligned down to 16 bytes. + // https://github.com/WebAssembly/tool-conventions/blob/c74267a5897c1bdc9aa60adeaf41816387d3cd12/BasicCABI.md#the-linear-stack + uintptr_t sp = ((uintptr_t)stack + size) & ~0xF; + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (context = %p, stack = %p ... %p)\n", __func__, context, stack, (char *)sp); + rb_wasm_init_context(&context->fc, coroutine_trampoline, start, context); + // record the initial stack pointer position to restore it after resumption + context->current_sp = (char *)sp; + context->stack_base = stack; + context->size = size; +} + +static inline struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target) +{ + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (current = %p, target = %p)\n", __func__, current, target); + struct coroutine_context * previous = target->from; + + target->from = current; + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] current->current_sp = %p -> %p\n", __func__, current->current_sp, rb_wasm_get_stack_pointer()); + // record the current stack pointer position to restore it after resumption + current->current_sp = rb_wasm_get_stack_pointer(); + + // suspend the current coroutine and resume another coroutine + + rb_wasm_swapcontext(¤t->fc, &target->fc); + + // after the original coroutine resumed + + rb_wasm_set_stack_pointer(current->current_sp); + + target->from = previous; + + return target; +} + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (context = %p)\n", __func__, context); + context->stack_base = NULL; + context->size = 0; + context->from = NULL; +} + +#endif /* COROUTINE_ASYNCIFY_CONTEXT_H */ diff --git a/coroutine/loongarch64/Context.S b/coroutine/loongarch64/Context.S new file mode 100644 index 0000000000..662f5dfb6c --- /dev/null +++ b/coroutine/loongarch64/Context.S @@ -0,0 +1,73 @@ +#define TOKEN_PASTE(x,y) x##y +#define PREFIXED_SYMBOL(prefix,name) TOKEN_PASTE(prefix,name) + +.text +.align 2 + +.global PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer) +PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): + + # Make space on the stack for caller registers + addi.d $sp, $sp, -0xa0 + + # Save caller registers + st.d $s0, $sp, 0x00 + st.d $s1, $sp, 0x08 + st.d $s2, $sp, 0x10 + st.d $s3, $sp, 0x18 + st.d $s4, $sp, 0x20 + st.d $s5, $sp, 0x28 + st.d $s6, $sp, 0x30 + st.d $s7, $sp, 0x38 + st.d $s8, $sp, 0x40 + st.d $fp, $sp, 0x48 + fst.d $fs0, $sp, 0x50 + fst.d $fs1, $sp, 0x58 + fst.d $fs2, $sp, 0x60 + fst.d $fs3, $sp, 0x68 + fst.d $fs4, $sp, 0x70 + fst.d $fs5, $sp, 0x78 + fst.d $fs6, $sp, 0x80 + fst.d $fs7, $sp, 0x88 + + # Save return address + st.d $ra, $sp, 0x90 + + # Save stack pointer to a0 (first argument) + st.d $sp, $a0, 0x00 + + # Load stack pointer from a1 (second argument) + ld.d $sp, $a1, 0x00 + + # Restore caller registers + ld.d $s0, $sp, 0x00 + ld.d $s1, $sp, 0x08 + ld.d $s2, $sp, 0x10 + ld.d $s3, $sp, 0x18 + ld.d $s4, $sp, 0x20 + ld.d $s5, $sp, 0x28 + ld.d $s6, $sp, 0x30 + ld.d $s7, $sp, 0x38 + ld.d $s8, $sp, 0x40 + ld.d $fp, $sp, 0x48 + fld.d $fs0, $sp, 0x50 + fld.d $fs1, $sp, 0x58 + fld.d $fs2, $sp, 0x60 + fld.d $fs3, $sp, 0x68 + fld.d $fs4, $sp, 0x70 + fld.d $fs5, $sp, 0x78 + fld.d $fs6, $sp, 0x80 + fld.d $fs7, $sp, 0x88 + + # Load return address + ld.d $ra, $sp, 0x90 + + # Pop stack frame + addi.d $sp, $sp, 0xa0 + + # Jump to return address + jr $ra + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/coroutine/loongarch64/Context.h b/coroutine/loongarch64/Context.h new file mode 100644 index 0000000000..668c9a965e --- /dev/null +++ b/coroutine/loongarch64/Context.h @@ -0,0 +1,46 @@ +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void + +enum {COROUTINE_REGISTERS = 0xa0 / 8}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + context->stack_pointer[0x90 / 8] = (void*)start; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ +} diff --git a/coroutine/ppc/Context.S b/coroutine/ppc/Context.S new file mode 100644 index 0000000000..e2431a9250 --- /dev/null +++ b/coroutine/ppc/Context.S @@ -0,0 +1,90 @@ +; Based on the code by Samuel Williams. Created by Sergey Fedorov on 04/06/2022. +; Credits to Samuel Williams, Rei Odaira and Iain Sandoe. Errors, if any, are mine. +; Some relevant examples: https://github.com/gcc-mirror/gcc/blob/master/libphobos/libdruntime/config/powerpc/switchcontext.S +; https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/rs6000/darwin-gpsave.S +; https://www.ibm.com/docs/en/aix/7.2?topic=epilogs-saving-gprs-only +; ppc32 version may be re-written compactly with stmw/lwm, but the code won't be faster, see: https://github.com/ruby/ruby/pull/5927#issuecomment-1139730541 + +; Notice that this code is only for Darwin (macOS). Darwin ABI differs from AIX and ELF. +; To add support for AIX, *BSD or *Linux, please make separate implementations. + +#define TOKEN_PASTE(x,y) x##y +#define PREFIXED_SYMBOL(prefix,name) TOKEN_PASTE(prefix,name) + +.machine ppc7400 ; = G4, Rosetta +.text + +.globl PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer) +.align 2 + +PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): + ; Make space on the stack for caller registers + ; (Should we rather use red zone? See libphobos example.) + subi r1,r1,80 + + ; Get LR + mflr r0 + + ; Save caller registers + stw r31,0(r1) + stw r30,4(r1) + stw r29,8(r1) + stw r28,12(r1) + stw r27,16(r1) + stw r26,20(r1) + stw r25,24(r1) + stw r24,28(r1) + stw r23,32(r1) + stw r22,36(r1) + stw r21,40(r1) + stw r20,44(r1) + stw r19,48(r1) + stw r18,52(r1) + stw r17,56(r1) + stw r16,60(r1) + stw r15,64(r1) + stw r14,68(r1) + stw r13,72(r1) + + ; Save return address + ; Possibly should rather be saved into linkage area, see libphobos and IBM docs + stw r0,76(r1) + + ; Save stack pointer to first argument + stw r1,0(r3) + + ; Load stack pointer from second argument + lwz r1,0(r4) + + ; Load return address + lwz r0,76(r1) + + ; Restore caller registers + lwz r13,72(r1) + lwz r14,68(r1) + lwz r15,64(r1) + lwz r16,60(r1) + lwz r17,56(r1) + lwz r18,52(r1) + lwz r19,48(r1) + lwz r20,44(r1) + lwz r21,40(r1) + lwz r22,36(r1) + lwz r23,32(r1) + lwz r24,28(r1) + lwz r25,24(r1) + lwz r26,20(r1) + lwz r27,16(r1) + lwz r28,12(r1) + lwz r29,8(r1) + lwz r30,4(r1) + lwz r31,0(r1) + + ; Set LR + mtlr r0 + + ; Pop stack frame + addi r1,r1,80 + + ; Jump to return address + blr diff --git a/coroutine/ppc/Context.h b/coroutine/ppc/Context.h new file mode 100644 index 0000000000..8035d08556 --- /dev/null +++ b/coroutine/ppc/Context.h @@ -0,0 +1,58 @@ +#ifndef COROUTINE_PPC_CONTEXT_H +#define COROUTINE_PPC_CONTEXT_H 1 + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void +#define COROUTINE_LIMITED_ADDRESS_SPACE + +enum { + COROUTINE_REGISTERS = + 20 /* 19 general purpose registers (r13-r31) and 1 return address */ + + 4 /* space for fiber_entry() to store the link register */ +}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + /* Skip a global prologue that sets the TOC register */ + context->stack_pointer[19] = ((char*)start) + 8; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#endif /* COROUTINE_PPC_CONTEXT_H */ diff --git a/coroutine/ppc64/Context.S b/coroutine/ppc64/Context.S new file mode 100644 index 0000000000..f8561e0e7d --- /dev/null +++ b/coroutine/ppc64/Context.S @@ -0,0 +1,89 @@ +; Based on the code by Samuel Williams. Created by Sergey Fedorov on 04/06/2022. +; Credits to Samuel Williams, Rei Odaira and Iain Sandoe. Errors, if any, are mine. +; Some relevant examples: https://github.com/gcc-mirror/gcc/blob/master/libphobos/libdruntime/config/powerpc/switchcontext.S +; https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/rs6000/darwin-gpsave.S +; https://www.ibm.com/docs/en/aix/7.2?topic=epilogs-saving-gprs-only + +; Notice that this code is only for Darwin (macOS). Darwin ABI differs from AIX and ELF. +; To add support for AIX, *BSD or *Linux, please make separate implementations. + +#define TOKEN_PASTE(x,y) x##y +#define PREFIXED_SYMBOL(prefix,name) TOKEN_PASTE(prefix,name) + +.machine ppc64 ; = G5 +.text + +.globl PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer) +.align 2 + +PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): + ; Make space on the stack for caller registers + ; (Should we rather use red zone? See libphobos example.) + subi r1,r1,160 + + ; Get LR + mflr r0 + + ; Save caller registers + std r31,0(r1) + std r30,8(r1) + std r29,16(r1) + std r28,24(r1) + std r27,32(r1) + std r26,40(r1) + std r25,48(r1) + std r24,56(r1) + std r23,64(r1) + std r22,72(r1) + std r21,80(r1) + std r20,88(r1) + std r19,96(r1) + std r18,104(r1) + std r17,112(r1) + std r16,120(r1) + std r15,128(r1) + std r14,136(r1) + std r13,144(r1) + + ; Save return address + ; Possibly should rather be saved into linkage area, see libphobos and IBM docs + std r0,152(r1) + + ; Save stack pointer to first argument + std r1,0(r3) + + ; Load stack pointer from second argument + ld r1,0(r4) + + ; Load return address + ld r0,152(r1) + + ; Restore caller registers + ld r13,144(r1) + ld r14,136(r1) + ld r15,128(r1) + ld r16,120(r1) + ld r17,112(r1) + ld r18,104(r1) + ld r19,96(r1) + ld r20,88(r1) + ld r21,80(r1) + ld r22,72(r1) + ld r23,64(r1) + ld r24,56(r1) + ld r25,48(r1) + ld r26,40(r1) + ld r27,32(r1) + ld r28,24(r1) + ld r29,16(r1) + ld r30,8(r1) + ld r31,0(r1) + + ; Set LR + mtlr r0 + + ; Pop stack frame + addi r1,r1,160 + + ; Jump to return address + blr diff --git a/coroutine/ppc64/Context.h b/coroutine/ppc64/Context.h new file mode 100644 index 0000000000..085b475ed5 --- /dev/null +++ b/coroutine/ppc64/Context.h @@ -0,0 +1,57 @@ +#ifndef COROUTINE_PPC64_CONTEXT_H +#define COROUTINE_PPC64_CONTEXT_H 1 + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void + +enum { + COROUTINE_REGISTERS = + 20 /* 19 general purpose registers (r13-r31) and 1 return address */ + + 4 /* space for fiber_entry() to store the link register */ +}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + /* Skip a global prologue that sets the TOC register */ + context->stack_pointer[19] = ((char*)start) + 8; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#endif /* COROUTINE_PPC64_CONTEXT_H */ diff --git a/coroutine/universal/Context.S b/coroutine/universal/Context.S index 0fd8c01e7f..11c80a7927 100644 --- a/coroutine/universal/Context.S +++ b/coroutine/universal/Context.S @@ -3,7 +3,11 @@ # include "coroutine/amd64/Context.S" #elif defined __i386__ # include "coroutine/x86/Context.S" -#elif defined __ppc64__ +#elif defined __ppc__ +# include "coroutine/ppc/Context.S" +#elif defined __ppc64__ && defined(WORDS_BIGENDIAN) +# include "coroutine/ppc64/Context.S" +#elif defined __ppc64__ && !defined(WORDS_BIGENDIAN) # include "coroutine/ppc64le/Context.S" #elif defined __arm64__ # include "coroutine/arm64/Context.S" diff --git a/coroutine/universal/Context.h b/coroutine/universal/Context.h index 9a2ef425db..ec4d2f484a 100644 --- a/coroutine/universal/Context.h +++ b/coroutine/universal/Context.h @@ -6,7 +6,11 @@ # include "coroutine/amd64/Context.h" #elif defined __i386__ # include "coroutine/x86/Context.h" -#elif defined __ppc64__ +#elif defined __ppc__ +# include "coroutine/ppc/Context.h" +#elif defined __ppc64__ && defined(WORDS_BIGENDIAN) +# include "coroutine/ppc64/Context.h" +#elif defined __ppc64__ && !defined(WORDS_BIGENDIAN) # include "coroutine/ppc64le/Context.h" #elif defined __arm64__ # include "coroutine/arm64/Context.h" diff --git a/coroutine/win64/Context.h b/coroutine/win64/Context.h index aaa4caeaf9..d85ebf8e0e 100644 --- a/coroutine/win64/Context.h +++ b/coroutine/win64/Context.h @@ -30,7 +30,7 @@ struct coroutine_context typedef void(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); -void coroutine_trampoline(); +void coroutine_trampoline(void); static inline void coroutine_initialize_main(struct coroutine_context * context) { context->stack_pointer = NULL; |