diff options
Diffstat (limited to 'coroutine')
34 files changed, 1548 insertions, 347 deletions
diff --git a/coroutine/Stack.h b/coroutine/Stack.h deleted file mode 100644 index f0fc703622..0000000000 --- a/coroutine/Stack.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * This file is part of the "Coroutine" project and released under the MIT License. - * - * Created by Samuel Williams on 10/11/2020. - * Copyright, 2020, by Samuel Williams. -*/ - -#include COROUTINE_H - -#ifdef COROUTINE_PRIVATE_STACK -#define COROUTINE_STACK_LOCAL(type, name) type *name = ruby_xmalloc(sizeof(type)) -#define COROUTINE_STACK_FREE(name) ruby_xfree(name) -#else -#define COROUTINE_STACK_LOCAL(type, name) type name##_local; type * name = &name##_local -#define COROUTINE_STACK_FREE(name) -#endif diff --git a/coroutine/amd64/Context.S b/coroutine/amd64/Context.S index 051db1c5e8..4b94d31f30 100644 --- a/coroutine/amd64/Context.S +++ b/coroutine/amd64/Context.S @@ -5,42 +5,84 @@ ## Copyright, 2018, by Samuel Williams. ## +/* Important - do _not_ include <cet.h> in this file; doing so will + * cause an incorrect .note.gnu.property section to be emitted. We have + * one at the bottom of this file */ + #define TOKEN_PASTE(x,y) x##y -#define PREFIXED_SYMBOL(prefix,name) TOKEN_PASTE(prefix,name) .text -.globl PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer) -PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): +.globl PREFIXED_SYMBOL(coroutine_transfer) +PREFIXED_SYMBOL(coroutine_transfer): + +#if defined(__CET__) && (__CET__ & 0x01) != 0 + /* IBT landing pad */ + endbr64 +#endif + + # Make space on the stack for 6 registers: + subq $48, %rsp - # Save caller state - pushq %rbp - pushq %rbx - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 + # Save caller state: + movq %rbp, 40(%rsp) + movq %rbx, 32(%rsp) + movq %r12, 24(%rsp) + movq %r13, 16(%rsp) + movq %r14, 8(%rsp) + movq %r15, (%rsp) - # Save caller stack pointer + # Save caller stack pointer: movq %rsp, (%rdi) - # Restore callee stack pointer + # Restore callee stack pointer: movq (%rsi), %rsp # Restore callee state - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp - - # Put the first argument into the return value + movq 40(%rsp), %rbp + movq 32(%rsp), %rbx + movq 24(%rsp), %r12 + movq 16(%rsp), %r13 + movq 8(%rsp), %r14 + movq (%rsp), %r15 + + # Adjust stack pointer back: + addq $48, %rsp + + # Put the first argument into the return value: movq %rdi, %rax # We pop the return address and jump to it ret -#if defined(__linux__) && defined(__ELF__) +#if (defined(__linux__) || defined(__FreeBSD__)) && defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif + +#if defined(__ELF__) + +#if defined(__CET__) && (__CET__ & 0x01) != 0 +# define IBT_FLAG 0x01 +#else +# define IBT_FLAG 0x00 +#endif + +/* We do _NOT_ support CET shadow-stack. Do _not_ add the property for + * this to the Context.o object. If you require CET shadow-stack support, + * for now, consider building with --with-coroutine=ucontext */ +#define SHSTK_FLAG 0x00 + +.pushsection .note.gnu.property, "a" +.p2align 3 +.long 0x4 /* Name size ("GNU\0") */ +.long 0x10 /* Descriptor size */ +.long 0x5 /* Type: NT_GNU_PROPERTY_TYPE_0 */ +.asciz "GNU" /* Name */ +# Begin descriptor +.long 0xc0000002 /* Property type: GNU_PROPERTY_X86_FEATURE_1_AND */ +.long 0x4 /* Property size */ +.long (IBT_FLAG | SHSTK_FLAG) +.long 0x0 /* 8-byte alignment padding */ +/* End descriptor */ +.popsection +#endif diff --git a/coroutine/amd64/Context.h b/coroutine/amd64/Context.h index 581525b67e..65aa638304 100644 --- a/coroutine/amd64/Context.h +++ b/coroutine/amd64/Context.h @@ -1,3 +1,6 @@ +#ifndef COROUTINE_AMD64_CONTEXT_H +#define COROUTINE_AMD64_CONTEXT_H 1 + /* * This file is part of the "Coroutine" project and released under the MIT License. * @@ -16,9 +19,29 @@ enum {COROUTINE_REGISTERS = 6}; +#if defined(__SANITIZE_ADDRESS__) + #define COROUTINE_SANITIZE_ADDRESS +#elif defined(__has_feature) + #if __has_feature(address_sanitizer) + #define COROUTINE_SANITIZE_ADDRESS + #endif +#endif + +#if defined(COROUTINE_SANITIZE_ADDRESS) +#include <sanitizer/common_interface_defs.h> +#include <sanitizer/asan_interface.h> +#endif + struct coroutine_context { void **stack_pointer; + void *argument; + +#if defined(COROUTINE_SANITIZE_ADDRESS) + void *fake_stack; + void *stack_base; + size_t stack_size; +#endif }; typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); @@ -35,12 +58,18 @@ static inline void coroutine_initialize( ) { assert(start && stack && size >= 1024); +#if defined(COROUTINE_SANITIZE_ADDRESS) + context->fake_stack = NULL; + context->stack_base = stack; + context->stack_size = size; +#endif + // Stack grows down. Force 16-byte alignment. char * top = (char*)stack + size; context->stack_pointer = (void**)((uintptr_t)top & ~0xF); *--context->stack_pointer = NULL; - *--context->stack_pointer = (void*)start; + *--context->stack_pointer = (void*)(uintptr_t)start; context->stack_pointer -= COROUTINE_REGISTERS; memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); @@ -52,3 +81,5 @@ static inline void coroutine_destroy(struct coroutine_context * context) { context->stack_pointer = NULL; } + +#endif /* COROUTINE_AMD64_CONTEXT_H */ diff --git a/coroutine/arm32/Context.S b/coroutine/arm32/Context.S index 1850c4c408..945e4f82d5 100644 --- a/coroutine/arm32/Context.S +++ b/coroutine/arm32/Context.S @@ -6,16 +6,15 @@ ## #define TOKEN_PASTE(x,y) x##y -#define PREFIXED_SYMBOL(prefix,name) TOKEN_PASTE(prefix,name) .file "Context.S" .text -.globl PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer) +.globl PREFIXED_SYMBOL(coroutine_transfer) .align 2 -.type PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer),%function +.type PREFIXED_SYMBOL(coroutine_transfer),%function .syntax unified -PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): +PREFIXED_SYMBOL(coroutine_transfer): # Save caller state (8 registers + return address) push {r4-r11,lr} diff --git a/coroutine/arm32/Context.h b/coroutine/arm32/Context.h index e69f4b2eeb..7529dd2efc 100644 --- a/coroutine/arm32/Context.h +++ b/coroutine/arm32/Context.h @@ -1,3 +1,6 @@ +#ifndef COROUTINE_ARM32_CONTEXT_H +#define COROUTINE_ARM32_CONTEXT_H 1 + /* * This file is part of the "Coroutine" project and released under the MIT License. * @@ -20,6 +23,7 @@ enum {COROUTINE_REGISTERS = 8}; struct coroutine_context { void **stack_pointer; + void *argument; }; typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); @@ -40,7 +44,7 @@ static inline void coroutine_initialize( char * top = (char*)stack + size; context->stack_pointer = (void**)((uintptr_t)top & ~0xF); - *--context->stack_pointer = (void*)start; + *--context->stack_pointer = (void*)(uintptr_t)start; context->stack_pointer -= COROUTINE_REGISTERS; memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); @@ -51,3 +55,5 @@ struct coroutine_context * coroutine_transfer(struct coroutine_context * current static inline void coroutine_destroy(struct coroutine_context * context) { } + +#endif /* COROUTINE_ARM32_CONTEXT_H */ diff --git a/coroutine/arm64/Context.S b/coroutine/arm64/Context.S index 04e3f6d1ef..ce219c0c4d 100644 --- a/coroutine/arm64/Context.S +++ b/coroutine/arm64/Context.S @@ -6,31 +6,74 @@ ## #define TOKEN_PASTE(x,y) x##y -#define PREFIXED_SYMBOL(prefix,name) TOKEN_PASTE(prefix,name) +#if defined(__APPLE__) +#define x29 fp +#define x30 lr +.text +.p2align 2 +#else .text .align 2 +#endif + +#if defined(__ARM_FEATURE_PAC_DEFAULT) && (__ARM_FEATURE_PAC_DEFAULT & 0x02) != 0 +# error "-mbranch-protection flag specified b-key but Context.S does not support this" +#endif + +#if defined(_WIN32) +## Add more space for certain TEB values on each stack +#define TEB_OFFSET 0x20 +#else +#define TEB_OFFSET 0x00 +#endif -.global PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer) -PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): +## NOTE(PAC): Use we HINT mnemonics instead of PAC mnemonics to +## keep compatibility with those assemblers that don't support PAC. +## +## See "Providing protection for complex software" for more details about PAC/BTI +## https://developer.arm.com/architectures/learn-the-architecture/providing-protection-for-complex-software + +.global PREFIXED_SYMBOL(coroutine_transfer) +PREFIXED_SYMBOL(coroutine_transfer): +#if defined(__ARM_FEATURE_PAC_DEFAULT) && (__ARM_FEATURE_PAC_DEFAULT != 0) + # paciasp (it also acts as BTI landing pad, so no need to insert BTI also) + hint #25 +#elif defined(__ARM_FEATURE_BTI_DEFAULT) && (__ARM_FEATURE_BTI_DEFAULT != 0) + # For the case PAC is not enabled but BTI is. + # bti c + hint #34 +#endif # Make space on the stack for caller registers - sub sp, sp, 0xb0 + sub sp, sp, 0xa0 + TEB_OFFSET # Save caller registers - stp d8, d9, [sp, 0x00] - stp d10, d11, [sp, 0x10] - stp d12, d13, [sp, 0x20] - stp d14, d15, [sp, 0x30] - stp x19, x20, [sp, 0x40] - stp x21, x22, [sp, 0x50] - stp x23, x24, [sp, 0x60] - stp x25, x26, [sp, 0x70] - stp x27, x28, [sp, 0x80] - stp x29, x30, [sp, 0x90] - - # Save return address - str x30, [sp, 0xa0] + stp d8, d9, [sp, 0x00 + TEB_OFFSET] + stp d10, d11, [sp, 0x10 + TEB_OFFSET] + stp d12, d13, [sp, 0x20 + TEB_OFFSET] + stp d14, d15, [sp, 0x30 + TEB_OFFSET] + stp x19, x20, [sp, 0x40 + TEB_OFFSET] + stp x21, x22, [sp, 0x50 + TEB_OFFSET] + stp x23, x24, [sp, 0x60 + TEB_OFFSET] + stp x25, x26, [sp, 0x70 + TEB_OFFSET] + stp x27, x28, [sp, 0x80 + TEB_OFFSET] + stp x29, x30, [sp, 0x90 + TEB_OFFSET] + +#if defined(_WIN32) + # Save certain values from Thread Environment Block (TEB) + # x18 points to the TEB on Windows + # Read TeStackBase and TeStackLimit at ksarm64.h from TEB + ldp x5, x6, [x18, #0x08] + # Save them + stp x5, x6, [sp, #0x00] + # Read TeDeallocationStack at ksarm64.h from TEB + ldr x5, [x18, #0x1478] + # Read TeFiberData at ksarm64.h from TEB + ldr x6, [x18, #0x20] + # Save current fiber data and deallocation stack + stp x5, x6, [sp, #0x10] +#endif # Save stack pointer to x0 (first argument) mov x2, sp @@ -40,23 +83,78 @@ PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): ldr x3, [x1, 0] mov sp, x3 +#if defined(_WIN32) + # Restore stack base and limit + ldp x5, x6, [sp, #0x00] + # Write TeStackBase and TeStackLimit at ksarm64.h to TEB + stp x5, x6, [x18, #0x08] + # Restore fiber data and deallocation stack + ldp x5, x6, [sp, #0x10] + # Write TeDeallocationStack at ksarm64.h to TEB + str x5, [x18, #0x1478] + # Write TeFiberData at ksarm64.h to TEB + str x6, [x18, #0x20] +#endif + # Restore caller registers - ldp d8, d9, [sp, 0x00] - ldp d10, d11, [sp, 0x10] - ldp d12, d13, [sp, 0x20] - ldp d14, d15, [sp, 0x30] - ldp x19, x20, [sp, 0x40] - ldp x21, x22, [sp, 0x50] - ldp x23, x24, [sp, 0x60] - ldp x25, x26, [sp, 0x70] - ldp x27, x28, [sp, 0x80] - ldp x29, x30, [sp, 0x90] - - # Load return address into x4 - ldr x4, [sp, 0xa0] + ldp d8, d9, [sp, 0x00 + TEB_OFFSET] + ldp d10, d11, [sp, 0x10 + TEB_OFFSET] + ldp d12, d13, [sp, 0x20 + TEB_OFFSET] + ldp d14, d15, [sp, 0x30 + TEB_OFFSET] + ldp x19, x20, [sp, 0x40 + TEB_OFFSET] + ldp x21, x22, [sp, 0x50 + TEB_OFFSET] + ldp x23, x24, [sp, 0x60 + TEB_OFFSET] + ldp x25, x26, [sp, 0x70 + TEB_OFFSET] + ldp x27, x28, [sp, 0x80 + TEB_OFFSET] + ldp x29, x30, [sp, 0x90 + TEB_OFFSET] # Pop stack frame - add sp, sp, 0xb0 + add sp, sp, 0xa0 + TEB_OFFSET + +#if defined(__ARM_FEATURE_PAC_DEFAULT) && (__ARM_FEATURE_PAC_DEFAULT != 0) + # autiasp: Authenticate x30 (LR) with SP and key A + hint #29 +#endif + + # Jump to return address (in x30) + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +#if (defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT != 0) || (defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT != 0) +#if defined(__ELF__) +/* See "ELF for the Arm 64-bit Architecture (AArch64)" + https://github.com/ARM-software/abi-aa/blob/2023Q3/aaelf64/aaelf64.rst#program-property */ +# define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1<<0) +# define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1<<1) + +# if defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT != 0 +# define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI +# else +# define BTI_FLAG 0 +# endif +# if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT != 0 +# define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC +# else +# define PAC_FLAG 0 +# endif - # Jump to return address (in x4) - ret x4 + # The note section format is described by Note Section in Chapter 5 + # of "System V Application Binary Interface, Edition 4.1". + .pushsection .note.gnu.property, "a" + .p2align 3 + .long 0x4 /* Name size ("GNU\0") */ + .long 0x10 /* Descriptor size */ + .long 0x5 /* Type: NT_GNU_PROPERTY_TYPE_0 */ + .asciz "GNU" /* Name */ + # Begin descriptor + .long 0xc0000000 /* Property type: GNU_PROPERTY_AARCH64_FEATURE_1_AND */ + .long 0x4 /* Property size */ + .long (BTI_FLAG|PAC_FLAG) + .long 0x0 /* 8-byte alignment padding */ + # End descriptor + .popsection +#endif +#endif diff --git a/coroutine/arm64/Context.asm b/coroutine/arm64/Context.asm new file mode 100644 index 0000000000..866fa628e7 --- /dev/null +++ b/coroutine/arm64/Context.asm @@ -0,0 +1,81 @@ + TTL coroutine/arm64/Context.asm + + AREA |.drectve|, DRECTVE + + EXPORT |coroutine_transfer| + + AREA |.text$mn|, CODE, ARM64 + +;; Add more space for certain TEB values on each stack +TEB_OFFSET EQU 0x20 + +;; Incomplete implementation +coroutine_transfer PROC + ; Make space on the stack for caller registers + sub sp, sp, 0xa0 + TEB_OFFSET + + ; Save caller registers + stp d8, d9, [sp, 0x00 + TEB_OFFSET] + stp d10, d11, [sp, 0x10 + TEB_OFFSET] + stp d12, d13, [sp, 0x20 + TEB_OFFSET] + stp d14, d15, [sp, 0x30 + TEB_OFFSET] + stp x19, x20, [sp, 0x40 + TEB_OFFSET] + stp x21, x22, [sp, 0x50 + TEB_OFFSET] + stp x23, x24, [sp, 0x60 + TEB_OFFSET] + stp x25, x26, [sp, 0x70 + TEB_OFFSET] + stp x27, x28, [sp, 0x80 + TEB_OFFSET] + stp x29, x30, [sp, 0x90 + TEB_OFFSET] + + ;; Save certain values from Thread Environment Block (TEB) x18 + ;; points to the TEB on Windows + ;; Read TeStackBase and TeStackLimit at ksarm64.h from TEB + ldp x5, x6, [x18, #0x08] + ;; Save them + stp x5, x6, [sp, #0x00] + ;; Read TeDeallocationStack at ksarm64.h from TEB + ldr x5, [x18, #0x1478] + ;; Read TeFiberData at ksarm64.h from TEB + ldr x6, [x18, #0x20] + ;; Save current fiber data and deallocation stack + stp x5, x6, [sp, #0x10] + + ; Save stack pointer to x0 (first argument) + mov x2, sp + str x2, [x0, 0] + + ; Load stack pointer from x1 (second argument) + ldr x3, [x1, 0] + mov sp, x3 + + ;; Restore stack base and limit + ldp x5, x6, [sp, #0x00] + ;; Write TeStackBase and TeStackLimit at ksarm64.h to TEB + stp x5, x6, [x18, #0x08] + ;; Restore fiber data and deallocation stack + ldp x5, x6, [sp, #0x10] + ;; Write TeDeallocationStack at ksarm64.h to TEB + str x5, [x18, #0x1478] + ;; Write TeFiberData at ksarm64.h to TEB + str x6, [x18, #0x20] + + ; Restore caller registers + ldp d8, d9, [sp, 0x00 + TEB_OFFSET] + ldp d10, d11, [sp, 0x10 + TEB_OFFSET] + ldp d12, d13, [sp, 0x20 + TEB_OFFSET] + ldp d14, d15, [sp, 0x30 + TEB_OFFSET] + ldp x19, x20, [sp, 0x40 + TEB_OFFSET] + ldp x21, x22, [sp, 0x50 + TEB_OFFSET] + ldp x23, x24, [sp, 0x60 + TEB_OFFSET] + ldp x25, x26, [sp, 0x70 + TEB_OFFSET] + ldp x27, x28, [sp, 0x80 + TEB_OFFSET] + ldp x29, x30, [sp, 0x90 + TEB_OFFSET] + + ; Pop stack frame + add sp, sp, 0xa0 + TEB_OFFSET + + ; Jump to return address (in x30) + ret + + endp + + end diff --git a/coroutine/arm64/Context.h b/coroutine/arm64/Context.h index 34be995a4f..468e4155b2 100644 --- a/coroutine/arm64/Context.h +++ b/coroutine/arm64/Context.h @@ -1,3 +1,6 @@ +#ifndef COROUTINE_ARM64_CONTEXT_H +#define COROUTINE_ARM64_CONTEXT_H 1 + /* * This file is part of the "Coroutine" project and released under the MIT License. * @@ -12,21 +15,67 @@ #include <stdint.h> #include <string.h> +#if defined __GNUC__ #define COROUTINE __attribute__((noreturn)) void +#define COROUTINE_DECL COROUTINE +#elif defined _MSC_VER +#define COROUTINE __declspec(noreturn) void +#define COROUTINE_DECL void +#endif + +#if defined(_WIN32) +#define TEB_OFFSET 0x20 +#else +#define TEB_OFFSET 0x00 +#endif + +enum {COROUTINE_REGISTERS = (0xa0 + TEB_OFFSET) / 8}; -enum {COROUTINE_REGISTERS = 0xb0 / 8}; +#if defined(__SANITIZE_ADDRESS__) + #define COROUTINE_SANITIZE_ADDRESS +#elif defined(__has_feature) + #if __has_feature(address_sanitizer) + #define COROUTINE_SANITIZE_ADDRESS + #endif +#endif + +#if defined(COROUTINE_SANITIZE_ADDRESS) +#include <sanitizer/common_interface_defs.h> +#include <sanitizer/asan_interface.h> +#endif struct coroutine_context { void **stack_pointer; + void *argument; + +#if defined(COROUTINE_SANITIZE_ADDRESS) + void *fake_stack; + void *stack_base; + size_t stack_size; +#endif }; -typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); +typedef COROUTINE_DECL(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); static inline void coroutine_initialize_main(struct coroutine_context * context) { context->stack_pointer = NULL; } +static inline void *ptrauth_sign_instruction_addr(void *addr, void *modifier) { +#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT != 0 + // Sign the given instruction address with the given modifier and key A + register void *r17 __asm("r17") = addr; + register void *r16 __asm("r16") = modifier; + // Use HINT mnemonic instead of PACIA1716 for compatibility with older assemblers. + __asm ("hint #8;" : "+r"(r17) : "r"(r16)); + addr = r17; +#else + // No-op if PAC is not enabled +#endif + return addr; +} + static inline void coroutine_initialize( struct coroutine_context *context, coroutine_start start, @@ -35,14 +84,29 @@ static inline void coroutine_initialize( ) { assert(start && stack && size >= 1024); +#if defined(COROUTINE_SANITIZE_ADDRESS) + context->fake_stack = NULL; + context->stack_base = stack; + context->stack_size = size; +#endif + // Stack grows down. Force 16-byte alignment. char * top = (char*)stack + size; - context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + top = (char *)((uintptr_t)top & ~0xF); + context->stack_pointer = (void**)top; context->stack_pointer -= COROUTINE_REGISTERS; memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); - context->stack_pointer[0xa0 / 8] = (void*)start; + void *addr = (void*)(uintptr_t)start; + context->stack_pointer[(0x98 + TEB_OFFSET) / 8] = ptrauth_sign_instruction_addr(addr, (void*)top); +#if defined(_WIN32) + // save top address of stack as base in TEB + context->stack_pointer[0x00 / 8] = (char*)stack + size; + // save botton address of stack as limit and deallocation stack in TEB + context->stack_pointer[0x08 / 8] = stack; + context->stack_pointer[0x10 / 8] = stack; +#endif } struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); @@ -50,3 +114,5 @@ struct coroutine_context * coroutine_transfer(struct coroutine_context * current static inline void coroutine_destroy(struct coroutine_context * context) { } + +#endif /* COROUTINE_ARM64_CONTEXT_H */ diff --git a/coroutine/asyncify/Context.c b/coroutine/asyncify/Context.c new file mode 100644 index 0000000000..303e5f4429 --- /dev/null +++ b/coroutine/asyncify/Context.c @@ -0,0 +1,10 @@ +#include "Context.h" + +void coroutine_trampoline(void * _start, void * _context) +{ + coroutine_start start = (coroutine_start)_start; + struct coroutine_context * context = _context; + rb_wasm_set_stack_pointer(context->current_sp); + + start(context->from, context); +} diff --git a/coroutine/asyncify/Context.h b/coroutine/asyncify/Context.h new file mode 100644 index 0000000000..71791a4004 --- /dev/null +++ b/coroutine/asyncify/Context.h @@ -0,0 +1,93 @@ +#ifndef COROUTINE_ASYNCIFY_CONTEXT_H +#define COROUTINE_ASYNCIFY_CONTEXT_H + +/* + This is a coroutine implementation based on Binaryen's Asyncify transformation for WebAssembly. + + This implementation is built on low-level ucontext-like API in wasm/fiber.c + This file is an adapter for the common coroutine interface and for stack manipulation. + wasm/fiber.c doesn't take care of stack to avoid duplicate management with this adapter. + + * See also: wasm/fiber.c +*/ + +#include <stddef.h> +#include <stdio.h> +#include <stdint.h> +#include "wasm/asyncify.h" +#include "wasm/machine.h" +#include "wasm/fiber.h" + +#define COROUTINE void __attribute__((__noreturn__)) + +static const int ASYNCIFY_CORO_DEBUG = 0; + +struct coroutine_context +{ + rb_wasm_fiber_context fc; + void *argument; + struct coroutine_context *from; + + void *current_sp; + void *stack_base; + size_t size; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +COROUTINE coroutine_trampoline(void * _start, void * _context); + +static inline void coroutine_initialize_main(struct coroutine_context * context) +{ + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (context = %p)\n", __func__, context); + // NULL fiber entry means it's the main fiber, and handled specially. + rb_wasm_init_context(&context->fc, NULL, NULL, NULL); + // mark the main fiber has already started + context->fc.is_started = true; +} + +static inline void coroutine_initialize(struct coroutine_context *context, coroutine_start start, void *stack, size_t size) +{ + // Linear stack pointer must be always aligned down to 16 bytes. + // https://github.com/WebAssembly/tool-conventions/blob/c74267a5897c1bdc9aa60adeaf41816387d3cd12/BasicCABI.md#the-linear-stack + uintptr_t sp = ((uintptr_t)stack + size) & ~0xF; + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (context = %p, stack = %p ... %p)\n", __func__, context, stack, (char *)sp); + rb_wasm_init_context(&context->fc, coroutine_trampoline, start, context); + // record the initial stack pointer position to restore it after resumption + context->current_sp = (char *)sp; + context->stack_base = stack; + context->size = size; +} + +static inline struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target) +{ + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (current = %p, target = %p)\n", __func__, current, target); + struct coroutine_context * previous = target->from; + + target->from = current; + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] current->current_sp = %p -> %p\n", __func__, current->current_sp, rb_wasm_get_stack_pointer()); + // record the current stack pointer position to restore it after resumption + current->current_sp = rb_wasm_get_stack_pointer(); + + // suspend the current coroutine and resume another coroutine + + rb_wasm_swapcontext(¤t->fc, &target->fc); + + // after the original coroutine resumed + + rb_wasm_set_stack_pointer(current->current_sp); + + target->from = previous; + + return target; +} + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (context = %p)\n", __func__, context); + context->stack_base = NULL; + context->size = 0; + context->from = NULL; +} + +#endif /* COROUTINE_ASYNCIFY_CONTEXT_H */ diff --git a/coroutine/copy/Context.c b/coroutine/copy/Context.c deleted file mode 100644 index aa8bb2ff05..0000000000 --- a/coroutine/copy/Context.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * This file is part of the "Coroutine" project and released under the MIT License. - * - * Created by Samuel Williams on 24/6/2019. - * Copyright, 2019, by Samuel Williams. -*/ - -#include "Context.h" - -#include <stdint.h> - -// http://gcc.gnu.org/onlinedocs/gcc/Alternate-Keywords.html -#ifndef __GNUC__ -#define __asm__ asm -#endif - -#if defined(__sparc) -__attribute__((noinline)) -// https://marc.info/?l=linux-sparc&m=131914569320660&w=2 -static void coroutine_flush_register_windows(void) { - __asm__ -#ifdef __GNUC__ - __volatile__ -#endif -#if defined(__sparcv9) || defined(__sparc_v9__) || defined(__arch64__) -#ifdef __GNUC__ - ("flushw" : : : "%o7") -#else - ("flushw") -#endif -#else - ("ta 0x03") -#endif - ; -} -#else -static void coroutine_flush_register_windows(void) {} -#endif - -__attribute__((noinline)) -void *coroutine_stack_pointer(void) { - return (void*)( - (char*)__builtin_frame_address(0) - ); -} - -// Save the current stack to a private area. It is likely that when restoring the stack, this stack frame will be incomplete. But that is acceptable since the previous stack frame which called `setjmp` should be correctly restored. -__attribute__((noinline)) -int coroutine_save_stack_1(struct coroutine_context * context) { - assert(context->stack); - assert(context->base); - - void *stack_pointer = coroutine_stack_pointer(); - - // At this point, you may need to ensure on architectures that use register windows, that all registers are flushed to the stack, otherwise the copy of the stack will not contain the valid registers: - coroutine_flush_register_windows(); - - // Save stack to private area: - if (stack_pointer < context->base) { - size_t size = (char*)context->base - (char*)stack_pointer; - assert(size <= context->size); - - memcpy(context->stack, stack_pointer, size); - context->used = size; - } else { - size_t size = (char*)stack_pointer - (char*)context->base; - assert(size <= context->size); - - memcpy(context->stack, context->base, size); - context->used = size; - } - - // Initialized: - return 0; -} - -// Copy the current stack to a private memory buffer. -int coroutine_save_stack(struct coroutine_context * context) { - if (_setjmp(context->state)) { - // Restored. - return 1; - } - - // We need to invoke the memory copy from one stack frame deeper than the one that calls setjmp. That is because if you don't do this, the setjmp might be restored into an invalid stack frame (truncated, etc): - return coroutine_save_stack_1(context); -} - -__attribute__((noreturn, noinline)) -void coroutine_restore_stack_padded(struct coroutine_context *context, void * buffer) { - void *stack_pointer = coroutine_stack_pointer(); - - assert(context->base); - - // At this point, you may need to ensure on architectures that use register windows, that all registers are flushed to the stack, otherwise when we copy in the new stack, the registers would not be updated: - coroutine_flush_register_windows(); - - // Restore stack from private area: - if (stack_pointer < context->base) { - void * bottom = (char*)context->base - context->used; - assert(bottom > stack_pointer); - - memcpy(bottom, context->stack, context->used); - } else { - void * top = (char*)context->base + context->used; - assert(top < stack_pointer); - - memcpy(context->base, context->stack, context->used); - } - - // Restore registers. The `buffer` is to force the compiler NOT to elide he buffer and `alloca`: - _longjmp(context->state, (int)(1 | (intptr_t)buffer)); -} - -// In order to swap between coroutines, we need to swap the stack and registers. -// `setjmp` and `longjmp` are able to swap registers, but what about swapping stacks? You can use `memcpy` to copy the current stack to a private area and `memcpy` to copy the private stack of the next coroutine to the main stack. -// But if the stack yop are copying in to the main stack is bigger than the currently executing stack, the `memcpy` will clobber the current stack frame (including the context argument). So we use `alloca` to push the current stack frame *beyond* the stack we are about to copy in. This ensures the current stack frame in `coroutine_restore_stack_padded` remains valid for calling `longjmp`. -__attribute__((noreturn)) -void coroutine_restore_stack(struct coroutine_context *context) { - void *stack_pointer = coroutine_stack_pointer(); - void *buffer = NULL; - - // We must ensure that the next stack frame is BEYOND the stack we are restoring: - if (stack_pointer < context->base) { - intptr_t offset = (intptr_t)stack_pointer - ((intptr_t)context->base - context->used); - if (offset > 0) buffer = alloca(offset); - } else { - intptr_t offset = ((intptr_t)context->base + context->used) - (intptr_t)stack_pointer; - if (offset > 0) buffer = alloca(offset); - } - - assert(context->used > 0); - - coroutine_restore_stack_padded(context, buffer); -} - -struct coroutine_context *coroutine_transfer(struct coroutine_context *current, struct coroutine_context *target) -{ - struct coroutine_context *previous = target->from; - - // In theory, either this condition holds true, or we should assign the base address to target: - assert(current->base == target->base); - // If you are trying to copy the coroutine to a different thread - // target->base = current->base - - target->from = current; - - assert(current != target); - - // It's possible to come here, even thought the current fiber has been terminated. We are never going to return so we don't bother saving the stack. - - if (current->stack) { - if (coroutine_save_stack(current) == 0) { - coroutine_restore_stack(target); - } - } else { - coroutine_restore_stack(target); - } - - target->from = previous; - - return target; -} diff --git a/coroutine/copy/Context.h b/coroutine/copy/Context.h deleted file mode 100644 index 6038f00cf6..0000000000 --- a/coroutine/copy/Context.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * This file is part of the "Coroutine" project and released under the MIT License. - * - * Created by Samuel Williams on 27/6/2019. - * Copyright, 2019, by Samuel Williams. -*/ - -#pragma once - -#include <assert.h> -#include <stddef.h> -#include <setjmp.h> -#include <string.h> -#include <stdlib.h> - -/* OpenBSD supports alloca, but does not include alloca.h */ -#ifndef __OpenBSD__ -#include <alloca.h> -#endif - -#define COROUTINE __attribute__((noreturn)) void - -#if INTPTR_MAX <= INT32_MAX -#define COROUTINE_LIMITED_ADDRESS_SPACE -#endif - -// This stack copying implementation which uses a private stack for each coroutine, including the main one. -#define COROUTINE_PRIVATE_STACK - -struct coroutine_context -{ - // Private stack: - void *stack; - size_t size, used; - - // The top (or bottom) of the currently executing stack: - void *base; - - jmp_buf state; - - struct coroutine_context *from; -}; - -typedef COROUTINE(*coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); - -int coroutine_save_stack(struct coroutine_context * context); -COROUTINE coroutine_restore_stack(struct coroutine_context *context); - -// @param stack The private stack area memory allocation (pointer to lowest address). -// @param size The size of the private stack area. -// @param base A stack pointer to the base of the main stack. On x86 hardware, this is the upper extent of the region that will be copied to the private stack. -static inline void coroutine_initialize_main(struct coroutine_context *context, void *stack, size_t size, void *base) { - assert(stack); - assert(size >= 1024); - - context->stack = stack; - context->size = size; - context->used = 0; - - assert(base); - context->base = base; - - context->from = NULL; -} - -// @param start The start function to invoke. -static inline void coroutine_initialize( - struct coroutine_context *context, - coroutine_start start, - void *stack, - size_t size, - void *base -) { - assert(start); - - coroutine_initialize_main(context, stack, size, base); - - if (coroutine_save_stack(context)) { - start(context->from, context); - } -} - -struct coroutine_context *coroutine_transfer(struct coroutine_context *current, register struct coroutine_context *target); - -static inline void coroutine_destroy(struct coroutine_context *context) -{ - context->stack = NULL; - context->size = 0; - context->from = NULL; -} diff --git a/coroutine/emscripten/Context.c b/coroutine/emscripten/Context.c new file mode 100644 index 0000000000..75c088daaa --- /dev/null +++ b/coroutine/emscripten/Context.c @@ -0,0 +1,8 @@ +#include "Context.h" + +void coroutine_trampoline(void * _context) +{ + struct coroutine_context * context = _context; + + context->entry_func(context->from, context); +} diff --git a/coroutine/emscripten/Context.h b/coroutine/emscripten/Context.h new file mode 100644 index 0000000000..361e241685 --- /dev/null +++ b/coroutine/emscripten/Context.h @@ -0,0 +1,77 @@ +#ifndef COROUTINE_EMSCRIPTEN_CONTEXT_H +#define COROUTINE_EMSCRIPTEN_CONTEXT_H 1 + +/* An experimental coroutine wrapper for emscripten + * Contact on Yusuke Endoh if you encounter any problem about this + */ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <emscripten/fiber.h> + +#define COROUTINE __attribute__((noreturn)) void + +#if INTPTR_MAX <= INT32_MAX +#define COROUTINE_LIMITED_ADDRESS_SPACE +#endif + +struct coroutine_context; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +struct coroutine_context +{ + emscripten_fiber_t state; + coroutine_start entry_func; + struct coroutine_context * from; + void *argument; +}; + +COROUTINE coroutine_trampoline(void * _context); + +#define MAIN_ASYNCIFY_STACK_SIZE 65536 +static inline void coroutine_initialize_main(struct coroutine_context * context) { + static char asyncify_stack[MAIN_ASYNCIFY_STACK_SIZE]; + emscripten_fiber_init_from_current_context(&context->state, asyncify_stack, MAIN_ASYNCIFY_STACK_SIZE); +} +#undef MAIN_ASYNCIFY_STACK_SIZE + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + uintptr_t addr = (uintptr_t)stack; + size_t offset = addr & 0xF; + void *c_stack = (void*)((addr + 0xF) & ~0xF); + size -= offset; + size_t c_stack_size = (size / 2) & ~0xF; + void *asyncify_stack = (void*)((uintptr_t)c_stack + c_stack_size); + size_t asyncify_stack_size = size - c_stack_size; + context->entry_func = start; + + emscripten_fiber_init(&context->state, coroutine_trampoline, context, c_stack, c_stack_size, asyncify_stack, asyncify_stack_size); +} + +static inline struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target) +{ + struct coroutine_context * previous = target->from; + + target->from = current; + emscripten_fiber_swap(¤t->state, &target->state); + target->from = previous; + + return target; +} + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->from = NULL; +} + +#endif /* COROUTINE_EMSCRIPTEN_CONTEXT_H */ diff --git a/coroutine/loongarch64/Context.S b/coroutine/loongarch64/Context.S new file mode 100644 index 0000000000..6e10cd032b --- /dev/null +++ b/coroutine/loongarch64/Context.S @@ -0,0 +1,72 @@ +#define TOKEN_PASTE(x,y) x##y + +.text +.align 2 + +.global PREFIXED_SYMBOL(coroutine_transfer) +PREFIXED_SYMBOL(coroutine_transfer): + + # Make space on the stack for caller registers + addi.d $sp, $sp, -0xa0 + + # Save caller registers + st.d $s0, $sp, 0x00 + st.d $s1, $sp, 0x08 + st.d $s2, $sp, 0x10 + st.d $s3, $sp, 0x18 + st.d $s4, $sp, 0x20 + st.d $s5, $sp, 0x28 + st.d $s6, $sp, 0x30 + st.d $s7, $sp, 0x38 + st.d $s8, $sp, 0x40 + st.d $fp, $sp, 0x48 + fst.d $fs0, $sp, 0x50 + fst.d $fs1, $sp, 0x58 + fst.d $fs2, $sp, 0x60 + fst.d $fs3, $sp, 0x68 + fst.d $fs4, $sp, 0x70 + fst.d $fs5, $sp, 0x78 + fst.d $fs6, $sp, 0x80 + fst.d $fs7, $sp, 0x88 + + # Save return address + st.d $ra, $sp, 0x90 + + # Save stack pointer to a0 (first argument) + st.d $sp, $a0, 0x00 + + # Load stack pointer from a1 (second argument) + ld.d $sp, $a1, 0x00 + + # Restore caller registers + ld.d $s0, $sp, 0x00 + ld.d $s1, $sp, 0x08 + ld.d $s2, $sp, 0x10 + ld.d $s3, $sp, 0x18 + ld.d $s4, $sp, 0x20 + ld.d $s5, $sp, 0x28 + ld.d $s6, $sp, 0x30 + ld.d $s7, $sp, 0x38 + ld.d $s8, $sp, 0x40 + ld.d $fp, $sp, 0x48 + fld.d $fs0, $sp, 0x50 + fld.d $fs1, $sp, 0x58 + fld.d $fs2, $sp, 0x60 + fld.d $fs3, $sp, 0x68 + fld.d $fs4, $sp, 0x70 + fld.d $fs5, $sp, 0x78 + fld.d $fs6, $sp, 0x80 + fld.d $fs7, $sp, 0x88 + + # Load return address + ld.d $ra, $sp, 0x90 + + # Pop stack frame + addi.d $sp, $sp, 0xa0 + + # Jump to return address + jr $ra + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/coroutine/loongarch64/Context.h b/coroutine/loongarch64/Context.h new file mode 100644 index 0000000000..82b85b36e9 --- /dev/null +++ b/coroutine/loongarch64/Context.h @@ -0,0 +1,46 @@ +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void + +enum {COROUTINE_REGISTERS = 0xa0 / 8}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + context->stack_pointer[0x90 / 8] = (void*)(uintptr_t)start; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ +} diff --git a/coroutine/ppc/Context.S b/coroutine/ppc/Context.S new file mode 100644 index 0000000000..f44b2419b4 --- /dev/null +++ b/coroutine/ppc/Context.S @@ -0,0 +1,89 @@ +; Based on the code by Samuel Williams. Created by Sergey Fedorov on 04/06/2022. +; Credits to Samuel Williams, Rei Odaira and Iain Sandoe. Errors, if any, are mine. +; Some relevant examples: https://github.com/gcc-mirror/gcc/blob/master/libphobos/libdruntime/config/powerpc/switchcontext.S +; https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/rs6000/darwin-gpsave.S +; https://www.ibm.com/docs/en/aix/7.2?topic=epilogs-saving-gprs-only +; ppc32 version may be re-written compactly with stmw/lwm, but the code won't be faster, see: https://github.com/ruby/ruby/pull/5927#issuecomment-1139730541 + +; Notice that this code is only for Darwin (macOS). Darwin ABI differs from AIX and ELF. +; To add support for AIX, *BSD or *Linux, please make separate implementations. + +#define TOKEN_PASTE(x,y) x##y + +.machine ppc7400 ; = G4, Rosetta +.text + +.globl PREFIXED_SYMBOL(coroutine_transfer) +.align 2 + +PREFIXED_SYMBOL(coroutine_transfer): + ; Make space on the stack for caller registers + ; (Should we rather use red zone? See libphobos example.) + subi r1,r1,80 + + ; Get LR + mflr r0 + + ; Save caller registers + stw r31,0(r1) + stw r30,4(r1) + stw r29,8(r1) + stw r28,12(r1) + stw r27,16(r1) + stw r26,20(r1) + stw r25,24(r1) + stw r24,28(r1) + stw r23,32(r1) + stw r22,36(r1) + stw r21,40(r1) + stw r20,44(r1) + stw r19,48(r1) + stw r18,52(r1) + stw r17,56(r1) + stw r16,60(r1) + stw r15,64(r1) + stw r14,68(r1) + stw r13,72(r1) + + ; Save return address + ; Possibly should rather be saved into linkage area, see libphobos and IBM docs + stw r0,76(r1) + + ; Save stack pointer to first argument + stw r1,0(r3) + + ; Load stack pointer from second argument + lwz r1,0(r4) + + ; Load return address + lwz r0,76(r1) + + ; Restore caller registers + lwz r13,72(r1) + lwz r14,68(r1) + lwz r15,64(r1) + lwz r16,60(r1) + lwz r17,56(r1) + lwz r18,52(r1) + lwz r19,48(r1) + lwz r20,44(r1) + lwz r21,40(r1) + lwz r22,36(r1) + lwz r23,32(r1) + lwz r24,28(r1) + lwz r25,24(r1) + lwz r26,20(r1) + lwz r27,16(r1) + lwz r28,12(r1) + lwz r29,8(r1) + lwz r30,4(r1) + lwz r31,0(r1) + + ; Set LR + mtlr r0 + + ; Pop stack frame + addi r1,r1,80 + + ; Jump to return address + blr diff --git a/coroutine/ppc/Context.h b/coroutine/ppc/Context.h new file mode 100644 index 0000000000..8035d08556 --- /dev/null +++ b/coroutine/ppc/Context.h @@ -0,0 +1,58 @@ +#ifndef COROUTINE_PPC_CONTEXT_H +#define COROUTINE_PPC_CONTEXT_H 1 + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void +#define COROUTINE_LIMITED_ADDRESS_SPACE + +enum { + COROUTINE_REGISTERS = + 20 /* 19 general purpose registers (r13-r31) and 1 return address */ + + 4 /* space for fiber_entry() to store the link register */ +}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + /* Skip a global prologue that sets the TOC register */ + context->stack_pointer[19] = ((char*)start) + 8; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#endif /* COROUTINE_PPC_CONTEXT_H */ diff --git a/coroutine/ppc64/Context.S b/coroutine/ppc64/Context.S new file mode 100644 index 0000000000..20a47c61c6 --- /dev/null +++ b/coroutine/ppc64/Context.S @@ -0,0 +1,88 @@ +; Based on the code by Samuel Williams. Created by Sergey Fedorov on 04/06/2022. +; Credits to Samuel Williams, Rei Odaira and Iain Sandoe. Errors, if any, are mine. +; Some relevant examples: https://github.com/gcc-mirror/gcc/blob/master/libphobos/libdruntime/config/powerpc/switchcontext.S +; https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/rs6000/darwin-gpsave.S +; https://www.ibm.com/docs/en/aix/7.2?topic=epilogs-saving-gprs-only + +; Notice that this code is only for Darwin (macOS). Darwin ABI differs from AIX and ELF. +; To add support for AIX, *BSD or *Linux, please make separate implementations. + +#define TOKEN_PASTE(x,y) x##y + +.machine ppc64 ; = G5 +.text + +.globl PREFIXED_SYMBOL(coroutine_transfer) +.align 2 + +PREFIXED_SYMBOL(coroutine_transfer): + ; Make space on the stack for caller registers + ; (Should we rather use red zone? See libphobos example.) + subi r1,r1,160 + + ; Get LR + mflr r0 + + ; Save caller registers + std r31,0(r1) + std r30,8(r1) + std r29,16(r1) + std r28,24(r1) + std r27,32(r1) + std r26,40(r1) + std r25,48(r1) + std r24,56(r1) + std r23,64(r1) + std r22,72(r1) + std r21,80(r1) + std r20,88(r1) + std r19,96(r1) + std r18,104(r1) + std r17,112(r1) + std r16,120(r1) + std r15,128(r1) + std r14,136(r1) + std r13,144(r1) + + ; Save return address + ; Possibly should rather be saved into linkage area, see libphobos and IBM docs + std r0,152(r1) + + ; Save stack pointer to first argument + std r1,0(r3) + + ; Load stack pointer from second argument + ld r1,0(r4) + + ; Load return address + ld r0,152(r1) + + ; Restore caller registers + ld r13,144(r1) + ld r14,136(r1) + ld r15,128(r1) + ld r16,120(r1) + ld r17,112(r1) + ld r18,104(r1) + ld r19,96(r1) + ld r20,88(r1) + ld r21,80(r1) + ld r22,72(r1) + ld r23,64(r1) + ld r24,56(r1) + ld r25,48(r1) + ld r26,40(r1) + ld r27,32(r1) + ld r28,24(r1) + ld r29,16(r1) + ld r30,8(r1) + ld r31,0(r1) + + ; Set LR + mtlr r0 + + ; Pop stack frame + addi r1,r1,160 + + ; Jump to return address + blr diff --git a/coroutine/ppc64/Context.h b/coroutine/ppc64/Context.h new file mode 100644 index 0000000000..085b475ed5 --- /dev/null +++ b/coroutine/ppc64/Context.h @@ -0,0 +1,57 @@ +#ifndef COROUTINE_PPC64_CONTEXT_H +#define COROUTINE_PPC64_CONTEXT_H 1 + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void + +enum { + COROUTINE_REGISTERS = + 20 /* 19 general purpose registers (r13-r31) and 1 return address */ + + 4 /* space for fiber_entry() to store the link register */ +}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + /* Skip a global prologue that sets the TOC register */ + context->stack_pointer[19] = ((char*)start) + 8; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#endif /* COROUTINE_PPC64_CONTEXT_H */ diff --git a/coroutine/ppc64le/Context.S b/coroutine/ppc64le/Context.S index 61be9efcf0..f7bcae2c3a 100644 --- a/coroutine/ppc64le/Context.S +++ b/coroutine/ppc64le/Context.S @@ -1,14 +1,13 @@ #define TOKEN_PASTE(x,y) x##y -#define PREFIXED_SYMBOL(prefix,name) TOKEN_PASTE(prefix,name) .text .align 2 -.globl PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer) -.type PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer), @function -PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): +.globl PREFIXED_SYMBOL(coroutine_transfer) +.type PREFIXED_SYMBOL(coroutine_transfer), @function +PREFIXED_SYMBOL(coroutine_transfer): # Make space on the stack for caller registers - addi 1,1,-152 + addi 1,1,-160 # Save caller registers std 14,0(1) @@ -34,6 +33,10 @@ PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): mflr 0 std 0,144(1) + # Save caller special register + mfcr 0 + std 0, 152(1) + # Save stack pointer to first argument std 1,0(3) @@ -64,8 +67,14 @@ PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): ld 0,144(1) mtlr 0 + # Load special registers + ld 0,152(1) + # Restore cr register cr2, cr3 and cr4 (field index 3,4,5) + # (field index is 1-based, field 1 = cr0) using a mask (32|16|8 = 56) + mtcrf 56,0 + # Pop stack frame - addi 1,1,152 + addi 1,1,160 # Jump to return address blr diff --git a/coroutine/ppc64le/Context.h b/coroutine/ppc64le/Context.h index e36f9c3583..63ea9f19ff 100644 --- a/coroutine/ppc64le/Context.h +++ b/coroutine/ppc64le/Context.h @@ -1,3 +1,6 @@ +#ifndef COROUTINE_PPC64LE_CONTEXT_H +#define COROUTINE_PPC64LE_CONTEXT_H 1 + #pragma once #include <assert.h> @@ -9,13 +12,14 @@ enum { COROUTINE_REGISTERS = - 19 /* 18 general purpose registers (r14-r31) and 1 return address */ + 20 /* 18 general purpose registers (r14-r31), 1 special register (cr) and 1 return address */ + 4 /* space for fiber_entry() to store the link register */ }; struct coroutine_context { void **stack_pointer; + void *argument; }; typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); @@ -49,3 +53,5 @@ static inline void coroutine_destroy(struct coroutine_context * context) { context->stack_pointer = NULL; } + +#endif /* COROUTINE_PPC64LE_CONTEXT_H */ diff --git a/coroutine/pthread/Context.c b/coroutine/pthread/Context.c new file mode 100644 index 0000000000..38774cda0b --- /dev/null +++ b/coroutine/pthread/Context.c @@ -0,0 +1,272 @@ +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 24/6/2021. + * Copyright, 2021, by Samuel Williams. +*/ + +#include "Context.h" +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> + +static const int DEBUG = 0; + +static +int check(const char * message, int result) { + if (result) { + switch (result) { + case EDEADLK: + if (DEBUG) fprintf(stderr, "deadlock detected result=%d errno=%d\n", result, errno); + break; + default: + if (DEBUG) fprintf(stderr, "error detected result=%d errno=%d\n", result, errno); + perror(message); + } + } + + assert(result == 0); + + return result; +} + +void coroutine_initialize_main(struct coroutine_context * context) { + context->id = pthread_self(); + + check("coroutine_initialize_main:pthread_cond_init", + pthread_cond_init(&context->schedule, NULL) + ); + + context->shared = (struct coroutine_shared*)malloc(sizeof(struct coroutine_shared)); + assert(context->shared); + + context->shared->main = context; + context->shared->count = 1; + + if (DEBUG) { + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); + + check("coroutine_initialize_main:pthread_mutex_init", + pthread_mutex_init(&context->shared->guard, &attr) + ); + } else { + check("coroutine_initialize_main:pthread_mutex_init", + pthread_mutex_init(&context->shared->guard, NULL) + ); + } +} + +static +void coroutine_release(struct coroutine_context *context) { + if (context->shared) { + size_t count = (context->shared->count -= 1); + + if (count == 0) { + if (DEBUG) fprintf(stderr, "coroutine_release:pthread_mutex_destroy(%p)\n", &context->shared->guard); + pthread_mutex_destroy(&context->shared->guard); + free(context->shared); + } + + context->shared = NULL; + + if (DEBUG) fprintf(stderr, "coroutine_release:pthread_cond_destroy(%p)\n", &context->schedule); + pthread_cond_destroy(&context->schedule); + } +} + +void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // We will create the thread when we first transfer, but save the details now: + context->shared = NULL; + context->start = start; + context->stack = stack; + context->size = size; +} + +static +int is_locked(pthread_mutex_t * mutex) { + int result = pthread_mutex_trylock(mutex); + + // If we could successfully lock the mutex: + if (result == 0) { + pthread_mutex_unlock(mutex); + // We could lock the mutex, so it wasn't locked: + return 0; + } else { + // Otherwise we couldn't lock it because it's already locked: + return 1; + } +} + +static +void coroutine_guard_unlock(void * _context) +{ + struct coroutine_context * context = _context; + + if (DEBUG) fprintf(stderr, "coroutine_guard_unlock:pthread_mutex_unlock\n"); + + check("coroutine_guard_unlock:pthread_mutex_unlock", + pthread_mutex_unlock(&context->shared->guard) + ); +} + +static +void coroutine_wait(struct coroutine_context *context) +{ + if (DEBUG) fprintf(stderr, "coroutine_wait:pthread_mutex_lock(guard=%p is_locked=%d)\n", &context->shared->guard, is_locked(&context->shared->guard)); + check("coroutine_wait:pthread_mutex_lock", + pthread_mutex_lock(&context->shared->guard) + ); + + if (DEBUG) fprintf(stderr, "coroutine_wait:pthread_mutex_unlock(guard)\n"); + pthread_mutex_unlock(&context->shared->guard); +} + +static +void coroutine_trampoline_cleanup(void *_context) { + struct coroutine_context * context = _context; + coroutine_release(context); +} + +void * coroutine_trampoline(void * _context) +{ + struct coroutine_context * context = _context; + assert(context->shared); + + pthread_cleanup_push(coroutine_trampoline_cleanup, context); + + coroutine_wait(context); + + context->start(context->from, context); + + pthread_cleanup_pop(1); + + return NULL; +} + +static +int coroutine_create_thread(struct coroutine_context *context) +{ + int result; + + pthread_attr_t attr; + result = pthread_attr_init(&attr); + if (result != 0) { + return result; + } + + result = pthread_attr_setstack(&attr, context->stack, (size_t)context->size); + if (result != 0) { + pthread_attr_destroy(&attr); + return result; + } + + result = pthread_cond_init(&context->schedule, NULL); + if (result != 0) { + pthread_attr_destroy(&attr); + return result; + } + + result = pthread_create(&context->id, &attr, coroutine_trampoline, context); + if (result != 0) { + pthread_attr_destroy(&attr); + if (DEBUG) fprintf(stderr, "coroutine_create_thread:pthread_cond_destroy(%p)\n", &context->schedule); + pthread_cond_destroy(&context->schedule); + return result; + } + + context->shared->count += 1; + + return result; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target) +{ + assert(current->shared); + + struct coroutine_context * previous = target->from; + target->from = current; + + if (DEBUG) fprintf(stderr, "coroutine_transfer:pthread_mutex_lock(guard=%p is_locked=%d)\n", ¤t->shared->guard, is_locked(¤t->shared->guard)); + pthread_mutex_lock(¤t->shared->guard); + pthread_cleanup_push(coroutine_guard_unlock, current); + + // First transfer: + if (target->shared == NULL) { + target->shared = current->shared; + + if (DEBUG) fprintf(stderr, "coroutine_transfer:coroutine_create_thread...\n"); + if (coroutine_create_thread(target)) { + if (DEBUG) fprintf(stderr, "coroutine_transfer:coroutine_create_thread failed\n"); + target->shared = NULL; + target->from = previous; + return NULL; + } + } else { + if (DEBUG) fprintf(stderr, "coroutine_transfer:pthread_cond_signal(target)\n"); + pthread_cond_signal(&target->schedule); + } + + // A side effect of acting upon a cancellation request while in a condition wait is that the mutex is (in effect) re-acquired before calling the first cancellation cleanup handler. If cancelled, pthread_cond_wait immediately invokes cleanup handlers. + if (DEBUG) fprintf(stderr, "coroutine_transfer:pthread_cond_wait(schedule=%p, guard=%p, is_locked=%d)\n", ¤t->schedule, ¤t->shared->guard, is_locked(¤t->shared->guard)); + check("coroutine_transfer:pthread_cond_wait", + pthread_cond_wait(¤t->schedule, ¤t->shared->guard) + ); + + if (DEBUG) fprintf(stderr, "coroutine_transfer:pthread_cleanup_pop\n"); + pthread_cleanup_pop(1); + +#ifdef __FreeBSD__ + // Apparently required for FreeBSD: + pthread_testcancel(); +#endif + + target->from = previous; + + return target; +} + +static +void coroutine_join(struct coroutine_context * context) { + if (DEBUG) fprintf(stderr, "coroutine_join:pthread_cancel\n"); + int result = pthread_cancel(context->id); + if (result == -1 && errno == ESRCH) { + // The thread may be dead due to fork, so it cannot be joined and this doesn't represent a real error: + return; + } + + check("coroutine_join:pthread_cancel", result); + + if (DEBUG) fprintf(stderr, "coroutine_join:pthread_join\n"); + check("coroutine_join:pthread_join", + pthread_join(context->id, NULL) + ); + + if (DEBUG) fprintf(stderr, "coroutine_join:pthread_join done\n"); +} + +void coroutine_destroy(struct coroutine_context * context) +{ + if (DEBUG) fprintf(stderr, "coroutine_destroy\n"); + + assert(context); + + // We are already destroyed or never created: + if (context->shared == NULL) return; + + if (context == context->shared->main) { + context->shared->main = NULL; + coroutine_release(context); + } else { + coroutine_join(context); + assert(context->shared == NULL); + } +} diff --git a/coroutine/pthread/Context.h b/coroutine/pthread/Context.h new file mode 100644 index 0000000000..6d551ee9df --- /dev/null +++ b/coroutine/pthread/Context.h @@ -0,0 +1,63 @@ +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 24/6/2021. + * Copyright, 2021, by Samuel Williams. +*/ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <pthread.h> + +#define COROUTINE void + +#define COROUTINE_PTHREAD_CONTEXT + +#ifdef HAVE_STDINT_H +#include <stdint.h> +#if INTPTR_MAX <= INT32_MAX +#define COROUTINE_LIMITED_ADDRESS_SPACE +#endif +#endif + +struct coroutine_context; + +struct coroutine_shared +{ + pthread_mutex_t guard; + struct coroutine_context * main; + + size_t count; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +struct coroutine_context +{ + struct coroutine_shared * shared; + + coroutine_start start; + void *argument; + + void *stack; + size_t size; + + pthread_t id; + pthread_cond_t schedule; + struct coroutine_context * from; +}; + +void coroutine_initialize_main(struct coroutine_context * context); + +void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +); + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +void coroutine_destroy(struct coroutine_context * context); diff --git a/coroutine/riscv64/Context.S b/coroutine/riscv64/Context.S new file mode 100644 index 0000000000..8e7fc74ffc --- /dev/null +++ b/coroutine/riscv64/Context.S @@ -0,0 +1,86 @@ +#define TOKEN_PASTE(x,y) x##y + +.text +.align 2 + +.global PREFIXED_SYMBOL(coroutine_transfer) +PREFIXED_SYMBOL(coroutine_transfer): + + # Make space on the stack for caller registers + addi sp, sp, -0xd0 + + # Save caller registers + sd s0, 0x00(sp) + sd s1, 0x08(sp) + sd s2, 0x10(sp) + sd s3, 0x18(sp) + sd s4, 0x20(sp) + sd s5, 0x28(sp) + sd s6, 0x30(sp) + sd s7, 0x38(sp) + sd s8, 0x40(sp) + sd s9, 0x48(sp) + sd s10, 0x50(sp) + sd s11, 0x58(sp) + fsd fs0, 0x60(sp) + fsd fs1, 0x68(sp) + fsd fs2, 0x70(sp) + fsd fs3, 0x78(sp) + fsd fs4, 0x80(sp) + fsd fs5, 0x88(sp) + fsd fs6, 0x90(sp) + fsd fs7, 0x98(sp) + fsd fs8, 0xa0(sp) + fsd fs9, 0xa8(sp) + fsd fs10, 0xb0(sp) + fsd fs11, 0xb8(sp) + + # Save return address + sd ra, 0xc0(sp) + + # Save stack pointer to a0 (first argument) + mv a2, sp + sd a2, (a0) + + # Load stack pointer from a1 (second argument) + ld a3, (a1) + mv sp, a3 + + # Restore caller registers + ld s0, 0x00(sp) + ld s1, 0x08(sp) + ld s2, 0x10(sp) + ld s3, 0x18(sp) + ld s4, 0x20(sp) + ld s5, 0x28(sp) + ld s6, 0x30(sp) + ld s7, 0x38(sp) + ld s8, 0x40(sp) + ld s9, 0x48(sp) + ld s10, 0x50(sp) + ld s11, 0x58(sp) + fld fs0, 0x60(sp) + fld fs1, 0x68(sp) + fld fs2, 0x70(sp) + fld fs3, 0x78(sp) + fld fs4, 0x80(sp) + fld fs5, 0x88(sp) + fld fs6, 0x90(sp) + fld fs7, 0x98(sp) + fld fs8, 0xa0(sp) + fld fs9, 0xa8(sp) + fld fs10, 0xb0(sp) + fld fs11, 0xb8(sp) + + # Load return address + ld ra, 0xc0(sp) + + # Pop stack frame + addi sp, sp, 0xd0 + + # Jump to return address + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/coroutine/riscv64/Context.h b/coroutine/riscv64/Context.h new file mode 100644 index 0000000000..3660fb5577 --- /dev/null +++ b/coroutine/riscv64/Context.h @@ -0,0 +1,46 @@ +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void + +enum {COROUTINE_REGISTERS = 0xd0 / 8}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + context->stack_pointer[0xc0 / 8] = (void*)(uintptr_t)start; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ +} diff --git a/coroutine/ucontext/Context.c b/coroutine/ucontext/Context.c index 9ba76ee628..5d728d554d 100644 --- a/coroutine/ucontext/Context.c +++ b/coroutine/ucontext/Context.c @@ -11,6 +11,7 @@ #if defined(__sun) && !defined(__EXTENSIONS__) #define __EXTENSIONS__ #endif + #include "Context.h" void coroutine_trampoline(void * _start, void * _context) diff --git a/coroutine/ucontext/Context.h b/coroutine/ucontext/Context.h index 2b2a593793..d338d8de60 100644 --- a/coroutine/ucontext/Context.h +++ b/coroutine/ucontext/Context.h @@ -1,3 +1,6 @@ +#ifndef COROUTINE_UCONTEXT_CONTEXT_H +#define COROUTINE_UCONTEXT_CONTEXT_H 1 + /* * This file is part of the "Coroutine" project and released under the MIT License. * @@ -13,14 +16,18 @@ #define COROUTINE __attribute__((noreturn)) void +#ifdef HAVE_STDINT_H +#include <stdint.h> #if INTPTR_MAX <= INT32_MAX #define COROUTINE_LIMITED_ADDRESS_SPACE #endif +#endif struct coroutine_context { ucontext_t state; struct coroutine_context * from; + void *argument; }; typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); @@ -68,3 +75,5 @@ static inline void coroutine_destroy(struct coroutine_context * context) context->state.uc_stack.ss_size = 0; context->from = NULL; } + +#endif /* COROUTINE_UCONTEXT_CONTEXT_H */ diff --git a/coroutine/universal/Context.S b/coroutine/universal/Context.S new file mode 100644 index 0000000000..11c80a7927 --- /dev/null +++ b/coroutine/universal/Context.S @@ -0,0 +1,16 @@ +#if 0 +#elif defined __x86_64__ +# include "coroutine/amd64/Context.S" +#elif defined __i386__ +# include "coroutine/x86/Context.S" +#elif defined __ppc__ +# include "coroutine/ppc/Context.S" +#elif defined __ppc64__ && defined(WORDS_BIGENDIAN) +# include "coroutine/ppc64/Context.S" +#elif defined __ppc64__ && !defined(WORDS_BIGENDIAN) +# include "coroutine/ppc64le/Context.S" +#elif defined __arm64__ +# include "coroutine/arm64/Context.S" +#else +# error "Unsupported CPU" +#endif diff --git a/coroutine/universal/Context.h b/coroutine/universal/Context.h new file mode 100644 index 0000000000..ec4d2f484a --- /dev/null +++ b/coroutine/universal/Context.h @@ -0,0 +1,21 @@ +#ifndef COROUTINE_UNIVERSAL_CONTEXT_H +#define COROUTINE_UNIVERSAL_CONTEXT_H 1 + +#if 0 +#elif defined __x86_64__ +# include "coroutine/amd64/Context.h" +#elif defined __i386__ +# include "coroutine/x86/Context.h" +#elif defined __ppc__ +# include "coroutine/ppc/Context.h" +#elif defined __ppc64__ && defined(WORDS_BIGENDIAN) +# include "coroutine/ppc64/Context.h" +#elif defined __ppc64__ && !defined(WORDS_BIGENDIAN) +# include "coroutine/ppc64le/Context.h" +#elif defined __arm64__ +# include "coroutine/arm64/Context.h" +#else +# error "Unsupported CPU" +#endif + +#endif /* COROUTINE_UNIVERSAL_CONTEXT_H */ diff --git a/coroutine/win32/Context.h b/coroutine/win32/Context.h index e20342a98d..c1c8fbcd13 100644 --- a/coroutine/win32/Context.h +++ b/coroutine/win32/Context.h @@ -1,3 +1,6 @@ +#ifndef COROUTINE_WIN32_CONTEXT_H +#define COROUTINE_WIN32_CONTEXT_H 1 + /* * This file is part of the "Coroutine" project and released under the MIT License. * @@ -13,6 +16,7 @@ #include <string.h> #define COROUTINE __declspec(noreturn) void __fastcall +#define COROUTINE_DECL void __fastcall #define COROUTINE_LIMITED_ADDRESS_SPACE /* This doesn't include thread information block */ @@ -21,6 +25,7 @@ enum {COROUTINE_REGISTERS = 4}; struct coroutine_context { void **stack_pointer; + void *argument; }; typedef void(__fastcall * coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); @@ -41,7 +46,7 @@ static inline void coroutine_initialize( char * top = (char*)stack + size; context->stack_pointer = (void**)((uintptr_t)top & ~0xF); - *--context->stack_pointer = (void*)start; + *--context->stack_pointer = (void*)(uintptr_t)start; /* Windows Thread Information Block */ *--context->stack_pointer = (void*)0xFFFFFFFF; /* fs:[0] */ @@ -57,3 +62,5 @@ struct coroutine_context * __fastcall coroutine_transfer(struct coroutine_contex static inline void coroutine_destroy(struct coroutine_context * context) { } + +#endif /* COROUTINE_WIN32_CONTEXT_H */ diff --git a/coroutine/win64/Context.h b/coroutine/win64/Context.h index e28e6a6ff0..23b21b5c1c 100644 --- a/coroutine/win64/Context.h +++ b/coroutine/win64/Context.h @@ -1,3 +1,6 @@ +#ifndef COROUTINE_WIN64_CONTEXT_H +#define COROUTINE_WIN64_CONTEXT_H 1 + /* * This file is part of the "Coroutine" project and released under the MIT License. * @@ -13,6 +16,7 @@ #include <string.h> #define COROUTINE __declspec(noreturn) void +#define COROUTINE_DECL void enum { COROUTINE_REGISTERS = 8, @@ -22,11 +26,12 @@ enum { struct coroutine_context { void **stack_pointer; + void *argument; }; typedef void(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); -void coroutine_trampoline(); +void coroutine_trampoline(void); static inline void coroutine_initialize_main(struct coroutine_context * context) { context->stack_pointer = NULL; @@ -49,7 +54,7 @@ static inline void coroutine_initialize( /* Return address */ *--context->stack_pointer = 0; - *--context->stack_pointer = (void*)start; + *--context->stack_pointer = (void*)(uintptr_t)start; *--context->stack_pointer = (void*)coroutine_trampoline; /* Windows Thread Information Block */ @@ -67,3 +72,5 @@ struct coroutine_context * coroutine_transfer(struct coroutine_context * current static inline void coroutine_destroy(struct coroutine_context * context) { } + +#endif /* COROUTINE_WIN64_CONTEXT_H */ diff --git a/coroutine/x86/Context.S b/coroutine/x86/Context.S index 6983f21c3b..b04e71aa1c 100644 --- a/coroutine/x86/Context.S +++ b/coroutine/x86/Context.S @@ -6,12 +6,11 @@ ## #define TOKEN_PASTE(x,y) x##y -#define PREFIXED_SYMBOL(prefix,name) TOKEN_PASTE(prefix,name) .text -.globl PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer) -PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): +.globl PREFIXED_SYMBOL(coroutine_transfer) +PREFIXED_SYMBOL(coroutine_transfer): # Save caller registers pushl %ebp @@ -37,6 +36,6 @@ PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer): # Jump to the address on the stack ret -#if defined(__linux__) && defined(__ELF__) +#if (defined(__linux__) || defined(__FreeBSD__)) && defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif diff --git a/coroutine/x86/Context.h b/coroutine/x86/Context.h index dfdde24f5d..f33b338eab 100644 --- a/coroutine/x86/Context.h +++ b/coroutine/x86/Context.h @@ -1,3 +1,6 @@ +#ifndef COROUTINE_X86_CONTEXT_H +#define COROUTINE_X86_CONTEXT_H 1 + /* * This file is part of the "Coroutine" project and released under the MIT License. * @@ -20,6 +23,7 @@ enum {COROUTINE_REGISTERS = 4}; struct coroutine_context { void **stack_pointer; + void *argument; }; typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self) __attribute__((fastcall)); @@ -41,7 +45,7 @@ static inline void coroutine_initialize( context->stack_pointer = (void**)((uintptr_t)top & ~0xF); *--context->stack_pointer = NULL; - *--context->stack_pointer = (void*)start; + *--context->stack_pointer = (void*)(uintptr_t)start; context->stack_pointer -= COROUTINE_REGISTERS; memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); @@ -53,3 +57,5 @@ static inline void coroutine_destroy(struct coroutine_context * context) { context->stack_pointer = NULL; } + +#endif /* COROUTINE_X86_CONTEXT_H */ |
