diff options
Diffstat (limited to 'coroutine')
35 files changed, 2470 insertions, 0 deletions
diff --git a/coroutine/amd64/Context.S b/coroutine/amd64/Context.S new file mode 100644 index 0000000000..4b94d31f30 --- /dev/null +++ b/coroutine/amd64/Context.S @@ -0,0 +1,88 @@ +## +## This file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 10/5/2018. +## Copyright, 2018, by Samuel Williams. +## + +/* Important - do _not_ include <cet.h> in this file; doing so will + * cause an incorrect .note.gnu.property section to be emitted. We have + * one at the bottom of this file */ + +#define TOKEN_PASTE(x,y) x##y + +.text + +.globl PREFIXED_SYMBOL(coroutine_transfer) +PREFIXED_SYMBOL(coroutine_transfer): + +#if defined(__CET__) && (__CET__ & 0x01) != 0 + /* IBT landing pad */ + endbr64 +#endif + + # Make space on the stack for 6 registers: + subq $48, %rsp + + # Save caller state: + movq %rbp, 40(%rsp) + movq %rbx, 32(%rsp) + movq %r12, 24(%rsp) + movq %r13, 16(%rsp) + movq %r14, 8(%rsp) + movq %r15, (%rsp) + + # Save caller stack pointer: + movq %rsp, (%rdi) + + # Restore callee stack pointer: + movq (%rsi), %rsp + + # Restore callee state + movq 40(%rsp), %rbp + movq 32(%rsp), %rbx + movq 24(%rsp), %r12 + movq 16(%rsp), %r13 + movq 8(%rsp), %r14 + movq (%rsp), %r15 + + # Adjust stack pointer back: + addq $48, %rsp + + # Put the first argument into the return value: + movq %rdi, %rax + + # We pop the return address and jump to it + ret + +#if (defined(__linux__) || defined(__FreeBSD__)) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +#if defined(__ELF__) + +#if defined(__CET__) && (__CET__ & 0x01) != 0 +# define IBT_FLAG 0x01 +#else +# define IBT_FLAG 0x00 +#endif + +/* We do _NOT_ support CET shadow-stack. Do _not_ add the property for + * this to the Context.o object. If you require CET shadow-stack support, + * for now, consider building with --with-coroutine=ucontext */ +#define SHSTK_FLAG 0x00 + +.pushsection .note.gnu.property, "a" +.p2align 3 +.long 0x4 /* Name size ("GNU\0") */ +.long 0x10 /* Descriptor size */ +.long 0x5 /* Type: NT_GNU_PROPERTY_TYPE_0 */ +.asciz "GNU" /* Name */ +# Begin descriptor +.long 0xc0000002 /* Property type: GNU_PROPERTY_X86_FEATURE_1_AND */ +.long 0x4 /* Property size */ +.long (IBT_FLAG | SHSTK_FLAG) +.long 0x0 /* 8-byte alignment padding */ +/* End descriptor */ +.popsection +#endif diff --git a/coroutine/amd64/Context.h b/coroutine/amd64/Context.h new file mode 100644 index 0000000000..65aa638304 --- /dev/null +++ b/coroutine/amd64/Context.h @@ -0,0 +1,85 @@ +#ifndef COROUTINE_AMD64_CONTEXT_H +#define COROUTINE_AMD64_CONTEXT_H 1 + +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 10/5/2018. + * Copyright, 2018, by Samuel Williams. +*/ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void + +enum {COROUTINE_REGISTERS = 6}; + +#if defined(__SANITIZE_ADDRESS__) + #define COROUTINE_SANITIZE_ADDRESS +#elif defined(__has_feature) + #if __has_feature(address_sanitizer) + #define COROUTINE_SANITIZE_ADDRESS + #endif +#endif + +#if defined(COROUTINE_SANITIZE_ADDRESS) +#include <sanitizer/common_interface_defs.h> +#include <sanitizer/asan_interface.h> +#endif + +struct coroutine_context +{ + void **stack_pointer; + void *argument; + +#if defined(COROUTINE_SANITIZE_ADDRESS) + void *fake_stack; + void *stack_base; + size_t stack_size; +#endif +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + +#if defined(COROUTINE_SANITIZE_ADDRESS) + context->fake_stack = NULL; + context->stack_base = stack; + context->stack_size = size; +#endif + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + *--context->stack_pointer = NULL; + *--context->stack_pointer = (void*)(uintptr_t)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#endif /* COROUTINE_AMD64_CONTEXT_H */ diff --git a/coroutine/arm32/Context.S b/coroutine/arm32/Context.S new file mode 100644 index 0000000000..945e4f82d5 --- /dev/null +++ b/coroutine/arm32/Context.S @@ -0,0 +1,32 @@ +## +## This file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 10/5/2018. +## Copyright, 2018, by Samuel Williams. +## + +#define TOKEN_PASTE(x,y) x##y + +.file "Context.S" +.text +.globl PREFIXED_SYMBOL(coroutine_transfer) +.align 2 +.type PREFIXED_SYMBOL(coroutine_transfer),%function +.syntax unified + +PREFIXED_SYMBOL(coroutine_transfer): + # Save caller state (8 registers + return address) + push {r4-r11,lr} + + # Save caller stack pointer + str sp, [r0] + + # Restore callee stack pointer + ldr sp, [r1] + + # Restore callee state (8 registers program counter) + pop {r4-r11,pc} + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/coroutine/arm32/Context.h b/coroutine/arm32/Context.h new file mode 100644 index 0000000000..7529dd2efc --- /dev/null +++ b/coroutine/arm32/Context.h @@ -0,0 +1,59 @@ +#ifndef COROUTINE_ARM32_CONTEXT_H +#define COROUTINE_ARM32_CONTEXT_H 1 + +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 10/5/2018. + * Copyright, 2018, by Samuel Williams. +*/ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void +#define COROUTINE_LIMITED_ADDRESS_SPACE + +enum {COROUTINE_REGISTERS = 8}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + *--context->stack_pointer = (void*)(uintptr_t)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ +} + +#endif /* COROUTINE_ARM32_CONTEXT_H */ diff --git a/coroutine/arm64/Context.S b/coroutine/arm64/Context.S new file mode 100644 index 0000000000..ce219c0c4d --- /dev/null +++ b/coroutine/arm64/Context.S @@ -0,0 +1,160 @@ +## +## This file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 10/5/2018. +## Copyright, 2018, by Samuel Williams. +## + +#define TOKEN_PASTE(x,y) x##y + +#if defined(__APPLE__) +#define x29 fp +#define x30 lr +.text +.p2align 2 +#else +.text +.align 2 +#endif + +#if defined(__ARM_FEATURE_PAC_DEFAULT) && (__ARM_FEATURE_PAC_DEFAULT & 0x02) != 0 +# error "-mbranch-protection flag specified b-key but Context.S does not support this" +#endif + +#if defined(_WIN32) +## Add more space for certain TEB values on each stack +#define TEB_OFFSET 0x20 +#else +#define TEB_OFFSET 0x00 +#endif + +## NOTE(PAC): Use we HINT mnemonics instead of PAC mnemonics to +## keep compatibility with those assemblers that don't support PAC. +## +## See "Providing protection for complex software" for more details about PAC/BTI +## https://developer.arm.com/architectures/learn-the-architecture/providing-protection-for-complex-software + +.global PREFIXED_SYMBOL(coroutine_transfer) +PREFIXED_SYMBOL(coroutine_transfer): + +#if defined(__ARM_FEATURE_PAC_DEFAULT) && (__ARM_FEATURE_PAC_DEFAULT != 0) + # paciasp (it also acts as BTI landing pad, so no need to insert BTI also) + hint #25 +#elif defined(__ARM_FEATURE_BTI_DEFAULT) && (__ARM_FEATURE_BTI_DEFAULT != 0) + # For the case PAC is not enabled but BTI is. + # bti c + hint #34 +#endif + # Make space on the stack for caller registers + sub sp, sp, 0xa0 + TEB_OFFSET + + # Save caller registers + stp d8, d9, [sp, 0x00 + TEB_OFFSET] + stp d10, d11, [sp, 0x10 + TEB_OFFSET] + stp d12, d13, [sp, 0x20 + TEB_OFFSET] + stp d14, d15, [sp, 0x30 + TEB_OFFSET] + stp x19, x20, [sp, 0x40 + TEB_OFFSET] + stp x21, x22, [sp, 0x50 + TEB_OFFSET] + stp x23, x24, [sp, 0x60 + TEB_OFFSET] + stp x25, x26, [sp, 0x70 + TEB_OFFSET] + stp x27, x28, [sp, 0x80 + TEB_OFFSET] + stp x29, x30, [sp, 0x90 + TEB_OFFSET] + +#if defined(_WIN32) + # Save certain values from Thread Environment Block (TEB) + # x18 points to the TEB on Windows + # Read TeStackBase and TeStackLimit at ksarm64.h from TEB + ldp x5, x6, [x18, #0x08] + # Save them + stp x5, x6, [sp, #0x00] + # Read TeDeallocationStack at ksarm64.h from TEB + ldr x5, [x18, #0x1478] + # Read TeFiberData at ksarm64.h from TEB + ldr x6, [x18, #0x20] + # Save current fiber data and deallocation stack + stp x5, x6, [sp, #0x10] +#endif + + # Save stack pointer to x0 (first argument) + mov x2, sp + str x2, [x0, 0] + + # Load stack pointer from x1 (second argument) + ldr x3, [x1, 0] + mov sp, x3 + +#if defined(_WIN32) + # Restore stack base and limit + ldp x5, x6, [sp, #0x00] + # Write TeStackBase and TeStackLimit at ksarm64.h to TEB + stp x5, x6, [x18, #0x08] + # Restore fiber data and deallocation stack + ldp x5, x6, [sp, #0x10] + # Write TeDeallocationStack at ksarm64.h to TEB + str x5, [x18, #0x1478] + # Write TeFiberData at ksarm64.h to TEB + str x6, [x18, #0x20] +#endif + + # Restore caller registers + ldp d8, d9, [sp, 0x00 + TEB_OFFSET] + ldp d10, d11, [sp, 0x10 + TEB_OFFSET] + ldp d12, d13, [sp, 0x20 + TEB_OFFSET] + ldp d14, d15, [sp, 0x30 + TEB_OFFSET] + ldp x19, x20, [sp, 0x40 + TEB_OFFSET] + ldp x21, x22, [sp, 0x50 + TEB_OFFSET] + ldp x23, x24, [sp, 0x60 + TEB_OFFSET] + ldp x25, x26, [sp, 0x70 + TEB_OFFSET] + ldp x27, x28, [sp, 0x80 + TEB_OFFSET] + ldp x29, x30, [sp, 0x90 + TEB_OFFSET] + + # Pop stack frame + add sp, sp, 0xa0 + TEB_OFFSET + +#if defined(__ARM_FEATURE_PAC_DEFAULT) && (__ARM_FEATURE_PAC_DEFAULT != 0) + # autiasp: Authenticate x30 (LR) with SP and key A + hint #29 +#endif + + # Jump to return address (in x30) + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +#if (defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT != 0) || (defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT != 0) +#if defined(__ELF__) +/* See "ELF for the Arm 64-bit Architecture (AArch64)" + https://github.com/ARM-software/abi-aa/blob/2023Q3/aaelf64/aaelf64.rst#program-property */ +# define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1<<0) +# define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1<<1) + +# if defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT != 0 +# define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI +# else +# define BTI_FLAG 0 +# endif +# if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT != 0 +# define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC +# else +# define PAC_FLAG 0 +# endif + + # The note section format is described by Note Section in Chapter 5 + # of "System V Application Binary Interface, Edition 4.1". + .pushsection .note.gnu.property, "a" + .p2align 3 + .long 0x4 /* Name size ("GNU\0") */ + .long 0x10 /* Descriptor size */ + .long 0x5 /* Type: NT_GNU_PROPERTY_TYPE_0 */ + .asciz "GNU" /* Name */ + # Begin descriptor + .long 0xc0000000 /* Property type: GNU_PROPERTY_AARCH64_FEATURE_1_AND */ + .long 0x4 /* Property size */ + .long (BTI_FLAG|PAC_FLAG) + .long 0x0 /* 8-byte alignment padding */ + # End descriptor + .popsection +#endif +#endif diff --git a/coroutine/arm64/Context.asm b/coroutine/arm64/Context.asm new file mode 100644 index 0000000000..866fa628e7 --- /dev/null +++ b/coroutine/arm64/Context.asm @@ -0,0 +1,81 @@ + TTL coroutine/arm64/Context.asm + + AREA |.drectve|, DRECTVE + + EXPORT |coroutine_transfer| + + AREA |.text$mn|, CODE, ARM64 + +;; Add more space for certain TEB values on each stack +TEB_OFFSET EQU 0x20 + +;; Incomplete implementation +coroutine_transfer PROC + ; Make space on the stack for caller registers + sub sp, sp, 0xa0 + TEB_OFFSET + + ; Save caller registers + stp d8, d9, [sp, 0x00 + TEB_OFFSET] + stp d10, d11, [sp, 0x10 + TEB_OFFSET] + stp d12, d13, [sp, 0x20 + TEB_OFFSET] + stp d14, d15, [sp, 0x30 + TEB_OFFSET] + stp x19, x20, [sp, 0x40 + TEB_OFFSET] + stp x21, x22, [sp, 0x50 + TEB_OFFSET] + stp x23, x24, [sp, 0x60 + TEB_OFFSET] + stp x25, x26, [sp, 0x70 + TEB_OFFSET] + stp x27, x28, [sp, 0x80 + TEB_OFFSET] + stp x29, x30, [sp, 0x90 + TEB_OFFSET] + + ;; Save certain values from Thread Environment Block (TEB) x18 + ;; points to the TEB on Windows + ;; Read TeStackBase and TeStackLimit at ksarm64.h from TEB + ldp x5, x6, [x18, #0x08] + ;; Save them + stp x5, x6, [sp, #0x00] + ;; Read TeDeallocationStack at ksarm64.h from TEB + ldr x5, [x18, #0x1478] + ;; Read TeFiberData at ksarm64.h from TEB + ldr x6, [x18, #0x20] + ;; Save current fiber data and deallocation stack + stp x5, x6, [sp, #0x10] + + ; Save stack pointer to x0 (first argument) + mov x2, sp + str x2, [x0, 0] + + ; Load stack pointer from x1 (second argument) + ldr x3, [x1, 0] + mov sp, x3 + + ;; Restore stack base and limit + ldp x5, x6, [sp, #0x00] + ;; Write TeStackBase and TeStackLimit at ksarm64.h to TEB + stp x5, x6, [x18, #0x08] + ;; Restore fiber data and deallocation stack + ldp x5, x6, [sp, #0x10] + ;; Write TeDeallocationStack at ksarm64.h to TEB + str x5, [x18, #0x1478] + ;; Write TeFiberData at ksarm64.h to TEB + str x6, [x18, #0x20] + + ; Restore caller registers + ldp d8, d9, [sp, 0x00 + TEB_OFFSET] + ldp d10, d11, [sp, 0x10 + TEB_OFFSET] + ldp d12, d13, [sp, 0x20 + TEB_OFFSET] + ldp d14, d15, [sp, 0x30 + TEB_OFFSET] + ldp x19, x20, [sp, 0x40 + TEB_OFFSET] + ldp x21, x22, [sp, 0x50 + TEB_OFFSET] + ldp x23, x24, [sp, 0x60 + TEB_OFFSET] + ldp x25, x26, [sp, 0x70 + TEB_OFFSET] + ldp x27, x28, [sp, 0x80 + TEB_OFFSET] + ldp x29, x30, [sp, 0x90 + TEB_OFFSET] + + ; Pop stack frame + add sp, sp, 0xa0 + TEB_OFFSET + + ; Jump to return address (in x30) + ret + + endp + + end diff --git a/coroutine/arm64/Context.h b/coroutine/arm64/Context.h new file mode 100644 index 0000000000..468e4155b2 --- /dev/null +++ b/coroutine/arm64/Context.h @@ -0,0 +1,118 @@ +#ifndef COROUTINE_ARM64_CONTEXT_H +#define COROUTINE_ARM64_CONTEXT_H 1 + +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 10/5/2018. + * Copyright, 2018, by Samuel Williams. +*/ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#if defined __GNUC__ +#define COROUTINE __attribute__((noreturn)) void +#define COROUTINE_DECL COROUTINE +#elif defined _MSC_VER +#define COROUTINE __declspec(noreturn) void +#define COROUTINE_DECL void +#endif + +#if defined(_WIN32) +#define TEB_OFFSET 0x20 +#else +#define TEB_OFFSET 0x00 +#endif + +enum {COROUTINE_REGISTERS = (0xa0 + TEB_OFFSET) / 8}; + +#if defined(__SANITIZE_ADDRESS__) + #define COROUTINE_SANITIZE_ADDRESS +#elif defined(__has_feature) + #if __has_feature(address_sanitizer) + #define COROUTINE_SANITIZE_ADDRESS + #endif +#endif + +#if defined(COROUTINE_SANITIZE_ADDRESS) +#include <sanitizer/common_interface_defs.h> +#include <sanitizer/asan_interface.h> +#endif + +struct coroutine_context +{ + void **stack_pointer; + void *argument; + +#if defined(COROUTINE_SANITIZE_ADDRESS) + void *fake_stack; + void *stack_base; + size_t stack_size; +#endif +}; + +typedef COROUTINE_DECL(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void *ptrauth_sign_instruction_addr(void *addr, void *modifier) { +#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT != 0 + // Sign the given instruction address with the given modifier and key A + register void *r17 __asm("r17") = addr; + register void *r16 __asm("r16") = modifier; + // Use HINT mnemonic instead of PACIA1716 for compatibility with older assemblers. + __asm ("hint #8;" : "+r"(r17) : "r"(r16)); + addr = r17; +#else + // No-op if PAC is not enabled +#endif + return addr; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + +#if defined(COROUTINE_SANITIZE_ADDRESS) + context->fake_stack = NULL; + context->stack_base = stack; + context->stack_size = size; +#endif + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + top = (char *)((uintptr_t)top & ~0xF); + context->stack_pointer = (void**)top; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + void *addr = (void*)(uintptr_t)start; + context->stack_pointer[(0x98 + TEB_OFFSET) / 8] = ptrauth_sign_instruction_addr(addr, (void*)top); +#if defined(_WIN32) + // save top address of stack as base in TEB + context->stack_pointer[0x00 / 8] = (char*)stack + size; + // save botton address of stack as limit and deallocation stack in TEB + context->stack_pointer[0x08 / 8] = stack; + context->stack_pointer[0x10 / 8] = stack; +#endif +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ +} + +#endif /* COROUTINE_ARM64_CONTEXT_H */ diff --git a/coroutine/asyncify/Context.c b/coroutine/asyncify/Context.c new file mode 100644 index 0000000000..303e5f4429 --- /dev/null +++ b/coroutine/asyncify/Context.c @@ -0,0 +1,10 @@ +#include "Context.h" + +void coroutine_trampoline(void * _start, void * _context) +{ + coroutine_start start = (coroutine_start)_start; + struct coroutine_context * context = _context; + rb_wasm_set_stack_pointer(context->current_sp); + + start(context->from, context); +} diff --git a/coroutine/asyncify/Context.h b/coroutine/asyncify/Context.h new file mode 100644 index 0000000000..71791a4004 --- /dev/null +++ b/coroutine/asyncify/Context.h @@ -0,0 +1,93 @@ +#ifndef COROUTINE_ASYNCIFY_CONTEXT_H +#define COROUTINE_ASYNCIFY_CONTEXT_H + +/* + This is a coroutine implementation based on Binaryen's Asyncify transformation for WebAssembly. + + This implementation is built on low-level ucontext-like API in wasm/fiber.c + This file is an adapter for the common coroutine interface and for stack manipulation. + wasm/fiber.c doesn't take care of stack to avoid duplicate management with this adapter. + + * See also: wasm/fiber.c +*/ + +#include <stddef.h> +#include <stdio.h> +#include <stdint.h> +#include "wasm/asyncify.h" +#include "wasm/machine.h" +#include "wasm/fiber.h" + +#define COROUTINE void __attribute__((__noreturn__)) + +static const int ASYNCIFY_CORO_DEBUG = 0; + +struct coroutine_context +{ + rb_wasm_fiber_context fc; + void *argument; + struct coroutine_context *from; + + void *current_sp; + void *stack_base; + size_t size; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +COROUTINE coroutine_trampoline(void * _start, void * _context); + +static inline void coroutine_initialize_main(struct coroutine_context * context) +{ + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (context = %p)\n", __func__, context); + // NULL fiber entry means it's the main fiber, and handled specially. + rb_wasm_init_context(&context->fc, NULL, NULL, NULL); + // mark the main fiber has already started + context->fc.is_started = true; +} + +static inline void coroutine_initialize(struct coroutine_context *context, coroutine_start start, void *stack, size_t size) +{ + // Linear stack pointer must be always aligned down to 16 bytes. + // https://github.com/WebAssembly/tool-conventions/blob/c74267a5897c1bdc9aa60adeaf41816387d3cd12/BasicCABI.md#the-linear-stack + uintptr_t sp = ((uintptr_t)stack + size) & ~0xF; + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (context = %p, stack = %p ... %p)\n", __func__, context, stack, (char *)sp); + rb_wasm_init_context(&context->fc, coroutine_trampoline, start, context); + // record the initial stack pointer position to restore it after resumption + context->current_sp = (char *)sp; + context->stack_base = stack; + context->size = size; +} + +static inline struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target) +{ + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (current = %p, target = %p)\n", __func__, current, target); + struct coroutine_context * previous = target->from; + + target->from = current; + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] current->current_sp = %p -> %p\n", __func__, current->current_sp, rb_wasm_get_stack_pointer()); + // record the current stack pointer position to restore it after resumption + current->current_sp = rb_wasm_get_stack_pointer(); + + // suspend the current coroutine and resume another coroutine + + rb_wasm_swapcontext(¤t->fc, &target->fc); + + // after the original coroutine resumed + + rb_wasm_set_stack_pointer(current->current_sp); + + target->from = previous; + + return target; +} + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + if (ASYNCIFY_CORO_DEBUG) fprintf(stderr, "[%s] entry (context = %p)\n", __func__, context); + context->stack_base = NULL; + context->size = 0; + context->from = NULL; +} + +#endif /* COROUTINE_ASYNCIFY_CONTEXT_H */ diff --git a/coroutine/emscripten/Context.c b/coroutine/emscripten/Context.c new file mode 100644 index 0000000000..75c088daaa --- /dev/null +++ b/coroutine/emscripten/Context.c @@ -0,0 +1,8 @@ +#include "Context.h" + +void coroutine_trampoline(void * _context) +{ + struct coroutine_context * context = _context; + + context->entry_func(context->from, context); +} diff --git a/coroutine/emscripten/Context.h b/coroutine/emscripten/Context.h new file mode 100644 index 0000000000..361e241685 --- /dev/null +++ b/coroutine/emscripten/Context.h @@ -0,0 +1,77 @@ +#ifndef COROUTINE_EMSCRIPTEN_CONTEXT_H +#define COROUTINE_EMSCRIPTEN_CONTEXT_H 1 + +/* An experimental coroutine wrapper for emscripten + * Contact on Yusuke Endoh if you encounter any problem about this + */ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <emscripten/fiber.h> + +#define COROUTINE __attribute__((noreturn)) void + +#if INTPTR_MAX <= INT32_MAX +#define COROUTINE_LIMITED_ADDRESS_SPACE +#endif + +struct coroutine_context; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +struct coroutine_context +{ + emscripten_fiber_t state; + coroutine_start entry_func; + struct coroutine_context * from; + void *argument; +}; + +COROUTINE coroutine_trampoline(void * _context); + +#define MAIN_ASYNCIFY_STACK_SIZE 65536 +static inline void coroutine_initialize_main(struct coroutine_context * context) { + static char asyncify_stack[MAIN_ASYNCIFY_STACK_SIZE]; + emscripten_fiber_init_from_current_context(&context->state, asyncify_stack, MAIN_ASYNCIFY_STACK_SIZE); +} +#undef MAIN_ASYNCIFY_STACK_SIZE + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + uintptr_t addr = (uintptr_t)stack; + size_t offset = addr & 0xF; + void *c_stack = (void*)((addr + 0xF) & ~0xF); + size -= offset; + size_t c_stack_size = (size / 2) & ~0xF; + void *asyncify_stack = (void*)((uintptr_t)c_stack + c_stack_size); + size_t asyncify_stack_size = size - c_stack_size; + context->entry_func = start; + + emscripten_fiber_init(&context->state, coroutine_trampoline, context, c_stack, c_stack_size, asyncify_stack, asyncify_stack_size); +} + +static inline struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target) +{ + struct coroutine_context * previous = target->from; + + target->from = current; + emscripten_fiber_swap(¤t->state, &target->state); + target->from = previous; + + return target; +} + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->from = NULL; +} + +#endif /* COROUTINE_EMSCRIPTEN_CONTEXT_H */ diff --git a/coroutine/loongarch64/Context.S b/coroutine/loongarch64/Context.S new file mode 100644 index 0000000000..6e10cd032b --- /dev/null +++ b/coroutine/loongarch64/Context.S @@ -0,0 +1,72 @@ +#define TOKEN_PASTE(x,y) x##y + +.text +.align 2 + +.global PREFIXED_SYMBOL(coroutine_transfer) +PREFIXED_SYMBOL(coroutine_transfer): + + # Make space on the stack for caller registers + addi.d $sp, $sp, -0xa0 + + # Save caller registers + st.d $s0, $sp, 0x00 + st.d $s1, $sp, 0x08 + st.d $s2, $sp, 0x10 + st.d $s3, $sp, 0x18 + st.d $s4, $sp, 0x20 + st.d $s5, $sp, 0x28 + st.d $s6, $sp, 0x30 + st.d $s7, $sp, 0x38 + st.d $s8, $sp, 0x40 + st.d $fp, $sp, 0x48 + fst.d $fs0, $sp, 0x50 + fst.d $fs1, $sp, 0x58 + fst.d $fs2, $sp, 0x60 + fst.d $fs3, $sp, 0x68 + fst.d $fs4, $sp, 0x70 + fst.d $fs5, $sp, 0x78 + fst.d $fs6, $sp, 0x80 + fst.d $fs7, $sp, 0x88 + + # Save return address + st.d $ra, $sp, 0x90 + + # Save stack pointer to a0 (first argument) + st.d $sp, $a0, 0x00 + + # Load stack pointer from a1 (second argument) + ld.d $sp, $a1, 0x00 + + # Restore caller registers + ld.d $s0, $sp, 0x00 + ld.d $s1, $sp, 0x08 + ld.d $s2, $sp, 0x10 + ld.d $s3, $sp, 0x18 + ld.d $s4, $sp, 0x20 + ld.d $s5, $sp, 0x28 + ld.d $s6, $sp, 0x30 + ld.d $s7, $sp, 0x38 + ld.d $s8, $sp, 0x40 + ld.d $fp, $sp, 0x48 + fld.d $fs0, $sp, 0x50 + fld.d $fs1, $sp, 0x58 + fld.d $fs2, $sp, 0x60 + fld.d $fs3, $sp, 0x68 + fld.d $fs4, $sp, 0x70 + fld.d $fs5, $sp, 0x78 + fld.d $fs6, $sp, 0x80 + fld.d $fs7, $sp, 0x88 + + # Load return address + ld.d $ra, $sp, 0x90 + + # Pop stack frame + addi.d $sp, $sp, 0xa0 + + # Jump to return address + jr $ra + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/coroutine/loongarch64/Context.h b/coroutine/loongarch64/Context.h new file mode 100644 index 0000000000..82b85b36e9 --- /dev/null +++ b/coroutine/loongarch64/Context.h @@ -0,0 +1,46 @@ +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void + +enum {COROUTINE_REGISTERS = 0xa0 / 8}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + context->stack_pointer[0x90 / 8] = (void*)(uintptr_t)start; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ +} diff --git a/coroutine/ppc/Context.S b/coroutine/ppc/Context.S new file mode 100644 index 0000000000..f44b2419b4 --- /dev/null +++ b/coroutine/ppc/Context.S @@ -0,0 +1,89 @@ +; Based on the code by Samuel Williams. Created by Sergey Fedorov on 04/06/2022. +; Credits to Samuel Williams, Rei Odaira and Iain Sandoe. Errors, if any, are mine. +; Some relevant examples: https://github.com/gcc-mirror/gcc/blob/master/libphobos/libdruntime/config/powerpc/switchcontext.S +; https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/rs6000/darwin-gpsave.S +; https://www.ibm.com/docs/en/aix/7.2?topic=epilogs-saving-gprs-only +; ppc32 version may be re-written compactly with stmw/lwm, but the code won't be faster, see: https://github.com/ruby/ruby/pull/5927#issuecomment-1139730541 + +; Notice that this code is only for Darwin (macOS). Darwin ABI differs from AIX and ELF. +; To add support for AIX, *BSD or *Linux, please make separate implementations. + +#define TOKEN_PASTE(x,y) x##y + +.machine ppc7400 ; = G4, Rosetta +.text + +.globl PREFIXED_SYMBOL(coroutine_transfer) +.align 2 + +PREFIXED_SYMBOL(coroutine_transfer): + ; Make space on the stack for caller registers + ; (Should we rather use red zone? See libphobos example.) + subi r1,r1,80 + + ; Get LR + mflr r0 + + ; Save caller registers + stw r31,0(r1) + stw r30,4(r1) + stw r29,8(r1) + stw r28,12(r1) + stw r27,16(r1) + stw r26,20(r1) + stw r25,24(r1) + stw r24,28(r1) + stw r23,32(r1) + stw r22,36(r1) + stw r21,40(r1) + stw r20,44(r1) + stw r19,48(r1) + stw r18,52(r1) + stw r17,56(r1) + stw r16,60(r1) + stw r15,64(r1) + stw r14,68(r1) + stw r13,72(r1) + + ; Save return address + ; Possibly should rather be saved into linkage area, see libphobos and IBM docs + stw r0,76(r1) + + ; Save stack pointer to first argument + stw r1,0(r3) + + ; Load stack pointer from second argument + lwz r1,0(r4) + + ; Load return address + lwz r0,76(r1) + + ; Restore caller registers + lwz r13,72(r1) + lwz r14,68(r1) + lwz r15,64(r1) + lwz r16,60(r1) + lwz r17,56(r1) + lwz r18,52(r1) + lwz r19,48(r1) + lwz r20,44(r1) + lwz r21,40(r1) + lwz r22,36(r1) + lwz r23,32(r1) + lwz r24,28(r1) + lwz r25,24(r1) + lwz r26,20(r1) + lwz r27,16(r1) + lwz r28,12(r1) + lwz r29,8(r1) + lwz r30,4(r1) + lwz r31,0(r1) + + ; Set LR + mtlr r0 + + ; Pop stack frame + addi r1,r1,80 + + ; Jump to return address + blr diff --git a/coroutine/ppc/Context.h b/coroutine/ppc/Context.h new file mode 100644 index 0000000000..8035d08556 --- /dev/null +++ b/coroutine/ppc/Context.h @@ -0,0 +1,58 @@ +#ifndef COROUTINE_PPC_CONTEXT_H +#define COROUTINE_PPC_CONTEXT_H 1 + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void +#define COROUTINE_LIMITED_ADDRESS_SPACE + +enum { + COROUTINE_REGISTERS = + 20 /* 19 general purpose registers (r13-r31) and 1 return address */ + + 4 /* space for fiber_entry() to store the link register */ +}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + /* Skip a global prologue that sets the TOC register */ + context->stack_pointer[19] = ((char*)start) + 8; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#endif /* COROUTINE_PPC_CONTEXT_H */ diff --git a/coroutine/ppc64/Context.S b/coroutine/ppc64/Context.S new file mode 100644 index 0000000000..20a47c61c6 --- /dev/null +++ b/coroutine/ppc64/Context.S @@ -0,0 +1,88 @@ +; Based on the code by Samuel Williams. Created by Sergey Fedorov on 04/06/2022. +; Credits to Samuel Williams, Rei Odaira and Iain Sandoe. Errors, if any, are mine. +; Some relevant examples: https://github.com/gcc-mirror/gcc/blob/master/libphobos/libdruntime/config/powerpc/switchcontext.S +; https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/rs6000/darwin-gpsave.S +; https://www.ibm.com/docs/en/aix/7.2?topic=epilogs-saving-gprs-only + +; Notice that this code is only for Darwin (macOS). Darwin ABI differs from AIX and ELF. +; To add support for AIX, *BSD or *Linux, please make separate implementations. + +#define TOKEN_PASTE(x,y) x##y + +.machine ppc64 ; = G5 +.text + +.globl PREFIXED_SYMBOL(coroutine_transfer) +.align 2 + +PREFIXED_SYMBOL(coroutine_transfer): + ; Make space on the stack for caller registers + ; (Should we rather use red zone? See libphobos example.) + subi r1,r1,160 + + ; Get LR + mflr r0 + + ; Save caller registers + std r31,0(r1) + std r30,8(r1) + std r29,16(r1) + std r28,24(r1) + std r27,32(r1) + std r26,40(r1) + std r25,48(r1) + std r24,56(r1) + std r23,64(r1) + std r22,72(r1) + std r21,80(r1) + std r20,88(r1) + std r19,96(r1) + std r18,104(r1) + std r17,112(r1) + std r16,120(r1) + std r15,128(r1) + std r14,136(r1) + std r13,144(r1) + + ; Save return address + ; Possibly should rather be saved into linkage area, see libphobos and IBM docs + std r0,152(r1) + + ; Save stack pointer to first argument + std r1,0(r3) + + ; Load stack pointer from second argument + ld r1,0(r4) + + ; Load return address + ld r0,152(r1) + + ; Restore caller registers + ld r13,144(r1) + ld r14,136(r1) + ld r15,128(r1) + ld r16,120(r1) + ld r17,112(r1) + ld r18,104(r1) + ld r19,96(r1) + ld r20,88(r1) + ld r21,80(r1) + ld r22,72(r1) + ld r23,64(r1) + ld r24,56(r1) + ld r25,48(r1) + ld r26,40(r1) + ld r27,32(r1) + ld r28,24(r1) + ld r29,16(r1) + ld r30,8(r1) + ld r31,0(r1) + + ; Set LR + mtlr r0 + + ; Pop stack frame + addi r1,r1,160 + + ; Jump to return address + blr diff --git a/coroutine/ppc64/Context.h b/coroutine/ppc64/Context.h new file mode 100644 index 0000000000..085b475ed5 --- /dev/null +++ b/coroutine/ppc64/Context.h @@ -0,0 +1,57 @@ +#ifndef COROUTINE_PPC64_CONTEXT_H +#define COROUTINE_PPC64_CONTEXT_H 1 + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void + +enum { + COROUTINE_REGISTERS = + 20 /* 19 general purpose registers (r13-r31) and 1 return address */ + + 4 /* space for fiber_entry() to store the link register */ +}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + /* Skip a global prologue that sets the TOC register */ + context->stack_pointer[19] = ((char*)start) + 8; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#endif /* COROUTINE_PPC64_CONTEXT_H */ diff --git a/coroutine/ppc64le/Context.S b/coroutine/ppc64le/Context.S new file mode 100644 index 0000000000..f7bcae2c3a --- /dev/null +++ b/coroutine/ppc64le/Context.S @@ -0,0 +1,84 @@ +#define TOKEN_PASTE(x,y) x##y + +.text +.align 2 + +.globl PREFIXED_SYMBOL(coroutine_transfer) +.type PREFIXED_SYMBOL(coroutine_transfer), @function +PREFIXED_SYMBOL(coroutine_transfer): + # Make space on the stack for caller registers + addi 1,1,-160 + + # Save caller registers + std 14,0(1) + std 15,8(1) + std 16,16(1) + std 17,24(1) + std 18,32(1) + std 19,40(1) + std 20,48(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + # Save return address + mflr 0 + std 0,144(1) + + # Save caller special register + mfcr 0 + std 0, 152(1) + + # Save stack pointer to first argument + std 1,0(3) + + # Load stack pointer from second argument + ld 1,0(4) + + # Restore caller registers + ld 14,0(1) + ld 15,8(1) + ld 16,16(1) + ld 17,24(1) + ld 18,32(1) + ld 19,40(1) + ld 20,48(1) + ld 21,56(1) + ld 22,64(1) + ld 23,72(1) + ld 24,80(1) + ld 25,88(1) + ld 26,96(1) + ld 27,104(1) + ld 28,112(1) + ld 29,120(1) + ld 30,128(1) + ld 31,136(1) + + # Load return address + ld 0,144(1) + mtlr 0 + + # Load special registers + ld 0,152(1) + # Restore cr register cr2, cr3 and cr4 (field index 3,4,5) + # (field index is 1-based, field 1 = cr0) using a mask (32|16|8 = 56) + mtcrf 56,0 + + # Pop stack frame + addi 1,1,160 + + # Jump to return address + blr + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/coroutine/ppc64le/Context.h b/coroutine/ppc64le/Context.h new file mode 100644 index 0000000000..63ea9f19ff --- /dev/null +++ b/coroutine/ppc64le/Context.h @@ -0,0 +1,57 @@ +#ifndef COROUTINE_PPC64LE_CONTEXT_H +#define COROUTINE_PPC64LE_CONTEXT_H 1 + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void + +enum { + COROUTINE_REGISTERS = + 20 /* 18 general purpose registers (r14-r31), 1 special register (cr) and 1 return address */ + + 4 /* space for fiber_entry() to store the link register */ +}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + /* Skip a global prologue that sets the TOC register */ + context->stack_pointer[18] = ((char*)start) + 8; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#endif /* COROUTINE_PPC64LE_CONTEXT_H */ diff --git a/coroutine/pthread/Context.c b/coroutine/pthread/Context.c new file mode 100644 index 0000000000..38774cda0b --- /dev/null +++ b/coroutine/pthread/Context.c @@ -0,0 +1,272 @@ +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 24/6/2021. + * Copyright, 2021, by Samuel Williams. +*/ + +#include "Context.h" +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> + +static const int DEBUG = 0; + +static +int check(const char * message, int result) { + if (result) { + switch (result) { + case EDEADLK: + if (DEBUG) fprintf(stderr, "deadlock detected result=%d errno=%d\n", result, errno); + break; + default: + if (DEBUG) fprintf(stderr, "error detected result=%d errno=%d\n", result, errno); + perror(message); + } + } + + assert(result == 0); + + return result; +} + +void coroutine_initialize_main(struct coroutine_context * context) { + context->id = pthread_self(); + + check("coroutine_initialize_main:pthread_cond_init", + pthread_cond_init(&context->schedule, NULL) + ); + + context->shared = (struct coroutine_shared*)malloc(sizeof(struct coroutine_shared)); + assert(context->shared); + + context->shared->main = context; + context->shared->count = 1; + + if (DEBUG) { + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); + + check("coroutine_initialize_main:pthread_mutex_init", + pthread_mutex_init(&context->shared->guard, &attr) + ); + } else { + check("coroutine_initialize_main:pthread_mutex_init", + pthread_mutex_init(&context->shared->guard, NULL) + ); + } +} + +static +void coroutine_release(struct coroutine_context *context) { + if (context->shared) { + size_t count = (context->shared->count -= 1); + + if (count == 0) { + if (DEBUG) fprintf(stderr, "coroutine_release:pthread_mutex_destroy(%p)\n", &context->shared->guard); + pthread_mutex_destroy(&context->shared->guard); + free(context->shared); + } + + context->shared = NULL; + + if (DEBUG) fprintf(stderr, "coroutine_release:pthread_cond_destroy(%p)\n", &context->schedule); + pthread_cond_destroy(&context->schedule); + } +} + +void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // We will create the thread when we first transfer, but save the details now: + context->shared = NULL; + context->start = start; + context->stack = stack; + context->size = size; +} + +static +int is_locked(pthread_mutex_t * mutex) { + int result = pthread_mutex_trylock(mutex); + + // If we could successfully lock the mutex: + if (result == 0) { + pthread_mutex_unlock(mutex); + // We could lock the mutex, so it wasn't locked: + return 0; + } else { + // Otherwise we couldn't lock it because it's already locked: + return 1; + } +} + +static +void coroutine_guard_unlock(void * _context) +{ + struct coroutine_context * context = _context; + + if (DEBUG) fprintf(stderr, "coroutine_guard_unlock:pthread_mutex_unlock\n"); + + check("coroutine_guard_unlock:pthread_mutex_unlock", + pthread_mutex_unlock(&context->shared->guard) + ); +} + +static +void coroutine_wait(struct coroutine_context *context) +{ + if (DEBUG) fprintf(stderr, "coroutine_wait:pthread_mutex_lock(guard=%p is_locked=%d)\n", &context->shared->guard, is_locked(&context->shared->guard)); + check("coroutine_wait:pthread_mutex_lock", + pthread_mutex_lock(&context->shared->guard) + ); + + if (DEBUG) fprintf(stderr, "coroutine_wait:pthread_mutex_unlock(guard)\n"); + pthread_mutex_unlock(&context->shared->guard); +} + +static +void coroutine_trampoline_cleanup(void *_context) { + struct coroutine_context * context = _context; + coroutine_release(context); +} + +void * coroutine_trampoline(void * _context) +{ + struct coroutine_context * context = _context; + assert(context->shared); + + pthread_cleanup_push(coroutine_trampoline_cleanup, context); + + coroutine_wait(context); + + context->start(context->from, context); + + pthread_cleanup_pop(1); + + return NULL; +} + +static +int coroutine_create_thread(struct coroutine_context *context) +{ + int result; + + pthread_attr_t attr; + result = pthread_attr_init(&attr); + if (result != 0) { + return result; + } + + result = pthread_attr_setstack(&attr, context->stack, (size_t)context->size); + if (result != 0) { + pthread_attr_destroy(&attr); + return result; + } + + result = pthread_cond_init(&context->schedule, NULL); + if (result != 0) { + pthread_attr_destroy(&attr); + return result; + } + + result = pthread_create(&context->id, &attr, coroutine_trampoline, context); + if (result != 0) { + pthread_attr_destroy(&attr); + if (DEBUG) fprintf(stderr, "coroutine_create_thread:pthread_cond_destroy(%p)\n", &context->schedule); + pthread_cond_destroy(&context->schedule); + return result; + } + + context->shared->count += 1; + + return result; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target) +{ + assert(current->shared); + + struct coroutine_context * previous = target->from; + target->from = current; + + if (DEBUG) fprintf(stderr, "coroutine_transfer:pthread_mutex_lock(guard=%p is_locked=%d)\n", ¤t->shared->guard, is_locked(¤t->shared->guard)); + pthread_mutex_lock(¤t->shared->guard); + pthread_cleanup_push(coroutine_guard_unlock, current); + + // First transfer: + if (target->shared == NULL) { + target->shared = current->shared; + + if (DEBUG) fprintf(stderr, "coroutine_transfer:coroutine_create_thread...\n"); + if (coroutine_create_thread(target)) { + if (DEBUG) fprintf(stderr, "coroutine_transfer:coroutine_create_thread failed\n"); + target->shared = NULL; + target->from = previous; + return NULL; + } + } else { + if (DEBUG) fprintf(stderr, "coroutine_transfer:pthread_cond_signal(target)\n"); + pthread_cond_signal(&target->schedule); + } + + // A side effect of acting upon a cancellation request while in a condition wait is that the mutex is (in effect) re-acquired before calling the first cancellation cleanup handler. If cancelled, pthread_cond_wait immediately invokes cleanup handlers. + if (DEBUG) fprintf(stderr, "coroutine_transfer:pthread_cond_wait(schedule=%p, guard=%p, is_locked=%d)\n", ¤t->schedule, ¤t->shared->guard, is_locked(¤t->shared->guard)); + check("coroutine_transfer:pthread_cond_wait", + pthread_cond_wait(¤t->schedule, ¤t->shared->guard) + ); + + if (DEBUG) fprintf(stderr, "coroutine_transfer:pthread_cleanup_pop\n"); + pthread_cleanup_pop(1); + +#ifdef __FreeBSD__ + // Apparently required for FreeBSD: + pthread_testcancel(); +#endif + + target->from = previous; + + return target; +} + +static +void coroutine_join(struct coroutine_context * context) { + if (DEBUG) fprintf(stderr, "coroutine_join:pthread_cancel\n"); + int result = pthread_cancel(context->id); + if (result == -1 && errno == ESRCH) { + // The thread may be dead due to fork, so it cannot be joined and this doesn't represent a real error: + return; + } + + check("coroutine_join:pthread_cancel", result); + + if (DEBUG) fprintf(stderr, "coroutine_join:pthread_join\n"); + check("coroutine_join:pthread_join", + pthread_join(context->id, NULL) + ); + + if (DEBUG) fprintf(stderr, "coroutine_join:pthread_join done\n"); +} + +void coroutine_destroy(struct coroutine_context * context) +{ + if (DEBUG) fprintf(stderr, "coroutine_destroy\n"); + + assert(context); + + // We are already destroyed or never created: + if (context->shared == NULL) return; + + if (context == context->shared->main) { + context->shared->main = NULL; + coroutine_release(context); + } else { + coroutine_join(context); + assert(context->shared == NULL); + } +} diff --git a/coroutine/pthread/Context.h b/coroutine/pthread/Context.h new file mode 100644 index 0000000000..6d551ee9df --- /dev/null +++ b/coroutine/pthread/Context.h @@ -0,0 +1,63 @@ +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 24/6/2021. + * Copyright, 2021, by Samuel Williams. +*/ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <pthread.h> + +#define COROUTINE void + +#define COROUTINE_PTHREAD_CONTEXT + +#ifdef HAVE_STDINT_H +#include <stdint.h> +#if INTPTR_MAX <= INT32_MAX +#define COROUTINE_LIMITED_ADDRESS_SPACE +#endif +#endif + +struct coroutine_context; + +struct coroutine_shared +{ + pthread_mutex_t guard; + struct coroutine_context * main; + + size_t count; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +struct coroutine_context +{ + struct coroutine_shared * shared; + + coroutine_start start; + void *argument; + + void *stack; + size_t size; + + pthread_t id; + pthread_cond_t schedule; + struct coroutine_context * from; +}; + +void coroutine_initialize_main(struct coroutine_context * context); + +void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +); + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +void coroutine_destroy(struct coroutine_context * context); diff --git a/coroutine/riscv64/Context.S b/coroutine/riscv64/Context.S new file mode 100644 index 0000000000..8e7fc74ffc --- /dev/null +++ b/coroutine/riscv64/Context.S @@ -0,0 +1,86 @@ +#define TOKEN_PASTE(x,y) x##y + +.text +.align 2 + +.global PREFIXED_SYMBOL(coroutine_transfer) +PREFIXED_SYMBOL(coroutine_transfer): + + # Make space on the stack for caller registers + addi sp, sp, -0xd0 + + # Save caller registers + sd s0, 0x00(sp) + sd s1, 0x08(sp) + sd s2, 0x10(sp) + sd s3, 0x18(sp) + sd s4, 0x20(sp) + sd s5, 0x28(sp) + sd s6, 0x30(sp) + sd s7, 0x38(sp) + sd s8, 0x40(sp) + sd s9, 0x48(sp) + sd s10, 0x50(sp) + sd s11, 0x58(sp) + fsd fs0, 0x60(sp) + fsd fs1, 0x68(sp) + fsd fs2, 0x70(sp) + fsd fs3, 0x78(sp) + fsd fs4, 0x80(sp) + fsd fs5, 0x88(sp) + fsd fs6, 0x90(sp) + fsd fs7, 0x98(sp) + fsd fs8, 0xa0(sp) + fsd fs9, 0xa8(sp) + fsd fs10, 0xb0(sp) + fsd fs11, 0xb8(sp) + + # Save return address + sd ra, 0xc0(sp) + + # Save stack pointer to a0 (first argument) + mv a2, sp + sd a2, (a0) + + # Load stack pointer from a1 (second argument) + ld a3, (a1) + mv sp, a3 + + # Restore caller registers + ld s0, 0x00(sp) + ld s1, 0x08(sp) + ld s2, 0x10(sp) + ld s3, 0x18(sp) + ld s4, 0x20(sp) + ld s5, 0x28(sp) + ld s6, 0x30(sp) + ld s7, 0x38(sp) + ld s8, 0x40(sp) + ld s9, 0x48(sp) + ld s10, 0x50(sp) + ld s11, 0x58(sp) + fld fs0, 0x60(sp) + fld fs1, 0x68(sp) + fld fs2, 0x70(sp) + fld fs3, 0x78(sp) + fld fs4, 0x80(sp) + fld fs5, 0x88(sp) + fld fs6, 0x90(sp) + fld fs7, 0x98(sp) + fld fs8, 0xa0(sp) + fld fs9, 0xa8(sp) + fld fs10, 0xb0(sp) + fld fs11, 0xb8(sp) + + # Load return address + ld ra, 0xc0(sp) + + # Pop stack frame + addi sp, sp, 0xd0 + + # Jump to return address + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/coroutine/riscv64/Context.h b/coroutine/riscv64/Context.h new file mode 100644 index 0000000000..3660fb5577 --- /dev/null +++ b/coroutine/riscv64/Context.h @@ -0,0 +1,46 @@ +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn)) void + +enum {COROUTINE_REGISTERS = 0xd0 / 8}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + context->stack_pointer[0xc0 / 8] = (void*)(uintptr_t)start; +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ +} diff --git a/coroutine/ucontext/Context.c b/coroutine/ucontext/Context.c new file mode 100644 index 0000000000..5d728d554d --- /dev/null +++ b/coroutine/ucontext/Context.c @@ -0,0 +1,23 @@ +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 24/6/2019. + * Copyright, 2019, by Samuel Williams. +*/ + +/* According to Solaris' ucontext.h, makecontext, etc. are removed in SUSv4. + * To enable the prototype declarations, we need to define __EXTENSIONS__. + */ +#if defined(__sun) && !defined(__EXTENSIONS__) +#define __EXTENSIONS__ +#endif + +#include "Context.h" + +void coroutine_trampoline(void * _start, void * _context) +{ + coroutine_start start = (coroutine_start)_start; + struct coroutine_context * context = _context; + + start(context->from, context); +} diff --git a/coroutine/ucontext/Context.h b/coroutine/ucontext/Context.h new file mode 100644 index 0000000000..d338d8de60 --- /dev/null +++ b/coroutine/ucontext/Context.h @@ -0,0 +1,79 @@ +#ifndef COROUTINE_UCONTEXT_CONTEXT_H +#define COROUTINE_UCONTEXT_CONTEXT_H 1 + +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 24/6/2019. + * Copyright, 2019, by Samuel Williams. +*/ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <ucontext.h> + +#define COROUTINE __attribute__((noreturn)) void + +#ifdef HAVE_STDINT_H +#include <stdint.h> +#if INTPTR_MAX <= INT32_MAX +#define COROUTINE_LIMITED_ADDRESS_SPACE +#endif +#endif + +struct coroutine_context +{ + ucontext_t state; + struct coroutine_context * from; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +COROUTINE coroutine_trampoline(void * _start, void * _context); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->from = NULL; + getcontext(&context->state); +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + coroutine_initialize_main(context); + + context->state.uc_stack.ss_size = size; + // Despite what it's called, this is not actually a stack pointer. It points to the address of the stack allocation (the lowest address). + context->state.uc_stack.ss_sp = (char*)stack; + context->state.uc_stack.ss_flags = 0; + context->state.uc_link = NULL; + + makecontext(&context->state, (void(*)(void))coroutine_trampoline, 2, (void*)start, (void*)context); +} + +static inline struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target) +{ + struct coroutine_context * previous = target->from; + + target->from = current; + swapcontext(¤t->state, &target->state); + target->from = previous; + + return target; +} + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->state.uc_stack.ss_sp = NULL; + context->state.uc_stack.ss_size = 0; + context->from = NULL; +} + +#endif /* COROUTINE_UCONTEXT_CONTEXT_H */ diff --git a/coroutine/universal/Context.S b/coroutine/universal/Context.S new file mode 100644 index 0000000000..11c80a7927 --- /dev/null +++ b/coroutine/universal/Context.S @@ -0,0 +1,16 @@ +#if 0 +#elif defined __x86_64__ +# include "coroutine/amd64/Context.S" +#elif defined __i386__ +# include "coroutine/x86/Context.S" +#elif defined __ppc__ +# include "coroutine/ppc/Context.S" +#elif defined __ppc64__ && defined(WORDS_BIGENDIAN) +# include "coroutine/ppc64/Context.S" +#elif defined __ppc64__ && !defined(WORDS_BIGENDIAN) +# include "coroutine/ppc64le/Context.S" +#elif defined __arm64__ +# include "coroutine/arm64/Context.S" +#else +# error "Unsupported CPU" +#endif diff --git a/coroutine/universal/Context.h b/coroutine/universal/Context.h new file mode 100644 index 0000000000..ec4d2f484a --- /dev/null +++ b/coroutine/universal/Context.h @@ -0,0 +1,21 @@ +#ifndef COROUTINE_UNIVERSAL_CONTEXT_H +#define COROUTINE_UNIVERSAL_CONTEXT_H 1 + +#if 0 +#elif defined __x86_64__ +# include "coroutine/amd64/Context.h" +#elif defined __i386__ +# include "coroutine/x86/Context.h" +#elif defined __ppc__ +# include "coroutine/ppc/Context.h" +#elif defined __ppc64__ && defined(WORDS_BIGENDIAN) +# include "coroutine/ppc64/Context.h" +#elif defined __ppc64__ && !defined(WORDS_BIGENDIAN) +# include "coroutine/ppc64le/Context.h" +#elif defined __arm64__ +# include "coroutine/arm64/Context.h" +#else +# error "Unsupported CPU" +#endif + +#endif /* COROUTINE_UNIVERSAL_CONTEXT_H */ diff --git a/coroutine/win32/Context.S b/coroutine/win32/Context.S new file mode 100644 index 0000000000..d14bf435e8 --- /dev/null +++ b/coroutine/win32/Context.S @@ -0,0 +1,47 @@ +## +## This file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Lars Kanis on 06/06/2019. +## + +.text + +# Using fastcall is a big win (and it's the same as how x64 works). +# In coroutine transfer, the arguments are passed in ecx and edx. We don't need +# to touch these in order to pass them to the destination coroutine. + +.globl @coroutine_transfer@8 +@coroutine_transfer@8: + # Save the thread information block: + pushl %fs:0 + pushl %fs:4 + pushl %fs:8 + + # Save caller registers: + pushl %ebp + pushl %ebx + pushl %edi + pushl %esi + + # Save caller stack pointer: + movl %esp, (%ecx) + + # Restore callee stack pointer: + movl (%edx), %esp + + # Restore callee stack: + popl %esi + popl %edi + popl %ebx + popl %ebp + + # Restore the thread information block: + popl %fs:8 + popl %fs:4 + popl %fs:0 + + # Save the first argument as the return value: + movl %ecx, %eax + + # Jump to the address on the stack: + ret diff --git a/coroutine/win32/Context.asm b/coroutine/win32/Context.asm new file mode 100644 index 0000000000..f8f431239b --- /dev/null +++ b/coroutine/win32/Context.asm @@ -0,0 +1,55 @@ +;; +;; This file is part of the "Coroutine" project and released under the MIT License. +;; +;; Created by Samuel Williams on 10/5/2018. +;; Copyright, 2018, by Samuel Williams. +;; + +.386 +.model flat + +.code + +assume fs:nothing + +; Using fastcall is a big win (and it's the same has how x64 works). +; In coroutine transfer, the arguments are passed in ecx and edx. We don't need +; to touch these in order to pass them to the destination coroutine. + +@coroutine_transfer@8 proc + ; Save the thread information block: + push fs:[0] + push fs:[4] + push fs:[8] + + ; Save caller registers: + push ebp + push ebx + push edi + push esi + + ; Save caller stack pointer: + mov dword ptr [ecx], esp + + ; Restore callee stack pointer: + mov esp, dword ptr [edx] + + ; Restore callee stack: + pop esi + pop edi + pop ebx + pop ebp + + ; Restore the thread information block: + pop fs:[8] + pop fs:[4] + pop fs:[0] + + ; Save the first argument as the return value: + mov eax, dword ptr ecx + + ; Jump to the address on the stack: + ret +@coroutine_transfer@8 endp + +end diff --git a/coroutine/win32/Context.h b/coroutine/win32/Context.h new file mode 100644 index 0000000000..c1c8fbcd13 --- /dev/null +++ b/coroutine/win32/Context.h @@ -0,0 +1,66 @@ +#ifndef COROUTINE_WIN32_CONTEXT_H +#define COROUTINE_WIN32_CONTEXT_H 1 + +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 10/5/2018. + * Copyright, 2018, by Samuel Williams. +*/ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __declspec(noreturn) void __fastcall +#define COROUTINE_DECL void __fastcall +#define COROUTINE_LIMITED_ADDRESS_SPACE + +/* This doesn't include thread information block */ +enum {COROUTINE_REGISTERS = 4}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef void(__fastcall * coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + *--context->stack_pointer = (void*)(uintptr_t)start; + + /* Windows Thread Information Block */ + *--context->stack_pointer = (void*)0xFFFFFFFF; /* fs:[0] */ + *--context->stack_pointer = (void*)top; /* fs:[4] */ + *--context->stack_pointer = (void*)stack; /* fs:[8] */ + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +struct coroutine_context * __fastcall coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ +} + +#endif /* COROUTINE_WIN32_CONTEXT_H */ diff --git a/coroutine/win64/Context.S b/coroutine/win64/Context.S new file mode 100644 index 0000000000..e0ee38e006 --- /dev/null +++ b/coroutine/win64/Context.S @@ -0,0 +1,77 @@ +## +## This file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 4/11/2018. +## Copyright, 2018, by Samuel Williams. +## + +.text + +.globl coroutine_transfer +coroutine_transfer: + # Save the thread information block: + pushq %gs:8 + pushq %gs:16 + + # Save caller registers: + pushq %rbp + pushq %rbx + pushq %rdi + pushq %rsi + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movaps %xmm15, -168(%rsp) + movaps %xmm14, -152(%rsp) + movaps %xmm13, -136(%rsp) + movaps %xmm12, -120(%rsp) + movaps %xmm11, -104(%rsp) + movaps %xmm10, -88(%rsp) + movaps %xmm9, -72(%rsp) + movaps %xmm8, -56(%rsp) + movaps %xmm7, -40(%rsp) + movaps %xmm6, -24(%rsp) + + # Save caller stack pointer: + mov %rsp, (%rcx) + + # Restore callee stack pointer: + mov (%rdx), %rsp + + movaps -24(%rsp), %xmm6 + movaps -40(%rsp), %xmm7 + movaps -56(%rsp), %xmm8 + movaps -72(%rsp), %xmm9 + movaps -88(%rsp), %xmm10 + movaps -104(%rsp), %xmm11 + movaps -120(%rsp), %xmm12 + movaps -136(%rsp), %xmm13 + movaps -152(%rsp), %xmm14 + movaps -168(%rsp), %xmm15 + + # Restore callee stack: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rsi + popq %rdi + popq %rbx + popq %rbp + + # Restore the thread information block: + popq %gs:16 + popq %gs:8 + + # Put the first argument into the return value: + mov %rcx, %rax + + # We pop the return address and jump to it: + ret + +.globl coroutine_trampoline +coroutine_trampoline: + # Do not remove this. This forces 16-byte alignment when entering the coroutine. + ret diff --git a/coroutine/win64/Context.asm b/coroutine/win64/Context.asm new file mode 100644 index 0000000000..8c4dea1c93 --- /dev/null +++ b/coroutine/win64/Context.asm @@ -0,0 +1,79 @@ +;; +;; This file is part of the "Coroutine" project and released under the MIT License. +;; +;; Created by Samuel Williams on 10/5/2018. +;; Copyright, 2018, by Samuel Williams. +;; + +.code + +coroutine_transfer proc + ; Save the thread information block: + push qword ptr gs:[8] + push qword ptr gs:[16] + + ; Save caller registers: + push rbp + push rbx + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + + movaps [rsp - 24], xmm6 + movaps [rsp - 40], xmm7 + movaps [rsp - 56], xmm8 + movaps [rsp - 72], xmm9 + movaps [rsp - 88], xmm10 + movaps [rsp - 104], xmm11 + movaps [rsp - 120], xmm12 + movaps [rsp - 136], xmm13 + movaps [rsp - 152], xmm14 + movaps [rsp - 168], xmm15 + + ; Save caller stack pointer: + mov [rcx], rsp + + ; Restore callee stack pointer: + mov rsp, [rdx] + + movaps xmm15, [rsp - 168] + movaps xmm14, [rsp - 152] + movaps xmm13, [rsp - 136] + movaps xmm12, [rsp - 120] + movaps xmm11, [rsp - 104] + movaps xmm10, [rsp - 88] + movaps xmm9, [rsp - 72] + movaps xmm8, [rsp - 56] + movaps xmm7, [rsp - 40] + movaps xmm6, [rsp - 24] + + ; Restore callee stack: + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbx + pop rbp + + ; Restore the thread information block: + pop qword ptr gs:[16] + pop qword ptr gs:[8] + + ; Put the first argument into the return value: + mov rax, rcx + + ; We pop the return address and jump to it: + ret +coroutine_transfer endp + +coroutine_trampoline proc + ; Do not remove this. This forces 16-byte alignment when entering the coroutine. + ret +coroutine_trampoline endp + +end diff --git a/coroutine/win64/Context.h b/coroutine/win64/Context.h new file mode 100644 index 0000000000..23b21b5c1c --- /dev/null +++ b/coroutine/win64/Context.h @@ -0,0 +1,76 @@ +#ifndef COROUTINE_WIN64_CONTEXT_H +#define COROUTINE_WIN64_CONTEXT_H 1 + +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 10/5/2018. + * Copyright, 2018, by Samuel Williams. +*/ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __declspec(noreturn) void +#define COROUTINE_DECL void + +enum { + COROUTINE_REGISTERS = 8, + COROUTINE_XMM_REGISTERS = 1+10*2, +}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef void(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); + +void coroutine_trampoline(void); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + /* Win64 ABI requires space for arguments */ + context->stack_pointer -= 4; + + /* Return address */ + *--context->stack_pointer = 0; + *--context->stack_pointer = (void*)(uintptr_t)start; + *--context->stack_pointer = (void*)coroutine_trampoline; + + /* Windows Thread Information Block */ + /* *--context->stack_pointer = 0; */ /* gs:[0x00] is not used */ + *--context->stack_pointer = (void*)top; /* gs:[0x08] */ + *--context->stack_pointer = (void*)stack; /* gs:[0x10] */ + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + memset(context->stack_pointer - COROUTINE_XMM_REGISTERS, 0, sizeof(void*) * COROUTINE_XMM_REGISTERS); +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ +} + +#endif /* COROUTINE_WIN64_CONTEXT_H */ diff --git a/coroutine/x86/Context.S b/coroutine/x86/Context.S new file mode 100644 index 0000000000..b04e71aa1c --- /dev/null +++ b/coroutine/x86/Context.S @@ -0,0 +1,41 @@ +## +## This file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 3/11/2018. +## Copyright, 2018, by Samuel Williams. +## + +#define TOKEN_PASTE(x,y) x##y + +.text + +.globl PREFIXED_SYMBOL(coroutine_transfer) +PREFIXED_SYMBOL(coroutine_transfer): + + # Save caller registers + pushl %ebp + pushl %ebx + pushl %edi + pushl %esi + + # Save caller stack pointer + movl %esp, (%ecx) + + # Restore callee stack pointer + movl (%edx), %esp + + # Restore callee stack + popl %esi + popl %edi + popl %ebx + popl %ebp + + # Save the first argument as the return value + movl %ecx, %eax + + # Jump to the address on the stack + ret + +#if (defined(__linux__) || defined(__FreeBSD__)) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/coroutine/x86/Context.h b/coroutine/x86/Context.h new file mode 100644 index 0000000000..f33b338eab --- /dev/null +++ b/coroutine/x86/Context.h @@ -0,0 +1,61 @@ +#ifndef COROUTINE_X86_CONTEXT_H +#define COROUTINE_X86_CONTEXT_H 1 + +/* + * This file is part of the "Coroutine" project and released under the MIT License. + * + * Created by Samuel Williams on 3/11/2018. + * Copyright, 2018, by Samuel Williams. +*/ + +#pragma once + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define COROUTINE __attribute__((noreturn, fastcall)) void +#define COROUTINE_LIMITED_ADDRESS_SPACE + +enum {COROUTINE_REGISTERS = 4}; + +struct coroutine_context +{ + void **stack_pointer; + void *argument; +}; + +typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self) __attribute__((fastcall)); + +static inline void coroutine_initialize_main(struct coroutine_context * context) { + context->stack_pointer = NULL; +} + +static inline void coroutine_initialize( + struct coroutine_context *context, + coroutine_start start, + void *stack, + size_t size +) { + assert(start && stack && size >= 1024); + + // Stack grows down. Force 16-byte alignment. + char * top = (char*)stack + size; + context->stack_pointer = (void**)((uintptr_t)top & ~0xF); + + *--context->stack_pointer = NULL; + *--context->stack_pointer = (void*)(uintptr_t)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target) __attribute__((fastcall)); + +static inline void coroutine_destroy(struct coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#endif /* COROUTINE_X86_CONTEXT_H */ |
