summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configure.ac14
-rw-r--r--coroutine/win32/Context.asm2
-rw-r--r--coroutine/win32/Context.h10
-rw-r--r--coroutine/win64/Context.asm37
-rw-r--r--coroutine/win64/Context.h24
5 files changed, 68 insertions, 19 deletions
diff --git a/configure.ac b/configure.ac
index 4d74c7737e..4dec975960 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2325,8 +2325,14 @@ AS_IF([test "${universal_binary-no}" = yes ], [
AC_DEFINE_UNQUOTED(STACK_GROW_DIRECTION, $dir)
])
-AC_MSG_CHECKING(native fiber implementation)
+AC_MSG_CHECKING(native coroutine implementation for $target_cpu-$target_os)
AS_CASE(["$target_cpu-$target_os"],
+ [x*64-darwin], [
+ AC_MSG_RESULT(amd64)
+ COROUTINE_H=coroutine/amd64/Context.h
+ AC_DEFINE_UNQUOTED(FIBER_USE_COROUTINE, ["$COROUTINE_H"])
+ AC_LIBOBJ([coroutine/amd64/Context])
+ ],
[x*64-linux], [
AC_MSG_RESULT(amd64)
COROUTINE_H=coroutine/amd64/Context.h
@@ -2339,6 +2345,12 @@ AS_CASE(["$target_cpu-$target_os"],
AC_DEFINE_UNQUOTED(FIBER_USE_COROUTINE, ["$COROUTINE_H"])
AC_LIBOBJ([coroutine/x86/Context])
]
+ [x64-mingw32], [
+ AC_MSG_RESULT(x86)
+ COROUTINE_H=coroutine/win64/Context.h
+ AC_DEFINE_UNQUOTED(FIBER_USE_COROUTINE, ["$COROUTINE_H"])
+ AC_LIBOBJ([coroutine/win64/Context])
+ ]
[*], [
AC_MSG_RESULT(no)
]
diff --git a/coroutine/win32/Context.asm b/coroutine/win32/Context.asm
index b024b22fc7..22b56c0568 100644
--- a/coroutine/win32/Context.asm
+++ b/coroutine/win32/Context.asm
@@ -9,6 +9,8 @@
.code
+assume fs:nothing
+
; Using fastcall is a big win (and it's the same has how x64 works).
; In coroutine transfer, the arguments are passed in ecx and edx. We don't need
; to touch these in order to pass them to the destination coroutine.
diff --git a/coroutine/win32/Context.h b/coroutine/win32/Context.h
index ba6b1fd07b..95b4ccdba1 100644
--- a/coroutine/win32/Context.h
+++ b/coroutine/win32/Context.h
@@ -24,7 +24,7 @@ struct coroutine_context
void **stack_pointer;
};
-typedef COROUTINE(* coroutine_start)(coroutine_context *from, coroutine_context *self);
+typedef void(__fastcall * coroutine_start)(coroutine_context *from, coroutine_context *self);
static inline void coroutine_initialize(
coroutine_context *context,
@@ -40,12 +40,12 @@ static inline void coroutine_initialize(
return;
}
+ *--context->stack_pointer = (void*)start;
+
/* Windows Thread Information Block */
*--context->stack_pointer = 0; /* fs:[0] */
- *--context->stack_pointer = stack_pointer + stack_size; /* fs:[4] */
- *--context->stack_pointer = (void*)stack_pointer; /* fs:[8] */
-
- *--context->stack_pointer = (void*)start;
+ *--context->stack_pointer = (void*)stack_pointer; /* fs:[4] */
+ *--context->stack_pointer = (void*)((char *)stack_pointer - stack_size); /* fs:[8] */
context->stack_pointer -= COROUTINE_REGISTERS;
memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS);
diff --git a/coroutine/win64/Context.asm b/coroutine/win64/Context.asm
index 4031c613fe..59673ffa3e 100644
--- a/coroutine/win64/Context.asm
+++ b/coroutine/win64/Context.asm
@@ -9,9 +9,8 @@
coroutine_transfer proc
; Save the thread information block:
- push gs:[0x00]
- push gs:[0x08]
- push gs:[0x10]
+ push qword ptr gs:[8]
+ push qword ptr gs:[16]
; Save caller registers:
push rbp
@@ -23,12 +22,34 @@ coroutine_transfer proc
push r14
push r15
+ movaps [rsp - 24], xmm6
+ movaps [rsp - 40], xmm7
+ movaps [rsp - 56], xmm8
+ movaps [rsp - 72], xmm9
+ movaps [rsp - 88], xmm10
+ movaps [rsp - 104], xmm11
+ movaps [rsp - 120], xmm12
+ movaps [rsp - 136], xmm13
+ movaps [rsp - 152], xmm14
+ movaps [rsp - 168], xmm15
+
; Save caller stack pointer:
mov [rcx], rsp
; Restore callee stack pointer:
mov rsp, [rdx]
+ movaps xmm15, [rsp - 168]
+ movaps xmm14, [rsp - 152]
+ movaps xmm13, [rsp - 136]
+ movaps xmm12, [rsp - 120]
+ movaps xmm11, [rsp - 104]
+ movaps xmm10, [rsp - 88]
+ movaps xmm9, [rsp - 72]
+ movaps xmm8, [rsp - 56]
+ movaps xmm7, [rsp - 40]
+ movaps xmm6, [rsp - 24]
+
; Restore callee stack:
pop r15
pop r14
@@ -40,9 +61,8 @@ coroutine_transfer proc
pop rbp
; Restore the thread information block:
- pop gs:[0x10]
- pop gs:[0x08]
- pop gs:[0x00]
+ pop qword ptr gs:[16]
+ pop qword ptr gs:[8]
; Put the first argument into the return value:
mov rax, rcx
@@ -51,4 +71,9 @@ coroutine_transfer proc
ret
coroutine_transfer endp
+coroutine_trampoline proc
+ ; Do not remove this. This forces 16-byte alignment when entering the coroutine.
+ ret
+coroutine_trampoline endp
+
end
diff --git a/coroutine/win64/Context.h b/coroutine/win64/Context.h
index 32d6e1038b..db003a2ee6 100644
--- a/coroutine/win64/Context.h
+++ b/coroutine/win64/Context.h
@@ -17,13 +17,16 @@ extern "C" {
#define COROUTINE __declspec(noreturn) void
const size_t COROUTINE_REGISTERS = 8;
+const size_t COROUTINE_XMM_REGISTERS = 1+10*2;
struct coroutine_context
{
void **stack_pointer;
};
-typedef COROUTINE(* coroutine_start)(coroutine_context *from, coroutine_context *self);
+typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self);
+
+void coroutine_trampoline();
static inline void coroutine_initialize(
coroutine_context *context,
@@ -31,7 +34,8 @@ static inline void coroutine_initialize(
void *stack_pointer,
size_t stack_size
) {
- context->stack_pointer = (void**)stack_pointer;
+ /* Force 16-byte alignment */
+ context->stack_pointer = (void**)((uintptr_t)stack_pointer & ~0xF);
if (!start) {
assert(!context->stack_pointer);
@@ -39,16 +43,22 @@ static inline void coroutine_initialize(
return;
}
- /* Windows Thread Information Block */
- *--context->stack_pointer = 0; /* gs:[0x00] */
- *--context->stack_pointer = stack_pointer + stack_size; /* gs:[0x08] */
- *--context->stack_pointer = (void*)stack_pointer; /* gs:[0x10] */
-
+ /* Win64 ABI requires space for arguments */
+ context->stack_pointer -= 4;
+ /* Return address */
+ *--context->stack_pointer = 0;
*--context->stack_pointer = (void*)start;
+ *--context->stack_pointer = (void*)coroutine_trampoline;
+
+ /* Windows Thread Information Block */
+ /* *--context->stack_pointer = 0; */ /* gs:[0x00] is not used */
+ *--context->stack_pointer = (void*)stack_pointer; /* gs:[0x08] */
+ *--context->stack_pointer = (void*)((char *)stack_pointer - stack_size); /* gs:[0x10] */
context->stack_pointer -= COROUTINE_REGISTERS;
memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS);
+ memset(context->stack_pointer - COROUTINE_XMM_REGISTERS, 0, sizeof(void*) * COROUTINE_XMM_REGISTERS);
}
coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target);