diff options
Diffstat (limited to 'thread_pthread_mn.c')
| -rw-r--r-- | thread_pthread_mn.c | 184 |
1 files changed, 110 insertions, 74 deletions
diff --git a/thread_pthread_mn.c b/thread_pthread_mn.c index cc0dae3b70..c9e6649832 100644 --- a/thread_pthread_mn.c +++ b/thread_pthread_mn.c @@ -3,26 +3,23 @@ #if USE_MN_THREADS static void timer_thread_unregister_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags); +static void timer_thread_wakeup_thread_locked(struct rb_thread_sched *sched, rb_thread_t *th, uint32_t event_serial); static bool timer_thread_cancel_waiting(rb_thread_t *th) { bool canceled = false; - if (th->sched.waiting_reason.flags) { - rb_native_mutex_lock(&timer_th.waiting_lock); - { - if (th->sched.waiting_reason.flags) { - canceled = true; - ccan_list_del_init(&th->sched.waiting_reason.node); - if (th->sched.waiting_reason.flags & (thread_sched_waiting_io_read | thread_sched_waiting_io_write)) { - timer_thread_unregister_waiting(th, th->sched.waiting_reason.data.fd, th->sched.waiting_reason.flags); - } - th->sched.waiting_reason.flags = thread_sched_waiting_none; - } + rb_native_mutex_lock(&timer_th.waiting_lock); + { + if (th->sched.waiting_reason.flags) { + canceled = true; + ccan_list_del_init(&th->sched.waiting_reason.node); + timer_thread_unregister_waiting(th, th->sched.waiting_reason.data.fd, th->sched.waiting_reason.flags); + th->sched.waiting_reason.flags = thread_sched_waiting_none; } - rb_native_mutex_unlock(&timer_th.waiting_lock); } + rb_native_mutex_unlock(&timer_th.waiting_lock); return canceled; } @@ -41,10 +38,10 @@ ubf_event_waiting(void *ptr) th->unblock.func = NULL; th->unblock.arg = NULL; - bool canceled = timer_thread_cancel_waiting(th); - thread_sched_lock(sched, th); { + bool canceled = timer_thread_cancel_waiting(th); + if (sched->running == th) { RUBY_DEBUG_LOG("not waiting yet"); } @@ -58,7 +55,7 @@ ubf_event_waiting(void *ptr) thread_sched_unlock(sched, th); } -static bool timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags, rb_hrtime_t *rel); +static bool timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags, rb_hrtime_t *rel, uint32_t event_serial); // return true if timed out static bool @@ -68,20 +65,30 @@ thread_sched_wait_events(struct rb_thread_sched *sched, rb_thread_t *th, int fd, volatile bool timedout = false, need_cancel = false; - if (timer_thread_register_waiting(th, fd, events, rel)) { - RUBY_DEBUG_LOG("wait fd:%d", fd); + uint32_t event_serial = ++th->sched.event_serial; // overflow is okay - RB_VM_SAVE_MACHINE_CONTEXT(th); - setup_ubf(th, ubf_event_waiting, (void *)th); - RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED, th); + thread_sched_lock(sched, th); + { + // NOTE: there's a lock ordering inversion here with the ubf call, but it's benign. + if (ubf_set(th, ubf_event_waiting, (void *)th, NULL)) { + thread_sched_unlock(sched, th); + return false; + } + + if (timer_thread_register_waiting(th, fd, events, rel, event_serial)) { + RUBY_DEBUG_LOG("wait fd:%d", fd); + + RB_VM_SAVE_MACHINE_CONTEXT(th); + + RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED, th); - thread_sched_lock(sched, th); - { if (th->sched.waiting_reason.flags == thread_sched_waiting_none) { - // already awaken + th->sched.event_serial++; + // timer thread has dequeued us already, but it won't try to wake us because we bumped our serial } else if (RUBY_VM_INTERRUPTED(th->ec)) { + th->sched.event_serial++; // make sure timer thread doesn't try to wake us need_cancel = true; } else { @@ -95,21 +102,22 @@ thread_sched_wait_events(struct rb_thread_sched *sched, rb_thread_t *th, int fd, } timedout = th->sched.waiting_reason.data.result == 0; - } - thread_sched_unlock(sched, th); - - if (need_cancel) { - timer_thread_cancel_waiting(th); - } - setup_ubf(th, NULL, NULL); // TODO: maybe it is already NULL? + if (need_cancel) { + timer_thread_cancel_waiting(th); + } - th->status = THREAD_RUNNABLE; - } - else { - RUBY_DEBUG_LOG("can not wait fd:%d", fd); - return false; + th->status = THREAD_RUNNABLE; + } + else { + RUBY_DEBUG_LOG("can not wait fd:%d", fd); + timedout = false; + } } + thread_sched_unlock(sched, th); + + // if ubf triggered between sched unlock and ubf clear, sched->running == th here + ubf_clear(th, false); VM_ASSERT(sched->running == th); @@ -184,12 +192,7 @@ nt_thread_stack_size(void) static struct nt_stack_chunk_header * nt_alloc_thread_stack_chunk(void) { - int mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE; -#if defined(MAP_STACK) && !defined(__FreeBSD__) && !defined(__FreeBSD_kernel__) - mmap_flags |= MAP_STACK; -#endif - - const char *m = (void *)mmap(NULL, MSTACK_CHUNK_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + const char *m = (void *)mmap(NULL, MSTACK_CHUNK_SIZE, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (m == MAP_FAILED) { return NULL; } @@ -208,6 +211,12 @@ nt_alloc_thread_stack_chunk(void) VM_ASSERT(stack_count <= UINT16_MAX); + // Enable read/write for the header pages + if (mprotect((void *)m, (size_t)header_page_cnt * MSTACK_PAGE_SIZE, PROT_READ | PROT_WRITE) != 0) { + munmap((void *)m, MSTACK_CHUNK_SIZE); + return NULL; + } + struct nt_stack_chunk_header *ch = (struct nt_stack_chunk_header *)m; ch->start_page = header_page_cnt; @@ -236,7 +245,7 @@ nt_stack_chunk_get_msf(const rb_vm_t *vm, const char *mstack) return (struct nt_machine_stack_footer *)&mstack[msz - sizeof(struct nt_machine_stack_footer)]; } -static void * +static void nt_stack_chunk_get_stack(const rb_vm_t *vm, struct nt_stack_chunk_header *ch, size_t idx, void **vm_stack, void **machine_stack) { // TODO: only support stack going down @@ -261,8 +270,6 @@ nt_stack_chunk_get_stack(const rb_vm_t *vm, struct nt_stack_chunk_header *ch, si *vm_stack = (void *)vstack; *machine_stack = (void *)mstack; - - return (void *)guard_page; } RBIMPL_ATTR_MAYBE_UNUSED() @@ -286,17 +293,6 @@ nt_stack_chunk_dump(void) } static int -nt_guard_page(const char *p, size_t len) -{ - if (mprotect((void *)p, len, PROT_NONE) != -1) { - return 0; - } - else { - return errno; - } -} - -static int nt_alloc_stack(rb_vm_t *vm, void **vm_stack, void **machine_stack) { int err = 0; @@ -314,8 +310,26 @@ nt_alloc_stack(rb_vm_t *vm, void **vm_stack, void **machine_stack) RUBY_DEBUG_LOG("uninitialized_stack_count:%d", ch->uninitialized_stack_count); size_t idx = ch->stack_count - ch->uninitialized_stack_count--; - void *guard_page = nt_stack_chunk_get_stack(vm, ch, idx, vm_stack, machine_stack); - err = nt_guard_page(guard_page, MSTACK_PAGE_SIZE); + + // The chunk was mapped PROT_NONE; enable the VM stack and + // machine stack pages, leaving the guard page as PROT_NONE. + char *stack_start = nt_stack_chunk_get_stack_start(ch, idx); + size_t vm_stack_size = vm->default_params.thread_vm_stack_size; + size_t mstack_size = nt_thread_stack_size() - vm_stack_size - MSTACK_PAGE_SIZE; + char *mstack_start = stack_start + vm_stack_size + MSTACK_PAGE_SIZE; + + int mstack_flags = MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE; +#if defined(MAP_STACK) && !defined(__FreeBSD__) && !defined(__FreeBSD_kernel__) + mstack_flags |= MAP_STACK; +#endif + + if (mprotect(stack_start, vm_stack_size, PROT_READ | PROT_WRITE) != 0 || + mmap(mstack_start, mstack_size, PROT_READ | PROT_WRITE, mstack_flags, -1, 0) == MAP_FAILED) { + err = errno; + } + else { + nt_stack_chunk_get_stack(vm, ch, idx, vm_stack, machine_stack); + } } else { nt_free_stack_chunks = ch->prev_free_chunk; @@ -397,11 +411,15 @@ native_thread_check_and_create_shared(rb_vm_t *vm) rb_native_mutex_lock(&vm->ractor.sched.lock); { - unsigned int snt_cnt = vm->ractor.sched.snt_cnt; - if (!vm->ractor.main_ractor->threads.sched.enable_mn_threads) snt_cnt++; // do not need snt for main ractor + unsigned int schedulable_ractor_cnt = vm->ractor.cnt; + RUBY_ASSERT(schedulable_ractor_cnt >= 1); + + if (!vm->ractor.main_ractor->threads.sched.enable_mn_threads) + schedulable_ractor_cnt--; // do not need snt for main ractor + unsigned int snt_cnt = vm->ractor.sched.snt_cnt; if (((int)snt_cnt < MINIMUM_SNT) || - (snt_cnt < vm->ractor.cnt && + (snt_cnt < schedulable_ractor_cnt && snt_cnt < vm->ractor.sched.max_cpu)) { RUBY_DEBUG_LOG("added snt:%u dnt:%u ractor_cnt:%u grq_cnt:%u", @@ -450,7 +468,7 @@ co_start(struct coroutine_context *from, struct coroutine_context *self) // RUBY_DEBUG_LOG("th:%u", rb_th_serial(th)); - thread_sched_set_lock_owner(sched, th); + thread_sched_set_locked(sched, th); thread_sched_add_running_thread(TH_SCHED(th), th); thread_sched_unlock(sched, th); { @@ -475,13 +493,11 @@ co_start(struct coroutine_context *from, struct coroutine_context *self) coroutine_transfer0(self, nt->nt_context, true); } else { - rb_vm_t *vm = th->vm; - bool has_ready_ractor = vm->ractor.sched.grq_cnt > 0; // at least this ractor is not queued rb_thread_t *next_th = sched->running; - if (!has_ready_ractor && next_th && !next_th->nt) { + if (next_th && !next_th->nt) { // switch to the next thread - thread_sched_set_lock_owner(sched, NULL); + thread_sched_set_unlocked(sched, NULL); th->sched.finished = true; thread_sched_switch0(th->sched.context, next_th, nt, true); } @@ -515,6 +531,7 @@ native_thread_create_shared(rb_thread_t *th) th->ec->machine.stack_start = (void *)((uintptr_t)machine_stack + machine_stack_size); th->ec->machine.stack_maxsize = machine_stack_size; // TODO th->sched.context_stack = machine_stack; + th->sched.context_stack_size = machine_stack_size; th->sched.context = ruby_xmalloc(sizeof(struct coroutine_context)); coroutine_initialize(th->sched.context, co_start, machine_stack, machine_stack_size); @@ -610,11 +627,17 @@ kqueue_wait(rb_vm_t *vm) struct timespec *timeout = NULL; int timeout_ms = timer_thread_set_timeout(vm); - if (timeout_ms >= 0) { + if (timeout_ms > 0) { calculated_timeout.tv_sec = timeout_ms / 1000; calculated_timeout.tv_nsec = (timeout_ms % 1000) * 1000000; timeout = &calculated_timeout; } + else if (timeout_ms == 0) { + // Relying on the absence of other members of struct timespec is not strictly portable, + // and kevent needs a 0-valued timespec to mean immediate timeout. + memset(&calculated_timeout, 0, sizeof(struct timespec)); + timeout = &calculated_timeout; + } return kevent(timer_th.event_fd, NULL, 0, timer_th.finished_events, KQUEUE_EVENTS_MAX, timeout); } @@ -676,7 +699,7 @@ kqueue_already_registered(int fd) // return false if the fd is not waitable or not need to wait. static bool -timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags, rb_hrtime_t *rel) +timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags, rb_hrtime_t *rel, uint32_t event_serial) { RUBY_DEBUG_LOG("th:%u fd:%d flag:%d rel:%lu", rb_th_serial(th), fd, flags, rel ? (unsigned long)*rel : 0); @@ -803,6 +826,7 @@ timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting th->sched.waiting_reason.data.timeout = abs; th->sched.waiting_reason.data.fd = fd; th->sched.waiting_reason.data.result = 0; + th->sched.waiting_reason.data.event_serial = event_serial; } if (abs == 0) { // no timeout @@ -835,8 +859,8 @@ timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting verify_waiting_list(); - // update timeout seconds - timer_thread_wakeup(); + // update timeout seconds; force wake so timer thread notices short deadlines + timer_thread_wakeup_force(); } } else { @@ -851,6 +875,10 @@ timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting static void timer_thread_unregister_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags) { + if (!(th->sched.waiting_reason.flags & (thread_sched_waiting_io_read | thread_sched_waiting_io_write))) { + return; + } + RUBY_DEBUG_LOG("th:%u fd:%d", rb_th_serial(th), fd); #if HAVE_SYS_EVENT_H kqueue_unregister_waiting(fd, flags); @@ -881,7 +909,7 @@ timer_thread_setup_mn(void) #endif RUBY_DEBUG_LOG("comm_fds:%d/%d", timer_th.comm_fds[0], timer_th.comm_fds[1]); - timer_thread_register_waiting(NULL, timer_th.comm_fds[0], thread_sched_waiting_io_read | thread_sched_waiting_io_force, NULL); + timer_thread_register_waiting(NULL, timer_th.comm_fds[0], thread_sched_waiting_io_read | thread_sched_waiting_io_force, NULL, 0); } static int @@ -970,6 +998,8 @@ timer_thread_polling(rb_vm_t *vm) (filter == EVFILT_READ) ? "read/" : "", (filter == EVFILT_WRITE) ? "write/" : ""); + struct rb_thread_sched *sched = TH_SCHED(th); + thread_sched_lock(sched, th); rb_native_mutex_lock(&timer_th.waiting_lock); { if (th->sched.waiting_reason.flags) { @@ -980,14 +1010,16 @@ timer_thread_polling(rb_vm_t *vm) th->sched.waiting_reason.flags = thread_sched_waiting_none; th->sched.waiting_reason.data.fd = -1; th->sched.waiting_reason.data.result = filter; + uint32_t event_serial = th->sched.waiting_reason.data.event_serial; - timer_thread_wakeup_thread(th); + timer_thread_wakeup_thread_locked(sched, th, event_serial); } else { // already released } } rb_native_mutex_unlock(&timer_th.waiting_lock); + thread_sched_unlock(sched, th); } } #else @@ -1012,6 +1044,8 @@ timer_thread_polling(rb_vm_t *vm) (events & EPOLLERR) ? "err/" : "", (events & EPOLLHUP) ? "hup/" : ""); + struct rb_thread_sched *sched = TH_SCHED(th); + thread_sched_lock(sched, th); rb_native_mutex_lock(&timer_th.waiting_lock); { if (th->sched.waiting_reason.flags) { @@ -1022,14 +1056,16 @@ timer_thread_polling(rb_vm_t *vm) th->sched.waiting_reason.flags = thread_sched_waiting_none; th->sched.waiting_reason.data.fd = -1; th->sched.waiting_reason.data.result = (int)events; + uint32_t event_serial = th->sched.waiting_reason.data.event_serial; - timer_thread_wakeup_thread(th); + timer_thread_wakeup_thread_locked(sched, th, event_serial); } else { // already released } } rb_native_mutex_unlock(&timer_th.waiting_lock); + thread_sched_unlock(sched, th); } } #endif @@ -1058,12 +1094,12 @@ timer_thread_polling(rb_vm_t *vm) switch (r) { case 0: // timeout - rb_native_mutex_lock(&vm->ractor.sched.lock); + ractor_sched_lock(vm, NULL); { // (1-1) timeslice timer_thread_check_timeslice(vm); } - rb_native_mutex_unlock(&vm->ractor.sched.lock); + ractor_sched_unlock(vm, NULL); break; case -1: // error |
