From 708bfd21156828526fe72de2cedecfaca6647dc1 Mon Sep 17 00:00:00 2001
From: normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Sun, 29 Jul 2018 20:47:33 +0000
Subject: thread_pthread: remove timer-thread by restructuring GVL

To reduce resource use and reduce CI failure; remove
timer-thread.  Single-threaded Ruby processes (including forked
children) will never see extra thread overhead.  This prevents
glibc and jemalloc from going into multi-threaded mode and
initializing locks or causing fragmentation via arena explosion.

The GVL is implements its own wait-queue as a ccan/list to
permit controlling wakeup order.  Timeslice under contention is
handled by a designated timer thread (similar to choosing a
"patrol_thread" for current deadlock checking).

There is only one self-pipe, now, as wakeups for timeslice are
done independently using condition variables.  This reduces FD
pressure slightly.

Signal handling is handled directly by a Ruby Thread (instead
of timer-thread) by exposing signal self-pipe to callers of
rb_thread_fd_select, native_sleep, rb_wait_for_single_fd, etc...
Acquiring, using, and releasing the self-pipe is exposed via 4
new internal functions:

1) rb_sigwait_fd_get - exclusively acquire timer_thread_pipe.normal[0]

2) rb_sigwait_fd_sleep - sleep and wait for signal (and no other FDs)

3) rb_sigwait_fd_put - release acquired result from rb_sigwait_fd_get

4) rb_sigwait_fd_migrate - migrate signal handling to another thread
                           after calling rb_sigwait_fd_put.

rb_sigwait_fd_migrate is necessary for waitpid callers because
only one thread can wait on self-pipe at a time, otherwise a
deadlock will occur if threads fight over the self-pipe.

TRAP_INTERRUPT_MASK is now set for the main thread directly in
signal handler via rb_thread_wakeup_timer_thread.

Originally, I wanted to use POSIX timers
(timer_create/timer_settime) for this.  Unfortunately, this
proved unfeasible as Mutex#sleep resumes on spurious wakeups and
test/thread/test_cv.rb::test_condvar_timed_wait failed.  Using
pthread_sigmask to mask out SIGVTALRM fixed that test,  but
test/fiddle/test_function.rb::test_nogvl_poll proved there'd be
some unavoidable (and frequent) incompatibilities from that
approach.

Finally, this allows us to drop thread_destruct_lock and
interrupt current ec directly.

We don't need to rely on vm->thread_destruct_lock or a coherent
vm->running_thread on any platform.  Separate timer-thread for
time slice and signal handling is relegated to thread_win32.c,
now.

[ruby-core:88088] [Misc #14937]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@64107 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
---
 process.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 95 insertions(+), 8 deletions(-)

(limited to 'process.c')

diff --git a/process.c b/process.c
index dd70c7104c..57ad802631 100644
--- a/process.c
+++ b/process.c
@@ -936,13 +936,51 @@ void rb_native_cond_signal(rb_nativethread_cond_t *);
 void rb_native_cond_wait(rb_nativethread_cond_t *, rb_nativethread_lock_t *);
 rb_nativethread_cond_t *rb_sleep_cond_get(const rb_execution_context_t *);
 void rb_sleep_cond_put(rb_nativethread_cond_t *);
+int rb_sigwait_fd_get(const rb_thread_t *);
+void rb_sigwait_sleep(const rb_thread_t *, int fd, const struct timespec *);
+void rb_sigwait_fd_put(const rb_thread_t *, int fd);
+
+/*
+ * When a thread is done using sigwait_fd and there are other threads
+ * sleeping on waitpid, we must kick one of the threads out of
+ * rb_native_cond_wait so it can switch to rb_sigwait_sleep
+ */
+static void
+sigwait_fd_migrate_sleeper(rb_vm_t *vm)
+{
+    struct waitpid_state *w = 0;
+
+    list_for_each(&vm->waiting_pids, w, wnode) {
+        if (!w->cond) continue; /* somebody else already got sigwait_fd */
+        rb_native_cond_signal(w->cond);
+        return;
+    }
+    list_for_each(&vm->waiting_grps, w, wnode) {
+        if (!w->cond) continue; /* somebody else already got sigwait_fd */
+        rb_native_cond_signal(w->cond);
+        return;
+    }
+}
+
+void
+rb_sigwait_fd_migrate(rb_vm_t *vm)
+{
+    rb_native_mutex_lock(&vm->waitpid_lock);
+    sigwait_fd_migrate_sleeper(vm);
+    rb_native_mutex_unlock(&vm->waitpid_lock);
+}
 
 static void
 waitpid_notify(struct waitpid_state *w, rb_pid_t ret)
 {
     w->ret = ret;
     list_del_init(&w->wnode);
-    rb_native_cond_signal(w->cond);
+    if (w->cond) {
+        rb_native_cond_signal(w->cond);
+    }
+    else {
+        /* w is owned by this thread */
+    }
 }
 
 #ifdef _WIN32 /* for spawnvp result from mjit.c */
@@ -954,7 +992,7 @@ waitpid_notify(struct waitpid_state *w, rb_pid_t ret)
 #endif
 
 extern volatile unsigned int ruby_nocldwait; /* signal.c */
-/* called by timer thread */
+/* called by timer thread or thread which acquired sigwait_fd */
 static void
 waitpid_each(struct list_head *head)
 {
@@ -1008,6 +1046,17 @@ waitpid_state_init(struct waitpid_state *w, rb_pid_t pid, int options)
     w->options = options;
 }
 
+static const struct timespec *
+sigwait_sleep_time(void)
+{
+    if (SIGCHLD_LOSSY) {
+        static const struct timespec busy_wait = { 0, 100000000 };
+
+        return &busy_wait;
+    }
+    return 0;
+}
+
 /*
  * must be called with vm->waitpid_lock held, this is not interruptible
  */
@@ -1026,13 +1075,30 @@ ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options,
         if (w.ret == -1) w.errnum = errno;
     }
     else {
-        w.cond = cond;
+        int sigwait_fd;
+
         w.ec = 0;
         list_add(w.pid > 0 ? &vm->waiting_pids : &vm->waiting_grps, &w.wnode);
         do {
-            rb_native_cond_wait(w.cond, &vm->waitpid_lock);
+            sigwait_fd = rb_sigwait_fd_get(0);
+
+            if (sigwait_fd >= 0) {
+                w.cond = 0;
+                rb_native_mutex_unlock(&vm->waitpid_lock);
+                rb_sigwait_sleep(0, sigwait_fd, sigwait_sleep_time());
+                rb_native_mutex_lock(&vm->waitpid_lock);
+                rb_sigwait_fd_put(0, sigwait_fd);
+            }
+            else {
+                w.cond = cond;
+                rb_native_cond_wait(w.cond, &vm->waitpid_lock);
+            }
         } while (!w.ret);
         list_del(&w.wnode);
+
+        /* we're done, maybe other waitpid callers are not: */
+        if (sigwait_fd >= 0)
+            sigwait_fd_migrate_sleeper(vm);
     }
     if (status) {
         *status = w.status;
@@ -1047,7 +1113,10 @@ waitpid_wake(void *x)
     struct waitpid_state *w = x;
 
     /* th->interrupt_lock is already held by rb_threadptr_interrupt_common */
-    rb_native_cond_signal(w->cond);
+    if (w->cond)
+        rb_native_cond_signal(w->cond);
+    else
+        rb_thread_wakeup_timer_thread(0); /* kick sigwait_fd */
 }
 
 static void *
@@ -1055,6 +1124,7 @@ waitpid_nogvl(void *x)
 {
     struct waitpid_state *w = x;
     rb_thread_t *th = rb_ec_thread_ptr(w->ec);
+    int sigwait_fd = -1;
 
     rb_native_mutex_lock(&th->interrupt_lock);
     /*
@@ -1062,13 +1132,30 @@ waitpid_nogvl(void *x)
      * by the time we enter this.  And we may also be interrupted.
      */
     if (!w->ret && !RUBY_VM_INTERRUPTED_ANY(w->ec)) {
-        if (SIGCHLD_LOSSY) {
-            rb_thread_wakeup_timer_thread();
+        sigwait_fd = rb_sigwait_fd_get(th);
+        if (sigwait_fd >= 0) {
+            rb_nativethread_cond_t *cond = w->cond;
+
+            w->cond = 0;
+            rb_native_mutex_unlock(&th->interrupt_lock);
+            rb_sigwait_sleep(th, sigwait_fd, sigwait_sleep_time());
+            rb_native_mutex_lock(&th->interrupt_lock);
+            w->cond = cond;
+            rb_sigwait_fd_put(th, sigwait_fd);
+        }
+        else {
+            /* another thread calling rb_sigwait_sleep will process
+             * signals for us */
+            if (SIGCHLD_LOSSY) {
+                rb_thread_wakeup_timer_thread(0);
+            }
+            rb_native_cond_wait(w->cond, &th->interrupt_lock);
         }
-        rb_native_cond_wait(w->cond, &th->interrupt_lock);
     }
     rb_native_mutex_unlock(&th->interrupt_lock);
 
+    if (sigwait_fd >= 0)
+        rb_sigwait_fd_migrate(th->vm);
     return 0;
 }
 
-- 
cgit v1.2.3