diff options
| author | Luke Gruber <luke.gruber@shopify.com> | 2025-08-18 10:47:40 -0400 |
|---|---|---|
| committer | John Hawthorn <john@hawthorn.email> | 2025-09-25 15:29:47 -0700 |
| commit | 62430c19c9f1ab49429cebe65f30588472648c95 (patch) | |
| tree | f419d7e74565ea50dd8e92c6cbce890511363f5d | |
| parent | 50393d1ada756fa03febb6566b6820bb1a37036c (diff) | |
Properly unlock locked mutexes on thread cleanup.
Mutexes were being improperly unlocked on thread cleanup. This bug was
introduced in 050a8954395.
We must keep a reference from the mutex to the thread, because if the fiber
is collected before the mutex is, then we cannot unlink it from the thread in
`mutex_free`. If it's not unlinked from the the thread when it's freed, it
causes bugs in `rb_thread_unlock_all_locking_mutexes`.
We now mark the fiber when a mutex is locked, and the thread is marked
as well. However, a fiber can still be freed in the same GC cycle as the
mutex, so the reference to the thread is still needed.
The reason we need to mark the fiber is that `mutex_owned_p()` has an ABA
issue where if the fiber is collected while it's locked, a new fiber could be
allocated at the same memory address and we could get false positives.
Fixes [Bug #21342]
Co-authored-by: John Hawthorn <john@hawthorn.email>
| -rw-r--r-- | test/ruby/test_thread.rb | 61 | ||||
| -rw-r--r-- | thread.c | 2 | ||||
| -rw-r--r-- | thread_sync.c | 73 |
3 files changed, 120 insertions, 16 deletions
diff --git a/test/ruby/test_thread.rb b/test/ruby/test_thread.rb index 12ba1165ed..01a1e51025 100644 --- a/test/ruby/test_thread.rb +++ b/test/ruby/test_thread.rb @@ -1589,4 +1589,65 @@ q.pop frame_for_deadlock_test_2 { t.join } INPUT end + + # [Bug #21342] + def test_unlock_locked_mutex_with_collected_fiber + bug21127 = '[ruby-core:120930] [Bug #21127]' + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + 5.times do + m = Mutex.new + Thread.new do + m.synchronize do + end + end.join + Fiber.new do + GC.start + m.lock + end.resume + end + end; + end + + def test_unlock_locked_mutex_with_collected_fiber2 + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + MUTEXES = [] + 5.times do + m = Mutex.new + Fiber.new do + GC.start + m.lock + end.resume + MUTEXES << m + end + 10.times do + MUTEXES.clear + GC.start + end + end; + end + + def test_mutexes_locked_in_fiber_dont_have_aba_issue_with_new_fibers + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + mutexes = 1000.times.map do + Mutex.new + end + + mutexes.map do |m| + Fiber.new do + m.lock + end.resume + end + + GC.start + + 1000.times.map do + Fiber.new do + raise "FAILED!" if mutexes.any?(&:owned?) + end.resume + end + end; + end end @@ -442,7 +442,7 @@ rb_threadptr_unlock_all_locking_mutexes(rb_thread_t *th) th->keeping_mutexes = mutex->next_mutex; // rb_warn("mutex #<%p> was not unlocked by thread #<%p>", (void *)mutex, (void*)th); - + VM_ASSERT(mutex->fiber); const char *error_message = rb_mutex_unlock_th(mutex, th, mutex->fiber); if (error_message) rb_bug("invalid keeping_mutexes: %s", error_message); } diff --git a/thread_sync.c b/thread_sync.c index dd54e82671..0fc70224ff 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -8,6 +8,7 @@ static VALUE rb_eClosedQueueError; /* Mutex */ typedef struct rb_mutex_struct { rb_fiber_t *fiber; + VALUE thread; // even if the fiber is collected, we might need access to the thread in mutex_free struct rb_mutex_struct *next_mutex; struct ccan_list_head waitq; /* protected by GVL */ } rb_mutex_t; @@ -106,8 +107,6 @@ static const char* rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_fib * */ -#define mutex_mark ((void(*)(void*))0) - static size_t rb_mutex_num_waiting(rb_mutex_t *mutex) { @@ -123,13 +122,39 @@ rb_mutex_num_waiting(rb_mutex_t *mutex) rb_thread_t* rb_fiber_threadptr(const rb_fiber_t *fiber); +static bool +locked_p(rb_mutex_t *mutex) +{ + return mutex->fiber != 0; +} + +static void +mutex_mark(void *ptr) +{ + rb_mutex_t *mutex = ptr; + VALUE fiber; + if (locked_p(mutex)) { + fiber = rb_fiberptr_self(mutex->fiber); // rb_fiber_t* doesn't move along with fiber object + if (fiber) rb_gc_mark_movable(fiber); + rb_gc_mark_movable(mutex->thread); + } +} + +static void +mutex_compact(void *ptr) +{ + rb_mutex_t *mutex = ptr; + if (locked_p(mutex)) { + mutex->thread = rb_gc_location(mutex->thread); + } +} + static void mutex_free(void *ptr) { rb_mutex_t *mutex = ptr; - if (mutex->fiber) { - /* rb_warn("free locked mutex"); */ - const char *err = rb_mutex_unlock_th(mutex, rb_fiber_threadptr(mutex->fiber), mutex->fiber); + if (locked_p(mutex)) { + const char *err = rb_mutex_unlock_th(mutex, rb_thread_ptr(mutex->thread), mutex->fiber); if (err) rb_bug("%s", err); } ruby_xfree(ptr); @@ -143,7 +168,7 @@ mutex_memsize(const void *ptr) static const rb_data_type_t mutex_data_type = { "mutex", - {mutex_mark, mutex_free, mutex_memsize,}, + {mutex_mark, mutex_free, mutex_memsize, mutex_compact,}, 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY }; @@ -204,12 +229,13 @@ rb_mutex_locked_p(VALUE self) { rb_mutex_t *mutex = mutex_ptr(self); - return RBOOL(mutex->fiber); + return RBOOL(locked_p(mutex)); } static void thread_mutex_insert(rb_thread_t *thread, rb_mutex_t *mutex) { + RUBY_ASSERT(!mutex->next_mutex); if (thread->keeping_mutexes) { mutex->next_mutex = thread->keeping_mutexes; } @@ -234,10 +260,24 @@ thread_mutex_remove(rb_thread_t *thread, rb_mutex_t *mutex) } static void -mutex_locked(rb_thread_t *th, VALUE self) +mutex_set_owner(VALUE self, rb_thread_t *th, rb_fiber_t *fiber) +{ + rb_mutex_t *mutex = mutex_ptr(self); + + mutex->thread = th->self; + mutex->fiber = fiber; + RB_OBJ_WRITTEN(self, Qundef, th->self); + if (fiber) { + RB_OBJ_WRITTEN(self, Qundef, rb_fiberptr_self(fiber)); + } +} + +static void +mutex_locked(rb_thread_t *th, rb_fiber_t *fiber, VALUE self) { rb_mutex_t *mutex = mutex_ptr(self); + mutex_set_owner(self, th, fiber); thread_mutex_insert(th, mutex); } @@ -258,9 +298,8 @@ rb_mutex_trylock(VALUE self) rb_fiber_t *fiber = GET_EC()->fiber_ptr; rb_thread_t *th = GET_THREAD(); - mutex->fiber = fiber; - mutex_locked(th, self); + mutex_locked(th, fiber, self); return Qtrue; } else { @@ -328,7 +367,7 @@ do_mutex_lock(VALUE self, int interruptible_p) rb_ensure(call_rb_fiber_scheduler_block, self, delete_from_waitq, (VALUE)&sync_waiter); if (!mutex->fiber) { - mutex->fiber = fiber; + mutex_set_owner(self, th, fiber); } } else { @@ -358,6 +397,7 @@ do_mutex_lock(VALUE self, int interruptible_p) rb_ractor_sleeper_threads_inc(th->ractor); rb_check_deadlock(th->ractor); + RUBY_ASSERT(!th->locking_mutex); th->locking_mutex = self; ccan_list_add_tail(&mutex->waitq, &sync_waiter.node); @@ -368,7 +408,7 @@ do_mutex_lock(VALUE self, int interruptible_p) // unlocked by another thread while sleeping if (!mutex->fiber) { - mutex->fiber = fiber; + mutex_set_owner(self, th, fiber); } rb_ractor_sleeper_threads_dec(th->ractor); @@ -381,10 +421,13 @@ do_mutex_lock(VALUE self, int interruptible_p) if (interruptible_p) { /* release mutex before checking for interrupts...as interrupt checking * code might call rb_raise() */ - if (mutex->fiber == fiber) mutex->fiber = 0; + if (mutex->fiber == fiber) { + mutex->thread = Qfalse; + mutex->fiber = NULL; + } RUBY_VM_CHECK_INTS_BLOCKING(th->ec); /* may release mutex */ if (!mutex->fiber) { - mutex->fiber = fiber; + mutex_set_owner(self, th, fiber); } } else { @@ -403,7 +446,7 @@ do_mutex_lock(VALUE self, int interruptible_p) } if (saved_ints) th->ec->interrupt_flag = saved_ints; - if (mutex->fiber == fiber) mutex_locked(th, self); + if (mutex->fiber == fiber) mutex_locked(th, fiber, self); } RUBY_DEBUG_LOG("%p locked", mutex); |
