diff options
| author | Peter Zhu <peter@peterzhu.ca> | 2025-11-24 22:32:07 -0500 |
|---|---|---|
| committer | Peter Zhu <peter@peterzhu.ca> | 2025-11-25 14:19:30 -0800 |
| commit | 8bf333a199b5c099c2e3d3efeb2ba06f265db324 (patch) | |
| tree | 78f817f2e1f1f79f997d55aa764601dfcda164ce | |
| parent | 4263f1d718df65bdf465552029a71b1ea9747067 (diff) | |
Fix live object count for multi-Ractor forking
Since we do not run a Ractor barrier before forking, it's possible that
another other Ractor is halfway through allocating an object during forking.
This may lead to allocated_objects_count being off by one.
For example, the following script reproduces the bug:
100.times do |i|
Ractor.new(i) do |j|
10000.times do |i|
"#{j}-#{i}"
end
Ractor.receive
end
pid = fork { GC.verify_internal_consistency }
_, status = Process.waitpid2 pid
raise unless status.success?
end
We need to run with `taskset -c 1` to force it to use a single CPU core
to more consistenly reproduce the bug:
heap_pages_final_slots: 1, total_freed_objects: 16628
test.rb:8: [BUG] inconsistent live slot number: expect 19589, but 19588.
ruby 4.0.0dev (2025-11-25T03:06:55Z master 55892f5994) +PRISM [x86_64-linux]
-- Control frame information -----------------------------------------------
c:0007 p:---- s:0029 e:000028 l:y b:---- CFUNC :verify_internal_consistency
c:0006 p:0004 s:0025 e:000024 l:n b:---- BLOCK test.rb:8 [FINISH]
c:0005 p:---- s:0022 e:000021 l:y b:---- CFUNC :fork
c:0004 p:0012 s:0018 E:0014c0 l:n b:---- BLOCK test.rb:8
c:0003 p:0024 s:0011 e:000010 l:y b:0001 METHOD <internal:numeric>:257
c:0002 p:0005 s:0006 E:001730 l:n b:---- EVAL test.rb:1 [FINISH]
c:0001 p:0000 s:0003 E:001d20 l:y b:---- DUMMY [FINISH]
-- Ruby level backtrace information ----------------------------------------
test.rb:1:in '<main>'
<internal:numeric>:257:in 'times'
test.rb:8:in 'block in <main>'
test.rb:8:in 'fork'
test.rb:8:in 'block (2 levels) in <main>'
test.rb:8:in 'verify_internal_consistency'
-- Threading information ---------------------------------------------------
Total ractor count: 1
Ruby thread count for this ractor: 1
-- C level backtrace information -------------------------------------------
ruby(rb_print_backtrace+0x14) [0x61b67ac48b60] vm_dump.c:1105
ruby(rb_vm_bugreport) vm_dump.c:1450
ruby(rb_bug_without_die_internal+0x5f) [0x61b67a818a28] error.c:1098
ruby(rb_bug) error.c:1116
ruby(gc_verify_internal_consistency_+0xbdd) [0x61b67a83d8ed] gc/default/default.c:5186
ruby(gc_verify_internal_consistency+0x2d) [0x61b67a83d960] gc/default/default.c:5241
ruby(rb_gc_verify_internal_consistency) gc/default/default.c:8950
ruby(gc_verify_internal_consistency_m) gc/default/default.c:8966
ruby(vm_call_cfunc_with_frame_+0x10d) [0x61b67a9e50fd] vm_insnhelper.c:3902
ruby(vm_sendish+0x111) [0x61b67a9eeaf1] vm_insnhelper.c:6124
ruby(vm_exec_core+0x84) [0x61b67aa07434] insns.def:903
ruby(vm_exec_loop+0xa) [0x61b67a9f8155] vm.c:2811
ruby(rb_vm_exec) vm.c:2787
ruby(vm_yield_with_cref+0x90) [0x61b67a9fd2ea] vm.c:1865
ruby(vm_yield) vm.c:1873
ruby(rb_yield) vm_eval.c:1362
ruby(rb_protect+0xef) [0x61b67a81fe6f] eval.c:1154
ruby(rb_f_fork+0x16) [0x61b67a8e98ab] process.c:4293
ruby(rb_f_fork) process.c:4284
| -rw-r--r-- | bootstraptest/test_ractor.rb | 21 | ||||
| -rw-r--r-- | gc/default/default.c | 12 |
2 files changed, 32 insertions, 1 deletions
diff --git a/bootstraptest/test_ractor.rb b/bootstraptest/test_ractor.rb index ed80dd0862..6c3a45148c 100644 --- a/bootstraptest/test_ractor.rb +++ b/bootstraptest/test_ractor.rb @@ -2370,3 +2370,24 @@ assert_equal 'ok', <<~'RUBY' :ok end RUBY + +assert_equal 'ok', <<~'RUBY' + begin + 100.times do |i| + Ractor.new(i) do |j| + 1000.times do |i| + "#{j}-#{i}" + end + end + pid = fork do + GC.verify_internal_consistency + end + _, status = Process.waitpid2 pid + raise unless status.success? + end + + :ok + rescue NotImplementedError + :ok + end +RUBY diff --git a/gc/default/default.c b/gc/default/default.c index b6a9dfa1f5..9a2efab562 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -622,6 +622,8 @@ typedef struct rb_objspace { rb_postponed_job_handle_t finalize_deferred_pjob; unsigned long live_ractor_cache_count; + + int fork_vm_lock_lev; } rb_objspace_t; #ifndef HEAP_PAGE_ALIGN_LOG @@ -9324,12 +9326,20 @@ gc_malloc_allocations(VALUE self) void rb_gc_impl_before_fork(void *objspace_ptr) { - /* no-op */ + rb_objspace_t *objspace = objspace_ptr; + + objspace->fork_vm_lock_lev = RB_GC_VM_LOCK(); + rb_gc_vm_barrier(); } void rb_gc_impl_after_fork(void *objspace_ptr, rb_pid_t pid) { + rb_objspace_t *objspace = objspace_ptr; + + RB_GC_VM_UNLOCK(objspace->fork_vm_lock_lev); + objspace->fork_vm_lock_lev = 0; + if (pid == 0) { /* child process */ rb_gc_ractor_newobj_cache_foreach(gc_ractor_newobj_cache_clear, NULL); } |
