objspace_dump: Use FILE* to avoid crashing in mark functions

We observed crashes from rb_io_bufwrite() thread switching (through rb_thread_check_ints()) in the middle of rb_execution_context_mark(). By the time rb_execution_context_mark() gets a timeslice again, it read garbage from a frame that was already popped in another thread, crashing the process in SEGV. Other mark functions probably have their own ways of breaking, but clearly, the usual IO code do too much for this perilous pseudo GC context. Use `FILE*` like before 5001cc47169614ea07d87651c95c2ee185e374e0 ("Optimize ObjectSpace.dump_all"). Also, add type checking for the private _dump methods. Co-authored-by: Peter Zhu <peter@peterzhu.ca>
author: Alan Wu <XrXr@users.noreply.github.com> 2024-12-06 22:49:53 -0500
committer: Alan Wu <XrXr@users.noreply.github.com> 2024-12-09 16:08:35 -0500
commit: 476d655053b0e3ea447dc6549b821d18636c6603 (patch)
tree: e70a7b68d55f8fa6041c1a574ad850fa8939398a /ext/objspace
parent: de7feb0538266b2d66b444f4142773c2f510cdcc (diff)
1 files changed, 29 insertions, 10 deletions
diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c
index b1f7ab9804..1bcb4033cb 100644
--- a/ext/objspace/objspace_dump.c
+++ b/ext/objspace/objspace_dump.c
@@ -36,9 +36,10 @@ RUBY_EXTERN const char ruby_hexdigits[];
 #define BUFFER_CAPACITY 4096
 
 struct dump_config {
-    VALUE type;
-    VALUE stream;
+    VALUE given_output;
+    VALUE output_io;
     VALUE string;
+    FILE *stream;
     const char *root_category;
     VALUE cur_obj;
     VALUE cur_obj_klass;
@@ -58,7 +59,7 @@ dump_flush(struct dump_config *dc)
 {
     if (dc->buffer_len) {
         if (dc->stream) {
-            size_t written = rb_io_bufwrite(dc->stream, dc->buffer, dc->buffer_len);
+            size_t written = fwrite(dc->buffer, sizeof(dc->buffer[0]), dc->buffer_len, dc->stream);
             if (written < dc->buffer_len) {
                 MEMMOVE(dc->buffer, dc->buffer + written, char, dc->buffer_len - written);
                 dc->buffer_len -= written;
@@ -696,16 +697,34 @@ root_obj_i(const char *category, VALUE obj, void *data)
 static void
 dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since, VALUE shapes)
 {
-
+    dc->given_output = output;
     dc->full_heap = 0;
     dc->buffer_len = 0;
 
     if (TYPE(output) == T_STRING) {
-        dc->stream = Qfalse;
+        dc->stream = NULL;
         dc->string = output;
     }
     else {
-        dc->stream = output;
+        rb_io_t *fptr;
+        // Output should be an IO, typecheck and get a FILE* for writing.
+        // We cannot write with the usual IO code here because writes
+        // interleave with calls to rb_gc_mark(). The usual IO code can
+        // cause a thread switch, raise exceptions, and even run arbitrary
+        // ruby code through the fiber scheduler.
+        //
+        // Mark functions generally can't handle these possibilities so
+        // the usual IO code is unsafe in this context. (For example,
+        // there are many ways to crash when ruby code runs and mutates
+        // the execution context while rb_execution_context_mark() is in
+        // progress.)
+        //
+        // Using FILE* isn't perfect, but it avoids the most acute problems.
+        output = rb_io_get_io(output);
+        dc->output_io = rb_io_get_write_io(output);
+        rb_io_flush(dc->output_io);
+        GetOpenFile(dc->output_io, fptr);
+        dc->stream = rb_io_stdio_file(fptr);
         dc->string = Qfalse;
     }
 
@@ -729,13 +748,13 @@ dump_result(struct dump_config *dc)
 {
     dump_flush(dc);
 
+    if (dc->stream) {
+        fflush(dc->stream);
+    }
     if (dc->string) {
         return dc->string;
     }
-    else {
-        rb_io_flush(dc->stream);
-        return dc->stream;
-    }
+    return dc->given_output;
 }
 
 /* :nodoc: */
author	Alan Wu <XrXr@users.noreply.github.com>	2024-12-06 22:49:53 -0500
committer	Alan Wu <XrXr@users.noreply.github.com>	2024-12-09 16:08:35 -0500
commit	476d655053b0e3ea447dc6549b821d18636c6603 (patch)
tree	e70a7b68d55f8fa6041c1a574ad850fa8939398a /ext/objspace
parent	de7feb0538266b2d66b444f4142773c2f510cdcc (diff)