diff options
author | Jean Boussier <byroot@ruby-lang.org> | 2022-12-06 12:56:51 +0100 |
---|---|---|
committer | Jean Boussier <jean.boussier@gmail.com> | 2022-12-08 18:46:16 +0100 |
commit | 73771e4b192f3db62efb854affdfc95babba1d35 (patch) | |
tree | 6f15d7fea885b5e639c82b51d5c3fd50b9200735 /ext | |
parent | b19490f75dd790f2f886df2c05ed8fba947326a9 (diff) |
ObjectSpace.dump_all: dump shapes as well
I see several arguments in doing so.
First they use a non trivial amount of memory, so for various memory
profiling/mapping tools it is relevant to have visibility of the space
occupied by shapes.
Then, some pathological code can create a tons of shape, so it is
valuable to have a way to have a way to observe shapes without having
to compile Ruby with `SHAPE_DEBUG=1`.
And additionally it's likely much faster to dump then this way than
to use `RubyVM::Shape`.
There are however a few open questions:
- Shapes can't respect the `since:` argument. Not sure what to do when
it is provided. Would probably make sense to not dump them.
- Maybe it would make more sense to have a separate `ObjectSpace.dump_shapes`?
- Maybe instead `dump_all` should take a `shapes: false` argument?
Additionally, `ObjectSpace.dump_shapes` is added for the use case of
debugging the evolution of the shape tree.
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/6868
Diffstat (limited to 'ext')
-rw-r--r-- | ext/objspace/depend | 1 | ||||
-rw-r--r-- | ext/objspace/lib/objspace.rb | 65 | ||||
-rw-r--r-- | ext/objspace/objspace_dump.c | 118 |
3 files changed, 171 insertions, 13 deletions
diff --git a/ext/objspace/depend b/ext/objspace/depend index d830239b18..f83607236a 100644 --- a/ext/objspace/depend +++ b/ext/objspace/depend @@ -556,6 +556,7 @@ objspace_dump.o: $(top_srcdir)/node.h objspace_dump.o: $(top_srcdir)/ruby_assert.h objspace_dump.o: $(top_srcdir)/ruby_atomic.h objspace_dump.o: $(top_srcdir)/shape.h +objspace_dump.o: $(top_srcdir)/symbol.h objspace_dump.o: $(top_srcdir)/thread_pthread.h objspace_dump.o: $(top_srcdir)/vm_core.h objspace_dump.o: $(top_srcdir)/vm_opts.h diff --git a/ext/objspace/lib/objspace.rb b/ext/objspace/lib/objspace.rb index 0298b0646c..f8a66d8d32 100644 --- a/ext/objspace/lib/objspace.rb +++ b/ext/objspace/lib/objspace.rb @@ -6,6 +6,7 @@ module ObjectSpace class << self private :_dump private :_dump_all + private :_dump_shapes end module_function @@ -53,23 +54,38 @@ module ObjectSpace # # Dump the contents of the ruby heap as JSON. # + #. _full__ must be a boolean. If true all heap slots are dumped including the empty ones (T_NONE). + # # _since_ must be a non-negative integer or +nil+. # # If _since_ is a positive integer, only objects of that generation and # newer generations are dumped. The current generation can be accessed using - # GC::count. - # - # Objects that were allocated without object allocation tracing enabled + # GC::count. Objects that were allocated without object allocation tracing enabled # are ignored. See ::trace_object_allocations for more information and # examples. # # If _since_ is omitted or is +nil+, all objects are dumped. # + # _shapes_ must be a boolean or a non-negative integer. + # + # If _shapes_ is a positive integer, only shapes newer than the provided + # shape id are dumped. The current shape_id can be accessed using +RubyVM.stat(:next_shape_id)+. + # + # If _shapes_ is +false+, no shapes are dumped. + # + # To only dump objects allocated past a certain point you can combine _since_ and _shapes_: + # ObjectSpace.trace_object_allocations + # GC.start + # gc_generation = GC.count + # shape_generation = RubyVM.stat(:next_shape_id) + #. call_method_to_instrument + # ObjectSpace.dump_all(since: gc_generation, shapes: shape_generation) + # # This method is only expected to work with C Ruby. # This is an experimental method and is subject to change. # In particular, the function signature and output format are # not guaranteed to be compatible in future versions of ruby. - def dump_all(output: :file, full: false, since: nil) + def dump_all(output: :file, full: false, since: nil, shapes: true) out = case output when :file, nil require 'tempfile' @@ -84,7 +100,46 @@ module ObjectSpace raise ArgumentError, "wrong output option: #{output.inspect}" end - ret = _dump_all(out, full, since) + shapes = 0 if shapes == true + ret = _dump_all(out, full, since, shapes) + return nil if output == :stdout + ret + end + + # call-seq: + # ObjectSpace.dump_shapes([output: :file]) # => #<File:/tmp/rubyshapes20131125-88469-laoj3v.json> + # ObjectSpace.dump_shapes(output: :stdout) # => nil + # ObjectSpace.dump_shapes(output: :string) # => "{...}\n{...}\n..." + # ObjectSpace.dump_shapes(output: + # File.open('shapes.json','w')) # => #<File:shapes.json> + # ObjectSpace.dump_all(output: :string, + # since: 42) # => "{...}\n{...}\n..." + # + # Dump the contents of the ruby shape tree as JSON. + # + # If _shapes_ is a positive integer, only shapes newer than the provided + # shape id are dumped. The current shape_id can be accessed using +RubyVM.stat(:next_shape_id)+. + # + # This method is only expected to work with C Ruby. + # This is an experimental method and is subject to change. + # In particular, the function signature and output format are + # not guaranteed to be compatible in future versions of ruby. + def dump_shapes(output: :file, since: 0) + out = case output + when :file, nil + require 'tempfile' + Tempfile.create(%w(rubyshapes .json)) + when :stdout + STDOUT + when :string + +'' + when IO + output + else + raise ArgumentError, "wrong output option: #{output.inspect}" + end + + ret = _dump_shapes(out, since) return nil if output == :stdout ret end diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c index 41da368267..bcc258eb25 100644 --- a/ext/objspace/objspace_dump.c +++ b/ext/objspace/objspace_dump.c @@ -18,6 +18,8 @@ #include "internal/hash.h" #include "internal/string.h" #include "internal/sanitizers.h" +#include "symbol.h" +#include "shape.h" #include "node.h" #include "objspace.h" #include "ruby/debug.h" @@ -42,6 +44,7 @@ struct dump_config { unsigned int full_heap: 1; unsigned int partial_dump; size_t since; + size_t shapes_since; unsigned long buffer_len; char buffer[BUFFER_CAPACITY]; }; @@ -350,6 +353,20 @@ dump_append_string_content(struct dump_config *dc, VALUE obj) } } +static inline void +dump_append_id(struct dump_config *dc, ID id) +{ + if (is_instance_id(id)) { + dump_append_string_value(dc, rb_sym2str(ID2SYM(id))); + } + else { + dump_append(dc, "\"ID_INTERNAL("); + dump_append_sizet(dc, rb_id_to_serial(id)); + dump_append(dc, ")\""); + } +} + + static void dump_object(VALUE obj, struct dump_config *dc) { @@ -378,13 +395,16 @@ dump_object(VALUE obj, struct dump_config *dc) dump_append(dc, "{\"address\":"); dump_append_ref(dc, obj); - dump_append(dc, ", \"shape_id\":"); - dump_append_sizet(dc, rb_shape_get_shape_id(obj)); - dump_append(dc, ", \"type\":\""); dump_append(dc, obj_type(obj)); dump_append(dc, "\""); + size_t shape_id = rb_shape_get_shape_id(obj); + if (shape_id) { + dump_append(dc, ", \"shape_id\":"); + dump_append_sizet(dc, shape_id); + } + dump_append(dc, ", \"slot_size\":"); dump_append_sizet(dc, dc->cur_page_slot_size); @@ -622,7 +642,7 @@ root_obj_i(const char *category, VALUE obj, void *data) } static void -dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since) +dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since, VALUE shapes) { dc->full_heap = 0; @@ -648,6 +668,8 @@ dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since) else { dc->partial_dump = 0; } + + dc->shapes_since = RTEST(shapes) ? NUM2SIZET(shapes) : 0; } static VALUE @@ -672,18 +694,81 @@ objspace_dump(VALUE os, VALUE obj, VALUE output) dc.cur_page_slot_size = rb_gc_obj_slot_size(obj); } - dump_output(&dc, output, Qnil, Qnil); + dump_output(&dc, output, Qnil, Qnil, Qnil); dump_object(obj, &dc); return dump_result(&dc); } +static void +shape_i(rb_shape_t *shape, void *data) +{ + struct dump_config *dc = (struct dump_config *)data; + + size_t shape_id = rb_shape_id(shape); + if (shape_id < dc->shapes_since) { + return; + } + + dump_append(dc, "{\"address\":"); + dump_append_ref(dc, (VALUE)shape); + + dump_append(dc, ", \"type\":\"SHAPE\", \"id\":"); + dump_append_sizet(dc, shape_id); + + if (shape->type != SHAPE_ROOT) { + dump_append(dc, ", \"parent_id\":"); + dump_append_lu(dc, shape->parent_id); + } + + dump_append(dc, ", \"depth\":"); + dump_append_sizet(dc, rb_shape_depth(shape)); + + dump_append(dc, ", \"shape_type\":"); + switch(shape->type) { + case SHAPE_ROOT: + dump_append(dc, "\"ROOT\""); + break; + case SHAPE_IVAR: + dump_append(dc, "\"IVAR\""); + + dump_append(dc, ",\"edge_name\":"); + dump_append_id(dc, shape->edge_name); + + break; + case SHAPE_FROZEN: + dump_append(dc, "\"FROZEN\""); + break; + case SHAPE_CAPACITY_CHANGE: + dump_append(dc, "\"CAPACITY_CHANGE\""); + dump_append(dc, ", \"capacity\":"); + dump_append_sizet(dc, shape->capacity); + break; + case SHAPE_INITIAL_CAPACITY: + dump_append(dc, "\"INITIAL_CAPACITY\""); + break; + case SHAPE_T_OBJECT: + dump_append(dc, "\"T_OBJECT\""); + break; + default: + rb_bug("[objspace] unexpected shape type"); + } + + dump_append(dc, ", \"edges\":"); + dump_append_sizet(dc, rb_shape_edges_count(shape)); + + dump_append(dc, ", \"memsize\":"); + dump_append_sizet(dc, rb_shape_memsize(shape)); + + dump_append(dc, "}\n"); +} + static VALUE -objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since) +objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since, VALUE shapes) { struct dump_config dc = {0,}; - dump_output(&dc, output, full, since); + dump_output(&dc, output, full, since, shapes); if (!dc.partial_dump || dc.since == 0) { /* dump roots */ @@ -691,12 +776,28 @@ objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since) if (dc.roots) dump_append(&dc, "]}\n"); } + if (RTEST(shapes)) { + rb_shape_each_shape(shape_i, &dc); + } + /* dump all objects */ rb_objspace_each_objects(heap_i, &dc); return dump_result(&dc); } +static VALUE +objspace_dump_shapes(VALUE os, VALUE output, VALUE shapes) +{ + struct dump_config dc = {0,}; + dump_output(&dc, output, Qfalse, Qnil, shapes); + + if (RTEST(shapes)) { + rb_shape_each_shape(shape_i, &dc); + } + return dump_result(&dc); +} + void Init_objspace_dump(VALUE rb_mObjSpace) { @@ -706,7 +807,8 @@ Init_objspace_dump(VALUE rb_mObjSpace) #endif rb_define_module_function(rb_mObjSpace, "_dump", objspace_dump, 2); - rb_define_module_function(rb_mObjSpace, "_dump_all", objspace_dump_all, 3); + rb_define_module_function(rb_mObjSpace, "_dump_all", objspace_dump_all, 4); + rb_define_module_function(rb_mObjSpace, "_dump_shapes", objspace_dump_shapes, 2); /* force create static IDs */ rb_obj_gc_flags(rb_mObjSpace, 0, 0); |