diff options
Diffstat (limited to 'misc')
-rw-r--r-- | misc/.vscode/settings.json | 3 | ||||
-rw-r--r-- | misc/call_fuzzer.rb | 372 | ||||
-rwxr-xr-x | misc/call_fuzzer.sh | 13 | ||||
-rw-r--r-- | misc/gdb.py | 115 | ||||
-rwxr-xr-x | misc/lldb_cruby.py | 57 | ||||
-rw-r--r-- | misc/lldb_rb/commands/heap_page_command.py | 4 | ||||
-rw-r--r-- | misc/lldb_rb/commands/rp_command.py | 1 | ||||
-rw-r--r-- | misc/lldb_rb/lldb_interface.py | 1 | ||||
-rw-r--r-- | misc/lldb_rb/rb_base_command.py | 2 | ||||
-rw-r--r-- | misc/lldb_rb/rb_heap_structs.py | 19 | ||||
-rw-r--r-- | misc/lldb_rb/utils.py | 319 | ||||
-rw-r--r-- | misc/lldb_yjit.py | 47 | ||||
-rw-r--r-- | misc/ruby-style.el | 11 | ||||
-rwxr-xr-x | misc/yjit_perf.py | 116 |
14 files changed, 915 insertions, 165 deletions
diff --git a/misc/.vscode/settings.json b/misc/.vscode/settings.json index b0f3576025..7b1a38c536 100644 --- a/misc/.vscode/settings.json +++ b/misc/.vscode/settings.json @@ -2,4 +2,7 @@ "rust-analyzer.cargo.features": [ "disasm", ], + "rust-analyzer.cargo.unsetTest": [ + "yjit", + ], } diff --git a/misc/call_fuzzer.rb b/misc/call_fuzzer.rb new file mode 100644 index 0000000000..c3f9f90490 --- /dev/null +++ b/misc/call_fuzzer.rb @@ -0,0 +1,372 @@ +require 'optparse' +require 'set' + +# Number of iterations to test +num_iters = 10_000 + +# Parse the command-line options +OptionParser.new do |opts| + opts.on("--num-iters=N") do |n| + num_iters = n.to_i + end +end.parse! + +# Format large numbers with comma separators for readability +def format_number(pad, number) + s = number.to_s + i = s.index('.') || s.size + s.insert(i -= 3, ',') while i > 3 + s.rjust(pad, ' ') +end + +# Wrap an integer to pass as argument +# We use this so we can have some object arguments +class IntWrapper + def initialize(v) + # Force the object to have a random shape + if rand() < 50 + @v0 = 1 + end + if rand() < 50 + @v1 = 1 + end + if rand() < 50 + @v2 = 1 + end + if rand() < 50 + @v3 = 1 + end + if rand() < 50 + @v4 = 1 + end + if rand() < 50 + @v5 = 1 + end + if rand() < 50 + @v6 = 1 + end + + @value = v + end + + attr_reader :value +end + +# Generate a random argument value, integer or string or object +def sample_arg() + c = ['int', 'string', 'object'].sample() + + if c == 'int' + return rand(0...100) + end + + if c == 'string' + return 'f' * rand(0...100) + end + + if c == 'object' + return IntWrapper.new(rand(0...100)) + end + + raise "should not get here" +end + +# Evaluate the value of an argument with respect to the checksum +def arg_val(arg) + if arg.kind_of? Integer + return arg + end + + if arg.kind_of? String + return arg.length + end + + if arg.kind_of? Object + return arg.value + end + + raise "unknown arg type" +end + +# List of parameters/arguments for a method +class ParamList + def initialize() + self.sample_params() + self.sample_args() + end + + # Sample/generate a random set of parameters for a method + def sample_params() + # Choose how many positional arguments to use, and how many are optional + num_pargs = rand(10) + @opt_parg_idx = rand(num_pargs) + @num_opt_pargs = rand(num_pargs + 1 - @opt_parg_idx) + @num_pargs_req = num_pargs - @num_opt_pargs + @pargs = (0...num_pargs).map do |i| + { + :name => "p#{i}", + :optional => (i >= @opt_parg_idx && i < @opt_parg_idx + @num_opt_pargs) + } + end + + # Choose how many kwargs to use, and how many are optional + num_kwargs = rand(10) + @kwargs = (0...num_kwargs).map do |i| + { + :name => "k#{i}", + :optional => rand() < 0.5 + } + end + + # Choose whether to have rest parameters or not + @has_rest = @num_opt_pargs == 0 && rand() < 0.5 + @has_kwrest = rand() < 0.25 + + # Choose whether to have a named block parameter or not + @has_block_param = rand() < 0.25 + end + + # Sample/generate a random set of arguments corresponding to the parameters + def sample_args() + # Choose how many positional args to pass + num_pargs_passed = rand(@num_pargs_req..@pargs.size) + + # How many optional arguments will be filled + opt_pargs_filled = num_pargs_passed - @num_pargs_req + + @pargs.each_with_index do |parg, i| + if parg[:optional] + parg[:default] = rand(100) + end + + if !parg[:optional] || i < @opt_parg_idx + opt_pargs_filled + parg[:argval] = rand(100) + end + end + + @kwargs.each_with_index do |kwarg, i| + if kwarg[:optional] + kwarg[:default] = rand(100) + end + + if !kwarg[:optional] || rand() < 0.5 + kwarg[:argval] = rand(100) + end + end + + # Randomly pass a block or not + @block_arg = nil + if rand() < 0.5 + @block_arg = rand(100) + end + end + + # Compute the expected checksum of arguments ahead of time + def compute_checksum() + checksum = 0 + + @pargs.each_with_index do |arg, i| + value = (arg.key? :argval)? arg[:argval]:arg[:default] + checksum += (i+1) * arg_val(value) + end + + @kwargs.each_with_index do |arg, i| + value = (arg.key? :argval)? arg[:argval]:arg[:default] + checksum += (i+1) * arg_val(value) + end + + if @block_arg + if @has_block_param + checksum += arg_val(@block_arg) + end + + checksum += arg_val(@block_arg) + end + + checksum + end + + # Generate code for the method signature and method body + def gen_method_str() + m_str = "def m(" + + @pargs.each do |arg| + if !m_str.end_with?("(") + m_str += ", " + end + + m_str += arg[:name] + + # If this has a default value + if arg[:optional] + m_str += " = #{arg[:default]}" + end + end + + if @has_rest + if !m_str.end_with?("(") + m_str += ", " + end + m_str += "*rest" + end + + @kwargs.each do |arg| + if !m_str.end_with?("(") + m_str += ", " + end + + m_str += "#{arg[:name]}:" + + # If this has a default value + if arg[:optional] + m_str += " #{arg[:default]}" + end + end + + if @has_kwrest + if !m_str.end_with?("(") + m_str += ", " + end + m_str += "**kwrest" + end + + if @has_block_param + if !m_str.end_with?("(") + m_str += ", " + end + + m_str += "&block" + end + + m_str += ")\n" + + # Add some useless locals + rand(0...16).times do |i| + m_str += "local#{i} = #{i}\n" + end + + # Add some useless if statements + @pargs.each_with_index do |arg, i| + if rand() < 50 + m_str += "if #{arg[:name]} > 4; end\n" + end + end + + m_str += "checksum = 0\n" + + @pargs.each_with_index do |arg, i| + m_str += "checksum += #{i+1} * arg_val(#{arg[:name]})\n" + end + + @kwargs.each_with_index do |arg, i| + m_str += "checksum += #{i+1} * arg_val(#{arg[:name]})\n" + end + + if @has_block_param + m_str += "if block; r = block.call; checksum += arg_val(r); end\n" + end + + m_str += "if block_given?; r = yield; checksum += arg_val(r); end\n" + + if @has_rest + m_str += "raise 'rest is not array' unless rest.kind_of?(Array)\n" + m_str += "raise 'rest size not integer' unless rest.size.kind_of?(Integer)\n" + end + + if @has_kwrest + m_str += "raise 'kwrest is not a hash' unless kwrest.kind_of?(Hash)\n" + m_str += "raise 'kwrest size not integer' unless kwrest.size.kind_of?(Integer)\n" + end + + m_str += "checksum\n" + m_str += "end" + + m_str + end + + # Generate code to call into the method and pass the arguments + def gen_call_str() + c_str = "m(" + + @pargs.each_with_index do |arg, i| + if !arg.key? :argval + next + end + + if !c_str.end_with?("(") + c_str += ", " + end + + c_str += "#{arg[:argval]}" + end + + @kwargs.each_with_index do |arg, i| + if !arg.key? :argval + next + end + + if !c_str.end_with?("(") + c_str += ", " + end + + c_str += "#{arg[:name]}: #{arg[:argval]}" + end + + c_str += ")" + + # Randomly pass a block or not + if @block_arg + c_str += " { #{@block_arg} }" + end + + c_str + end +end + +iseqs_compiled_start = RubyVM::YJIT.runtime_stats[:compiled_iseq_entry] +start_time = Time.now.to_f + +num_iters.times do |i| + puts "Iteration #{i}" + + lst = ParamList.new() + m_str = lst.gen_method_str() + c_str = lst.gen_call_str() + checksum = lst.compute_checksum() + + f = Object.new + + # Define the method on f + puts "Defining" + p m_str + f.instance_eval(m_str) + #puts RubyVM::InstructionSequence.disasm(f.method(:m)) + #exit 0 + + puts "Calling" + c_str = "f.#{c_str}" + p c_str + r = eval(c_str) + puts "checksum=#{r}" + + if r != checksum + raise "return value #{r} doesn't match checksum #{checksum}" + end + + puts "" +end + +# Make sure that YJIT actually compiled the tests we ran +# Should be run with --yjit-call-threshold=1 +iseqs_compiled_end = RubyVM::YJIT.runtime_stats[:compiled_iseq_entry] +if iseqs_compiled_end - iseqs_compiled_start < num_iters + raise "YJIT did not compile enough ISEQs" +end + +puts "Code region size: #{ format_number(0, RubyVM::YJIT.runtime_stats[:code_region_size]) }" + +end_time = Time.now.to_f +itrs_per_sec = num_iters / (end_time - start_time) +itrs_per_hour = 3600 * itrs_per_sec +puts "#{'%.1f' % itrs_per_sec} iterations/s" +puts "#{format_number(0, itrs_per_hour.round)} iterations/hour" diff --git a/misc/call_fuzzer.sh b/misc/call_fuzzer.sh new file mode 100755 index 0000000000..cf4ec76fe8 --- /dev/null +++ b/misc/call_fuzzer.sh @@ -0,0 +1,13 @@ +# Stop at first error +set -e + +# TODO +# TODO: boost --num-iters to 1M+ for actual test +# TODO +export NUM_ITERS=25000 + +# Enable code GC so we don't stop compiling when we hit the code size limit +ruby --yjit-call-threshold=1 --yjit-code-gc misc/call_fuzzer.rb --num-iters=$NUM_ITERS + +# Do another pass with --verify-ctx +ruby --yjit-call-threshold=1 --yjit-code-gc --yjit-verify-ctx misc/call_fuzzer.rb --num-iters=$NUM_ITERS diff --git a/misc/gdb.py b/misc/gdb.py index 85507fb540..6034a389bb 100644 --- a/misc/gdb.py +++ b/misc/gdb.py @@ -1,8 +1,18 @@ +import argparse import textwrap -# Usage: -# cfp: Dump the current cfp -# cfp 1: Dump the caller cfp +# usage: [-h] [-a | --all | --no-all] [-s STACK_SIZE] [uplevel] +# +# Dump a control frame +# +# positional arguments: +# uplevel CFP offset from the stack top +# +# options: +# -h, --help show this help message and exit +# -a, --all, --no-all dump all frames +# -s STACK_SIZE, --stack-size STACK_SIZE +# override stack_size (useful for JIT frames) class CFP(gdb.Command): FRAME_MAGICS = [ # frame types @@ -35,44 +45,77 @@ class CFP(gdb.Command): def __init__(self): super(CFP, self).__init__('cfp', gdb.COMMAND_USER) - def invoke(self, offset, from_tty): - if not offset: - offset = '0' - cfp = f'(ruby_current_ec->cfp + ({offset}))' - + self.parser = argparse.ArgumentParser(description='Dump a control frame') + self.parser.add_argument('uplevel', type=int, nargs='?', default=0, help='CFP offset from the stack top') + self.parser.add_argument('-a', '--all', action=argparse.BooleanOptionalAction, help='dump all frames') + self.parser.add_argument('-s', '--stack-size', type=int, help='override stack_size (useful for JIT frames)') + + def invoke(self, args, from_tty): + try: + args = self.parser.parse_args(args.split()) + except SystemExit: + return + cfp = f'(ruby_current_ec->cfp + ({args.uplevel}))' end_cfp = self.get_int('ruby_current_ec->vm_stack + ruby_current_ec->vm_stack_size') - cfp_count = int((end_cfp - self.get_int('ruby_current_ec->cfp')) / self.get_int('sizeof(rb_control_frame_t)')) - print('CFP (count={}, addr=0x{:x}):'.format(cfp_count, self.get_int(cfp))) + cfp_index = int((end_cfp - self.get_int(cfp) - 1) / self.get_int('sizeof(rb_control_frame_t)')) + + if args.all: + cfp_count = int((end_cfp - self.get_int('ruby_current_ec->cfp')) / self.get_int('sizeof(rb_control_frame_t)')) - 1 # exclude dummy CFP + for i in range(cfp_count): + print('-' * 80) + self.invoke(str(cfp_count - i - 1), from_tty) + return + + print('CFP (addr=0x{:x}, index={}):'.format(self.get_int(cfp), cfp_index)) gdb.execute(f'p *({cfp})') print() if self.get_int(f'{cfp}->iseq'): local_size = self.get_int(f'{cfp}->iseq->body->local_table_size - {cfp}->iseq->body->param.size') param_size = self.get_int(f'{cfp}->iseq->body->param.size') - print(f'Params (size={param_size}):') - for i in range(-3 - local_size - param_size, -3 - local_size): - self.print_stack(cfp, i, self.rp(cfp, i)) - print() - print(f'Locals (size={local_size}):') - for i in range(-3 - local_size, -3): - self.print_stack(cfp, i, self.rp(cfp, i)) - print() + if local_size: + print(f'Params (size={param_size}):') + for i in range(-3 - local_size - param_size, -3 - local_size): + self.print_stack(cfp, i, self.rp(cfp, i)) + print() + + if param_size: + print(f'Locals (size={local_size}):') + for i in range(-3 - local_size, -3): + self.print_stack(cfp, i, self.rp(cfp, i)) + print() print('Env:') - self.print_stack(cfp, -3, self.rp(cfp, -3)) - self.print_stack(cfp, -2, self.specval(cfp, -2)) - self.print_stack(cfp, -1, self.frame_types(cfp, -1)) + self.print_env(cfp, -3, self.rp_env(cfp, -3)) + self.print_env(cfp, -2, self.specval(cfp, -2)) + self.print_env(cfp, -1, self.frame_types(cfp, -1)) print() - stack_size = int((self.get_int(f'{cfp}->sp') - self.get_int(f'{cfp}->__bp__')) / 8) - print(f'Stack (size={stack_size}):') - for i in range(0, stack_size): - self.print_stack(cfp, i, self.rp(cfp, i)) - print(self.regs(cfp, stack_size)) + # We can't calculate BP for the first frame. + # vm_base_ptr doesn't work for C frames either. + if cfp_index > 0 and self.get_int(f'{cfp}->iseq'): + if args.stack_size is not None: + stack_size = args.stack_size + else: + stack_size = int((self.get_int(f'{cfp}->sp') - self.get_int(f'vm_base_ptr({cfp})')) / 8) + print(f'Stack (size={stack_size}):') + for i in range(0, stack_size): + self.print_stack(cfp, i, self.rp(cfp, i)) + print(self.regs(cfp, stack_size)) + + def print_env(self, cfp, bp_index, content): + ep_index = bp_index + 1 + address = self.get_int(f'((rb_control_frame_t *){cfp})->ep + {ep_index}') + value = self.get_env(cfp, bp_index) + regs = self.regs(cfp, bp_index) + if content: + content = textwrap.indent(content, ' ' * 3).lstrip() # Leave the regs column empty + content = f'{content} ' + print('{:2} 0x{:x} [{}] {}(0x{:x})'.format(regs, address, bp_index, content, value)) def print_stack(self, cfp, bp_index, content): - address = self.get_int(f'{cfp}->__bp__ + {bp_index}') + address = self.get_int(f'vm_base_ptr({cfp}) + {bp_index}') value = self.get_value(cfp, bp_index) regs = self.regs(cfp, bp_index) if content: @@ -81,9 +124,9 @@ class CFP(gdb.Command): print('{:2} 0x{:x} [{}] {}(0x{:x})'.format(regs, address, bp_index, content, value)) def regs(self, cfp, bp_index): - address = self.get_int(f'{cfp}->__bp__ + {bp_index}') + address = self.get_int(f'vm_base_ptr({cfp}) + {bp_index}') regs = [] - for reg, field in { 'EP': 'ep', 'BP': '__bp__', 'SP': 'sp' }.items(): + for reg, field in { 'EP': 'ep', 'SP': 'sp' }.items(): if address == self.get_int(f'{cfp}->{field}'): regs.append(reg) return ' '.join(regs) @@ -92,9 +135,13 @@ class CFP(gdb.Command): value = self.get_value(cfp, bp_index) return self.get_string(f'rp {value}').rstrip() + def rp_env(self, cfp, bp_index): + value = self.get_env(cfp, bp_index) + return self.get_string(f'rp {value}').rstrip() + # specval: block_handler or previous EP def specval(self, cfp, bp_index): - value = self.get_value(cfp, bp_index) + value = self.get_env(cfp, bp_index) if value == 0: return 'VM_BLOCK_HANDLER_NONE' if value == self.get_int('rb_block_param_proxy'): @@ -103,7 +150,7 @@ class CFP(gdb.Command): def frame_types(self, cfp, bp_index): types = [] - value = self.get_value(cfp, bp_index) + value = self.get_env(cfp, bp_index) magic_mask = self.get_int('VM_FRAME_MAGIC_MASK') for magic in self.FRAME_MAGICS: @@ -118,8 +165,12 @@ class CFP(gdb.Command): return ' | '.join(types) + def get_env(self, cfp, bp_index): + ep_index = bp_index + 1 + return self.get_int(f'((rb_control_frame_t *){cfp})->ep[{ep_index}]') + def get_value(self, cfp, bp_index): - return self.get_int(f'{cfp}->__bp__[{bp_index}]') + return self.get_int(f'vm_base_ptr({cfp})[{bp_index}]') def get_int(self, expr): return int(self.get_string(f'printf "%ld", ({expr})')) diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py index 95e03c6209..400ccb45b9 100755 --- a/misc/lldb_cruby.py +++ b/misc/lldb_cruby.py @@ -197,18 +197,16 @@ def string2cstr(rstring): flags = rstring.GetValueForExpressionPath(".basic->flags").unsigned if flags & RUBY_T_MASK != RUBY_T_STRING: raise TypeError("not a string") + clen = int(rstring.GetValueForExpressionPath(".len").value, 0) if flags & RUBY_FL_USER1: cptr = int(rstring.GetValueForExpressionPath(".as.heap.ptr").value, 0) - clen = int(rstring.GetValueForExpressionPath(".as.heap.len").value, 0) else: cptr = int(rstring.GetValueForExpressionPath(".as.embed.ary").location, 0) - clen = int(rstring.GetValueForExpressionPath(".as.embed.len").value, 0) return cptr, clen def output_string(debugger, result, rstring): cptr, clen = string2cstr(rstring) - expr = "print *(const char (*)[%d])%0#x" % (clen, cptr) - append_command_output(debugger, expr, result) + append_expression(debugger, "*(const char (*)[%d])%0#x" % (clen, cptr), result) def fixnum_p(x): return x & RUBY_FIXNUM_FLAG != 0 @@ -227,6 +225,9 @@ def append_command_output(debugger, command, result): result.write(output1) result.write(output2) +def append_expression(debugger, expression, result): + append_command_output(debugger, "expression " + expression, result) + def lldb_rp(debugger, command, result, internal_dict): if not ('RUBY_Qfalse' in globals()): lldb_init(debugger) @@ -258,13 +259,13 @@ def lldb_inspect(debugger, target, result, val): elif fixnum_p(num): print(num >> 1, file=result) elif flonum_p(num): - append_command_output(debugger, "print rb_float_value(%0#x)" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "rb_float_value(%0#x)" % val.GetValueAsUnsigned(), result) elif static_sym_p(num): if num < 128: print("T_SYMBOL: %c" % num, file=result) else: print("T_SYMBOL: (%x)" % num, file=result) - append_command_output(debugger, "p rb_id2name(%0#x)" % (num >> 8), result) + append_expression(debugger, "rb_id2name(%0#x)" % (num >> 8), result) elif num & RUBY_IMMEDIATE_MASK: print('immediate(%x)' % num, file=result) else: @@ -292,13 +293,13 @@ def lldb_inspect(debugger, target, result, val): print('T_NIL: %s%s' % (flaginfo, val.Dereference()), file=result) elif flType == RUBY_T_OBJECT: result.write('T_OBJECT: %s' % flaginfo) - append_command_output(debugger, "print *(struct RObject*)%0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RObject*)%0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_CLASS or flType == RUBY_T_MODULE or flType == RUBY_T_ICLASS: result.write('T_%s: %s' % ('CLASS' if flType == RUBY_T_CLASS else 'MODULE' if flType == RUBY_T_MODULE else 'ICLASS', flaginfo)) - append_command_output(debugger, "print *(struct RClass*)%0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RClass*)%0#x" % val.GetValueAsUnsigned(), result) tRClass = target.FindFirstType("struct RClass") if not val.Cast(tRClass).GetChildMemberWithName("ptr").IsValid(): - append_command_output(debugger, "print *(struct rb_classext_struct*)%0#x" % (val.GetValueAsUnsigned() + tRClass.GetByteSize()), result) + append_expression(debugger, "*(struct rb_classext_struct*)%0#x" % (val.GetValueAsUnsigned() + tRClass.GetByteSize()), result) elif flType == RUBY_T_STRING: result.write('T_STRING: %s' % flaginfo) encidx = ((flags & RUBY_ENCODING_MASK)>>RUBY_ENCODING_SHIFT) @@ -312,12 +313,12 @@ def lldb_inspect(debugger, target, result, val): if len == 0: result.write("(empty)\n") else: - append_command_output(debugger, "print *(const char (*)[%d])%0#x" % (len, ptr), result) + append_expression(debugger, "*(const char (*)[%d])%0#x" % (len, ptr), result) elif flType == RUBY_T_SYMBOL: result.write('T_SYMBOL: %s' % flaginfo) tRSymbol = target.FindFirstType("struct RSymbol").GetPointerType() val = val.Cast(tRSymbol) - append_command_output(debugger, 'print (ID)%0#x ' % val.GetValueForExpressionPath("->id").GetValueAsUnsigned(), result) + append_expression(debugger, '(ID)%0#x ' % val.GetValueForExpressionPath("->id").GetValueAsUnsigned(), result) tRString = target.FindFirstType("struct RString").GetPointerType() output_string(debugger, result, val.GetValueForExpressionPath("->fstr").Cast(tRString)) elif flType == RUBY_T_ARRAY: @@ -343,12 +344,12 @@ def lldb_inspect(debugger, target, result, val): else: result.write("\n") if ptr.GetValueAsSigned() == 0: - append_command_output(debugger, "expression -fx -- ((struct RArray*)%0#x)->as.ary" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "-fx -- ((struct RArray*)%0#x)->as.ary" % val.GetValueAsUnsigned(), result) else: - append_command_output(debugger, "expression -Z %d -fx -- (const VALUE*)%0#x" % (len, ptr.GetValueAsUnsigned()), result) + append_expression(debugger, "-Z %d -fx -- (const VALUE*)%0#x" % (len, ptr.GetValueAsUnsigned()), result) elif flType == RUBY_T_HASH: result.write("T_HASH: %s" % flaginfo) - append_command_output(debugger, "p *(struct RHash *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RHash *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_BIGNUM: tRBignum = target.FindFirstType("struct RBignum").GetPointerType() val = val.Cast(tRBignum) @@ -356,15 +357,15 @@ def lldb_inspect(debugger, target, result, val): if flags & RUBY_FL_USER2: len = ((flags & (RUBY_FL_USER3|RUBY_FL_USER4|RUBY_FL_USER5)) >> (RUBY_FL_USHIFT+3)) print("T_BIGNUM: sign=%s len=%d (embed)" % (sign, len), file=result) - append_command_output(debugger, "print ((struct RBignum *) %0#x)->as.ary" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "((struct RBignum *) %0#x)->as.ary" % val.GetValueAsUnsigned(), result) else: len = val.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() print("T_BIGNUM: sign=%s len=%d" % (sign, len), file=result) print(val.Dereference(), file=result) - append_command_output(debugger, "expression -Z %x -fx -- (const BDIGIT*)((struct RBignum*)%d)->as.heap.digits" % (len, val.GetValueAsUnsigned()), result) - # append_command_output(debugger, "x ((struct RBignum *) %0#x)->as.heap.digits / %d" % (val.GetValueAsUnsigned(), len), result) + append_expression(debugger, "-Z %x -fx -- (const BDIGIT*)((struct RBignum*)%d)->as.heap.digits" % (len, val.GetValueAsUnsigned()), result) + # append_expression(debugger, "((struct RBignum *) %0#x)->as.heap.digits / %d" % (val.GetValueAsUnsigned(), len), result) elif flType == RUBY_T_FLOAT: - append_command_output(debugger, "print ((struct RFloat *)%d)->float_value" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "((struct RFloat *)%d)->float_value" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_RATIONAL: tRRational = target.FindFirstType("struct RRational").GetPointerType() val = val.Cast(tRRational) @@ -397,39 +398,39 @@ def lldb_inspect(debugger, target, result, val): flag = val.GetValueForExpressionPath("->typed_flag") if flag.GetValueAsUnsigned() == 1: print("T_DATA: %s" % val.GetValueForExpressionPath("->type->wrap_struct_name"), file=result) - append_command_output(debugger, "p *(struct RTypedData *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RTypedData *) %0#x" % val.GetValueAsUnsigned(), result) else: print("T_DATA:", file=result) - append_command_output(debugger, "p *(struct RData *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RData *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_NODE: tRTypedData = target.FindFirstType("struct RNode").GetPointerType() nd_type = (flags & RUBY_NODE_TYPEMASK) >> RUBY_NODE_TYPESHIFT - append_command_output(debugger, "p (node_type) %d" % nd_type, result) + append_expression(debugger, "(node_type) %d" % nd_type, result) val = val.Cast(tRTypedData) - append_command_output(debugger, "p *(struct RNode *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RNode *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_MOVED: tRTypedData = target.FindFirstType("struct RMoved").GetPointerType() val = val.Cast(tRTypedData) - append_command_output(debugger, "p *(struct RMoved *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RMoved *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_MATCH: tRTypedData = target.FindFirstType("struct RMatch").GetPointerType() val = val.Cast(tRTypedData) - append_command_output(debugger, "p *(struct RMatch *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RMatch *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_IMEMO: # I'm not sure how to get IMEMO_MASK out of lldb. It's not in globals() imemo_type = (flags >> RUBY_FL_USHIFT) & 0x0F # IMEMO_MASK print("T_IMEMO: ", file=result) - append_command_output(debugger, "p (enum imemo_type) %d" % imemo_type, result) - append_command_output(debugger, "p *(struct MEMO *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "(enum imemo_type) %d" % imemo_type, result) + append_expression(debugger, "*(struct MEMO *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_STRUCT: tRTypedData = target.FindFirstType("struct RStruct").GetPointerType() val = val.Cast(tRTypedData) - append_command_output(debugger, "p *(struct RStruct *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RStruct *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_ZOMBIE: tRZombie = target.FindFirstType("struct RZombie").GetPointerType() val = val.Cast(tRZombie) - append_command_output(debugger, "p *(struct RZombie *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RZombie *) %0#x" % val.GetValueAsUnsigned(), result) else: print("Not-handled type %0#x" % flType, file=result) print(val, file=result) diff --git a/misc/lldb_rb/commands/heap_page_command.py b/misc/lldb_rb/commands/heap_page_command.py index edb74a415b..b56a3eae4e 100644 --- a/misc/lldb_rb/commands/heap_page_command.py +++ b/misc/lldb_rb/commands/heap_page_command.py @@ -14,8 +14,8 @@ class HeapPageCommand(RbBaseCommand): page = self._get_page(self.frame.EvaluateExpression(command)) page.Cast(self.t_heap_page_ptr) - self._append_command_output(debugger, "p (struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) - self._append_command_output(debugger, "p *(struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) + self._append_expression(debugger, "(struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) + self._append_expression(debugger, "*(struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) def _get_page(self, val): addr = val.GetValueAsUnsigned() diff --git a/misc/lldb_rb/commands/rp_command.py b/misc/lldb_rb/commands/rp_command.py index 9da9688b96..06b2516d50 100644 --- a/misc/lldb_rb/commands/rp_command.py +++ b/misc/lldb_rb/commands/rp_command.py @@ -13,4 +13,3 @@ class RbID2StrCommand(RbBaseCommand): val = self.frame.EvaluateExpression(command) inspector = RbInspector(debugger, result, self.ruby_globals) inspector.inspect(val) - diff --git a/misc/lldb_rb/lldb_interface.py b/misc/lldb_rb/lldb_interface.py index 893064db90..785a54b3e3 100644 --- a/misc/lldb_rb/lldb_interface.py +++ b/misc/lldb_rb/lldb_interface.py @@ -5,4 +5,3 @@ class LLDBInterface: self.process = self.target.GetProcess() self.thread = self.process.GetSelectedThread() self.frame = self.thread.GetSelectedFrame() - diff --git a/misc/lldb_rb/rb_base_command.py b/misc/lldb_rb/rb_base_command.py index d786a010b6..70a5addd6d 100644 --- a/misc/lldb_rb/rb_base_command.py +++ b/misc/lldb_rb/rb_base_command.py @@ -43,6 +43,7 @@ class RbBaseCommand(LLDBInterface): self.internal_dict = _internal_dict def __call__(self, debugger, command, exe_ctx, result): + self.ruby_globals = RbBaseCommand.lldb_init(debugger) self.build_environment(debugger) self.call(debugger, command, exe_ctx, result) @@ -54,4 +55,3 @@ class RbBaseCommand(LLDBInterface): def get_long_help(self): return self.__class__.help_string - diff --git a/misc/lldb_rb/rb_heap_structs.py b/misc/lldb_rb/rb_heap_structs.py index 0428b7fc3f..86b38dbbbd 100644 --- a/misc/lldb_rb/rb_heap_structs.py +++ b/misc/lldb_rb/rb_heap_structs.py @@ -108,11 +108,15 @@ class RbObject(LLDBInterface): else: return False + def as_type(self, type_name): + return self.val.Cast(self.tRValue.GetPointerType()).GetValueForExpressionPath("->as."+type_name) + def ary_ptr(self): + rval = self.as_type("array") if self.flags & self.ruby_globals["RUBY_FL_USER1"]: - ptr = self.val.GetValueForExpressionPath("->as.ary") + ptr = rval.GetValueForExpressionPath("->as.ary") else: - ptr = self.val.GetValueForExpressionPath("->as.heap.ptr") + ptr = rval.GetValueForExpressionPath("->as.heap.ptr") return ptr def ary_len(self): @@ -122,19 +126,18 @@ class RbObject(LLDBInterface): self.flUser7 | self.flUser8 | self.flUser9) ) >> (self.flUshift + 3)) else: - len = self.val.GetValueForExpressionPath("->as.heap.len") + rval = self.as_type("array") + len = rval.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() return len def bignum_len(self): - if self.flags & flUser2: + if self.flags & self.flUser2: len = ((self.flags & (self.flUser3 | self.flUser4 | self.flUser5) ) >> (self.flUshift + 3)) else: - len = self.val.GetValueForExpressionPath("->as.heap.len") + len = (self.as_type("bignum").GetValueForExpressionPath("->as.heap.len"). + GetValueAsUnsigned()) return len - - - diff --git a/misc/lldb_rb/utils.py b/misc/lldb_rb/utils.py index b6cfe2fe77..a6bbd385cd 100644 --- a/misc/lldb_rb/utils.py +++ b/misc/lldb_rb/utils.py @@ -16,25 +16,26 @@ class RbInspector(LLDBInterface): self.result.write(output1) self.result.write(output2) + def _append_expression(self, expression): + self._append_command_output("expression " + expression) + def string2cstr(self, rstring): """Returns the pointer to the C-string in the given String object""" if rstring.TypeIsPointerType(): rstring = rstring.Dereference() flags = rstring.GetValueForExpressionPath(".basic->flags").unsigned + clen = int(rstring.GetValueForExpressionPath(".len").value, 0) if flags & self.ruby_globals["RUBY_FL_USER1"]: cptr = int(rstring.GetValueForExpressionPath(".as.heap.ptr").value, 0) - clen = int(rstring.GetValueForExpressionPath(".as.heap.len").value, 0) else: cptr = int(rstring.GetValueForExpressionPath(".as.embed.ary").location, 0) - clen = int(rstring.GetValueForExpressionPath(".as.embed.len").value, 0) return cptr, clen def output_string(self, rstring): cptr, clen = self.string2cstr(rstring) - expr = "print *(const char (*)[%d])%0#x" % (clen, cptr) - self._append_command_output(expr) + self._append_expression("*(const char (*)[%d])%0#x" % (clen, cptr)) def fixnum_p(self, x): return x & self.ruby_globals["RUBY_FIXNUM_FLAG"] != 0 @@ -50,7 +51,7 @@ class RbInspector(LLDBInterface): def generic_inspect(self, val, rtype): tRType = self.target.FindFirstType("struct %s" % rtype).GetPointerType() val = val.Cast(tRType) - self._append_command_output("p *(struct %s *) %0#x" % (rtype, val.GetValueAsUnsigned())) + self._append_expression("*(struct %s *) %0#x" % (rtype, val.GetValueAsUnsigned())) def inspect(self, val): rbTrue = self.ruby_globals["RUBY_Qtrue"] @@ -59,6 +60,9 @@ class RbInspector(LLDBInterface): rbUndef = self.ruby_globals["RUBY_Qundef"] rbImmediateMask = self.ruby_globals["RUBY_IMMEDIATE_MASK"] + if self.inspect_node(val): + return + num = val.GetValueAsSigned() if num == rbFalse: print('false', file=self.result) @@ -71,13 +75,13 @@ class RbInspector(LLDBInterface): elif self.fixnum_p(num): print(num >> 1, file=self.result) elif self.flonum_p(num): - self._append_command_output("print rb_float_value(%0#x)" % val.GetValueAsUnsigned()) + self._append_expression("rb_float_value(%0#x)" % val.GetValueAsUnsigned()) elif self.static_sym_p(num): if num < 128: print("T_SYMBOL: %c" % num, file=self.result) else: print("T_SYMBOL: (%x)" % num, file=self.result) - self._append_command_output("p rb_id2name(%0#x)" % (num >> 8)) + self._append_expression("rb_id2name(%0#x)" % (num >> 8)) elif num & rbImmediateMask: print('immediate(%x)' % num, file=self.result) @@ -99,7 +103,7 @@ class RbInspector(LLDBInterface): elif rval.is_type("RUBY_T_OBJECT"): self.result.write('T_OBJECT: %s' % flaginfo) - self._append_command_output("print *(struct RObject*)%0#x" % val.GetValueAsUnsigned()) + self._append_expression("*(struct RObject*)%0#x" % val.GetValueAsUnsigned()) elif (rval.is_type("RUBY_T_CLASS") or rval.is_type("RUBY_T_MODULE") or @@ -107,10 +111,10 @@ class RbInspector(LLDBInterface): self.result.write('T_%s: %s' % (rval.type_name.split('_')[-1], flaginfo)) tRClass = self.target.FindFirstType("struct RClass") - self._append_command_output("print *(struct RClass*)%0#x" % val.GetValueAsUnsigned()) + self._append_expression("*(struct RClass*)%0#x" % val.GetValueAsUnsigned()) if not val.Cast(tRClass).GetChildMemberWithName("ptr").IsValid(): - self._append_command_output( - "print *(struct rb_classext_struct*)%0#x" % + self._append_expression( + "*(struct rb_classext_struct*)%0#x" % (val.GetValueAsUnsigned() + tRClass.GetByteSize()) ) @@ -118,6 +122,10 @@ class RbInspector(LLDBInterface): self.result.write('T_STRING: %s' % flaginfo) tRString = self.target.FindFirstType("struct RString").GetPointerType() + chilled = self.ruby_globals["RUBY_FL_USER3"] + if (rval.flags & chilled) != 0: + self.result.write("[CHILLED] ") + rb_enc_mask = self.ruby_globals["RUBY_ENCODING_MASK"] rb_enc_shift = self.ruby_globals["RUBY_ENCODING_SHIFT"] encidx = ((rval.flags & rb_enc_mask) >> rb_enc_shift) @@ -130,11 +138,21 @@ class RbInspector(LLDBInterface): else: self.result.write('[enc=%d] ' % encidx) + coderange = rval.flags & self.ruby_globals["RUBY_ENC_CODERANGE_MASK"] + if coderange == self.ruby_globals["RUBY_ENC_CODERANGE_7BIT"]: + self.result.write('[7BIT] ') + elif coderange == self.ruby_globals["RUBY_ENC_CODERANGE_VALID"]: + self.result.write('[VALID] ') + elif coderange == self.ruby_globals["RUBY_ENC_CODERANGE_BROKEN"]: + self.result.write('[BROKEN] ') + else: + self.result.write('[UNKNOWN] ') + ptr, len = self.string2cstr(val.Cast(tRString)) if len == 0: self.result.write("(empty)\n") else: - self._append_command_output("print *(const char (*)[%d])%0#x" % (len, ptr)) + self._append_expression("*(const char (*)[%d])%0#x" % (len, ptr)) elif rval.is_type("RUBY_T_SYMBOL"): self.result.write('T_SYMBOL: %s' % flaginfo) @@ -142,12 +160,10 @@ class RbInspector(LLDBInterface): tRString = self.target.FindFirstType("struct RString").GetPointerType() val = val.Cast(tRSymbol) - self._append_command_output( - 'print (ID)%0#x ' % val.GetValueForExpressionPath("->id").GetValueAsUnsigned()) + self._append_expression('(ID)%0#x ' % val.GetValueForExpressionPath("->id").GetValueAsUnsigned()) self.output_string(val.GetValueForExpressionPath("->fstr").Cast(tRString)) elif rval.is_type("RUBY_T_ARRAY"): - tRArray = self.target.FindFirstType("struct RArray").GetPointerType() len = rval.ary_len() ptr = rval.ary_ptr() @@ -166,19 +182,20 @@ class RbInspector(LLDBInterface): else: self.result.write("\n") if ptr.GetValueAsSigned() == 0: - self._append_command_output( - "expression -fx -- ((struct RArray*)%0#x)->as.ary" % val.GetValueAsUnsigned()) + self._append_expression("-fx -- ((struct RArray*)%0#x)->as.ary" % val.GetValueAsUnsigned()) else: - self._append_command_output( - "expression -Z %d -fx -- (const VALUE*)%0#x" % (len, ptr.GetValueAsUnsigned())) + self._append_expression("-Z %d -fx -- (const VALUE*)%0#x" % (len, ptr.GetValueAsUnsigned())) elif rval.is_type("RUBY_T_HASH"): self.result.write("T_HASH: %s" % flaginfo) - self._append_command_output("p *(struct RHash *) %0#x" % val.GetValueAsUnsigned()) + ptr = val.GetValueAsUnsigned() + self._append_expression("*(struct RHash *) %0#x" % ptr) + if rval.flags & self.ruby_globals["RUBY_FL_USER3"]: + self._append_expression("*(struct st_table *) (%0#x + sizeof(struct RHash))" % ptr) + else: + self._append_expression("*(struct ar_table *) (%0#x + sizeof(struct RHash))" % ptr) elif rval.is_type("RUBY_T_BIGNUM"): - tRBignum = self.target.FindFirstType("struct RBignum").GetPointerType() - sign = '-' if (rval.flags & self.ruby_globals["RUBY_FL_USER1"]) != 0: sign = '+' @@ -186,17 +203,16 @@ class RbInspector(LLDBInterface): if rval.flags & self.ruby_globals["RUBY_FL_USER2"]: print("T_BIGNUM: sign=%s len=%d (embed)" % (sign, len), file=self.result) - self._append_command_output("print ((struct RBignum *) %0#x)->as.ary" + self._append_expression("((struct RBignum *) %0#x)->as.ary" % val.GetValueAsUnsigned()) else: print("T_BIGNUM: sign=%s len=%d" % (sign, len), file=self.result) - print(val.Dereference(), file=self.result) - self._append_command_output( - "expression -Z %x -fx -- (const BDIGIT*)((struct RBignum*)%d)->as.heap.digits" % - (len, val.GetValueAsUnsigned())) + print(rval.as_type("bignum"), file=self.result) + self._append_expression("-Z %d -fx -- ((struct RBignum*)%d)->as.heap.digits" % + (len, val.GetValueAsUnsigned())) elif rval.is_type("RUBY_T_FLOAT"): - self._append_command_output("print ((struct RFloat *)%d)->float_value" + self._append_expression("((struct RFloat *)%d)->float_value" % val.GetValueAsUnsigned()) elif rval.is_type("RUBY_T_RATIONAL"): @@ -237,31 +253,18 @@ class RbInspector(LLDBInterface): print("T_DATA: %s" % val.GetValueForExpressionPath("->type->wrap_struct_name"), file=self.result) - self._append_command_output( - "p *(struct RTypedData *) %0#x" % val.GetValueAsUnsigned()) + self._append_expression("*(struct RTypedData *) %0#x" % val.GetValueAsUnsigned()) else: print("T_DATA:", file=self.result) - self._append_command_output( - "p *(struct RData *) %0#x" % val.GetValueAsUnsigned()) - - elif rval.is_type("RUBY_T_NODE"): - tRNode = self.target.FindFirstType("struct RNode").GetPointerType() - rbNodeTypeMask = self.ruby_globals["RUBY_NODE_TYPEMASK"] - rbNodeTypeShift = self.ruby_globals["RUBY_NODE_TYPESHIFT"] - - nd_type = (rval.flags & rbNodeTypeMask) >> rbNodeTypeShift - val = val.Cast(tRNode) - - self._append_command_output("p (node_type) %d" % nd_type) - self._append_command_output("p *(struct RNode *) %0#x" % val.GetValueAsUnsigned()) + self._append_expression("*(struct RData *) %0#x" % val.GetValueAsUnsigned()) elif rval.is_type("RUBY_T_IMEMO"): imemo_type = ((rval.flags >> self.ruby_globals["RUBY_FL_USHIFT"]) & IMEMO_MASK) print("T_IMEMO: ", file=self.result) - self._append_command_output("p (enum imemo_type) %d" % imemo_type) - self._append_command_output("p *(struct MEMO *) %0#x" % val.GetValueAsUnsigned()) + self._append_expression("(enum imemo_type) %d" % imemo_type) + self._append_expression("*(struct MEMO *) %0#x" % val.GetValueAsUnsigned()) elif rval.is_type("RUBY_T_FILE"): self.generic_inspect(val, "RFile") @@ -282,3 +285,229 @@ class RbInspector(LLDBInterface): print("Not-handled type %0#x" % rval.type, file=self.result) print(val, file=self.result) + def inspect_node(self, val): + tRNode = self.target.FindFirstType("struct RNode").GetPointerType() + + # if val.GetType() != tRNode: does not work for unknown reason + + if val.GetType().GetPointeeType().name != "NODE": + return False + + rbNodeTypeMask = self.ruby_globals["RUBY_NODE_TYPEMASK"] + rbNodeTypeShift = self.ruby_globals["RUBY_NODE_TYPESHIFT"] + flags = val.Cast(tRNode).GetChildMemberWithName("flags").GetValueAsUnsigned() + nd_type = (flags & rbNodeTypeMask) >> rbNodeTypeShift + + self._append_expression("(node_type) %d" % nd_type) + + if nd_type == self.ruby_globals["NODE_SCOPE"]: + self._append_expression("*(rb_node_scope_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BLOCK"]: + self._append_expression("*(rb_node_block_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_IF"]: + self._append_expression("*(rb_node_if_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_UNLESS"]: + self._append_expression("*(rb_node_unless_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CASE"]: + self._append_expression("*(rb_node_case_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CASE2"]: + self._append_expression("*(rb_node_case2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CASE3"]: + self._append_expression("*(rb_node_case3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_WHEN"]: + self._append_expression("*(rb_node_when_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_IN"]: + self._append_expression("*(rb_node_in_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_WHILE"]: + self._append_expression("*(rb_node_while_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_UNTIL"]: + self._append_expression("*(rb_node_until_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ITER"]: + self._append_expression("*(rb_node_iter_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FOR"]: + self._append_expression("*(rb_node_for_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FOR_MASGN"]: + self._append_expression("*(rb_node_for_masgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BREAK"]: + self._append_expression("*(rb_node_break_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_NEXT"]: + self._append_expression("*(rb_node_next_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_REDO"]: + self._append_expression("*(rb_node_redo_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RETRY"]: + self._append_expression("*(rb_node_retry_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BEGIN"]: + self._append_expression("*(rb_node_begin_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RESCUE"]: + self._append_expression("*(rb_node_rescue_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RESBODY"]: + self._append_expression("*(rb_node_resbody_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ENSURE"]: + self._append_expression("*(rb_node_ensure_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_AND"]: + self._append_expression("*(rb_node_and_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OR"]: + self._append_expression("*(rb_node_or_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MASGN"]: + self._append_expression("*(rb_node_masgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LASGN"]: + self._append_expression("*(rb_node_lasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DASGN"]: + self._append_expression("*(rb_node_dasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_GASGN"]: + self._append_expression("*(rb_node_gasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_IASGN"]: + self._append_expression("*(rb_node_iasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CDECL"]: + self._append_expression("*(rb_node_cdecl_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CVASGN"]: + self._append_expression("*(rb_node_cvasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN1"]: + self._append_expression("*(rb_node_op_asgn1_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN2"]: + self._append_expression("*(rb_node_op_asgn2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN_AND"]: + self._append_expression("*(rb_node_op_asgn_and_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN_OR"]: + self._append_expression("*(rb_node_op_asgn_or_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_CDECL"]: + self._append_expression("*(rb_node_op_cdecl_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CALL"]: + self._append_expression("*(rb_node_call_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OPCALL"]: + self._append_expression("*(rb_node_opcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FCALL"]: + self._append_expression("*(rb_node_fcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_VCALL"]: + self._append_expression("*(rb_node_vcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_QCALL"]: + self._append_expression("*(rb_node_qcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SUPER"]: + self._append_expression("*(rb_node_super_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ZSUPER"]: + self._append_expression("*(rb_node_zsuper_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LIST"]: + self._append_expression("*(rb_node_list_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ZLIST"]: + self._append_expression("*(rb_node_zlist_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_HASH"]: + self._append_expression("*(rb_node_hash_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RETURN"]: + self._append_expression("*(rb_node_return_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_YIELD"]: + self._append_expression("*(rb_node_yield_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LVAR"]: + self._append_expression("*(rb_node_lvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DVAR"]: + self._append_expression("*(rb_node_dvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_GVAR"]: + self._append_expression("*(rb_node_gvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CONST"]: + self._append_expression("*(rb_node_const_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CVAR"]: + self._append_expression("*(rb_node_cvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_NTH_REF"]: + self._append_expression("*(rb_node_nth_ref_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BACK_REF"]: + self._append_expression("*(rb_node_back_ref_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MATCH"]: + self._append_expression("*(rb_node_match_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MATCH2"]: + self._append_expression("*(rb_node_match2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MATCH3"]: + self._append_expression("*(rb_node_match3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_STR"]: + self._append_expression("*(rb_node_str_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DSTR"]: + self._append_expression("*(rb_node_dstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_XSTR"]: + self._append_expression("*(rb_node_xstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DXSTR"]: + self._append_expression("*(rb_node_dxstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_EVSTR"]: + self._append_expression("*(rb_node_evstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_REGX"]: + self._append_expression("*(rb_node_regx_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DREGX"]: + self._append_expression("*(rb_node_dregx_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ONCE"]: + self._append_expression("*(rb_node_once_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGS"]: + self._append_expression("*(rb_node_args_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGS_AUX"]: + self._append_expression("*(rb_node_args_aux_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OPT_ARG"]: + self._append_expression("*(rb_node_opt_arg_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_KW_ARG"]: + self._append_expression("*(rb_node_kw_arg_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_POSTARG"]: + self._append_expression("*(rb_node_postarg_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGSCAT"]: + self._append_expression("*(rb_node_argscat_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGSPUSH"]: + self._append_expression("*(rb_node_argspush_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SPLAT"]: + self._append_expression("*(rb_node_splat_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DEFN"]: + self._append_expression("*(rb_node_defn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DEFS"]: + self._append_expression("*(rb_node_defs_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ALIAS"]: + self._append_expression("*(rb_node_alias_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_VALIAS"]: + self._append_expression("*(rb_node_valias_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_UNDEF"]: + self._append_expression("*(rb_node_undef_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CLASS"]: + self._append_expression("*(rb_node_class_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MODULE"]: + self._append_expression("*(rb_node_module_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SCLASS"]: + self._append_expression("*(rb_node_sclass_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_COLON2"]: + self._append_expression("*(rb_node_colon2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_COLON3"]: + self._append_expression("*(rb_node_colon3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DOT2"]: + self._append_expression("*(rb_node_dot2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DOT3"]: + self._append_expression("*(rb_node_dot3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FLIP2"]: + self._append_expression("*(rb_node_flip2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FLIP3"]: + self._append_expression("*(rb_node_flip3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SELF"]: + self._append_expression("*(rb_node_self_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_NIL"]: + self._append_expression("*(rb_node_nil_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_TRUE"]: + self._append_expression("*(rb_node_true_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FALSE"]: + self._append_expression("*(rb_node_false_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ERRINFO"]: + self._append_expression("*(rb_node_errinfo_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DEFINED"]: + self._append_expression("*(rb_node_defined_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_POSTEXE"]: + self._append_expression("*(rb_node_postexe_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DSYM"]: + self._append_expression("*(rb_node_dsym_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ATTRASGN"]: + self._append_expression("*(rb_node_attrasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LAMBDA"]: + self._append_expression("*(rb_node_lambda_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARYPTN"]: + self._append_expression("*(rb_node_aryptn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_HSHPTN"]: + self._append_expression("*(rb_node_hshptn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FNDPTN"]: + self._append_expression("*(rb_node_fndptn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ERROR"]: + self._append_expression("*(rb_node_error_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LINE"]: + self._append_expression("*(rb_node_line_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FILE"]: + self._append_expression("*(rb_node_file_t *) %0#x" % val.GetValueAsUnsigned()) + else: + self._append_expression("*(NODE *) %0#x" % val.GetValueAsUnsigned()) + return True diff --git a/misc/lldb_yjit.py b/misc/lldb_yjit.py deleted file mode 100644 index cc37b990ea..0000000000 --- a/misc/lldb_yjit.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python -#coding: utf-8 -# -# Usage: run `command script import -r misc/lldb_yjit.py` on LLDB -# - -from __future__ import print_function -import lldb -import os -import shlex - -def list_comments(debugger, command, result, internal_dict): - target = debugger.GetSelectedTarget() - process = target.GetProcess() - thread = process.GetSelectedThread() - frame = thread.GetSelectedFrame() - - # Get the different types we need - rb_darray_meta_t = target.FindFirstType("rb_darray_meta_t") - codeblock_t = target.FindFirstType("codeblock_t") - yjit_comment = target.FindFirstType("yjit_comment") - - # Get the global variables we need - comments = target.FindFirstGlobalVariable("yjit_code_comments") - cb = target.FindFirstGlobalVariable("cb").Cast(codeblock_t.GetPointerType()) - - # Get the address of the memory block we're using - mem_addr = cb.GetChildMemberWithName("mem_block").GetValueAsUnsigned() - - # Find the size of the darray comment list - meta = comments.Cast(rb_darray_meta_t.GetPointerType()) - size = meta.GetChildMemberWithName("size").GetValueAsUnsigned() - - # Get the address of the block following the metadata header - t_offset = comments.GetValueAsUnsigned() + rb_darray_meta_t.GetByteSize() - - # Loop through each comment and print - for t in range(0, size): - addr = lldb.SBAddress(t_offset + (t * yjit_comment.GetByteSize()), target) - comment = target.CreateValueFromAddress("yjit_comment", addr, yjit_comment) - string = comment.GetChildMemberWithName("comment") - comment_offset = mem_addr + comment.GetChildMemberWithName("offset").GetValueAsUnsigned() - print("%0#x %s" % (comment_offset, string.GetSummary()), file = result) - - -def __lldb_init_module(debugger, internal_dict): - debugger.HandleCommand("command script add -f lldb_yjit.list_comments lc") diff --git a/misc/ruby-style.el b/misc/ruby-style.el index 13aad77b3d..45fdccc6f2 100644 --- a/misc/ruby-style.el +++ b/misc/ruby-style.el @@ -66,6 +66,17 @@ (access-label /) ))) +(c-add-style + "prism" + '("bsd" + (c-basic-offset . 4) + (tab-width . 8) + (indent-tabs-mode . nil) + (setq show-trailing-whitespace t) + (c-offsets-alist + (case-label . +) + ))) + ;;;###autoload (defun ruby-style-c-mode () (interactive) diff --git a/misc/yjit_perf.py b/misc/yjit_perf.py new file mode 100755 index 0000000000..61434e5eb4 --- /dev/null +++ b/misc/yjit_perf.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +import os +import sys +from collections import Counter, defaultdict +import os.path + +# Aggregating cycles per symbol and dso +total_cycles = 0 +category_cycles = Counter() +detailed_category_cycles = defaultdict(Counter) +categories = set() + +def truncate_symbol(symbol, max_length=50): + """ Truncate the symbol name to a maximum length """ + return symbol if len(symbol) <= max_length else symbol[:max_length-3] + '...' + +def categorize_symbol(dso, symbol): + """ Categorize the symbol based on the defined criteria """ + if dso == 'sqlite3_native.so': + return '[sqlite3]' + elif 'SHA256' in symbol: + return '[sha256]' + elif symbol.startswith('[JIT] gen_send'): + return '[JIT send]' + elif symbol.startswith('[JIT]'): + return '[JIT code]' + elif '::' in symbol or symbol.startswith('yjit::') or symbol.startswith('_ZN4yjit'): + return '[YJIT compile]' + elif symbol.startswith('rb_vm_') or symbol.startswith('vm_') or symbol in { + "rb_call0", "callable_method_entry_or_negative", "invoke_block_from_c_bh", + "rb_funcallv_scope", "setup_parameters_complex", "rb_yield"}: + return '[interpreter]' + elif symbol.startswith('rb_hash_') or symbol.startswith('hash_'): + return '[rb_hash_*]' + elif symbol.startswith('rb_ary_') or symbol.startswith('ary_'): + return '[rb_ary_*]' + elif symbol.startswith('rb_str_') or symbol.startswith('str_'): + return '[rb_str_*]' + elif symbol.startswith('rb_sym') or symbol.startswith('sym_'): + return '[rb_sym_*]' + elif symbol.startswith('rb_st_') or symbol.startswith('st_'): + return '[rb_st_*]' + elif symbol.startswith('rb_ivar_') or 'shape' in symbol: + return '[ivars]' + elif 'match' in symbol or symbol.startswith('rb_reg') or symbol.startswith('onig'): + return '[regexp]' + elif 'alloc' in symbol or 'free' in symbol or 'gc' in symbol: + return '[GC]' + elif 'pthread' in symbol and 'lock' in symbol: + return '[pthread lock]' + else: + return symbol # Return the symbol itself for uncategorized symbols + +def process_event(event): + global total_cycles, category_cycles, detailed_category_cycles, categories + + full_dso = event.get("dso", "Unknown_dso") + dso = os.path.basename(full_dso) + symbol = event.get("symbol", "[unknown]") + cycles = event["sample"]["period"] + total_cycles += cycles + + category = categorize_symbol(dso, symbol) + category_cycles[category] += cycles + detailed_category_cycles[category][(dso, symbol)] += cycles + + if category.startswith('[') and category.endswith(']'): + categories.add(category) + +def trace_end(): + if total_cycles == 0: + return + + print("Aggregated Event Data:") + print("{:<20} {:<50} {:>20} {:>15}".format("[dso]", "[symbol or category]", "[top-most cycle ratio]", "[num cycles]")) + + for category, cycles in category_cycles.most_common(): + ratio = (cycles / total_cycles) * 100 + dsos = {dso for dso, _ in detailed_category_cycles[category]} + dso_display = next(iter(dsos)) if len(dsos) == 1 else "Multiple DSOs" + print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso_display, truncate_symbol(category), ratio, cycles)) + + # Category breakdown + for category in categories: + symbols = detailed_category_cycles[category] + category_total = sum(symbols.values()) + category_ratio = (category_total / total_cycles) * 100 + print(f"\nCategory: {category} ({category_ratio:.2f}%)") + print("{:<20} {:<50} {:>20} {:>15}".format("[dso]", "[symbol]", "[top-most cycle ratio]", "[num cycles]")) + for (dso, symbol), cycles in symbols.most_common(): + symbol_ratio = (cycles / category_total) * 100 + print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso, truncate_symbol(symbol), symbol_ratio, cycles)) + +# There are two ways to use this script: +# 1) perf script -s misc/yjit_perf.py -- native interface +# 2) perf script > perf.txt && misc/yjit_perf.py perf.txt -- hack, which doesn't require perf with Python support +# +# In both cases, __name__ is "__main__". The following code implements (2) when sys.argv is 2. +if __name__ == "__main__" and len(sys.argv) == 2: + if len(sys.argv) != 2: + print("Usage: yjit_perf.py <filename>") + sys.exit(1) + + with open(sys.argv[1], "r") as file: + for line in file: + # [Example] + # ruby 78207 3482.848465: 1212775 cpu_core/cycles:P/: 5c0333f682e1 [JIT] getlocal_WC_0+0x0 (/tmp/perf-78207.map) + row = line.split(maxsplit=6) + + period = row[3] # "1212775" + symbol, dso = row[6].split(" (") # "[JIT] getlocal_WC_0+0x0", "/tmp/perf-78207.map)\n" + symbol = symbol.split("+")[0] # "[JIT] getlocal_WC_0" + dso = dso.split(")")[0] # "/tmp/perf-78207.map" + + process_event({"dso": dso, "symbol": symbol, "sample": {"period": int(period)}}) + trace_end() |