diff options
Diffstat (limited to 'misc')
26 files changed, 2745 insertions, 78 deletions
diff --git a/misc/.vscode/launch.json b/misc/.vscode/launch.json new file mode 100644 index 0000000000..51bfef09d7 --- /dev/null +++ b/misc/.vscode/launch.json @@ -0,0 +1,13 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "name": "Run ruby", + "request": "launch", + "program": "${workspaceFolder}/ruby", + "args": ["test.rb"], + "preLaunchTask": "${defaultBuildTask}" + } + ] +} diff --git a/misc/.vscode/settings.json b/misc/.vscode/settings.json new file mode 100644 index 0000000000..a2e4e1ec69 --- /dev/null +++ b/misc/.vscode/settings.json @@ -0,0 +1,10 @@ +{ + "rust-analyzer.cargo.features": [ + "disasm", + ], + "rust-analyzer.cfg.setTest": false, + // rust-analyzer bundled in the VSCode extension may only support Rust newer than 1.85.0. + // To avoid warnings, install rust-analyzer with `rustup component add rust-analyzer` and + // use `~/.cargo/bin/rust-analyzer` with the following config. + "rust-analyzer.server.path": "rust-analyzer", +} diff --git a/misc/.vscode/tasks.json b/misc/.vscode/tasks.json new file mode 100644 index 0000000000..045fe7e5c0 --- /dev/null +++ b/misc/.vscode/tasks.json @@ -0,0 +1,14 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build", + "type": "shell", + "command": "make -j", + "group": { + "kind": "build", + "isDefault": true + } + } + ] +} diff --git a/misc/README b/misc/README index 1728b42700..86b680e724 100644 --- a/misc/README +++ b/misc/README @@ -4,3 +4,4 @@ rb_optparse.zsh zsh completion script ruby-style.el Ruby's C/C++ mode style for emacs lldb_cruby.py LLDB port of debug utility test_lldb_cruby.rb test file for LLDB port +.vscode example VSCode config to debug Ruby diff --git a/misc/call_fuzzer.rb b/misc/call_fuzzer.rb new file mode 100644 index 0000000000..c3f9f90490 --- /dev/null +++ b/misc/call_fuzzer.rb @@ -0,0 +1,372 @@ +require 'optparse' +require 'set' + +# Number of iterations to test +num_iters = 10_000 + +# Parse the command-line options +OptionParser.new do |opts| + opts.on("--num-iters=N") do |n| + num_iters = n.to_i + end +end.parse! + +# Format large numbers with comma separators for readability +def format_number(pad, number) + s = number.to_s + i = s.index('.') || s.size + s.insert(i -= 3, ',') while i > 3 + s.rjust(pad, ' ') +end + +# Wrap an integer to pass as argument +# We use this so we can have some object arguments +class IntWrapper + def initialize(v) + # Force the object to have a random shape + if rand() < 50 + @v0 = 1 + end + if rand() < 50 + @v1 = 1 + end + if rand() < 50 + @v2 = 1 + end + if rand() < 50 + @v3 = 1 + end + if rand() < 50 + @v4 = 1 + end + if rand() < 50 + @v5 = 1 + end + if rand() < 50 + @v6 = 1 + end + + @value = v + end + + attr_reader :value +end + +# Generate a random argument value, integer or string or object +def sample_arg() + c = ['int', 'string', 'object'].sample() + + if c == 'int' + return rand(0...100) + end + + if c == 'string' + return 'f' * rand(0...100) + end + + if c == 'object' + return IntWrapper.new(rand(0...100)) + end + + raise "should not get here" +end + +# Evaluate the value of an argument with respect to the checksum +def arg_val(arg) + if arg.kind_of? Integer + return arg + end + + if arg.kind_of? String + return arg.length + end + + if arg.kind_of? Object + return arg.value + end + + raise "unknown arg type" +end + +# List of parameters/arguments for a method +class ParamList + def initialize() + self.sample_params() + self.sample_args() + end + + # Sample/generate a random set of parameters for a method + def sample_params() + # Choose how many positional arguments to use, and how many are optional + num_pargs = rand(10) + @opt_parg_idx = rand(num_pargs) + @num_opt_pargs = rand(num_pargs + 1 - @opt_parg_idx) + @num_pargs_req = num_pargs - @num_opt_pargs + @pargs = (0...num_pargs).map do |i| + { + :name => "p#{i}", + :optional => (i >= @opt_parg_idx && i < @opt_parg_idx + @num_opt_pargs) + } + end + + # Choose how many kwargs to use, and how many are optional + num_kwargs = rand(10) + @kwargs = (0...num_kwargs).map do |i| + { + :name => "k#{i}", + :optional => rand() < 0.5 + } + end + + # Choose whether to have rest parameters or not + @has_rest = @num_opt_pargs == 0 && rand() < 0.5 + @has_kwrest = rand() < 0.25 + + # Choose whether to have a named block parameter or not + @has_block_param = rand() < 0.25 + end + + # Sample/generate a random set of arguments corresponding to the parameters + def sample_args() + # Choose how many positional args to pass + num_pargs_passed = rand(@num_pargs_req..@pargs.size) + + # How many optional arguments will be filled + opt_pargs_filled = num_pargs_passed - @num_pargs_req + + @pargs.each_with_index do |parg, i| + if parg[:optional] + parg[:default] = rand(100) + end + + if !parg[:optional] || i < @opt_parg_idx + opt_pargs_filled + parg[:argval] = rand(100) + end + end + + @kwargs.each_with_index do |kwarg, i| + if kwarg[:optional] + kwarg[:default] = rand(100) + end + + if !kwarg[:optional] || rand() < 0.5 + kwarg[:argval] = rand(100) + end + end + + # Randomly pass a block or not + @block_arg = nil + if rand() < 0.5 + @block_arg = rand(100) + end + end + + # Compute the expected checksum of arguments ahead of time + def compute_checksum() + checksum = 0 + + @pargs.each_with_index do |arg, i| + value = (arg.key? :argval)? arg[:argval]:arg[:default] + checksum += (i+1) * arg_val(value) + end + + @kwargs.each_with_index do |arg, i| + value = (arg.key? :argval)? arg[:argval]:arg[:default] + checksum += (i+1) * arg_val(value) + end + + if @block_arg + if @has_block_param + checksum += arg_val(@block_arg) + end + + checksum += arg_val(@block_arg) + end + + checksum + end + + # Generate code for the method signature and method body + def gen_method_str() + m_str = "def m(" + + @pargs.each do |arg| + if !m_str.end_with?("(") + m_str += ", " + end + + m_str += arg[:name] + + # If this has a default value + if arg[:optional] + m_str += " = #{arg[:default]}" + end + end + + if @has_rest + if !m_str.end_with?("(") + m_str += ", " + end + m_str += "*rest" + end + + @kwargs.each do |arg| + if !m_str.end_with?("(") + m_str += ", " + end + + m_str += "#{arg[:name]}:" + + # If this has a default value + if arg[:optional] + m_str += " #{arg[:default]}" + end + end + + if @has_kwrest + if !m_str.end_with?("(") + m_str += ", " + end + m_str += "**kwrest" + end + + if @has_block_param + if !m_str.end_with?("(") + m_str += ", " + end + + m_str += "&block" + end + + m_str += ")\n" + + # Add some useless locals + rand(0...16).times do |i| + m_str += "local#{i} = #{i}\n" + end + + # Add some useless if statements + @pargs.each_with_index do |arg, i| + if rand() < 50 + m_str += "if #{arg[:name]} > 4; end\n" + end + end + + m_str += "checksum = 0\n" + + @pargs.each_with_index do |arg, i| + m_str += "checksum += #{i+1} * arg_val(#{arg[:name]})\n" + end + + @kwargs.each_with_index do |arg, i| + m_str += "checksum += #{i+1} * arg_val(#{arg[:name]})\n" + end + + if @has_block_param + m_str += "if block; r = block.call; checksum += arg_val(r); end\n" + end + + m_str += "if block_given?; r = yield; checksum += arg_val(r); end\n" + + if @has_rest + m_str += "raise 'rest is not array' unless rest.kind_of?(Array)\n" + m_str += "raise 'rest size not integer' unless rest.size.kind_of?(Integer)\n" + end + + if @has_kwrest + m_str += "raise 'kwrest is not a hash' unless kwrest.kind_of?(Hash)\n" + m_str += "raise 'kwrest size not integer' unless kwrest.size.kind_of?(Integer)\n" + end + + m_str += "checksum\n" + m_str += "end" + + m_str + end + + # Generate code to call into the method and pass the arguments + def gen_call_str() + c_str = "m(" + + @pargs.each_with_index do |arg, i| + if !arg.key? :argval + next + end + + if !c_str.end_with?("(") + c_str += ", " + end + + c_str += "#{arg[:argval]}" + end + + @kwargs.each_with_index do |arg, i| + if !arg.key? :argval + next + end + + if !c_str.end_with?("(") + c_str += ", " + end + + c_str += "#{arg[:name]}: #{arg[:argval]}" + end + + c_str += ")" + + # Randomly pass a block or not + if @block_arg + c_str += " { #{@block_arg} }" + end + + c_str + end +end + +iseqs_compiled_start = RubyVM::YJIT.runtime_stats[:compiled_iseq_entry] +start_time = Time.now.to_f + +num_iters.times do |i| + puts "Iteration #{i}" + + lst = ParamList.new() + m_str = lst.gen_method_str() + c_str = lst.gen_call_str() + checksum = lst.compute_checksum() + + f = Object.new + + # Define the method on f + puts "Defining" + p m_str + f.instance_eval(m_str) + #puts RubyVM::InstructionSequence.disasm(f.method(:m)) + #exit 0 + + puts "Calling" + c_str = "f.#{c_str}" + p c_str + r = eval(c_str) + puts "checksum=#{r}" + + if r != checksum + raise "return value #{r} doesn't match checksum #{checksum}" + end + + puts "" +end + +# Make sure that YJIT actually compiled the tests we ran +# Should be run with --yjit-call-threshold=1 +iseqs_compiled_end = RubyVM::YJIT.runtime_stats[:compiled_iseq_entry] +if iseqs_compiled_end - iseqs_compiled_start < num_iters + raise "YJIT did not compile enough ISEQs" +end + +puts "Code region size: #{ format_number(0, RubyVM::YJIT.runtime_stats[:code_region_size]) }" + +end_time = Time.now.to_f +itrs_per_sec = num_iters / (end_time - start_time) +itrs_per_hour = 3600 * itrs_per_sec +puts "#{'%.1f' % itrs_per_sec} iterations/s" +puts "#{format_number(0, itrs_per_hour.round)} iterations/hour" diff --git a/misc/call_fuzzer.sh b/misc/call_fuzzer.sh new file mode 100755 index 0000000000..cf4ec76fe8 --- /dev/null +++ b/misc/call_fuzzer.sh @@ -0,0 +1,13 @@ +# Stop at first error +set -e + +# TODO +# TODO: boost --num-iters to 1M+ for actual test +# TODO +export NUM_ITERS=25000 + +# Enable code GC so we don't stop compiling when we hit the code size limit +ruby --yjit-call-threshold=1 --yjit-code-gc misc/call_fuzzer.rb --num-iters=$NUM_ITERS + +# Do another pass with --verify-ctx +ruby --yjit-call-threshold=1 --yjit-code-gc --yjit-verify-ctx misc/call_fuzzer.rb --num-iters=$NUM_ITERS diff --git a/misc/expand_tabs.rb b/misc/expand_tabs.rb new file mode 100755 index 0000000000..d26568eefc --- /dev/null +++ b/misc/expand_tabs.rb @@ -0,0 +1,178 @@ +#!/usr/bin/env ruby --disable-gems +# Add the following line to your `.git/hooks/pre-commit`: +# +# $ ruby --disable-gems misc/expand_tabs.rb +# + +require 'shellwords' +require 'tmpdir' +ENV['LC_ALL'] = 'C' + +class Git + def initialize(oldrev, newrev) + @oldrev = oldrev + @newrev = newrev + end + + # ["foo/bar.c", "baz.h", ...] + def updated_paths + with_clean_env do + IO.popen(['git', 'diff', '--cached', '--name-only', @newrev], &:readlines).each(&:chomp!) + end + end + + # [0, 1, 4, ...] + def updated_lines(file) + lines = [] + revs_pattern = ("0"*40) + " " + with_clean_env { IO.popen(['git', 'blame', '-l', '--', file], &:readlines) }.each_with_index do |line, index| + if line.b.start_with?(revs_pattern) + lines << index + end + end + lines + end + + def add(file) + git('add', file) + end + + def toplevel + IO.popen(['git', 'rev-parse', '--show-toplevel'], &:read).chomp + end + + private + + def git(*args) + cmd = ['git', *args].shelljoin + unless with_clean_env { system(cmd) } + abort "Failed to run: #{cmd}" + end + end + + def with_clean_env + git_dir = ENV.delete('GIT_DIR') # this overcomes '-C' or pwd + yield + ensure + ENV['GIT_DIR'] = git_dir if git_dir + end +end + +DEFAULT_GEM_LIBS = %w[ + bundler + delegate + did_you_mean + english + erb + error_highlight + fileutils + find + forwardable + ipaddr + net-http + net-protocol + open3 + open-uri + optparse + ostruct + pp + prettyprint + prism + resolv + rubygems + securerandom + shellwords + singleton + tempfile + time + timeout + tmpdir + un + tsort + uri + weakref + yaml +] + +DEFAULT_GEM_EXTS = %w[ + date + digest + etc + fcntl + io-console + io-nonblock + io-wait + json + openssl + pathname + psych + stringio + strscan + zlib +] + +EXPANDTAB_IGNORED_FILES = [ + # default gems whose master is GitHub + %r{\Abin/(?!erb)\w+\z}, + *DEFAULT_GEM_LIBS.flat_map { |lib| + [ + %r{\Alib/#{lib}/}, + %r{\Alib/#{lib}\.gemspec\z}, + %r{\Alib/#{lib}\.rb\z}, + %r{\Atest/#{lib}/}, + ] + }, + *DEFAULT_GEM_EXTS.flat_map { |ext| + [ + %r{\Aext/#{ext}/}, + %r{\Atest/#{ext}/}, + ] + }, + + # vendoring (ccan) + %r{\Accan/}, + + # vendoring (onigmo) + %r{\Aenc/}, + %r{\Ainclude/ruby/onigmo\.h\z}, + %r{\Areg.+\.(c|h)\z}, + + # explicit or implicit `c-file-style: "linux"` + %r{\Aaddr2line\.c\z}, + %r{\Amissing/}, + %r{\Astrftime\.c\z}, + %r{\Avsnprintf\.c\z}, +] + +git = Git.new('HEAD^', 'HEAD') + +Dir.chdir(git.toplevel) do + paths = git.updated_paths + paths.select! {|f| + (f.end_with?('.c') || f.end_with?('.h') || f == 'insns.def') && EXPANDTAB_IGNORED_FILES.all? { |re| !f.match(re) } + } + files = paths.select {|n| File.file?(n)} + exit if files.empty? + + files.each do |f| + src = File.binread(f) rescue next + + expanded = false + updated_lines = git.updated_lines(f) + unless updated_lines.empty? + src.gsub!(/^.*$/).with_index do |line, lineno| + if updated_lines.include?(lineno) && line.start_with?("\t") # last-committed line with hard tabs + expanded = true + line.sub(/\A\t+/) { |tabs| ' ' * (8 * tabs.length) } + else + line + end + end + end + + if expanded + File.binwrite(f, src) + git.add(f) + end + end +end diff --git a/misc/gdb.py b/misc/gdb.py new file mode 100644 index 0000000000..6034a389bb --- /dev/null +++ b/misc/gdb.py @@ -0,0 +1,181 @@ +import argparse +import textwrap + +# usage: [-h] [-a | --all | --no-all] [-s STACK_SIZE] [uplevel] +# +# Dump a control frame +# +# positional arguments: +# uplevel CFP offset from the stack top +# +# options: +# -h, --help show this help message and exit +# -a, --all, --no-all dump all frames +# -s STACK_SIZE, --stack-size STACK_SIZE +# override stack_size (useful for JIT frames) +class CFP(gdb.Command): + FRAME_MAGICS = [ + # frame types + 'VM_FRAME_MAGIC_METHOD', + 'VM_FRAME_MAGIC_BLOCK', + 'VM_FRAME_MAGIC_CLASS', + 'VM_FRAME_MAGIC_TOP', + 'VM_FRAME_MAGIC_CFUNC', + 'VM_FRAME_MAGIC_IFUNC', + 'VM_FRAME_MAGIC_EVAL', + 'VM_FRAME_MAGIC_RESCUE', + 'VM_FRAME_MAGIC_DUMMY', + ] + FRAME_FLAGS = [ + # frame flag + 'VM_FRAME_FLAG_FINISH', + 'VM_FRAME_FLAG_BMETHOD', + 'VM_FRAME_FLAG_CFRAME', + 'VM_FRAME_FLAG_LAMBDA', + 'VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM', + 'VM_FRAME_FLAG_CFRAME_KW', + 'VM_FRAME_FLAG_PASSED', + # env flag + 'VM_ENV_FLAG_LOCAL', + 'VM_ENV_FLAG_ESCAPED', + 'VM_ENV_FLAG_WB_REQUIRED', + 'VM_ENV_FLAG_ISOLATED', + ] + + def __init__(self): + super(CFP, self).__init__('cfp', gdb.COMMAND_USER) + + self.parser = argparse.ArgumentParser(description='Dump a control frame') + self.parser.add_argument('uplevel', type=int, nargs='?', default=0, help='CFP offset from the stack top') + self.parser.add_argument('-a', '--all', action=argparse.BooleanOptionalAction, help='dump all frames') + self.parser.add_argument('-s', '--stack-size', type=int, help='override stack_size (useful for JIT frames)') + + def invoke(self, args, from_tty): + try: + args = self.parser.parse_args(args.split()) + except SystemExit: + return + cfp = f'(ruby_current_ec->cfp + ({args.uplevel}))' + end_cfp = self.get_int('ruby_current_ec->vm_stack + ruby_current_ec->vm_stack_size') + cfp_index = int((end_cfp - self.get_int(cfp) - 1) / self.get_int('sizeof(rb_control_frame_t)')) + + if args.all: + cfp_count = int((end_cfp - self.get_int('ruby_current_ec->cfp')) / self.get_int('sizeof(rb_control_frame_t)')) - 1 # exclude dummy CFP + for i in range(cfp_count): + print('-' * 80) + self.invoke(str(cfp_count - i - 1), from_tty) + return + + print('CFP (addr=0x{:x}, index={}):'.format(self.get_int(cfp), cfp_index)) + gdb.execute(f'p *({cfp})') + print() + + if self.get_int(f'{cfp}->iseq'): + local_size = self.get_int(f'{cfp}->iseq->body->local_table_size - {cfp}->iseq->body->param.size') + param_size = self.get_int(f'{cfp}->iseq->body->param.size') + + if local_size: + print(f'Params (size={param_size}):') + for i in range(-3 - local_size - param_size, -3 - local_size): + self.print_stack(cfp, i, self.rp(cfp, i)) + print() + + if param_size: + print(f'Locals (size={local_size}):') + for i in range(-3 - local_size, -3): + self.print_stack(cfp, i, self.rp(cfp, i)) + print() + + print('Env:') + self.print_env(cfp, -3, self.rp_env(cfp, -3)) + self.print_env(cfp, -2, self.specval(cfp, -2)) + self.print_env(cfp, -1, self.frame_types(cfp, -1)) + print() + + # We can't calculate BP for the first frame. + # vm_base_ptr doesn't work for C frames either. + if cfp_index > 0 and self.get_int(f'{cfp}->iseq'): + if args.stack_size is not None: + stack_size = args.stack_size + else: + stack_size = int((self.get_int(f'{cfp}->sp') - self.get_int(f'vm_base_ptr({cfp})')) / 8) + print(f'Stack (size={stack_size}):') + for i in range(0, stack_size): + self.print_stack(cfp, i, self.rp(cfp, i)) + print(self.regs(cfp, stack_size)) + + def print_env(self, cfp, bp_index, content): + ep_index = bp_index + 1 + address = self.get_int(f'((rb_control_frame_t *){cfp})->ep + {ep_index}') + value = self.get_env(cfp, bp_index) + regs = self.regs(cfp, bp_index) + if content: + content = textwrap.indent(content, ' ' * 3).lstrip() # Leave the regs column empty + content = f'{content} ' + print('{:2} 0x{:x} [{}] {}(0x{:x})'.format(regs, address, bp_index, content, value)) + + def print_stack(self, cfp, bp_index, content): + address = self.get_int(f'vm_base_ptr({cfp}) + {bp_index}') + value = self.get_value(cfp, bp_index) + regs = self.regs(cfp, bp_index) + if content: + content = textwrap.indent(content, ' ' * 3).lstrip() # Leave the regs column empty + content = f'{content} ' + print('{:2} 0x{:x} [{}] {}(0x{:x})'.format(regs, address, bp_index, content, value)) + + def regs(self, cfp, bp_index): + address = self.get_int(f'vm_base_ptr({cfp}) + {bp_index}') + regs = [] + for reg, field in { 'EP': 'ep', 'SP': 'sp' }.items(): + if address == self.get_int(f'{cfp}->{field}'): + regs.append(reg) + return ' '.join(regs) + + def rp(self, cfp, bp_index): + value = self.get_value(cfp, bp_index) + return self.get_string(f'rp {value}').rstrip() + + def rp_env(self, cfp, bp_index): + value = self.get_env(cfp, bp_index) + return self.get_string(f'rp {value}').rstrip() + + # specval: block_handler or previous EP + def specval(self, cfp, bp_index): + value = self.get_env(cfp, bp_index) + if value == 0: + return 'VM_BLOCK_HANDLER_NONE' + if value == self.get_int('rb_block_param_proxy'): + return 'rb_block_param_proxy' + return '' + + def frame_types(self, cfp, bp_index): + types = [] + value = self.get_env(cfp, bp_index) + + magic_mask = self.get_int('VM_FRAME_MAGIC_MASK') + for magic in self.FRAME_MAGICS: + magic_value = self.get_int(magic) + if value & magic_mask == magic_value: + types.append(magic) + + for flag in self.FRAME_FLAGS: + flag_value = self.get_int(flag) + if value & flag_value: + types.append(flag) + + return ' | '.join(types) + + def get_env(self, cfp, bp_index): + ep_index = bp_index + 1 + return self.get_int(f'((rb_control_frame_t *){cfp})->ep[{ep_index}]') + + def get_value(self, cfp, bp_index): + return self.get_int(f'vm_base_ptr({cfp})[{bp_index}]') + + def get_int(self, expr): + return int(self.get_string(f'printf "%ld", ({expr})')) + + def get_string(self, expr): + return gdb.execute(expr, to_string=True) + +CFP() diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py index 71bb98679f..b3d4fb509a 100755..100644 --- a/misc/lldb_cruby.py +++ b/misc/lldb_cruby.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python #coding: utf-8 # # Usage: run `command script import -r misc/lldb_cruby.py` on LLDB @@ -6,10 +5,162 @@ # Test: misc/test_lldb_cruby.rb # +from __future__ import print_function import lldb -import commands import os +import inspect +import sys import shlex +import platform +import glob +import math + +from lldb_rb.constants import * + +# BEGIN FUNCTION STYLE DECLS +# This will be refactored to use class style decls in the misc/commands dir +class BackTrace: + VM_FRAME_MAGIC_METHOD = 0x11110001 + VM_FRAME_MAGIC_BLOCK = 0x22220001 + VM_FRAME_MAGIC_CLASS = 0x33330001 + VM_FRAME_MAGIC_TOP = 0x44440001 + VM_FRAME_MAGIC_CFUNC = 0x55550001 + VM_FRAME_MAGIC_IFUNC = 0x66660001 + VM_FRAME_MAGIC_EVAL = 0x77770001 + VM_FRAME_MAGIC_RESCUE = 0x78880001 + VM_FRAME_MAGIC_DUMMY = 0x79990001 + + VM_FRAME_MAGIC_MASK = 0x7fff0001 + + VM_FRAME_MAGIC_NAME = { + VM_FRAME_MAGIC_TOP: "TOP", + VM_FRAME_MAGIC_METHOD: "METHOD", + VM_FRAME_MAGIC_CLASS: "CLASS", + VM_FRAME_MAGIC_BLOCK: "BLOCK", + VM_FRAME_MAGIC_CFUNC: "CFUNC", + VM_FRAME_MAGIC_IFUNC: "IFUNC", + VM_FRAME_MAGIC_EVAL: "EVAL", + VM_FRAME_MAGIC_RESCUE: "RESCUE", + 0: "-----" + } + + def __init__(self, debugger, command, result, internal_dict): + self.debugger = debugger + self.command = command + self.result = result + + self.target = debugger.GetSelectedTarget() + self.process = self.target.GetProcess() + self.thread = self.process.GetSelectedThread() + self.frame = self.thread.GetSelectedFrame() + self.tRString = self.target.FindFirstType("struct RString").GetPointerType() + self.tRArray = self.target.FindFirstType("struct RArray").GetPointerType() + + rb_cft_len = len("rb_control_frame_t") + method_type_length = sorted(map(len, self.VM_FRAME_MAGIC_NAME.values()), reverse=True)[0] + # cfp address, method type, function name + self.fmt = "%%-%ds %%-%ds %%s" % (rb_cft_len, method_type_length) + + def vm_frame_magic(self, cfp): + ep = cfp.GetValueForExpressionPath("->ep") + frame_type = ep.GetChildAtIndex(0).GetValueAsUnsigned() & self.VM_FRAME_MAGIC_MASK + return self.VM_FRAME_MAGIC_NAME.get(frame_type, "(none)") + + def rb_iseq_path_str(self, iseq): + tRBasic = self.target.FindFirstType("::RBasic").GetPointerType() + + pathobj = iseq.GetValueForExpressionPath("->body->location.pathobj") + pathobj = pathobj.Cast(tRBasic) + flags = pathobj.GetValueForExpressionPath("->flags").GetValueAsUnsigned() + flType = flags & RUBY_T_MASK + + if flType == RUBY_T_ARRAY: + pathobj = pathobj.Cast(self.tRArray) + + if flags & RUBY_FL_USER1: + len = ((flags & (RUBY_FL_USER3|RUBY_FL_USER4|RUBY_FL_USER5|RUBY_FL_USER6|RUBY_FL_USER7|RUBY_FL_USER8|RUBY_FL_USER9)) >> (RUBY_FL_USHIFT+3)) + ptr = pathobj.GetValueForExpressionPath("->as.ary") + else: + len = pathobj.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() + ptr = pathobj.GetValueForExpressionPath("->as.heap.ptr") + + pathobj = ptr.GetChildAtIndex(0) + + pathobj = pathobj.Cast(self.tRString) + ptr, len = string2cstr(pathobj) + err = lldb.SBError() + path = self.target.process.ReadMemory(ptr, len, err) + if err.Success(): + return path.decode("utf-8") + else: + return "unknown" + + def dump_iseq_frame(self, cfp, iseq): + m = self.vm_frame_magic(cfp) + + if iseq.GetValueAsUnsigned(): + iseq_label = iseq.GetValueForExpressionPath("->body->location.label") + path = self.rb_iseq_path_str(iseq) + ptr, len = string2cstr(iseq_label.Cast(self.tRString)) + + err = lldb.SBError() + iseq_name = self.target.process.ReadMemory(ptr, len, err) + if err.Success(): + iseq_name = iseq_name.decode("utf-8") + else: + iseq_name = "error!!" + + else: + print("No iseq", file=self.result) + + print(self.fmt % (("%0#12x" % cfp.GetAddress().GetLoadAddress(self.target)), m, "%s %s" % (path, iseq_name)), file=self.result) + + def dump_cfunc_frame(self, cfp): + print(self.fmt % ("%0#12x" % (cfp.GetAddress().GetLoadAddress(self.target)), "CFUNC", ""), file=self.result) + + def print_bt(self, ec): + tRbExecutionContext_t = self.target.FindFirstType("rb_execution_context_t") + ec = ec.Cast(tRbExecutionContext_t.GetPointerType()) + vm_stack = ec.GetValueForExpressionPath("->vm_stack") + vm_stack_size = ec.GetValueForExpressionPath("->vm_stack_size") + + last_cfp_frame = ec.GetValueForExpressionPath("->cfp") + cfp_type_p = last_cfp_frame.GetType() + + stack_top = vm_stack.GetValueAsUnsigned() + ( + vm_stack_size.GetValueAsUnsigned() * vm_stack.GetType().GetByteSize()) + + cfp_frame_size = cfp_type_p.GetPointeeType().GetByteSize() + + start_cfp = stack_top + # Skip dummy frames + start_cfp -= cfp_frame_size + start_cfp -= cfp_frame_size + + last_cfp = last_cfp_frame.GetValueAsUnsigned() + + size = ((start_cfp - last_cfp) / cfp_frame_size) + 1 + + print(self.fmt % ("rb_control_frame_t", "TYPE", ""), file=self.result) + + curr_addr = start_cfp + + while curr_addr >= last_cfp: + cfp = self.target.CreateValueFromAddress("cfp", lldb.SBAddress(curr_addr, self.target), cfp_type_p.GetPointeeType()) + ep = cfp.GetValueForExpressionPath("->ep") + iseq = cfp.GetValueForExpressionPath("->iseq") + + frame_type = ep.GetChildAtIndex(0).GetValueAsUnsigned() & self.VM_FRAME_MAGIC_MASK + + if iseq.GetValueAsUnsigned(): + pc = cfp.GetValueForExpressionPath("->pc") + if pc.GetValueAsUnsigned(): + self.dump_iseq_frame(cfp, iseq) + else: + if frame_type == self.VM_FRAME_MAGIC_CFUNC: + self.dump_cfunc_frame(cfp) + + curr_addr -= cfp_frame_size def lldb_init(debugger): target = debugger.GetSelectedTarget() @@ -18,10 +169,18 @@ def lldb_init(debugger): value_types = [] g = globals() + + imemo_types = target.FindFirstType('enum imemo_type') + enum_members = imemo_types.GetEnumMembers() + + for i in range(enum_members.GetSize()): + member = enum_members.GetTypeEnumMemberAtIndex(i) + g[member.GetName()] = member.GetValueAsUnsigned() + for enum in target.FindFirstGlobalVariable('ruby_dummy_gdb_enums'): enum = enum.GetType() members = enum.GetEnumMembers() - for i in xrange(0, members.GetSize()): + for i in range(0, members.GetSize()): member = members.GetTypeEnumMemberAtIndex(i) name = member.GetName() value = member.GetValueAsUnsigned() @@ -33,21 +192,21 @@ def lldb_init(debugger): def string2cstr(rstring): """Returns the pointer to the C-string in the given String object""" + if rstring.TypeIsPointerType(): + rstring = rstring.Dereference() flags = rstring.GetValueForExpressionPath(".basic->flags").unsigned if flags & RUBY_T_MASK != RUBY_T_STRING: raise TypeError("not a string") + clen = int(rstring.GetValueForExpressionPath(".len").value, 0) if flags & RUBY_FL_USER1: cptr = int(rstring.GetValueForExpressionPath(".as.heap.ptr").value, 0) - clen = int(rstring.GetValueForExpressionPath(".as.heap.len").value, 0) else: - cptr = int(rstring.GetValueForExpressionPath(".as.ary").value, 0) - clen = (flags & RSTRING_EMBED_LEN_MASK) >> RSTRING_EMBED_LEN_SHIFT + cptr = int(rstring.GetValueForExpressionPath(".as.embed.ary").location, 0) return cptr, clen -def output_string(ctx, rstring): +def output_string(debugger, result, rstring): cptr, clen = string2cstr(rstring) - expr = 'printf("%%.*s", (size_t)%d, (const char*)%d)' % (clen, cptr) - ctx.frame.EvaluateExpression(expr) + append_expression(debugger, "*(const char (*)[%d])%0#x" % (clen, cptr), result) def fixnum_p(x): return x & RUBY_FIXNUM_FLAG != 0 @@ -66,6 +225,9 @@ def append_command_output(debugger, command, result): result.write(output1) result.write(output2) +def append_expression(debugger, expression, result): + append_command_output(debugger, "expression " + expression, result) + def lldb_rp(debugger, command, result, internal_dict): if not ('RUBY_Qfalse' in globals()): lldb_init(debugger) @@ -80,105 +242,129 @@ def lldb_rp(debugger, command, result, internal_dict): val = target.EvaluateExpression(command) error = val.GetError() if error.Fail(): - print >> result, error + print(error, file=result) return lldb_inspect(debugger, target, result, val) def lldb_inspect(debugger, target, result, val): num = val.GetValueAsSigned() if num == RUBY_Qfalse: - print >> result, 'false' + print('false', file=result) elif num == RUBY_Qtrue: - print >> result, 'true' + print('true', file=result) elif num == RUBY_Qnil: - print >> result, 'nil' + print('nil', file=result) elif num == RUBY_Qundef: - print >> result, 'undef' + print('undef', file=result) elif fixnum_p(num): - print >> result, num >> 1 + print(num >> 1, file=result) elif flonum_p(num): - append_command_output(debugger, "print rb_float_value(%0#x)" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "rb_float_value(%0#x)" % val.GetValueAsUnsigned(), result) elif static_sym_p(num): if num < 128: - print >> result, "T_SYMBOL: %c" % num + print("T_SYMBOL: %c" % num, file=result) else: - print >> result, "T_SYMBOL: (%x)" % num + print("T_SYMBOL: (%x)" % num, file=result) + append_expression(debugger, "rb_id2name(%0#x)" % (num >> 8), result) elif num & RUBY_IMMEDIATE_MASK: - print >> result, 'immediate(%x)' % num + print('immediate(%x)' % num, file=result) else: - tRBasic = target.FindFirstType("struct RBasic").GetPointerType() + tRBasic = target.FindFirstType("::RBasic").GetPointerType() + val = val.Cast(tRBasic) flags = val.GetValueForExpressionPath("->flags").GetValueAsUnsigned() + flaginfo = "" + + page = get_page(lldb, target, val) + page_type = target.FindFirstType("struct heap_page").GetPointerType() + page.Cast(page_type) + + dump_bits(target, result, page, val.GetValueAsUnsigned()) + if (flags & RUBY_FL_PROMOTED) == RUBY_FL_PROMOTED: - print >> result, "[PROMOTED] " + flaginfo += "[PROMOTED] " if (flags & RUBY_FL_FREEZE) == RUBY_FL_FREEZE: - print >> result, "[FROZEN] " + flaginfo += "[FROZEN] " flType = flags & RUBY_T_MASK if flType == RUBY_T_NONE: - print >> result, 'T_NONE: %s' % val.Dereference() + print('T_NONE: %s%s' % (flaginfo, val.Dereference()), file=result) elif flType == RUBY_T_NIL: - print >> result, 'T_NIL: %s' % val.Dereference() + print('T_NIL: %s%s' % (flaginfo, val.Dereference()), file=result) elif flType == RUBY_T_OBJECT: - tRObject = target.FindFirstType("struct RObject").GetPointerType() - val = val.Cast(tRObject) - print >> result, 'T_OBJECT: %s' % val.Dereference() + result.write('T_OBJECT: %s' % flaginfo) + append_expression(debugger, "*(struct RObject*)%0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_CLASS or flType == RUBY_T_MODULE or flType == RUBY_T_ICLASS: - tRClass = target.FindFirstType("struct RClass").GetPointerType() - val = val.Cast(tRClass) - print >> result, 'T_%s: %s' % ('CLASS' if flType == RUBY_T_CLASS else 'MODULE' if flType == RUBY_T_MODULE else 'ICLASS', val.Dereference()) + result.write('T_%s: %s' % ('CLASS' if flType == RUBY_T_CLASS else 'MODULE' if flType == RUBY_T_MODULE else 'ICLASS', flaginfo)) + append_expression(debugger, "*(struct RClass*)%0#x" % val.GetValueAsUnsigned(), result) + tRClass = target.FindFirstType("struct RClass") + if not val.Cast(tRClass).GetChildMemberWithName("ptr").IsValid(): + append_expression(debugger, "*(struct rb_classext_struct*)%0#x" % (val.GetValueAsUnsigned() + tRClass.GetByteSize()), result) elif flType == RUBY_T_STRING: + result.write('T_STRING: %s' % flaginfo) + encidx = ((flags & RUBY_ENCODING_MASK)>>RUBY_ENCODING_SHIFT) + encname = target.FindFirstType("enum ruby_preserved_encindex").GetEnumMembers().GetTypeEnumMemberAtIndex(encidx).GetName() + if encname is not None: + result.write('[%s] ' % encname[14:]) + else: + result.write('[enc=%d] ' % encidx) tRString = target.FindFirstType("struct RString").GetPointerType() - val = val.Cast(tRString) - if flags & RSTRING_NOEMBED: - print >> result, val.GetValueForExpressionPath("->as.heap") + ptr, len = string2cstr(val.Cast(tRString)) + if len == 0: + result.write("(empty)\n") else: - print >> result, val.GetValueForExpressionPath("->as.ary") + append_expression(debugger, "*(const char (*)[%d])%0#x" % (len, ptr), result) elif flType == RUBY_T_SYMBOL: + result.write('T_SYMBOL: %s' % flaginfo) tRSymbol = target.FindFirstType("struct RSymbol").GetPointerType() - print >> result, val.Cast(tRSymbol).Dereference() + val = val.Cast(tRSymbol) + append_expression(debugger, '(ID)%0#x ' % val.GetValueForExpressionPath("->id").GetValueAsUnsigned(), result) + tRString = target.FindFirstType("struct RString").GetPointerType() + output_string(debugger, result, val.GetValueForExpressionPath("->fstr").Cast(tRString)) elif flType == RUBY_T_ARRAY: tRArray = target.FindFirstType("struct RArray").GetPointerType() val = val.Cast(tRArray) if flags & RUBY_FL_USER1: - len = ((flags & (RUBY_FL_USER3|RUBY_FL_USER4)) >> (RUBY_FL_USHIFT+3)) + len = ((flags & (RUBY_FL_USER3|RUBY_FL_USER4|RUBY_FL_USER5|RUBY_FL_USER6|RUBY_FL_USER7|RUBY_FL_USER8|RUBY_FL_USER9)) >> (RUBY_FL_USHIFT+3)) ptr = val.GetValueForExpressionPath("->as.ary") else: len = val.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() ptr = val.GetValueForExpressionPath("->as.heap.ptr") - #print >> result, val.GetValueForExpressionPath("->as.heap") - result.write("T_ARRAY: len=%d" % len) + result.write("T_ARRAY: %slen=%d" % (flaginfo, len)) if flags & RUBY_FL_USER1: result.write(" (embed)") elif flags & RUBY_FL_USER2: shared = val.GetValueForExpressionPath("->as.heap.aux.shared").GetValueAsUnsigned() - result.write(" (shared) shared=%016x") + result.write(" (shared) shared=%016x" % shared) else: capa = val.GetValueForExpressionPath("->as.heap.aux.capa").GetValueAsSigned() result.write(" (ownership) capa=%d" % capa) if len == 0: - result.write(" {(empty)}") + result.write(" {(empty)}\n") else: result.write("\n") - append_command_output(debugger, "expression -Z %d -fx -- (const VALUE*)%0#x" % (len, ptr.GetValueAsUnsigned()), result) + if ptr.GetValueAsSigned() == 0: + append_expression(debugger, "-fx -- ((struct RArray*)%0#x)->as.ary" % val.GetValueAsUnsigned(), result) + else: + append_expression(debugger, "-Z %d -fx -- (const VALUE*)%0#x" % (len, ptr.GetValueAsUnsigned()), result) elif flType == RUBY_T_HASH: - append_command_output(debugger, "p *(struct RHash *) %0#x" % val.GetValueAsUnsigned(), result) + result.write("T_HASH: %s" % flaginfo) + append_expression(debugger, "*(struct RHash *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_BIGNUM: tRBignum = target.FindFirstType("struct RBignum").GetPointerType() val = val.Cast(tRBignum) + sign = '+' if (flags & RUBY_FL_USER1) != 0 else '-' if flags & RUBY_FL_USER2: len = ((flags & (RUBY_FL_USER3|RUBY_FL_USER4|RUBY_FL_USER5)) >> (RUBY_FL_USHIFT+3)) - print >> result, "T_BIGNUM: len=%d (embed)" % len - append_command_output(debugger, "print ((struct RBignum *) %0#x)->as.ary" % val.GetValueAsUnsigned(), result) + print("T_BIGNUM: sign=%s len=%d (embed)" % (sign, len), file=result) + append_expression(debugger, "((struct RBignum *) %0#x)->as.ary" % val.GetValueAsUnsigned(), result) else: len = val.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() - print >> result, "T_BIGNUM: len=%d" % len - print >> result, val.Dereference() - append_command_output(debugger, "expression -Z %x -fx -- (const BDIGIT*)((struct RBignum*)%d)->as.heap.digits" % (len, val.GetValueAsUnsigned()), result) - # append_command_output(debugger, "x ((struct RBignum *) %0#x)->as.heap.digits / %d" % (val.GetValueAsUnsigned(), len), result) + print("T_BIGNUM: sign=%s len=%d" % (sign, len), file=result) + print(val.Dereference(), file=result) + append_expression(debugger, "-Z %x -fx -- (const BDIGIT*)((struct RBignum*)%d)->as.heap.digits" % (len, val.GetValueAsUnsigned()), result) + # append_expression(debugger, "((struct RBignum *) %0#x)->as.heap.digits / %d" % (val.GetValueAsUnsigned(), len), result) elif flType == RUBY_T_FLOAT: - tRFloat = target.FindFirstType("struct RFloat").GetPointerType() - val = val.Cast(tRFloat) - append_command_output(debugger, "p *(double *)%0#x" % val.GetValueForExpressionPath("->float_value").GetAddress(), result) + append_expression(debugger, "((struct RFloat *)%d)->float_value" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_RATIONAL: tRRational = target.FindFirstType("struct RRational").GetPointerType() val = val.Cast(tRRational) @@ -198,27 +384,55 @@ def lldb_inspect(debugger, target, result, val): result.Clear() if not imag.startswith("-"): imag = "+" + imag - print >> result, "(Complex) " + real + imag + "i" + print("(Complex) " + real + imag + "i", file=result) elif flType == RUBY_T_REGEXP: tRRegex = target.FindFirstType("struct RRegexp").GetPointerType() val = val.Cast(tRRegex) - print >> result, "(Regex)" - print >> result, "->src {", + print("(Regex) ->src {", file=result) lldb_inspect(debugger, target, result, val.GetValueForExpressionPath("->src")) - print >> result, "}" + print("}", file=result) elif flType == RUBY_T_DATA: tRTypedData = target.FindFirstType("struct RTypedData").GetPointerType() val = val.Cast(tRTypedData) flag = val.GetValueForExpressionPath("->typed_flag") if flag.GetValueAsUnsigned() == 1: - print >> result, "T_DATA: %s" % val.GetValueForExpressionPath("->type->wrap_struct_name") - append_command_output(debugger, "p *(struct RTypedData *) %0#x" % val.GetValueAsUnsigned(), result) + print("T_DATA: %s" % val.GetValueForExpressionPath("->type->wrap_struct_name"), file=result) + append_expression(debugger, "*(struct RTypedData *) %0#x" % val.GetValueAsUnsigned(), result) else: - print >> result, "T_DATA:" - append_command_output(debugger, "p *(struct RData *) %0#x" % val.GetValueAsUnsigned(), result) + print("T_DATA:", file=result) + append_expression(debugger, "*(struct RData *) %0#x" % val.GetValueAsUnsigned(), result) + elif flType == RUBY_T_NODE: + tRTypedData = target.FindFirstType("struct RNode").GetPointerType() + nd_type = (flags & RUBY_NODE_TYPEMASK) >> RUBY_NODE_TYPESHIFT + append_expression(debugger, "(node_type) %d" % nd_type, result) + val = val.Cast(tRTypedData) + append_expression(debugger, "*(struct RNode *) %0#x" % val.GetValueAsUnsigned(), result) + elif flType == RUBY_T_MOVED: + tRTypedData = target.FindFirstType("struct RMoved").GetPointerType() + val = val.Cast(tRTypedData) + append_expression(debugger, "*(struct RMoved *) %0#x" % val.GetValueAsUnsigned(), result) + elif flType == RUBY_T_MATCH: + tRTypedData = target.FindFirstType("struct RMatch").GetPointerType() + val = val.Cast(tRTypedData) + append_expression(debugger, "*(struct RMatch *) %0#x" % val.GetValueAsUnsigned(), result) + elif flType == RUBY_T_IMEMO: + # I'm not sure how to get IMEMO_MASK out of lldb. It's not in globals() + imemo_type = (flags >> RUBY_FL_USHIFT) & 0x0F # IMEMO_MASK + + print("T_IMEMO: ", file=result) + append_expression(debugger, "(enum imemo_type) %d" % imemo_type, result) + append_expression(debugger, "*(struct MEMO *) %0#x" % val.GetValueAsUnsigned(), result) + elif flType == RUBY_T_STRUCT: + tRTypedData = target.FindFirstType("struct RStruct").GetPointerType() + val = val.Cast(tRTypedData) + append_expression(debugger, "*(struct RStruct *) %0#x" % val.GetValueAsUnsigned(), result) + elif flType == RUBY_T_ZOMBIE: + tRZombie = target.FindFirstType("struct RZombie").GetPointerType() + val = val.Cast(tRZombie) + append_expression(debugger, "*(struct RZombie *) %0#x" % val.GetValueAsUnsigned(), result) else: - print >> result, "Not-handled type %0#x" % flType - print >> result, val + print("Not-handled type %0#x" % flType, file=result) + print(val, file=result) def count_objects(debugger, command, ctx, result, internal_dict): objspace = ctx.frame.EvaluateExpression("ruby_current_vm->objspace") @@ -230,7 +444,7 @@ def count_objects(debugger, command, ctx, result, internal_dict): counts[t] = 0 for i in range(0, num_pages): - print "\rcounting... %d/%d" % (i, num_pages), + print("\rcounting... %d/%d" % (i, num_pages), end="") page = objspace.GetValueForExpressionPath('.heap_pages.sorted[%d]' % i) p = page.GetChildMemberWithName('start') num_slots = page.GetChildMemberWithName('total_slots').unsigned @@ -241,13 +455,43 @@ def count_objects(debugger, command, ctx, result, internal_dict): counts[obj_type] += 1 total += num_slots - print "\rTOTAL: %d, FREE: %d" % (total, counts[0x00]) + print("\rTOTAL: %d, FREE: %d" % (total, counts[0x00])) for sym in value_types: - print "%s: %d" % (sym, counts[globals()[sym]]) + print("%s: %d" % (sym, counts[globals()[sym]])) def stack_dump_raw(debugger, command, ctx, result, internal_dict): ctx.frame.EvaluateExpression("rb_vmdebug_stack_dump_raw_current()") +def check_bits(page, bitmap_name, bitmap_index, bitmap_bit, v): + bits = page.GetChildMemberWithName(bitmap_name) + plane = bits.GetChildAtIndex(bitmap_index).GetValueAsUnsigned() + if (plane & bitmap_bit) != 0: + return v + else: + return ' ' + +def heap_page_body(debugger, command, ctx, result, internal_dict): + target = debugger.GetSelectedTarget() + process = target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + + val = frame.EvaluateExpression(command) + page = get_page_body(lldb, target, val) + print("Page body address: ", page.GetAddress(), file=result) + print(page, file=result) + +def get_page_body(lldb, target, val): + tHeapPageBody = target.FindFirstType("struct heap_page_body") + addr = val.GetValueAsUnsigned() + page_addr = addr & ~(HEAP_PAGE_ALIGN_MASK) + address = lldb.SBAddress(page_addr, target) + return target.CreateValueFromAddress("page", address, tHeapPageBody) + +def get_page(lldb, target, val): + body = get_page_body(lldb, target, val) + return body.GetValueForExpressionPath("->header.page") + def dump_node(debugger, command, ctx, result, internal_dict): args = shlex.split(command) if not args: @@ -255,12 +499,249 @@ def dump_node(debugger, command, ctx, result, internal_dict): node = args[0] dump = ctx.frame.EvaluateExpression("(struct RString*)rb_parser_dump_tree((NODE*)(%s), 0)" % node) - output_string(ctx, dump) + output_string(ctx, result, dump) + +def rb_backtrace(debugger, command, result, internal_dict): + if not ('RUBY_Qfalse' in globals()): + lldb_init(debugger) + bt = BackTrace(debugger, command, result, internal_dict) + frame = bt.frame + + if command: + if frame.IsValid(): + val = frame.EvaluateExpression(command) + else: + val = target.EvaluateExpression(command) + + error = val.GetError() + if error.Fail(): + print >> result, error + return + else: + print("Need an EC for now") + + bt.print_bt(val) + +def dump_bits(target, result, page, object_address, end = "\n"): + slot_size = page.GetChildMemberWithName("heap").GetChildMemberWithName("slot_size").unsigned + byte_size = 40 ** math.floor(math.log(slot_size, 40)) + tUintPtr = target.FindFirstType("uintptr_t") # bits_t + + num_in_page = (object_address & HEAP_PAGE_ALIGN_MASK) // byte_size; + bits_bitlength = tUintPtr.GetByteSize() * 8 + bitmap_index = num_in_page // bits_bitlength + bitmap_offset = num_in_page & (bits_bitlength - 1) + bitmap_bit = 1 << bitmap_offset + + print("bits: [%s%s%s%s%s]" % ( + check_bits(page, "uncollectible_bits", bitmap_index, bitmap_bit, "L"), + check_bits(page, "mark_bits", bitmap_index, bitmap_bit, "M"), + check_bits(page, "pinned_bits", bitmap_index, bitmap_bit, "P"), + check_bits(page, "marking_bits", bitmap_index, bitmap_bit, "R"), + check_bits(page, "wb_unprotected_bits", bitmap_index, bitmap_bit, "U"), + ), end=end, file=result) + +class HeapPageIter: + def __init__(self, page, target): + self.page = page + self.target = target + self.start = page.GetChildMemberWithName('start').GetValueAsUnsigned(); + self.num_slots = page.GetChildMemberWithName('total_slots').unsigned + self.slot_size = page.GetChildMemberWithName('heap').GetChildMemberWithName('slot_size').unsigned + self.counter = 0 + self.tRBasic = target.FindFirstType("::RBasic") + + def is_valid(self): + heap_page_header_size = self.target.FindFirstType("struct heap_page_header").GetByteSize() + rvalue_size = self.slot_size + heap_page_obj_limit = int((HEAP_PAGE_SIZE - heap_page_header_size) / self.slot_size) + + return (heap_page_obj_limit - 1) <= self.num_slots <= heap_page_obj_limit + + def __iter__(self): + return self + + def __next__(self): + if self.counter < self.num_slots: + obj_addr_i = self.start + (self.counter * self.slot_size) + obj_addr = lldb.SBAddress(obj_addr_i, self.target) + slot_info = (self.counter, obj_addr_i, self.target.CreateValueFromAddress("object", obj_addr, self.tRBasic)) + self.counter += 1 + + return slot_info + else: + raise StopIteration + + +def dump_page_internal(page, target, process, thread, frame, result, debugger, highlight=None): + if not ('RUBY_Qfalse' in globals()): + lldb_init(debugger) + + ruby_type_map = ruby_types(debugger) + + freelist = [] + fl_start = page.GetChildMemberWithName('freelist').GetValueAsUnsigned() + free_slot = target.FindFirstType("struct free_slot") + + while fl_start > 0: + freelist.append(fl_start) + obj_addr = lldb.SBAddress(fl_start, target) + obj = target.CreateValueFromAddress("object", obj_addr, free_slot) + fl_start = obj.GetChildMemberWithName("next").GetValueAsUnsigned() + + page_iter = HeapPageIter(page, target) + if page_iter.is_valid(): + for (page_index, obj_addr, obj) in page_iter: + dump_bits(target, result, page, obj_addr, end= " ") + flags = obj.GetChildMemberWithName('flags').GetValueAsUnsigned() + flType = flags & RUBY_T_MASK + + flidx = ' ' + if flType == RUBY_T_NONE: + try: + flidx = "%3d" % freelist.index(obj_addr) + except ValueError: + flidx = ' -1' + + if flType == RUBY_T_NONE: + klass = obj.GetChildMemberWithName('klass').GetValueAsUnsigned() + result_str = "%s idx: [%3d] freelist_idx: {%s} Addr: %0#x (flags: %0#x, next: %0#x)" % (rb_type(flags, ruby_type_map), page_index, flidx, obj_addr, flags, klass) + else: + result_str = "%s idx: [%3d] freelist_idx: {%s} Addr: %0#x (flags: %0#x)" % (rb_type(flags, ruby_type_map), page_index, flidx, obj_addr, flags) + + if highlight == obj_addr: + result_str = ' '.join([result_str, "<<<<<"]) + + print(result_str, file=result) + else: + print("%s is not a valid heap page" % page, file=result) + + + +def dump_page(debugger, command, result, internal_dict): + target = debugger.GetSelectedTarget() + process = target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + + tHeapPageP = target.FindFirstType("struct heap_page").GetPointerType() + page = frame.EvaluateExpression(command) + page = page.Cast(tHeapPageP) + + dump_page_internal(page, target, process, thread, frame, result, debugger) + + +def dump_page_rvalue(debugger, command, result, internal_dict): + target = debugger.GetSelectedTarget() + process = target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + + val = frame.EvaluateExpression(command) + page = get_page(lldb, target, val) + page_type = target.FindFirstType("struct heap_page").GetPointerType() + page.Cast(page_type) + + dump_page_internal(page, target, process, thread, frame, result, debugger, highlight=val.GetValueAsUnsigned()) + + + +def rb_type(flags, ruby_types): + flType = flags & RUBY_T_MASK + return "%-10s" % (ruby_types.get(flType, ("%0#x" % flType))) + +def ruby_types(debugger): + target = debugger.GetSelectedTarget() + + types = {} + for enum in target.FindFirstGlobalVariable('ruby_dummy_gdb_enums'): + enum = enum.GetType() + members = enum.GetEnumMembers() + for i in range(0, members.GetSize()): + member = members.GetTypeEnumMemberAtIndex(i) + name = member.GetName() + value = member.GetValueAsUnsigned() + + if name.startswith('RUBY_T_'): + types[value] = name.replace('RUBY_', '') + + return types + +def rb_ary_entry(target, ary, idx, result): + tRArray = target.FindFirstType("struct RArray").GetPointerType() + ary = ary.Cast(tRArray) + flags = ary.GetValueForExpressionPath("->flags").GetValueAsUnsigned() + + if flags & RUBY_FL_USER1: + ptr = ary.GetValueForExpressionPath("->as.ary") + else: + ptr = ary.GetValueForExpressionPath("->as.heap.ptr") + + ptr_addr = ptr.GetValueAsUnsigned() + (idx * ptr.GetType().GetByteSize()) + return target.CreateValueFromAddress("ary_entry[%d]" % idx, lldb.SBAddress(ptr_addr, target), ptr.GetType().GetPointeeType()) + +def rb_id_to_serial(id_val): + if id_val > tLAST_OP_ID: + return id_val >> RUBY_ID_SCOPE_SHIFT + else: + return id_val + +def rb_id2str(debugger, command, result, internal_dict): + if not ('RUBY_Qfalse' in globals()): + lldb_init(debugger) + + target = debugger.GetSelectedTarget() + process = target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + global_symbols = target.FindFirstGlobalVariable("ruby_global_symbols") + + id_val = frame.EvaluateExpression(command).GetValueAsUnsigned() + num = rb_id_to_serial(id_val) + + last_id = global_symbols.GetChildMemberWithName("last_id").GetValueAsUnsigned() + ID_ENTRY_SIZE = 2 + ID_ENTRY_UNIT = int(target.FindFirstGlobalVariable("ID_ENTRY_UNIT").GetValue()) + + ids = global_symbols.GetChildMemberWithName("ids") + + if (num <= last_id): + idx = num // ID_ENTRY_UNIT + ary = rb_ary_entry(target, ids, idx, result) + pos = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE + id_str = rb_ary_entry(target, ary, pos, result) + lldb_inspect(debugger, target, result, id_str) +# END FUNCTION STYLE DECLS + + +load_dir, _ = os.path.split(os.path.realpath(__file__)) + +for fname in glob.glob(f"{load_dir}/lldb_rb/commands/*_command.py"): + _, basename = os.path.split(fname) + mname, _ = os.path.splitext(basename) + + exec(f"import lldb_rb.commands.{mname}") def __lldb_init_module(debugger, internal_dict): - debugger.HandleCommand("command script add -f lldb_cruby.lldb_rp rp") + # Register all classes that subclass RbBaseCommand + + for memname, mem in inspect.getmembers(sys.modules["lldb_rb.rb_base_command"]): + if memname == "RbBaseCommand": + for sclass in mem.__subclasses__(): + sclass.register_lldb_command(debugger, f"{__name__}.{sclass.__module__}") + + + ## FUNCTION INITS - These should be removed when converted to class commands + debugger.HandleCommand("command script add -f lldb_cruby.lldb_rp old_rp") debugger.HandleCommand("command script add -f lldb_cruby.count_objects rb_count_objects") debugger.HandleCommand("command script add -f lldb_cruby.stack_dump_raw SDR") debugger.HandleCommand("command script add -f lldb_cruby.dump_node dump_node") - lldb_init(debugger) - print "lldb scripts for ruby has been installed." + debugger.HandleCommand("command script add -f lldb_cruby.heap_page_body heap_page_body") + debugger.HandleCommand("command script add -f lldb_cruby.rb_backtrace rbbt") + debugger.HandleCommand("command script add -f lldb_cruby.dump_page dump_page") + debugger.HandleCommand("command script add -f lldb_cruby.dump_page_rvalue dump_page_rvalue") + debugger.HandleCommand("command script add -f lldb_cruby.rb_id2str old_rb_id2str") + + lldb_rb.rb_base_command.RbBaseCommand.lldb_init(debugger) + + print("lldb scripts for ruby has been installed.") diff --git a/misc/lldb_disasm.py b/misc/lldb_disasm.py new file mode 100644 index 0000000000..ab759f009a --- /dev/null +++ b/misc/lldb_disasm.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python +#coding: utf-8 +# +# Usage: run `command script import -r misc/lldb_disasm.py` on LLDB +# +# +# (lldb) p iseq +# (rb_iseq_t *) $147 = 0x0000000101068400 +# (lldb) rbdisasm iseq +# 0000 putspecialobject( 3 ) +# 0002 putnil +# 0003 defineclass( ID: 0x560b, (rb_iseq_t *)0x1010681d0, 2 ) +# 0007 pop +# 0008 putspecialobject( 3 ) +# 0010 putnil +# 0011 defineclass( ID: 0x56eb, (rb_iseq_t *)0x101063b58, 2 ) +# 0015 leave + + +import lldb +import os +import shlex + +class IseqDisassembler: + TS_VARIABLE = b'.'[0] + TS_CALLDATA = b'C'[0] + TS_CDHASH = b'H'[0] + TS_IC = b'K'[0] + TS_IVC = b'A'[0] + TS_ICVARC = b'J'[0] + TS_ID = b'I'[0] + TS_ISE = b'T'[0] + TS_ISEQ = b'S'[0] + TS_OFFSET = b'O'[0] + TS_VALUE = b'V'[0] + TS_LINDEX = b'L'[0] + TS_FUNCPTR = b'F'[0] + TS_NUM = b'N'[0] + TS_BUILTIN = b'R'[0] + + ISEQ_OPT_DISPATCH = { + TS_BUILTIN: "(rb_builtin_function *)%0#x", + TS_NUM: "%d", + TS_FUNCPTR: "(rb_insn_func_t) %0#x", + TS_LINDEX: "%d", + TS_VALUE: "(VALUE)%0#x", + TS_OFFSET: "%d", + TS_ISEQ: "(rb_iseq_t *)%0#x", + TS_ISE: "(iseq_inline_storage_entry *)%0#x", + TS_ID: "ID: %0#x", + TS_IVC: "(struct iseq_inline_iv_cache_entry *)%0#x", + TS_ICVARC: "(struct iseq_inline_cvar_cache_entry *)%0#x", + TS_IC: "(struct iseq_inline_cache_entry *)%0#x", + TS_CDHASH: "CDHASH (VALUE)%0#x", + TS_CALLDATA: "(struct rb_call_data *)%0#x", + TS_VARIABLE: "VARIABLE %0#x", + } + + def __init__(self, debugger, command, result, internal_dict): + self.debugger = debugger + self.command = command + self.result = result + self.internal_dict = internal_dict + + self.target = debugger.GetSelectedTarget() + self.insns_address_table = self.__get_insns_address_table() + self.process = self.target.GetProcess() + self.thread = self.process.GetSelectedThread() + self.frame = self.thread.GetSelectedFrame() + self.addr2insn = self.build_addr2insn(self.target) + self.tChar = self.target.FindFirstType("char") + + def disasm(self, val): + tRbISeq = self.target.FindFirstType("struct rb_iseq_struct").GetPointerType() + val = val.Cast(tRbISeq) + iseq_size = val.GetValueForExpressionPath("->body->iseq_size").GetValueAsUnsigned() + iseqs = val.GetValueForExpressionPath("->body->iseq_encoded") + idx = 0 + print("PC IDX insn_name(operands) ", file=self.result) + while idx < iseq_size: + m = self.iseq_extract_values(self.debugger, self.target, self.process, self.result, iseqs, idx) + if m < 1: + print("Error decoding", file=self.result) + return + else: + idx += m + + def build_addr2insn(self, target): + tIntPtr = target.FindFirstType("intptr_t") + size = target.EvaluateExpression('ruby_vminsn_type::VM_INSTRUCTION_SIZE').unsigned + sizeOfIntPtr = tIntPtr.GetByteSize() + addr_of_table = self.insns_address_table.GetStartAddress().GetLoadAddress(target) + + my_dict = {} + + for insn in range(size): + addr_in_table = addr_of_table + (insn * sizeOfIntPtr) + addr = lldb.SBAddress(addr_in_table, target) + machine_insn = target.CreateValueFromAddress("insn", addr, tIntPtr).GetValueAsUnsigned() + my_dict[machine_insn] = insn + + return my_dict + + def rb_vm_insn_addr2insn2(self, target, result, wanted_addr): + return self.addr2insn.get(wanted_addr) + + def iseq_extract_values(self, debugger, target, process, result, iseqs, n): + tValueP = target.FindFirstType("VALUE") + sizeofValueP = tValueP.GetByteSize() + pc = iseqs.unsigned + (n * sizeofValueP) + insn = target.CreateValueFromAddress("i", lldb.SBAddress(pc, target), tValueP) + addr = insn.GetValueAsUnsigned() + orig_insn = self.rb_vm_insn_addr2insn2(target, result, addr) + + name = self.insn_name(target, process, result, orig_insn) + length = self.insn_len(target, orig_insn) + op_str = self.insn_op_types(target, process, result, orig_insn) + op_types = bytes(op_str, 'utf-8') + + if length != (len(op_types) + 1): + print("error decoding iseqs", file=result) + return -1 + + print("%0#14x %04d %s" % (pc, n, name), file=result, end="") + + if length == 1: + print("", file=result) + return length + + print("(", end="", file=result) + for idx, op_type in enumerate(op_types): + if idx == 0: + print(" ", end="", file=result) + else: + print(", ", end="", file=result) + + opAddr = lldb.SBAddress(iseqs.unsigned + ((n + idx + 1) * sizeofValueP), target) + opValue = target.CreateValueFromAddress("op", opAddr, tValueP) + op = opValue.GetValueAsUnsigned() + print(self.ISEQ_OPT_DISPATCH.get(op_type) % op, end="", file=result) + + print(" )", file=result) + return length + + def insn_len(self, target, offset): + size_of_char = self.tChar.GetByteSize() + + symbol = target.FindSymbols("rb_vm_insn_len_info")[0].GetSymbol() + section = symbol.GetStartAddress().GetSection() + addr_of_table = symbol.GetStartAddress().GetOffset() + + error = lldb.SBError() + length = section.GetSectionData().GetUnsignedInt8(error, addr_of_table + (offset * size_of_char)) + + if error.Success(): + return length + else: + print("error getting length: ", error) + + def insn_op_types(self, target, process, result, insn): + tUShort = target.FindFirstType("unsigned short") + + size_of_short = tUShort.GetByteSize() + size_of_char = self.tChar.GetByteSize() + + symbol = target.FindSymbols("rb_vm_insn_op_offset")[0].GetSymbol() + section = symbol.GetStartAddress().GetSection() + addr_of_table = symbol.GetStartAddress().GetOffset() + + addr_in_table = addr_of_table + (insn * size_of_short) + + error = lldb.SBError() + offset = section.GetSectionData().GetUnsignedInt16(error, addr_in_table) + + if not error.Success(): + print("error getting op type offset: ", error) + + symbol = target.FindSymbols("rb_vm_insn_op_base")[0].GetSymbol() + section = symbol.GetStartAddress().GetSection() + addr_of_table = symbol.GetStartAddress().GetOffset() + addr_in_name_table = addr_of_table + (offset * size_of_char) + + error = lldb.SBError() + types = section.GetSectionData().GetString(error, addr_in_name_table) + if error.Success(): + return types + else: + print("error getting op types: ", error) + + def insn_name_table_offset(self, target, offset): + tUShort = target.FindFirstType("unsigned short") + size_of_short = tUShort.GetByteSize() + + symbol = target.FindSymbols("rb_vm_insn_name_offset")[0].GetSymbol() + section = symbol.GetStartAddress().GetSection() + table_offset = symbol.GetStartAddress().GetOffset() + + table_offset = table_offset + (offset * size_of_short) + + error = lldb.SBError() + offset = section.GetSectionData().GetUnsignedInt16(error, table_offset) + + if error.Success(): + return offset + else: + print("error getting insn name table offset: ", error) + + def insn_name(self, target, process, result, offset): + symbol = target.FindSymbols("rb_vm_insn_name_base")[0].GetSymbol() + section = symbol.GetStartAddress().GetSection() + addr_of_table = symbol.GetStartAddress().GetOffset() + + name_table_offset = self.insn_name_table_offset(target, offset) + addr_in_name_table = addr_of_table + name_table_offset + + error = lldb.SBError() + name = section.GetSectionData().GetString(error, addr_in_name_table) + + if error.Success(): + return name + else: + print('error getting insn name', error) + + def __get_insns_address_table(self): + module = self.target.FindSymbols("vm_exec_core")[0].GetModule() + + for symbol in module: + if "insns_address_table" in symbol.name and symbol.GetType() == lldb.eSymbolTypeData: + print(f"found symbol {symbol.name}") + return symbol + + +def disasm(debugger, command, result, internal_dict): + disassembler = IseqDisassembler(debugger, command, result, internal_dict) + frame = disassembler.frame + + if frame.IsValid(): + val = frame.EvaluateExpression(command) + else: + val = target.EvaluateExpression(command) + error = val.GetError() + if error.Fail(): + print >> result, error + return + + disassembler.disasm(val); + +def __lldb_init_module(debugger, internal_dict): + debugger.HandleCommand("command script add -f lldb_disasm.disasm rbdisasm") + print("lldb Ruby disasm installed.") diff --git a/misc/lldb_rb/commands/command_template.py b/misc/lldb_rb/commands/command_template.py new file mode 100644 index 0000000000..21014a993e --- /dev/null +++ b/misc/lldb_rb/commands/command_template.py @@ -0,0 +1,30 @@ +# This is a command template for implementing a helper function inside LLDB. To +# use this file +# 1. Copy it and rename the copy so it ends with `_command.py`. +# 2. Rename the class to something descriptive that ends with Command. +# 3. Change the program variable to be a descriptive command name +# 4. Ensure you are inheriting from RbBaseCommand or another command that +# implements the same interface + +import lldb + +from lldb_rb.constants import * +from lldb_rb.rb_base_command import RbBaseCommand + +# This test command inherits from RbBaseCommand which provides access to Ruby +# globals and utility helpers +class TestCommand(RbBaseCommand): + # program is the keyword the user will type in lldb to execute this command + program = "test" + + # help_string will be displayed in lldb when the user uses the help functions + help_string = "This is a test command to show how to implement lldb commands" + + # call is where our command logic will be implemented + def call(self, debugger, command, exe_ctx, result): + # This method will be called once the LLDB environment has been setup. + # You will have access to self.target, self.process, self.frame, and + # self.thread + # + # This is where we should implement our command logic + pass diff --git a/misc/lldb_rb/commands/heap_page_command.py b/misc/lldb_rb/commands/heap_page_command.py new file mode 100644 index 0000000000..2eed3c3bee --- /dev/null +++ b/misc/lldb_rb/commands/heap_page_command.py @@ -0,0 +1,27 @@ +import lldb + +from lldb_rb.constants import * +from lldb_rb.rb_base_command import RbBaseCommand + +class HeapPageCommand(RbBaseCommand): + program = "heap_page" + help_string = "prints out 'struct heap_page' for a VALUE pointer in the page" + + def call(self, debugger, command, exe_ctx, result): + self.result = result + self.t_heap_page_body = self.target.FindFirstType("struct heap_page_body") + self.t_heap_page_ptr = self.target.FindFirstType("struct heap_page").GetPointerType() + + page = self._get_page(self.frame.EvaluateExpression(command)) + page.Cast(self.t_heap_page_ptr) + + self._append_expression("(struct heap_page *) %0#x" % page.GetValueAsUnsigned()) + self._append_expression("*(struct heap_page *) %0#x" % page.GetValueAsUnsigned()) + + def _get_page(self, val): + addr = val.GetValueAsUnsigned() + page_addr = addr & ~(HEAP_PAGE_ALIGN_MASK) + address = lldb.SBAddress(page_addr, self.target) + body = self.target.CreateValueFromAddress("page", address, self.t_heap_page_body) + + return body.GetValueForExpressionPath("->header.page") diff --git a/misc/lldb_rb/commands/print_flags_command.py b/misc/lldb_rb/commands/print_flags_command.py new file mode 100644 index 0000000000..bc494ae01a --- /dev/null +++ b/misc/lldb_rb/commands/print_flags_command.py @@ -0,0 +1,31 @@ +import lldb +import re + +from lldb_rb.constants import * +from lldb_rb.rb_base_command import RbBaseCommand + +class PrintFlagsCommand(RbBaseCommand): + program = "print_flags" + + help_string = "Print out the individial flags of an RVALUE object in human readable format" + + # call is where our command logic will be implemented + def call(self, debugger, command, exe_ctx, result): + rclass_t = self.target.FindFirstType("::RBasic") + rcass_ptr = self.target.EvaluateExpression(command).Cast(rclass_t.GetPointerType()) + obj_flags = rcass_ptr.GetValueForExpressionPath("->flags").GetValueAsUnsigned() + + flags = [ + "RUBY_FL_WB_PROTECTED", "RUBY_FL_PROMOTED", "RUBY_FL_FINALIZE", + "RUBY_FL_SHAREABLE", "RUBY_FL_FREEZE", + "RUBY_FL_USER0", "RUBY_FL_USER1", "RUBY_FL_USER2", "RUBY_FL_USER3", "RUBY_FL_USER4", + "RUBY_FL_USER5", "RUBY_FL_USER6", "RUBY_FL_USER7", "RUBY_FL_USER8", "RUBY_FL_USER9", + "RUBY_FL_USER10", "RUBY_FL_USER11", "RUBY_FL_USER12", "RUBY_FL_USER13", "RUBY_FL_USER14", + "RUBY_FL_USER15", "RUBY_FL_USER16", "RUBY_FL_USER17", "RUBY_FL_USER18" + ] + + types_index = {v: k for k, v in self.ruby_globals.items() if re.match(r'RUBY_T_', k)} + print("TYPE: {}".format(types_index[obj_flags & self.ruby_globals["RUBY_T_MASK"]])) + for flag in flags: + output = "{} : {}".format(flag, "1" if (obj_flags & self.ruby_globals[flag]) else "0") + print(output, file=result) diff --git a/misc/lldb_rb/commands/rb_id2str_command.py b/misc/lldb_rb/commands/rb_id2str_command.py new file mode 100644 index 0000000000..6ee859ebf6 --- /dev/null +++ b/misc/lldb_rb/commands/rb_id2str_command.py @@ -0,0 +1,49 @@ +import lldb + +from lldb_rb.constants import * +from lldb_rb.utils import * +from lldb_rb.rb_base_command import RbBaseCommand + +class RbID2StrCommand(RbBaseCommand): + program = "rb_id2str" + + help_string = "convert and print a Ruby ID to a C string and print it to the LLDB console" + + def call(self, debugger, command, exe_ctx, result): + global_symbols = self.target.FindFirstGlobalVariable("ruby_global_symbols") + + id_val = self.frame.EvaluateExpression(command).GetValueAsUnsigned() + num = self.rb_id_to_serial(id_val) + + last_id = global_symbols.GetChildMemberWithName("last_id").GetValueAsUnsigned() + ID_ENTRY_SIZE = 2 + ID_ENTRY_UNIT = int(self.target.FindFirstGlobalVariable("ID_ENTRY_UNIT").GetValue()) + + ids = global_symbols.GetChildMemberWithName("ids") + + if num <= last_id: + idx = num // ID_ENTRY_UNIT + ary = self.rb_ary_entry(ids, idx, result) + pos = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE + id_str = self.rb_ary_entry(ary, pos, result) + + RbInspector(debugger, result, self.ruby_globals).inspect(id_str) + + def rb_id_to_serial(self, id_val): + if id_val > self.ruby_globals["tLAST_OP_ID"]: + return id_val >> self.ruby_globals["RUBY_ID_SCOPE_SHIFT"] + else: + return id_val + + def rb_ary_entry(self, ary, idx, result): + tRArray = self.target.FindFirstType("struct RArray").GetPointerType() + ary = ary.Cast(tRArray) + flags = ary.GetValueForExpressionPath("->flags").GetValueAsUnsigned() + + if flags & self.ruby_globals["RUBY_FL_USER1"]: + ptr = ary.GetValueForExpressionPath("->as.ary") + else: + ptr = ary.GetValueForExpressionPath("->as.heap.ptr") + + ptr_addr = ptr.GetValueAsUnsigned() + (idx * ptr.GetType().GetByteSize()) + return self.target.CreateValueFromAddress("ary_entry[%d]" % idx, lldb.SBAddress(ptr_addr, self.target), ptr.GetType().GetPointeeType()) diff --git a/misc/lldb_rb/commands/rclass_ext_command.py b/misc/lldb_rb/commands/rclass_ext_command.py new file mode 100644 index 0000000000..8bae911457 --- /dev/null +++ b/misc/lldb_rb/commands/rclass_ext_command.py @@ -0,0 +1,14 @@ +from lldb_rb.rb_base_command import RbBaseCommand + +class RclassExtCommand(RbBaseCommand): + program = "rclass_ext" + help_string = "retrieves and prints the rb_classext_struct for the VALUE pointer passed in" + + def call(self, debugger, command, exe_ctx, result): + uintptr_t = self.target.FindFirstType("uintptr_t") + rclass_t = self.target.FindFirstType("struct RClass") + rclass_ext_t = self.target.FindFirstType("rb_classext_t") + + rclass_addr = self.target.EvaluateExpression(command).Cast(uintptr_t) + rclass_ext_addr = (rclass_addr.GetValueAsUnsigned() + rclass_t.GetByteSize()) + debugger.HandleCommand("p *(rb_classext_t *)%0#x" % rclass_ext_addr) diff --git a/misc/lldb_rb/commands/rp_command.py b/misc/lldb_rb/commands/rp_command.py new file mode 100644 index 0000000000..06b2516d50 --- /dev/null +++ b/misc/lldb_rb/commands/rp_command.py @@ -0,0 +1,15 @@ +import lldb + +from lldb_rb.constants import * +from lldb_rb.utils import * +from lldb_rb.rb_base_command import RbBaseCommand + +class RbID2StrCommand(RbBaseCommand): + program = "rp" + + help_string = "convert and print a Ruby ID to a C string and print it to the LLDB console" + + def call(self, debugger, command, exe_ctx, result): + val = self.frame.EvaluateExpression(command) + inspector = RbInspector(debugger, result, self.ruby_globals) + inspector.inspect(val) diff --git a/misc/lldb_rb/constants.py b/misc/lldb_rb/constants.py new file mode 100644 index 0000000000..9cd56eccb0 --- /dev/null +++ b/misc/lldb_rb/constants.py @@ -0,0 +1,6 @@ +HEAP_PAGE_ALIGN_LOG = 16 +HEAP_PAGE_ALIGN_MASK = (~(~0 << HEAP_PAGE_ALIGN_LOG)) +HEAP_PAGE_ALIGN = (1 << HEAP_PAGE_ALIGN_LOG) +HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN + +IMEMO_MASK = 0x0F diff --git a/misc/lldb_rb/lldb_interface.py b/misc/lldb_rb/lldb_interface.py new file mode 100644 index 0000000000..25930b2e16 --- /dev/null +++ b/misc/lldb_rb/lldb_interface.py @@ -0,0 +1,18 @@ +class LLDBInterface: + def build_environment(self, debugger): + self.debugger = debugger + self.target = debugger.GetSelectedTarget() + self.process = self.target.GetProcess() + self.thread = self.process.GetSelectedThread() + self.frame = self.thread.GetSelectedFrame() + + def _append_command_output(self, command): + output1 = self.result.GetOutput() + self.debugger.GetCommandInterpreter().HandleCommand(command, self.result) + output2 = self.result.GetOutput() + self.result.Clear() + self.result.write(output1) + self.result.write(output2) + + def _append_expression(self, expression): + self._append_command_output("expression " + expression) diff --git a/misc/lldb_rb/rb_base_command.py b/misc/lldb_rb/rb_base_command.py new file mode 100644 index 0000000000..70a5addd6d --- /dev/null +++ b/misc/lldb_rb/rb_base_command.py @@ -0,0 +1,57 @@ +import lldb +from pydoc import locate +from lldb_rb.constants import * +from lldb_rb.utils import * + +class RbBaseCommand(LLDBInterface): + @classmethod + def register_lldb_command(cls, debugger, module_name): + # Add any commands contained in this module to LLDB + command = f"command script add -c {module_name}.{cls.__name__} {cls.program}" + debugger.HandleCommand(command) + + @classmethod + def lldb_init(cls, debugger): + target = debugger.GetSelectedTarget() + global SIZEOF_VALUE + SIZEOF_VALUE = target.FindFirstType("VALUE").GetByteSize() + + value_types = [] + g = globals() + + imemo_types = target.FindFirstType("enum imemo_type") + + #for member in imemo_types.GetEnumMembers(): + # g[member.GetName()] = member.GetValueAsUnsigned() + + for enum in target.FindFirstGlobalVariable("ruby_dummy_gdb_enums"): + enum = enum.GetType() + members = enum.GetEnumMembers() + for i in range(0, members.GetSize()): + member = members.GetTypeEnumMemberAtIndex(i) + name = member.GetName() + value = member.GetValueAsUnsigned() + g[name] = value + + if name.startswith("RUBY_T_"): + value_types.append(name) + g["value_types"] = value_types + return g + + def __init__(self, debugger, _internal_dict): + self.ruby_globals = RbBaseCommand.lldb_init(debugger) + self.internal_dict = _internal_dict + + def __call__(self, debugger, command, exe_ctx, result): + self.ruby_globals = RbBaseCommand.lldb_init(debugger) + self.build_environment(debugger) + self.call(debugger, command, exe_ctx, result) + + def call(self, debugger, command, exe_ctx, result): + raise NotImplementedError("subclasses must implement call") + + def get_short_help(self): + return self.__class__.help_string + + def get_long_help(self): + return self.__class__.help_string diff --git a/misc/lldb_rb/rb_heap_structs.py b/misc/lldb_rb/rb_heap_structs.py new file mode 100644 index 0000000000..798b838080 --- /dev/null +++ b/misc/lldb_rb/rb_heap_structs.py @@ -0,0 +1,152 @@ +import lldb +import math +from lldb_rb.lldb_interface import LLDBInterface +from lldb_rb.constants import * + +class HeapPage(LLDBInterface): + def __init__(self, debugger, val): + self.build_environment(debugger) + self.page_type = self.target.FindFirstType("struct heap_page").GetPointerType() + self.val = val + + def heap_page_body(self, command, ctx, result, internal_dict): + process = self.target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + + val = frame.EvaluateExpression(command) + page = self.get_page_body(val) + print("Page body address: ", page.GetAddress(), file=result) + print(page, file=result) + + def get_page_body(self, val): + tHeapPageBody = self.target.FindFirstType("struct heap_page_body") + addr = val.GetValueAsUnsigned() + page_addr = addr & ~(HEAP_PAGE_ALIGN_MASK) + address = lldb.SBAddress(page_addr, self.target) + return self.target.CreateValueFromAddress("page", address, tHeapPageBody) + + def get_page_raw(self, val): + body = self.get_page_body(val) + return body.GetValueForExpressionPath("->header.page") + + def to_heap_page_struct(self): + pagePtr = self.get_page_raw(self.val) + return pagePtr.Cast(self.page_type) + + +class RbObject(LLDBInterface): + def __init__(self, ptr, debugger, ruby_globals): + self.build_environment(debugger) + self.ruby_globals = ruby_globals + + self.flUser1 = self.ruby_globals["RUBY_FL_USER1"] + self.flUser2 = self.ruby_globals["RUBY_FL_USER2"] + self.flUser3 = self.ruby_globals["RUBY_FL_USER3"] + self.flUser4 = self.ruby_globals["RUBY_FL_USER4"] + self.flUser5 = self.ruby_globals["RUBY_FL_USER5"] + self.flUser6 = self.ruby_globals["RUBY_FL_USER6"] + self.flUser7 = self.ruby_globals["RUBY_FL_USER7"] + self.flUser8 = self.ruby_globals["RUBY_FL_USER8"] + self.flUser9 = self.ruby_globals["RUBY_FL_USER9"] + self.flUshift = self.ruby_globals["RUBY_FL_USHIFT"] + + self.tRBasic = self.target.FindFirstType("::RBasic").GetPointerType() + + self.val = ptr.Cast(self.tRBasic) + self.page = HeapPage(self.debugger, self.val) + self.flags = self.val.GetValueForExpressionPath("->flags").GetValueAsUnsigned() + + self.type = None + self.type_name = "" + + def check_bits(self, bitmap_name, bitmap_index, bitmap_bit, v): + page = self.page.to_heap_page_struct() + bits = page.GetChildMemberWithName(bitmap_name) + plane = bits.GetChildAtIndex(bitmap_index).GetValueAsUnsigned() + if (plane & bitmap_bit) != 0: + return v + else: + return ' ' + + def dump_bits(self, result, end = "\n"): + tUintPtr = self.target.FindFirstType("uintptr_t") # bits_t + + slot_size = self.page.to_heap_page_struct().GetChildMemberWithName("heap").GetChildMemberWithName("slot_size").unsigned + byte_size = 40 ** math.floor(math.log(slot_size, 40)) + + num_in_page = (self.val.GetValueAsUnsigned() & HEAP_PAGE_ALIGN_MASK) // byte_size; + bits_bitlength = tUintPtr.GetByteSize() * 8 + bitmap_index = num_in_page // bits_bitlength + bitmap_offset = num_in_page & (bits_bitlength - 1) + bitmap_bit = 1 << bitmap_offset + + page = self.page.to_heap_page_struct() + print("bits: [%s%s%s%s%s]" % ( + self.check_bits("uncollectible_bits", bitmap_index, bitmap_bit, "L"), + self.check_bits("mark_bits", bitmap_index, bitmap_bit, "M"), + self.check_bits("pinned_bits", bitmap_index, bitmap_bit, "P"), + self.check_bits("marking_bits", bitmap_index, bitmap_bit, "R"), + self.check_bits("wb_unprotected_bits", bitmap_index, bitmap_bit, "U"), + ), end=end, file=result) + + def promoted_p(self): + rbFlPromoted = self.ruby_globals["RUBY_FL_PROMOTED"] + return (self.flags & rbFlPromoted) == rbFlPromoted + + def frozen_p(self): + rbFlFreeze = self.ruby_globals["RUBY_FL_FREEZE"] + return (self.flags & rbFlFreeze) == rbFlFreeze + + def is_type(self, type_name): + if self.type is None: + flTMask = self.ruby_globals["RUBY_T_MASK"] + flType = self.flags & flTMask + self.type = flType + + if self.type == self.ruby_globals[type_name]: + self.type_name = type_name + return True + else: + return False + + def as_type(self, type_name): + if type_name == "array": + tRarray = self.target.FindFirstType("struct RArray") + return self.val.Cast(tRarray.GetPointerType()) + elif type_name == "bignum": + tRbignum = self.target.FindFirstType("struct RBignum") + return self.val.Cast(tRbignum.GetPointerType()) + else: + print("as_type is not implemented for:", type_name) + + def ary_ptr(self): + rval = self.as_type("array") + if self.flags & self.ruby_globals["RUBY_FL_USER1"]: + ptr = rval.GetValueForExpressionPath("->as.ary") + else: + ptr = rval.GetValueForExpressionPath("->as.heap.ptr") + return ptr + + def ary_len(self): + if self.flags & self.flUser1: + len = ((self.flags & + (self.flUser3 | self.flUser4 | self.flUser5 | self.flUser6 | + self.flUser7 | self.flUser8 | self.flUser9) + ) >> (self.flUshift + 3)) + else: + rval = self.as_type("array") + len = rval.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() + + return len + + def bignum_len(self): + if self.flags & self.flUser2: + len = ((self.flags & + (self.flUser3 | self.flUser4 | self.flUser5) + ) >> (self.flUshift + 3)) + else: + len = (self.as_type("bignum").GetValueForExpressionPath("->as.heap.len"). + GetValueAsUnsigned()) + + return len diff --git a/misc/lldb_rb/utils.py b/misc/lldb_rb/utils.py new file mode 100644 index 0000000000..1415dd3f33 --- /dev/null +++ b/misc/lldb_rb/utils.py @@ -0,0 +1,515 @@ +from lldb_rb.lldb_interface import LLDBInterface +from lldb_rb.rb_heap_structs import HeapPage, RbObject +from lldb_rb.constants import * + +class RbInspector(LLDBInterface): + def __init__(self, debugger, result, ruby_globals): + self.build_environment(debugger) + self.result = result + self.ruby_globals = ruby_globals + + def string2cstr(self, rstring): + """Returns the pointer to the C-string in the given String object""" + if rstring.TypeIsPointerType(): + rstring = rstring.Dereference() + + flags = rstring.GetValueForExpressionPath(".basic->flags").unsigned + clen = int(rstring.GetValueForExpressionPath(".len").value, 0) + if flags & self.ruby_globals["RUBY_FL_USER1"]: + cptr = int(rstring.GetValueForExpressionPath(".as.heap.ptr").value, 0) + else: + cptr = int(rstring.GetValueForExpressionPath(".as.embed.ary").location, 0) + + return cptr, clen + + def output_string(self, rstring): + cptr, clen = self.string2cstr(rstring) + self._append_expression("*(const char (*)[%d])%0#x" % (clen, cptr)) + + def fixnum_p(self, x): + return x & self.ruby_globals["RUBY_FIXNUM_FLAG"] != 0 + + def flonum_p(self, x): + return (x & self.ruby_globals["RUBY_FLONUM_MASK"]) == self.ruby_globals["RUBY_FLONUM_FLAG"] + + def static_sym_p(self, x): + special_shift = self.ruby_globals["RUBY_SPECIAL_SHIFT"] + symbol_flag = self.ruby_globals["RUBY_SYMBOL_FLAG"] + return (x & ~(~0 << special_shift)) == symbol_flag + + def generic_inspect(self, val, rtype): + tRType = self.target.FindFirstType("struct %s" % rtype).GetPointerType() + val = val.Cast(tRType) + self._append_expression("*(struct %s *) %0#x" % (rtype, val.GetValueAsUnsigned())) + + def inspect(self, val): + rbTrue = self.ruby_globals["RUBY_Qtrue"] + rbFalse = self.ruby_globals["RUBY_Qfalse"] + rbNil = self.ruby_globals["RUBY_Qnil"] + rbUndef = self.ruby_globals["RUBY_Qundef"] + rbImmediateMask = self.ruby_globals["RUBY_IMMEDIATE_MASK"] + + if self.inspect_node(val): + return + + num = val.GetValueAsSigned() + if num == rbFalse: + print('false', file=self.result) + elif num == rbTrue: + print('true', file=self.result) + elif num == rbNil: + print('nil', file=self.result) + elif num == rbUndef: + print('undef', file=self.result) + elif self.fixnum_p(num): + print(num >> 1, file=self.result) + elif self.flonum_p(num): + self._append_expression("rb_float_value(%0#x)" % val.GetValueAsUnsigned()) + elif self.static_sym_p(num): + if num < 128: + print("T_SYMBOL: %c" % num, file=self.result) + else: + print("T_SYMBOL: (%x)" % num, file=self.result) + self._append_expression("rb_id2name(%0#x)" % (num >> 8)) + + elif num & rbImmediateMask: + print('immediate(%x)' % num, file=self.result) + else: + rval = RbObject(val, self.debugger, self.ruby_globals) + rval.dump_bits(self.result) + + flaginfo = "" + if rval.promoted_p(): + flaginfo += "[PROMOTED] " + if rval.frozen_p(): + flaginfo += "[FROZEN] " + + if rval.is_type("RUBY_T_NONE"): + print('T_NONE: %s%s' % (flaginfo, val.Dereference()), file=self.result) + + elif rval.is_type("RUBY_T_NIL"): + print('T_NIL: %s%s' % (flaginfo, val.Dereference()), file=self.result) + + elif rval.is_type("RUBY_T_OBJECT"): + self.result.write('T_OBJECT: %s' % flaginfo) + self._append_expression("*(struct RObject*)%0#x" % val.GetValueAsUnsigned()) + + elif (rval.is_type("RUBY_T_CLASS") or + rval.is_type("RUBY_T_MODULE") or + rval.is_type("RUBY_T_ICLASS")): + self.result.write('T_%s: %s' % (rval.type_name.split('_')[-1], flaginfo)) + tRClass = self.target.FindFirstType("struct RClass") + + self._append_expression("*(struct RClass*)%0#x" % val.GetValueAsUnsigned()) + if not val.Cast(tRClass).GetChildMemberWithName("ptr").IsValid(): + self._append_expression( + "*(struct rb_classext_struct*)%0#x" % + (val.GetValueAsUnsigned() + tRClass.GetByteSize()) + ) + + elif rval.is_type("RUBY_T_STRING"): + self.result.write('T_STRING: %s' % flaginfo) + tRString = self.target.FindFirstType("struct RString").GetPointerType() + + chilled = self.ruby_globals["RUBY_FL_USER3"] + if (rval.flags & chilled) != 0: + self.result.write("[CHILLED] ") + + rb_enc_mask = self.ruby_globals["RUBY_ENCODING_MASK"] + rb_enc_shift = self.ruby_globals["RUBY_ENCODING_SHIFT"] + encidx = ((rval.flags & rb_enc_mask) >> rb_enc_shift) + encname = self.target.FindFirstType("enum ruby_preserved_encindex") \ + .GetEnumMembers().GetTypeEnumMemberAtIndex(encidx) \ + .GetName() + + if encname is not None: + self.result.write('[%s] ' % encname[14:]) + else: + self.result.write('[enc=%d] ' % encidx) + + coderange = rval.flags & self.ruby_globals["RUBY_ENC_CODERANGE_MASK"] + if coderange == self.ruby_globals["RUBY_ENC_CODERANGE_7BIT"]: + self.result.write('[7BIT] ') + elif coderange == self.ruby_globals["RUBY_ENC_CODERANGE_VALID"]: + self.result.write('[VALID] ') + elif coderange == self.ruby_globals["RUBY_ENC_CODERANGE_BROKEN"]: + self.result.write('[BROKEN] ') + else: + self.result.write('[UNKNOWN] ') + + ptr, len = self.string2cstr(val.Cast(tRString)) + if len == 0: + self.result.write("(empty)\n") + else: + self._append_expression("*(const char (*)[%d])%0#x" % (len, ptr)) + + elif rval.is_type("RUBY_T_SYMBOL"): + self.result.write('T_SYMBOL: %s' % flaginfo) + tRSymbol = self.target.FindFirstType("struct RSymbol").GetPointerType() + tRString = self.target.FindFirstType("struct RString").GetPointerType() + + val = val.Cast(tRSymbol) + self._append_expression('(ID)%0#x ' % val.GetValueForExpressionPath("->id").GetValueAsUnsigned()) + self.output_string(val.GetValueForExpressionPath("->fstr").Cast(tRString)) + + elif rval.is_type("RUBY_T_ARRAY"): + len = rval.ary_len() + ptr = rval.ary_ptr() + + self.result.write("T_ARRAY: %slen=%d" % (flaginfo, len)) + + if rval.flags & self.ruby_globals["RUBY_FL_USER1"]: + self.result.write(" (embed)") + elif rval.flags & self.ruby_globals["RUBY_FL_USER2"]: + shared = val.GetValueForExpressionPath("->as.heap.aux.shared").GetValueAsUnsigned() + self.result.write(" (shared) shared=%016x" % shared) + else: + capa = val.GetValueForExpressionPath("->as.heap.aux.capa").GetValueAsSigned() + self.result.write(" (ownership) capa=%d" % capa) + if len == 0: + self.result.write(" {(empty)}\n") + else: + self.result.write("\n") + if ptr.GetValueAsSigned() == 0: + self._append_expression("-fx -- ((struct RArray*)%0#x)->as.ary" % val.GetValueAsUnsigned()) + else: + self._append_expression("-Z %d -fx -- (const VALUE*)%0#x" % (len, ptr.GetValueAsUnsigned())) + + elif rval.is_type("RUBY_T_HASH"): + self.result.write("T_HASH: %s" % flaginfo) + ptr = val.GetValueAsUnsigned() + self._append_expression("*(struct RHash *) %0#x" % ptr) + if rval.flags & self.ruby_globals["RUBY_FL_USER3"]: + self._append_expression("*(struct st_table *) (%0#x + sizeof(struct RHash))" % ptr) + else: + self._append_expression("*(struct ar_table *) (%0#x + sizeof(struct RHash))" % ptr) + + elif rval.is_type("RUBY_T_BIGNUM"): + sign = '-' + if (rval.flags & self.ruby_globals["RUBY_FL_USER1"]) != 0: + sign = '+' + len = rval.bignum_len() + + if rval.flags & self.ruby_globals["RUBY_FL_USER2"]: + print("T_BIGNUM: sign=%s len=%d (embed)" % (sign, len), file=self.result) + self._append_expression("((struct RBignum *) %0#x)->as.ary" + % val.GetValueAsUnsigned()) + else: + print("T_BIGNUM: sign=%s len=%d" % (sign, len), file=self.result) + print(rval.as_type("bignum"), file=self.result) + self._append_expression("-Z %d -fx -- ((struct RBignum*)%d)->as.heap.digits" % + (len, val.GetValueAsUnsigned())) + + elif rval.is_type("RUBY_T_FLOAT"): + self._append_expression("((struct RFloat *)%d)->float_value" + % val.GetValueAsUnsigned()) + + elif rval.is_type("RUBY_T_RATIONAL"): + tRRational = self.target.FindFirstType("struct RRational").GetPointerType() + val = val.Cast(tRRational) + self.inspect(val.GetValueForExpressionPath("->num")) + output = self.result.GetOutput() + self.result.Clear() + self.result.write("(Rational) " + output.rstrip() + " / ") + self.inspect(val.GetValueForExpressionPath("->den")) + + elif rval.is_type("RUBY_T_COMPLEX"): + tRComplex = self.target.FindFirstType("struct RComplex").GetPointerType() + val = val.Cast(tRComplex) + self.inspect(val.GetValueForExpressionPath("->real")) + real = self.result.GetOutput().rstrip() + self.result.Clear() + self.inspect(val.GetValueForExpressionPath("->imag")) + imag = self.result.GetOutput().rstrip() + self.result.Clear() + if not imag.startswith("-"): + imag = "+" + imag + print("(Complex) " + real + imag + "i", file=self.result) + + elif rval.is_type("RUBY_T_REGEXP"): + tRRegex = self.target.FindFirstType("struct RRegexp").GetPointerType() + val = val.Cast(tRRegex) + print("(Regex) ->src {", file=self.result) + self.inspect(val.GetValueForExpressionPath("->src")) + print("}", file=self.result) + + elif rval.is_type("RUBY_T_DATA"): + tRTypedData = self.target.FindFirstType("struct RTypedData").GetPointerType() + val = val.Cast(tRTypedData) + is_typed_data = self.ruby_globals.get("RUBY_TYPED_FL_IS_TYPED_DATA", None) + if is_typed_data: + typed = rval.flags & is_typed_data + else: + typed = val.GetValueForExpressionPath("->typed_flag").GetValueAsUnsigned() == 1 + + if typed: + type = val.GetValueForExpressionPath("->type").GetValueAsUnsigned() + embed = (type & 1) + if embed: + flaginfo += "[EMBED] " + type = self.frame.EvaluateExpression("(rb_data_type_t *)%0#x" % (type & ~1)) + print("T_DATA: %s%s" % + (flaginfo, type.GetValueForExpressionPath("->wrap_struct_name")), + file=self.result) + print("%s", type.Dereference(), file=self.result) + ptr = val.GetValueForExpressionPath("->data") + if embed: + ptr = ptr.AddressOf() + self._append_expression("(void *)%0#x" % ptr.GetValueAsUnsigned()) + else: + print("T_DATA:", file=self.result) + self._append_expression("*(struct RData *) %0#x" % val.GetValueAsUnsigned()) + + elif rval.is_type("RUBY_T_IMEMO"): + imemo_type = ((rval.flags >> self.ruby_globals["RUBY_FL_USHIFT"]) + & IMEMO_MASK) + print("T_IMEMO: ", file=self.result) + + self._append_expression("(enum imemo_type) %d" % imemo_type) + self._append_expression("*(struct MEMO *) %0#x" % val.GetValueAsUnsigned()) + + elif rval.is_type("RUBY_T_FILE"): + self.generic_inspect(val, "RFile") + + elif rval.is_type("RUBY_T_MOVED"): + self.generic_inspect(val, "RMoved") + + elif rval.is_type("RUBY_T_MATCH"): + self.generic_inspect(val, "RMatch") + + elif rval.is_type("RUBY_T_STRUCT"): + self.generic_inspect(val, "RStruct") + + elif rval.is_type("RUBY_T_ZOMBIE"): + self.generic_inspect(val, "RZombie") + + else: + print("Not-handled type %0#x" % rval.type, file=self.result) + print(val, file=self.result) + + def inspect_node(self, val): + tRNode = self.target.FindFirstType("struct RNode").GetPointerType() + + # if val.GetType() != tRNode: does not work for unknown reason + + if val.GetType().GetPointeeType().name != "NODE": + return False + + rbNodeTypeMask = self.ruby_globals["RUBY_NODE_TYPEMASK"] + rbNodeTypeShift = self.ruby_globals["RUBY_NODE_TYPESHIFT"] + flags = val.Cast(tRNode).GetChildMemberWithName("flags").GetValueAsUnsigned() + nd_type = (flags & rbNodeTypeMask) >> rbNodeTypeShift + + self._append_expression("(node_type) %d" % nd_type) + + if nd_type == self.ruby_globals["NODE_SCOPE"]: + self._append_expression("*(rb_node_scope_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BLOCK"]: + self._append_expression("*(rb_node_block_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_IF"]: + self._append_expression("*(rb_node_if_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_UNLESS"]: + self._append_expression("*(rb_node_unless_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CASE"]: + self._append_expression("*(rb_node_case_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CASE2"]: + self._append_expression("*(rb_node_case2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CASE3"]: + self._append_expression("*(rb_node_case3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_WHEN"]: + self._append_expression("*(rb_node_when_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_IN"]: + self._append_expression("*(rb_node_in_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_WHILE"]: + self._append_expression("*(rb_node_while_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_UNTIL"]: + self._append_expression("*(rb_node_until_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ITER"]: + self._append_expression("*(rb_node_iter_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FOR"]: + self._append_expression("*(rb_node_for_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FOR_MASGN"]: + self._append_expression("*(rb_node_for_masgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BREAK"]: + self._append_expression("*(rb_node_break_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_NEXT"]: + self._append_expression("*(rb_node_next_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_REDO"]: + self._append_expression("*(rb_node_redo_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RETRY"]: + self._append_expression("*(rb_node_retry_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BEGIN"]: + self._append_expression("*(rb_node_begin_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RESCUE"]: + self._append_expression("*(rb_node_rescue_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RESBODY"]: + self._append_expression("*(rb_node_resbody_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ENSURE"]: + self._append_expression("*(rb_node_ensure_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_AND"]: + self._append_expression("*(rb_node_and_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OR"]: + self._append_expression("*(rb_node_or_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MASGN"]: + self._append_expression("*(rb_node_masgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LASGN"]: + self._append_expression("*(rb_node_lasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DASGN"]: + self._append_expression("*(rb_node_dasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_GASGN"]: + self._append_expression("*(rb_node_gasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_IASGN"]: + self._append_expression("*(rb_node_iasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CDECL"]: + self._append_expression("*(rb_node_cdecl_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CVASGN"]: + self._append_expression("*(rb_node_cvasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN1"]: + self._append_expression("*(rb_node_op_asgn1_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN2"]: + self._append_expression("*(rb_node_op_asgn2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN_AND"]: + self._append_expression("*(rb_node_op_asgn_and_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN_OR"]: + self._append_expression("*(rb_node_op_asgn_or_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_CDECL"]: + self._append_expression("*(rb_node_op_cdecl_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CALL"]: + self._append_expression("*(rb_node_call_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OPCALL"]: + self._append_expression("*(rb_node_opcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FCALL"]: + self._append_expression("*(rb_node_fcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_VCALL"]: + self._append_expression("*(rb_node_vcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_QCALL"]: + self._append_expression("*(rb_node_qcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SUPER"]: + self._append_expression("*(rb_node_super_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ZSUPER"]: + self._append_expression("*(rb_node_zsuper_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LIST"]: + self._append_expression("*(rb_node_list_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ZLIST"]: + self._append_expression("*(rb_node_zlist_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_HASH"]: + self._append_expression("*(rb_node_hash_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RETURN"]: + self._append_expression("*(rb_node_return_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_YIELD"]: + self._append_expression("*(rb_node_yield_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LVAR"]: + self._append_expression("*(rb_node_lvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DVAR"]: + self._append_expression("*(rb_node_dvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_GVAR"]: + self._append_expression("*(rb_node_gvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CONST"]: + self._append_expression("*(rb_node_const_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CVAR"]: + self._append_expression("*(rb_node_cvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_NTH_REF"]: + self._append_expression("*(rb_node_nth_ref_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BACK_REF"]: + self._append_expression("*(rb_node_back_ref_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MATCH"]: + self._append_expression("*(rb_node_match_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MATCH2"]: + self._append_expression("*(rb_node_match2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MATCH3"]: + self._append_expression("*(rb_node_match3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_STR"]: + self._append_expression("*(rb_node_str_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DSTR"]: + self._append_expression("*(rb_node_dstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_XSTR"]: + self._append_expression("*(rb_node_xstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DXSTR"]: + self._append_expression("*(rb_node_dxstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_EVSTR"]: + self._append_expression("*(rb_node_evstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_REGX"]: + self._append_expression("*(rb_node_regx_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DREGX"]: + self._append_expression("*(rb_node_dregx_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ONCE"]: + self._append_expression("*(rb_node_once_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGS"]: + self._append_expression("*(rb_node_args_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGS_AUX"]: + self._append_expression("*(rb_node_args_aux_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OPT_ARG"]: + self._append_expression("*(rb_node_opt_arg_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_KW_ARG"]: + self._append_expression("*(rb_node_kw_arg_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_POSTARG"]: + self._append_expression("*(rb_node_postarg_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGSCAT"]: + self._append_expression("*(rb_node_argscat_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGSPUSH"]: + self._append_expression("*(rb_node_argspush_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SPLAT"]: + self._append_expression("*(rb_node_splat_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DEFN"]: + self._append_expression("*(rb_node_defn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DEFS"]: + self._append_expression("*(rb_node_defs_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ALIAS"]: + self._append_expression("*(rb_node_alias_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_VALIAS"]: + self._append_expression("*(rb_node_valias_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_UNDEF"]: + self._append_expression("*(rb_node_undef_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CLASS"]: + self._append_expression("*(rb_node_class_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MODULE"]: + self._append_expression("*(rb_node_module_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SCLASS"]: + self._append_expression("*(rb_node_sclass_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_COLON2"]: + self._append_expression("*(rb_node_colon2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_COLON3"]: + self._append_expression("*(rb_node_colon3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DOT2"]: + self._append_expression("*(rb_node_dot2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DOT3"]: + self._append_expression("*(rb_node_dot3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FLIP2"]: + self._append_expression("*(rb_node_flip2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FLIP3"]: + self._append_expression("*(rb_node_flip3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SELF"]: + self._append_expression("*(rb_node_self_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_NIL"]: + self._append_expression("*(rb_node_nil_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_TRUE"]: + self._append_expression("*(rb_node_true_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FALSE"]: + self._append_expression("*(rb_node_false_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ERRINFO"]: + self._append_expression("*(rb_node_errinfo_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DEFINED"]: + self._append_expression("*(rb_node_defined_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_POSTEXE"]: + self._append_expression("*(rb_node_postexe_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DSYM"]: + self._append_expression("*(rb_node_dsym_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ATTRASGN"]: + self._append_expression("*(rb_node_attrasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LAMBDA"]: + self._append_expression("*(rb_node_lambda_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARYPTN"]: + self._append_expression("*(rb_node_aryptn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_HSHPTN"]: + self._append_expression("*(rb_node_hshptn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FNDPTN"]: + self._append_expression("*(rb_node_fndptn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ERROR"]: + self._append_expression("*(rb_node_error_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LINE"]: + self._append_expression("*(rb_node_line_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FILE"]: + self._append_expression("*(rb_node_file_t *) %0#x" % val.GetValueAsUnsigned()) + else: + self._append_expression("*(NODE *) %0#x" % val.GetValueAsUnsigned()) + return True diff --git a/misc/rb_optparse.bash b/misc/rb_optparse.bash index 8a59ec2dda..f77d937c87 100644 --- a/misc/rb_optparse.bash +++ b/misc/rb_optparse.bash @@ -1,4 +1,5 @@ -#! /bin/bash +# -*- bash -*- +# # Completion for bash: # # (1) install this file, @@ -16,5 +17,5 @@ _rb_optparse() { } rb_optparse () { - [ $# = 0 ] || complete -o default -F _rb_optparse "$@" + [ $# = 0 ] || complete -o default -F _rb_optparse "$@" } diff --git a/misc/rb_optparse.zsh b/misc/rb_optparse.zsh index d53170c5f7..258d4f856c 100755..100644 --- a/misc/rb_optparse.zsh +++ b/misc/rb_optparse.zsh @@ -1,4 +1,5 @@ -#!/bin/zsh +# -*- zsh -*- +# # Completion for zsh: # (based on <http://d.hatena.ne.jp/rubikitch/20071002/zshcomplete>) # @@ -7,13 +8,13 @@ # cp rb_optparse.zsh ~/.zsh.d/rb_optparse.zsh # # (2) load the script, and add a directory to fpath before compinit. -# echo '. ~/.zsh.d/rb_optparse.zsh' >> ~/.zshrc -# echo 'fpath=(~/.zsh.d/Completion $fpath)' >> ~/.zshrc -# echo 'autoload -U compinit; compinit' >> ~/.zshrc +# echo '. ~/.zsh.d/rb_optparse.zsh' >> "${ZDOTDIR:-~}/.zshrc" +# echo 'fpath=(~/.zsh.d/Completion $fpath)' >> "${ZDOTDIR:-~}/.zshrc" +# echo 'autoload -U compinit; compinit' >> "${ZDOTDIR:-~}/.zshrc" # # (3) restart zsh. # -# (4) geneate completion files once. +# (4) generate completion files once. # generate-complete-function/ruby/optparse COMMAND1 # generate-complete-function/ruby/optparse COMMAND2 # @@ -24,8 +25,8 @@ generate-complete-function/ruby/optparse () mkdir -p "${ZSH_COMPLETION_DIR-$HOME/.zsh.d/Completion}" $1 "--*-completion-zsh=${1:t}" >! "${ZSH_COMPLETION_DIR-$HOME/.zsh.d/Completion}/$cmpl" if [[ $(type -w "$cmpl") == "${cmpl}: function" ]]; then - unfunction "$cmpl" - autoload -U "$cmpl" + unfunction "$cmpl" + autoload -U "$cmpl" else compinit "$cmpl" fi diff --git a/misc/ruby-style.el b/misc/ruby-style.el index 13aad77b3d..03d0830d3a 100644 --- a/misc/ruby-style.el +++ b/misc/ruby-style.el @@ -56,7 +56,9 @@ (c-basic-offset . 4) (tab-width . 8) (indent-tabs-mode . nil) - (setq show-trailing-whitespace t) + (show-trailing-whitespace . t) + (c-backslash-column . 1) + (c-backslash-max-column . 1) (c-offsets-alist (case-label . *) (label . (ruby-style-label-indent *)) @@ -66,6 +68,17 @@ (access-label /) ))) +(c-add-style + "prism" + '("bsd" + (c-basic-offset . 4) + (tab-width . 8) + (indent-tabs-mode . nil) + (show-trailing-whitespace . t) + (c-offsets-alist + (case-label . +) + ))) + ;;;###autoload (defun ruby-style-c-mode () (interactive) diff --git a/misc/tsan_suppressions.txt b/misc/tsan_suppressions.txt new file mode 100644 index 0000000000..5492500e7f --- /dev/null +++ b/misc/tsan_suppressions.txt @@ -0,0 +1,109 @@ +# TSan: ThreadSanitizer +# https://github.com/google/sanitizers/wiki/threadsanitizersuppressions +# +# This file describes a number of places where TSAN detects problems in CRuby. +# Many of these indicate bugs. Others are benign (ex. data races that can be +# replaced with relaxed atomic loads) +# +# Usage: +# Configure with: +# ./configure cflags='-fsanitize=thread' CC=clang +# Build and run with: +# TSAN_OPTIONS="suppressions=$(pwd)/misc/tsan_suppressions.txt:die_after_fork=0" +# +# Other useful TSAN_OPTIONS: +# * halt_on_error=1 +# * strip_path_prefix=$(pwd)/ + +# Namespaces +race_top:push_subclass_entry_to_list + +# sub_nounderflow includes non-atomic read, possibly other issue +race:objspace_malloc_increase_body + +# Signals and ubf +race:unregister_ubf_list + +# It's already crashing. We're doing our best +signal:rb_vm_bugreport +race:check_reserved_signal_ + +race_top:rb_check_deadlock + +# vm->ractor.sched.grq_cnt++ +race_top:ractor_sched_enq +race_top:ractor_sched_deq + +# Race between vm_remove_ractor writing ractor count and +# native_thread_check_and_create_shared reading it during thread creation. +# The write happens when a ractor thread exits, the read happens when +# checking if new shared threads need to be created. +race:vm_remove_ractor + +# th->sched.finished at end of co_start +race_top:rb_thread_sched_mark_zombies + +# Races against timer thread setting th->sched.waiting_reason.flags +race_top:thread_sched_wait_events + +# At thread start +race_top:rb_ractor_set_current_ec_ + +# TSan reports a lock-order-inversion between thread_sched_lock_ and this lock. +# It's unclear if that can cause a deadlock since the lock is on self +deadlock:ractor_lock_self + +# TSan reports a deadlock when reacquiring the this lock after a barrier, but +# we know the other threads have been stopped +deadlock:rb_ractor_sched_barrier_start + +# RVALUE_AGE_SET manipulates flag bits on objects which may be accessed in Ractors +race_top:RVALUE_AGE_SET + +# Inline caches and call cache updates +# Multiple threads can race when updating shared call caches during method lookups +# and argument forwarding. These races involve reading/writing cd->cc fields. +race_top:vm_cc_call_set +race_top:vm_cc_class_check +race_top:vm_search_cc +race_top:vm_search_method_slowpath0 +race_top:rb_vm_opt_getconstant_path +race_top:vm_ic_attr_index_set +race:vm_ic_update +race:vm_caller_setup_fwd_args + +# Race in shape_get_next where multiple threads simultaneously access and modify +# RCLASS_MAX_IV_COUNT and RCLASS_VARIATION_COUNT fields in class objects. +# One thread reads the field while another thread calls RCLASS_SET_MAX_IV_COUNT. +# This happens during instance variable shape transitions in multi-threaded code. +race:shape_get_next + +# Non-atomic reads/writes +race:gccct_method_search + +# Ignore exit for now +race:rb_ec_finalize +race:rb_ec_cleanup + +# TSan doesn't work well post-fork, this raises errors when creating the new +# timer thread +race:after_fork_ruby + +# Sets objspace->flags.dont_incremental while writebarrier may be running +race_top:objspace_each_exec +race_top:objspace_each_objects_ensure + +# Non-atomic lazy initialized static variable +race_top:rbimpl_intern_const + +# Setting def->aliased bitfield non-atomically +race_top:method_definition_addref + +# Switching to setting up tracing. Likely other ractors should be stopped for this. +race_top:encoded_iseq_trace_instrument +race:rb_iseq_trace_set_all +race:rb_tracepoint_enable + +# GC enable/disable flag modifications race with object allocation flag reads +race_top:rb_gc_impl_gc_disable +race_top:rb_gc_impl_gc_enable diff --git a/misc/yjit_perf.py b/misc/yjit_perf.py new file mode 100755 index 0000000000..61434e5eb4 --- /dev/null +++ b/misc/yjit_perf.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +import os +import sys +from collections import Counter, defaultdict +import os.path + +# Aggregating cycles per symbol and dso +total_cycles = 0 +category_cycles = Counter() +detailed_category_cycles = defaultdict(Counter) +categories = set() + +def truncate_symbol(symbol, max_length=50): + """ Truncate the symbol name to a maximum length """ + return symbol if len(symbol) <= max_length else symbol[:max_length-3] + '...' + +def categorize_symbol(dso, symbol): + """ Categorize the symbol based on the defined criteria """ + if dso == 'sqlite3_native.so': + return '[sqlite3]' + elif 'SHA256' in symbol: + return '[sha256]' + elif symbol.startswith('[JIT] gen_send'): + return '[JIT send]' + elif symbol.startswith('[JIT]'): + return '[JIT code]' + elif '::' in symbol or symbol.startswith('yjit::') or symbol.startswith('_ZN4yjit'): + return '[YJIT compile]' + elif symbol.startswith('rb_vm_') or symbol.startswith('vm_') or symbol in { + "rb_call0", "callable_method_entry_or_negative", "invoke_block_from_c_bh", + "rb_funcallv_scope", "setup_parameters_complex", "rb_yield"}: + return '[interpreter]' + elif symbol.startswith('rb_hash_') or symbol.startswith('hash_'): + return '[rb_hash_*]' + elif symbol.startswith('rb_ary_') or symbol.startswith('ary_'): + return '[rb_ary_*]' + elif symbol.startswith('rb_str_') or symbol.startswith('str_'): + return '[rb_str_*]' + elif symbol.startswith('rb_sym') or symbol.startswith('sym_'): + return '[rb_sym_*]' + elif symbol.startswith('rb_st_') or symbol.startswith('st_'): + return '[rb_st_*]' + elif symbol.startswith('rb_ivar_') or 'shape' in symbol: + return '[ivars]' + elif 'match' in symbol or symbol.startswith('rb_reg') or symbol.startswith('onig'): + return '[regexp]' + elif 'alloc' in symbol or 'free' in symbol or 'gc' in symbol: + return '[GC]' + elif 'pthread' in symbol and 'lock' in symbol: + return '[pthread lock]' + else: + return symbol # Return the symbol itself for uncategorized symbols + +def process_event(event): + global total_cycles, category_cycles, detailed_category_cycles, categories + + full_dso = event.get("dso", "Unknown_dso") + dso = os.path.basename(full_dso) + symbol = event.get("symbol", "[unknown]") + cycles = event["sample"]["period"] + total_cycles += cycles + + category = categorize_symbol(dso, symbol) + category_cycles[category] += cycles + detailed_category_cycles[category][(dso, symbol)] += cycles + + if category.startswith('[') and category.endswith(']'): + categories.add(category) + +def trace_end(): + if total_cycles == 0: + return + + print("Aggregated Event Data:") + print("{:<20} {:<50} {:>20} {:>15}".format("[dso]", "[symbol or category]", "[top-most cycle ratio]", "[num cycles]")) + + for category, cycles in category_cycles.most_common(): + ratio = (cycles / total_cycles) * 100 + dsos = {dso for dso, _ in detailed_category_cycles[category]} + dso_display = next(iter(dsos)) if len(dsos) == 1 else "Multiple DSOs" + print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso_display, truncate_symbol(category), ratio, cycles)) + + # Category breakdown + for category in categories: + symbols = detailed_category_cycles[category] + category_total = sum(symbols.values()) + category_ratio = (category_total / total_cycles) * 100 + print(f"\nCategory: {category} ({category_ratio:.2f}%)") + print("{:<20} {:<50} {:>20} {:>15}".format("[dso]", "[symbol]", "[top-most cycle ratio]", "[num cycles]")) + for (dso, symbol), cycles in symbols.most_common(): + symbol_ratio = (cycles / category_total) * 100 + print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso, truncate_symbol(symbol), symbol_ratio, cycles)) + +# There are two ways to use this script: +# 1) perf script -s misc/yjit_perf.py -- native interface +# 2) perf script > perf.txt && misc/yjit_perf.py perf.txt -- hack, which doesn't require perf with Python support +# +# In both cases, __name__ is "__main__". The following code implements (2) when sys.argv is 2. +if __name__ == "__main__" and len(sys.argv) == 2: + if len(sys.argv) != 2: + print("Usage: yjit_perf.py <filename>") + sys.exit(1) + + with open(sys.argv[1], "r") as file: + for line in file: + # [Example] + # ruby 78207 3482.848465: 1212775 cpu_core/cycles:P/: 5c0333f682e1 [JIT] getlocal_WC_0+0x0 (/tmp/perf-78207.map) + row = line.split(maxsplit=6) + + period = row[3] # "1212775" + symbol, dso = row[6].split(" (") # "[JIT] getlocal_WC_0+0x0", "/tmp/perf-78207.map)\n" + symbol = symbol.split("+")[0] # "[JIT] getlocal_WC_0" + dso = dso.split(")")[0] # "/tmp/perf-78207.map" + + process_event({"dso": dso, "symbol": symbol, "sample": {"period": int(period)}}) + trace_end() |
