diff options
Diffstat (limited to 'misc')
27 files changed, 1857 insertions, 609 deletions
diff --git a/misc/.vscode/launch.json b/misc/.vscode/launch.json new file mode 100644 index 0000000000..51bfef09d7 --- /dev/null +++ b/misc/.vscode/launch.json @@ -0,0 +1,13 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "name": "Run ruby", + "request": "launch", + "program": "${workspaceFolder}/ruby", + "args": ["test.rb"], + "preLaunchTask": "${defaultBuildTask}" + } + ] +} diff --git a/misc/.vscode/settings.json b/misc/.vscode/settings.json new file mode 100644 index 0000000000..a2e4e1ec69 --- /dev/null +++ b/misc/.vscode/settings.json @@ -0,0 +1,10 @@ +{ + "rust-analyzer.cargo.features": [ + "disasm", + ], + "rust-analyzer.cfg.setTest": false, + // rust-analyzer bundled in the VSCode extension may only support Rust newer than 1.85.0. + // To avoid warnings, install rust-analyzer with `rustup component add rust-analyzer` and + // use `~/.cargo/bin/rust-analyzer` with the following config. + "rust-analyzer.server.path": "rust-analyzer", +} diff --git a/misc/.vscode/tasks.json b/misc/.vscode/tasks.json new file mode 100644 index 0000000000..045fe7e5c0 --- /dev/null +++ b/misc/.vscode/tasks.json @@ -0,0 +1,14 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build", + "type": "shell", + "command": "make -j", + "group": { + "kind": "build", + "isDefault": true + } + } + ] +} diff --git a/misc/README b/misc/README index 1728b42700..86b680e724 100644 --- a/misc/README +++ b/misc/README @@ -4,3 +4,4 @@ rb_optparse.zsh zsh completion script ruby-style.el Ruby's C/C++ mode style for emacs lldb_cruby.py LLDB port of debug utility test_lldb_cruby.rb test file for LLDB port +.vscode example VSCode config to debug Ruby diff --git a/misc/call_fuzzer.rb b/misc/call_fuzzer.rb new file mode 100644 index 0000000000..c3f9f90490 --- /dev/null +++ b/misc/call_fuzzer.rb @@ -0,0 +1,372 @@ +require 'optparse' +require 'set' + +# Number of iterations to test +num_iters = 10_000 + +# Parse the command-line options +OptionParser.new do |opts| + opts.on("--num-iters=N") do |n| + num_iters = n.to_i + end +end.parse! + +# Format large numbers with comma separators for readability +def format_number(pad, number) + s = number.to_s + i = s.index('.') || s.size + s.insert(i -= 3, ',') while i > 3 + s.rjust(pad, ' ') +end + +# Wrap an integer to pass as argument +# We use this so we can have some object arguments +class IntWrapper + def initialize(v) + # Force the object to have a random shape + if rand() < 50 + @v0 = 1 + end + if rand() < 50 + @v1 = 1 + end + if rand() < 50 + @v2 = 1 + end + if rand() < 50 + @v3 = 1 + end + if rand() < 50 + @v4 = 1 + end + if rand() < 50 + @v5 = 1 + end + if rand() < 50 + @v6 = 1 + end + + @value = v + end + + attr_reader :value +end + +# Generate a random argument value, integer or string or object +def sample_arg() + c = ['int', 'string', 'object'].sample() + + if c == 'int' + return rand(0...100) + end + + if c == 'string' + return 'f' * rand(0...100) + end + + if c == 'object' + return IntWrapper.new(rand(0...100)) + end + + raise "should not get here" +end + +# Evaluate the value of an argument with respect to the checksum +def arg_val(arg) + if arg.kind_of? Integer + return arg + end + + if arg.kind_of? String + return arg.length + end + + if arg.kind_of? Object + return arg.value + end + + raise "unknown arg type" +end + +# List of parameters/arguments for a method +class ParamList + def initialize() + self.sample_params() + self.sample_args() + end + + # Sample/generate a random set of parameters for a method + def sample_params() + # Choose how many positional arguments to use, and how many are optional + num_pargs = rand(10) + @opt_parg_idx = rand(num_pargs) + @num_opt_pargs = rand(num_pargs + 1 - @opt_parg_idx) + @num_pargs_req = num_pargs - @num_opt_pargs + @pargs = (0...num_pargs).map do |i| + { + :name => "p#{i}", + :optional => (i >= @opt_parg_idx && i < @opt_parg_idx + @num_opt_pargs) + } + end + + # Choose how many kwargs to use, and how many are optional + num_kwargs = rand(10) + @kwargs = (0...num_kwargs).map do |i| + { + :name => "k#{i}", + :optional => rand() < 0.5 + } + end + + # Choose whether to have rest parameters or not + @has_rest = @num_opt_pargs == 0 && rand() < 0.5 + @has_kwrest = rand() < 0.25 + + # Choose whether to have a named block parameter or not + @has_block_param = rand() < 0.25 + end + + # Sample/generate a random set of arguments corresponding to the parameters + def sample_args() + # Choose how many positional args to pass + num_pargs_passed = rand(@num_pargs_req..@pargs.size) + + # How many optional arguments will be filled + opt_pargs_filled = num_pargs_passed - @num_pargs_req + + @pargs.each_with_index do |parg, i| + if parg[:optional] + parg[:default] = rand(100) + end + + if !parg[:optional] || i < @opt_parg_idx + opt_pargs_filled + parg[:argval] = rand(100) + end + end + + @kwargs.each_with_index do |kwarg, i| + if kwarg[:optional] + kwarg[:default] = rand(100) + end + + if !kwarg[:optional] || rand() < 0.5 + kwarg[:argval] = rand(100) + end + end + + # Randomly pass a block or not + @block_arg = nil + if rand() < 0.5 + @block_arg = rand(100) + end + end + + # Compute the expected checksum of arguments ahead of time + def compute_checksum() + checksum = 0 + + @pargs.each_with_index do |arg, i| + value = (arg.key? :argval)? arg[:argval]:arg[:default] + checksum += (i+1) * arg_val(value) + end + + @kwargs.each_with_index do |arg, i| + value = (arg.key? :argval)? arg[:argval]:arg[:default] + checksum += (i+1) * arg_val(value) + end + + if @block_arg + if @has_block_param + checksum += arg_val(@block_arg) + end + + checksum += arg_val(@block_arg) + end + + checksum + end + + # Generate code for the method signature and method body + def gen_method_str() + m_str = "def m(" + + @pargs.each do |arg| + if !m_str.end_with?("(") + m_str += ", " + end + + m_str += arg[:name] + + # If this has a default value + if arg[:optional] + m_str += " = #{arg[:default]}" + end + end + + if @has_rest + if !m_str.end_with?("(") + m_str += ", " + end + m_str += "*rest" + end + + @kwargs.each do |arg| + if !m_str.end_with?("(") + m_str += ", " + end + + m_str += "#{arg[:name]}:" + + # If this has a default value + if arg[:optional] + m_str += " #{arg[:default]}" + end + end + + if @has_kwrest + if !m_str.end_with?("(") + m_str += ", " + end + m_str += "**kwrest" + end + + if @has_block_param + if !m_str.end_with?("(") + m_str += ", " + end + + m_str += "&block" + end + + m_str += ")\n" + + # Add some useless locals + rand(0...16).times do |i| + m_str += "local#{i} = #{i}\n" + end + + # Add some useless if statements + @pargs.each_with_index do |arg, i| + if rand() < 50 + m_str += "if #{arg[:name]} > 4; end\n" + end + end + + m_str += "checksum = 0\n" + + @pargs.each_with_index do |arg, i| + m_str += "checksum += #{i+1} * arg_val(#{arg[:name]})\n" + end + + @kwargs.each_with_index do |arg, i| + m_str += "checksum += #{i+1} * arg_val(#{arg[:name]})\n" + end + + if @has_block_param + m_str += "if block; r = block.call; checksum += arg_val(r); end\n" + end + + m_str += "if block_given?; r = yield; checksum += arg_val(r); end\n" + + if @has_rest + m_str += "raise 'rest is not array' unless rest.kind_of?(Array)\n" + m_str += "raise 'rest size not integer' unless rest.size.kind_of?(Integer)\n" + end + + if @has_kwrest + m_str += "raise 'kwrest is not a hash' unless kwrest.kind_of?(Hash)\n" + m_str += "raise 'kwrest size not integer' unless kwrest.size.kind_of?(Integer)\n" + end + + m_str += "checksum\n" + m_str += "end" + + m_str + end + + # Generate code to call into the method and pass the arguments + def gen_call_str() + c_str = "m(" + + @pargs.each_with_index do |arg, i| + if !arg.key? :argval + next + end + + if !c_str.end_with?("(") + c_str += ", " + end + + c_str += "#{arg[:argval]}" + end + + @kwargs.each_with_index do |arg, i| + if !arg.key? :argval + next + end + + if !c_str.end_with?("(") + c_str += ", " + end + + c_str += "#{arg[:name]}: #{arg[:argval]}" + end + + c_str += ")" + + # Randomly pass a block or not + if @block_arg + c_str += " { #{@block_arg} }" + end + + c_str + end +end + +iseqs_compiled_start = RubyVM::YJIT.runtime_stats[:compiled_iseq_entry] +start_time = Time.now.to_f + +num_iters.times do |i| + puts "Iteration #{i}" + + lst = ParamList.new() + m_str = lst.gen_method_str() + c_str = lst.gen_call_str() + checksum = lst.compute_checksum() + + f = Object.new + + # Define the method on f + puts "Defining" + p m_str + f.instance_eval(m_str) + #puts RubyVM::InstructionSequence.disasm(f.method(:m)) + #exit 0 + + puts "Calling" + c_str = "f.#{c_str}" + p c_str + r = eval(c_str) + puts "checksum=#{r}" + + if r != checksum + raise "return value #{r} doesn't match checksum #{checksum}" + end + + puts "" +end + +# Make sure that YJIT actually compiled the tests we ran +# Should be run with --yjit-call-threshold=1 +iseqs_compiled_end = RubyVM::YJIT.runtime_stats[:compiled_iseq_entry] +if iseqs_compiled_end - iseqs_compiled_start < num_iters + raise "YJIT did not compile enough ISEQs" +end + +puts "Code region size: #{ format_number(0, RubyVM::YJIT.runtime_stats[:code_region_size]) }" + +end_time = Time.now.to_f +itrs_per_sec = num_iters / (end_time - start_time) +itrs_per_hour = 3600 * itrs_per_sec +puts "#{'%.1f' % itrs_per_sec} iterations/s" +puts "#{format_number(0, itrs_per_hour.round)} iterations/hour" diff --git a/misc/call_fuzzer.sh b/misc/call_fuzzer.sh new file mode 100755 index 0000000000..cf4ec76fe8 --- /dev/null +++ b/misc/call_fuzzer.sh @@ -0,0 +1,13 @@ +# Stop at first error +set -e + +# TODO +# TODO: boost --num-iters to 1M+ for actual test +# TODO +export NUM_ITERS=25000 + +# Enable code GC so we don't stop compiling when we hit the code size limit +ruby --yjit-call-threshold=1 --yjit-code-gc misc/call_fuzzer.rb --num-iters=$NUM_ITERS + +# Do another pass with --verify-ctx +ruby --yjit-call-threshold=1 --yjit-code-gc --yjit-verify-ctx misc/call_fuzzer.rb --num-iters=$NUM_ITERS diff --git a/misc/expand_tabs.rb b/misc/expand_tabs.rb index a94eea5046..d26568eefc 100755 --- a/misc/expand_tabs.rb +++ b/misc/expand_tabs.rb @@ -59,53 +59,31 @@ class Git end DEFAULT_GEM_LIBS = %w[ - abbrev - base64 - benchmark bundler - cmath - csv - debug delegate did_you_mean - drb english erb + error_highlight fileutils find forwardable - getoptlong ipaddr - irb - logger - mutex_m net-http net-protocol - observer open3 open-uri optparse ostruct pp prettyprint - prime - pstore - rdoc - readline - reline + prism resolv - resolv-replace - rexml - rinda - rss rubygems - scanf securerandom - set shellwords singleton tempfile - thwait time timeout tmpdir @@ -117,27 +95,19 @@ DEFAULT_GEM_LIBS = %w[ ] DEFAULT_GEM_EXTS = %w[ - bigdecimal - cgi date digest etc fcntl - fiddle io-console io-nonblock io-wait json - nkf openssl pathname psych - racc - readline-ext stringio strscan - syslog - win32ole zlib ] diff --git a/misc/gdb.py b/misc/gdb.py new file mode 100644 index 0000000000..6034a389bb --- /dev/null +++ b/misc/gdb.py @@ -0,0 +1,181 @@ +import argparse +import textwrap + +# usage: [-h] [-a | --all | --no-all] [-s STACK_SIZE] [uplevel] +# +# Dump a control frame +# +# positional arguments: +# uplevel CFP offset from the stack top +# +# options: +# -h, --help show this help message and exit +# -a, --all, --no-all dump all frames +# -s STACK_SIZE, --stack-size STACK_SIZE +# override stack_size (useful for JIT frames) +class CFP(gdb.Command): + FRAME_MAGICS = [ + # frame types + 'VM_FRAME_MAGIC_METHOD', + 'VM_FRAME_MAGIC_BLOCK', + 'VM_FRAME_MAGIC_CLASS', + 'VM_FRAME_MAGIC_TOP', + 'VM_FRAME_MAGIC_CFUNC', + 'VM_FRAME_MAGIC_IFUNC', + 'VM_FRAME_MAGIC_EVAL', + 'VM_FRAME_MAGIC_RESCUE', + 'VM_FRAME_MAGIC_DUMMY', + ] + FRAME_FLAGS = [ + # frame flag + 'VM_FRAME_FLAG_FINISH', + 'VM_FRAME_FLAG_BMETHOD', + 'VM_FRAME_FLAG_CFRAME', + 'VM_FRAME_FLAG_LAMBDA', + 'VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM', + 'VM_FRAME_FLAG_CFRAME_KW', + 'VM_FRAME_FLAG_PASSED', + # env flag + 'VM_ENV_FLAG_LOCAL', + 'VM_ENV_FLAG_ESCAPED', + 'VM_ENV_FLAG_WB_REQUIRED', + 'VM_ENV_FLAG_ISOLATED', + ] + + def __init__(self): + super(CFP, self).__init__('cfp', gdb.COMMAND_USER) + + self.parser = argparse.ArgumentParser(description='Dump a control frame') + self.parser.add_argument('uplevel', type=int, nargs='?', default=0, help='CFP offset from the stack top') + self.parser.add_argument('-a', '--all', action=argparse.BooleanOptionalAction, help='dump all frames') + self.parser.add_argument('-s', '--stack-size', type=int, help='override stack_size (useful for JIT frames)') + + def invoke(self, args, from_tty): + try: + args = self.parser.parse_args(args.split()) + except SystemExit: + return + cfp = f'(ruby_current_ec->cfp + ({args.uplevel}))' + end_cfp = self.get_int('ruby_current_ec->vm_stack + ruby_current_ec->vm_stack_size') + cfp_index = int((end_cfp - self.get_int(cfp) - 1) / self.get_int('sizeof(rb_control_frame_t)')) + + if args.all: + cfp_count = int((end_cfp - self.get_int('ruby_current_ec->cfp')) / self.get_int('sizeof(rb_control_frame_t)')) - 1 # exclude dummy CFP + for i in range(cfp_count): + print('-' * 80) + self.invoke(str(cfp_count - i - 1), from_tty) + return + + print('CFP (addr=0x{:x}, index={}):'.format(self.get_int(cfp), cfp_index)) + gdb.execute(f'p *({cfp})') + print() + + if self.get_int(f'{cfp}->iseq'): + local_size = self.get_int(f'{cfp}->iseq->body->local_table_size - {cfp}->iseq->body->param.size') + param_size = self.get_int(f'{cfp}->iseq->body->param.size') + + if local_size: + print(f'Params (size={param_size}):') + for i in range(-3 - local_size - param_size, -3 - local_size): + self.print_stack(cfp, i, self.rp(cfp, i)) + print() + + if param_size: + print(f'Locals (size={local_size}):') + for i in range(-3 - local_size, -3): + self.print_stack(cfp, i, self.rp(cfp, i)) + print() + + print('Env:') + self.print_env(cfp, -3, self.rp_env(cfp, -3)) + self.print_env(cfp, -2, self.specval(cfp, -2)) + self.print_env(cfp, -1, self.frame_types(cfp, -1)) + print() + + # We can't calculate BP for the first frame. + # vm_base_ptr doesn't work for C frames either. + if cfp_index > 0 and self.get_int(f'{cfp}->iseq'): + if args.stack_size is not None: + stack_size = args.stack_size + else: + stack_size = int((self.get_int(f'{cfp}->sp') - self.get_int(f'vm_base_ptr({cfp})')) / 8) + print(f'Stack (size={stack_size}):') + for i in range(0, stack_size): + self.print_stack(cfp, i, self.rp(cfp, i)) + print(self.regs(cfp, stack_size)) + + def print_env(self, cfp, bp_index, content): + ep_index = bp_index + 1 + address = self.get_int(f'((rb_control_frame_t *){cfp})->ep + {ep_index}') + value = self.get_env(cfp, bp_index) + regs = self.regs(cfp, bp_index) + if content: + content = textwrap.indent(content, ' ' * 3).lstrip() # Leave the regs column empty + content = f'{content} ' + print('{:2} 0x{:x} [{}] {}(0x{:x})'.format(regs, address, bp_index, content, value)) + + def print_stack(self, cfp, bp_index, content): + address = self.get_int(f'vm_base_ptr({cfp}) + {bp_index}') + value = self.get_value(cfp, bp_index) + regs = self.regs(cfp, bp_index) + if content: + content = textwrap.indent(content, ' ' * 3).lstrip() # Leave the regs column empty + content = f'{content} ' + print('{:2} 0x{:x} [{}] {}(0x{:x})'.format(regs, address, bp_index, content, value)) + + def regs(self, cfp, bp_index): + address = self.get_int(f'vm_base_ptr({cfp}) + {bp_index}') + regs = [] + for reg, field in { 'EP': 'ep', 'SP': 'sp' }.items(): + if address == self.get_int(f'{cfp}->{field}'): + regs.append(reg) + return ' '.join(regs) + + def rp(self, cfp, bp_index): + value = self.get_value(cfp, bp_index) + return self.get_string(f'rp {value}').rstrip() + + def rp_env(self, cfp, bp_index): + value = self.get_env(cfp, bp_index) + return self.get_string(f'rp {value}').rstrip() + + # specval: block_handler or previous EP + def specval(self, cfp, bp_index): + value = self.get_env(cfp, bp_index) + if value == 0: + return 'VM_BLOCK_HANDLER_NONE' + if value == self.get_int('rb_block_param_proxy'): + return 'rb_block_param_proxy' + return '' + + def frame_types(self, cfp, bp_index): + types = [] + value = self.get_env(cfp, bp_index) + + magic_mask = self.get_int('VM_FRAME_MAGIC_MASK') + for magic in self.FRAME_MAGICS: + magic_value = self.get_int(magic) + if value & magic_mask == magic_value: + types.append(magic) + + for flag in self.FRAME_FLAGS: + flag_value = self.get_int(flag) + if value & flag_value: + types.append(flag) + + return ' | '.join(types) + + def get_env(self, cfp, bp_index): + ep_index = bp_index + 1 + return self.get_int(f'((rb_control_frame_t *){cfp})->ep[{ep_index}]') + + def get_value(self, cfp, bp_index): + return self.get_int(f'vm_base_ptr({cfp})[{bp_index}]') + + def get_int(self, expr): + return int(self.get_string(f'printf "%ld", ({expr})')) + + def get_string(self, expr): + return gdb.execute(expr, to_string=True) + +CFP() diff --git a/misc/jit_perf.py b/misc/jit_perf.py new file mode 100755 index 0000000000..bc0f961b20 --- /dev/null +++ b/misc/jit_perf.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +import os +import sys +from collections import Counter, defaultdict +import os.path + +# Aggregating cycles per symbol and dso +total_cycles = 0 +category_cycles = Counter() +detailed_category_cycles = defaultdict(Counter) +categories = set() + +def truncate_symbol(symbol, max_length=50): + """ Truncate the symbol name to a maximum length """ + return symbol if len(symbol) <= max_length else symbol[:max_length-3] + '...' + +def categorize_symbol(dso, symbol): + """ Categorize the symbol based on the defined criteria """ + if dso == 'sqlite3_native.so': + return '[sqlite3]' + elif 'SHA256' in symbol: + return '[sha256]' + elif symbol.startswith('[JIT] gen_send'): + return '[JIT send]' + elif symbol.startswith('[JIT]') or symbol.startswith('ZJIT: ') or dso.startswith('perf-'): + return '[JIT code]' + elif '::' in symbol or symbol.startswith('_ZN4yjit') or symbol.startswith('_ZN4zjit'): + return '[JIT compile]' + elif symbol.startswith('rb_vm_') or symbol.startswith('vm_') or symbol in { + "rb_call0", "callable_method_entry_or_negative", "invoke_block_from_c_bh", + "rb_funcallv_scope", "setup_parameters_complex", "rb_yield"}: + return '[interpreter]' + elif symbol.startswith('rb_hash_') or symbol.startswith('hash_'): + return '[rb_hash_*]' + elif symbol.startswith('rb_ary_') or symbol.startswith('ary_'): + return '[rb_ary_*]' + elif symbol.startswith('rb_str_') or symbol.startswith('str_'): + return '[rb_str_*]' + elif symbol.startswith('rb_sym') or symbol.startswith('sym_'): + return '[rb_sym_*]' + elif symbol.startswith('rb_st_') or symbol.startswith('st_'): + return '[rb_st_*]' + elif symbol.startswith('rb_ivar_') or 'shape' in symbol: + return '[ivars]' + elif 'match' in symbol or symbol.startswith('rb_reg') or symbol.startswith('onig'): + return '[regexp]' + elif 'alloc' in symbol or 'free' in symbol or 'gc' in symbol: + return '[GC]' + elif 'pthread' in symbol and 'lock' in symbol: + return '[pthread lock]' + else: + return symbol # Return the symbol itself for uncategorized symbols + +def process_event(event): + global total_cycles, category_cycles, detailed_category_cycles, categories + + full_dso = event.get("dso", "Unknown_dso") + dso = os.path.basename(full_dso) + symbol = event.get("symbol", "[unknown]") + cycles = event["sample"]["period"] + total_cycles += cycles + + category = categorize_symbol(dso, symbol) + category_cycles[category] += cycles + detailed_category_cycles[category][(dso, symbol)] += cycles + + if category.startswith('[') and category.endswith(']'): + categories.add(category) + +def trace_end(): + if total_cycles == 0: + return + + print("Aggregated Event Data:") + print("{:<20} {:<50} {:>20} {:>15}".format("[dso]", "[symbol or category]", "[top-most cycle ratio]", "[num cycles]")) + + for category, cycles in category_cycles.most_common(): + ratio = (cycles / total_cycles) * 100 + dsos = {dso for dso, _ in detailed_category_cycles[category]} + dso_display = next(iter(dsos)) if len(dsos) == 1 else "Multiple DSOs" + print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso_display, truncate_symbol(category), ratio, cycles)) + + # Category breakdown + for category in categories: + symbols = detailed_category_cycles[category] + category_total = sum(symbols.values()) + category_ratio = (category_total / total_cycles) * 100 + print(f"\nCategory: {category} ({category_ratio:.2f}%)") + print("{:<20} {:<50} {:>20} {:>15}".format("[dso]", "[symbol]", "[top-most cycle ratio]", "[num cycles]")) + for (dso, symbol), cycles in symbols.most_common(): + symbol_ratio = (cycles / category_total) * 100 + print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso, truncate_symbol(symbol), symbol_ratio, cycles)) + +# There are two ways to use this script: +# 1) perf script -s misc/yjit_perf.py -- native interface +# 2) perf script > perf.txt && misc/yjit_perf.py perf.txt -- hack, which doesn't require perf with Python support +# +# In both cases, __name__ is "__main__". The following code implements (2) when sys.argv is 2. +if __name__ == "__main__" and len(sys.argv) == 2: + if len(sys.argv) != 2: + print("Usage: yjit_perf.py <filename>") + sys.exit(1) + + with open(sys.argv[1], "r") as file: + for line in file: + # [Example] + # ruby 78207 3482.848465: 1212775 cpu_core/cycles:P/: 5c0333f682e1 [JIT] getlocal_WC_0+0x0 (/tmp/perf-78207.map) + row = line.split(maxsplit=6) + + period = row[3] # "1212775" + symbol, dso = row[6].rsplit(" (", 1) # "[JIT] getlocal_WC_0+0x0", "/tmp/perf-78207.map)\n" + symbol = symbol.split("+")[0] # "[JIT] getlocal_WC_0" + dso = dso.split(")")[0] # "/tmp/perf-78207.map" + + process_event({"dso": dso, "symbol": symbol, "sample": {"period": int(period)}}) + trace_end() diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py index 15d17d46da..b3d4fb509a 100755..100644 --- a/misc/lldb_cruby.py +++ b/misc/lldb_cruby.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python #coding: utf-8 # # Usage: run `command script import -r misc/lldb_cruby.py` on LLDB @@ -9,18 +8,17 @@ from __future__ import print_function import lldb import os +import inspect +import sys import shlex import platform +import glob +import math -if platform.system() == 'Darwin': - HEAP_PAGE_ALIGN_LOG = 16 -else: - HEAP_PAGE_ALIGN_LOG = 14 - -HEAP_PAGE_ALIGN_MASK = (~(~0 << HEAP_PAGE_ALIGN_LOG)) -HEAP_PAGE_ALIGN = (1 << HEAP_PAGE_ALIGN_LOG) -HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN +from lldb_rb.constants import * +# BEGIN FUNCTION STYLE DECLS +# This will be refactored to use class style decls in the misc/commands dir class BackTrace: VM_FRAME_MAGIC_METHOD = 0x11110001 VM_FRAME_MAGIC_BLOCK = 0x22220001 @@ -69,7 +67,7 @@ class BackTrace: return self.VM_FRAME_MAGIC_NAME.get(frame_type, "(none)") def rb_iseq_path_str(self, iseq): - tRBasic = self.target.FindFirstType("struct RBasic").GetPointerType() + tRBasic = self.target.FindFirstType("::RBasic").GetPointerType() pathobj = iseq.GetValueForExpressionPath("->body->location.pathobj") pathobj = pathobj.Cast(tRBasic) @@ -80,7 +78,7 @@ class BackTrace: pathobj = pathobj.Cast(self.tRArray) if flags & RUBY_FL_USER1: - len = ((flags & (RUBY_FL_USER3|RUBY_FL_USER4)) >> (RUBY_FL_USHIFT+3)) + len = ((flags & (RUBY_FL_USER3|RUBY_FL_USER4|RUBY_FL_USER5|RUBY_FL_USER6|RUBY_FL_USER7|RUBY_FL_USER8|RUBY_FL_USER9)) >> (RUBY_FL_USHIFT+3)) ptr = pathobj.GetValueForExpressionPath("->as.ary") else: len = pathobj.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() @@ -171,6 +169,14 @@ def lldb_init(debugger): value_types = [] g = globals() + + imemo_types = target.FindFirstType('enum imemo_type') + enum_members = imemo_types.GetEnumMembers() + + for i in range(enum_members.GetSize()): + member = enum_members.GetTypeEnumMemberAtIndex(i) + g[member.GetName()] = member.GetValueAsUnsigned() + for enum in target.FindFirstGlobalVariable('ruby_dummy_gdb_enums'): enum = enum.GetType() members = enum.GetEnumMembers() @@ -191,18 +197,16 @@ def string2cstr(rstring): flags = rstring.GetValueForExpressionPath(".basic->flags").unsigned if flags & RUBY_T_MASK != RUBY_T_STRING: raise TypeError("not a string") + clen = int(rstring.GetValueForExpressionPath(".len").value, 0) if flags & RUBY_FL_USER1: cptr = int(rstring.GetValueForExpressionPath(".as.heap.ptr").value, 0) - clen = int(rstring.GetValueForExpressionPath(".as.heap.len").value, 0) else: cptr = int(rstring.GetValueForExpressionPath(".as.embed.ary").location, 0) - clen = int(rstring.GetValueForExpressionPath(".as.embed.len").value, 0) return cptr, clen def output_string(debugger, result, rstring): cptr, clen = string2cstr(rstring) - expr = "print *(const char (*)[%d])%0#x" % (clen, cptr) - append_command_output(debugger, expr, result) + append_expression(debugger, "*(const char (*)[%d])%0#x" % (clen, cptr), result) def fixnum_p(x): return x & RUBY_FIXNUM_FLAG != 0 @@ -221,6 +225,9 @@ def append_command_output(debugger, command, result): result.write(output1) result.write(output2) +def append_expression(debugger, expression, result): + append_command_output(debugger, "expression " + expression, result) + def lldb_rp(debugger, command, result, internal_dict): if not ('RUBY_Qfalse' in globals()): lldb_init(debugger) @@ -252,18 +259,17 @@ def lldb_inspect(debugger, target, result, val): elif fixnum_p(num): print(num >> 1, file=result) elif flonum_p(num): - append_command_output(debugger, "print rb_float_value(%0#x)" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "rb_float_value(%0#x)" % val.GetValueAsUnsigned(), result) elif static_sym_p(num): if num < 128: print("T_SYMBOL: %c" % num, file=result) else: print("T_SYMBOL: (%x)" % num, file=result) - append_command_output(debugger, "p rb_id2name(%0#x)" % (num >> 8), result) + append_expression(debugger, "rb_id2name(%0#x)" % (num >> 8), result) elif num & RUBY_IMMEDIATE_MASK: print('immediate(%x)' % num, file=result) else: - tRBasic = target.FindFirstType("struct RBasic").GetPointerType() - tRValue = target.FindFirstType("struct RVALUE") + tRBasic = target.FindFirstType("::RBasic").GetPointerType() val = val.Cast(tRBasic) flags = val.GetValueForExpressionPath("->flags").GetValueAsUnsigned() @@ -286,13 +292,13 @@ def lldb_inspect(debugger, target, result, val): print('T_NIL: %s%s' % (flaginfo, val.Dereference()), file=result) elif flType == RUBY_T_OBJECT: result.write('T_OBJECT: %s' % flaginfo) - append_command_output(debugger, "print *(struct RObject*)%0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RObject*)%0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_CLASS or flType == RUBY_T_MODULE or flType == RUBY_T_ICLASS: result.write('T_%s: %s' % ('CLASS' if flType == RUBY_T_CLASS else 'MODULE' if flType == RUBY_T_MODULE else 'ICLASS', flaginfo)) - append_command_output(debugger, "print *(struct RClass*)%0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RClass*)%0#x" % val.GetValueAsUnsigned(), result) tRClass = target.FindFirstType("struct RClass") if not val.Cast(tRClass).GetChildMemberWithName("ptr").IsValid(): - append_command_output(debugger, "print *(struct rb_classext_struct*)%0#x" % (val.GetValueAsUnsigned() + tRClass.GetByteSize()), result) + append_expression(debugger, "*(struct rb_classext_struct*)%0#x" % (val.GetValueAsUnsigned() + tRClass.GetByteSize()), result) elif flType == RUBY_T_STRING: result.write('T_STRING: %s' % flaginfo) encidx = ((flags & RUBY_ENCODING_MASK)>>RUBY_ENCODING_SHIFT) @@ -306,19 +312,19 @@ def lldb_inspect(debugger, target, result, val): if len == 0: result.write("(empty)\n") else: - append_command_output(debugger, "print *(const char (*)[%d])%0#x" % (len, ptr), result) + append_expression(debugger, "*(const char (*)[%d])%0#x" % (len, ptr), result) elif flType == RUBY_T_SYMBOL: result.write('T_SYMBOL: %s' % flaginfo) tRSymbol = target.FindFirstType("struct RSymbol").GetPointerType() val = val.Cast(tRSymbol) - append_command_output(debugger, 'print (ID)%0#x ' % val.GetValueForExpressionPath("->id").GetValueAsUnsigned(), result) + append_expression(debugger, '(ID)%0#x ' % val.GetValueForExpressionPath("->id").GetValueAsUnsigned(), result) tRString = target.FindFirstType("struct RString").GetPointerType() output_string(debugger, result, val.GetValueForExpressionPath("->fstr").Cast(tRString)) elif flType == RUBY_T_ARRAY: tRArray = target.FindFirstType("struct RArray").GetPointerType() val = val.Cast(tRArray) if flags & RUBY_FL_USER1: - len = ((flags & (RUBY_FL_USER3|RUBY_FL_USER4)) >> (RUBY_FL_USHIFT+3)) + len = ((flags & (RUBY_FL_USER3|RUBY_FL_USER4|RUBY_FL_USER5|RUBY_FL_USER6|RUBY_FL_USER7|RUBY_FL_USER8|RUBY_FL_USER9)) >> (RUBY_FL_USHIFT+3)) ptr = val.GetValueForExpressionPath("->as.ary") else: len = val.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() @@ -337,12 +343,12 @@ def lldb_inspect(debugger, target, result, val): else: result.write("\n") if ptr.GetValueAsSigned() == 0: - append_command_output(debugger, "expression -fx -- ((struct RArray*)%0#x)->as.ary" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "-fx -- ((struct RArray*)%0#x)->as.ary" % val.GetValueAsUnsigned(), result) else: - append_command_output(debugger, "expression -Z %d -fx -- (const VALUE*)%0#x" % (len, ptr.GetValueAsUnsigned()), result) + append_expression(debugger, "-Z %d -fx -- (const VALUE*)%0#x" % (len, ptr.GetValueAsUnsigned()), result) elif flType == RUBY_T_HASH: result.write("T_HASH: %s" % flaginfo) - append_command_output(debugger, "p *(struct RHash *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RHash *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_BIGNUM: tRBignum = target.FindFirstType("struct RBignum").GetPointerType() val = val.Cast(tRBignum) @@ -350,15 +356,15 @@ def lldb_inspect(debugger, target, result, val): if flags & RUBY_FL_USER2: len = ((flags & (RUBY_FL_USER3|RUBY_FL_USER4|RUBY_FL_USER5)) >> (RUBY_FL_USHIFT+3)) print("T_BIGNUM: sign=%s len=%d (embed)" % (sign, len), file=result) - append_command_output(debugger, "print ((struct RBignum *) %0#x)->as.ary" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "((struct RBignum *) %0#x)->as.ary" % val.GetValueAsUnsigned(), result) else: len = val.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() print("T_BIGNUM: sign=%s len=%d" % (sign, len), file=result) print(val.Dereference(), file=result) - append_command_output(debugger, "expression -Z %x -fx -- (const BDIGIT*)((struct RBignum*)%d)->as.heap.digits" % (len, val.GetValueAsUnsigned()), result) - # append_command_output(debugger, "x ((struct RBignum *) %0#x)->as.heap.digits / %d" % (val.GetValueAsUnsigned(), len), result) + append_expression(debugger, "-Z %x -fx -- (const BDIGIT*)((struct RBignum*)%d)->as.heap.digits" % (len, val.GetValueAsUnsigned()), result) + # append_expression(debugger, "((struct RBignum *) %0#x)->as.heap.digits / %d" % (val.GetValueAsUnsigned(), len), result) elif flType == RUBY_T_FLOAT: - append_command_output(debugger, "print ((struct RFloat *)%d)->float_value" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "((struct RFloat *)%d)->float_value" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_RATIONAL: tRRational = target.FindFirstType("struct RRational").GetPointerType() val = val.Cast(tRRational) @@ -391,34 +397,39 @@ def lldb_inspect(debugger, target, result, val): flag = val.GetValueForExpressionPath("->typed_flag") if flag.GetValueAsUnsigned() == 1: print("T_DATA: %s" % val.GetValueForExpressionPath("->type->wrap_struct_name"), file=result) - append_command_output(debugger, "p *(struct RTypedData *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RTypedData *) %0#x" % val.GetValueAsUnsigned(), result) else: print("T_DATA:", file=result) - append_command_output(debugger, "p *(struct RData *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RData *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_NODE: tRTypedData = target.FindFirstType("struct RNode").GetPointerType() nd_type = (flags & RUBY_NODE_TYPEMASK) >> RUBY_NODE_TYPESHIFT - append_command_output(debugger, "p (node_type) %d" % nd_type, result) + append_expression(debugger, "(node_type) %d" % nd_type, result) val = val.Cast(tRTypedData) - append_command_output(debugger, "p *(struct RNode *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RNode *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_MOVED: tRTypedData = target.FindFirstType("struct RMoved").GetPointerType() val = val.Cast(tRTypedData) - append_command_output(debugger, "p *(struct RMoved *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RMoved *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_MATCH: tRTypedData = target.FindFirstType("struct RMatch").GetPointerType() val = val.Cast(tRTypedData) - append_command_output(debugger, "p *(struct RMatch *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RMatch *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_IMEMO: # I'm not sure how to get IMEMO_MASK out of lldb. It's not in globals() imemo_type = (flags >> RUBY_FL_USHIFT) & 0x0F # IMEMO_MASK + print("T_IMEMO: ", file=result) - append_command_output(debugger, "p (enum imemo_type) %d" % imemo_type, result) - append_command_output(debugger, "p *(struct MEMO *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "(enum imemo_type) %d" % imemo_type, result) + append_expression(debugger, "*(struct MEMO *) %0#x" % val.GetValueAsUnsigned(), result) + elif flType == RUBY_T_STRUCT: + tRTypedData = target.FindFirstType("struct RStruct").GetPointerType() + val = val.Cast(tRTypedData) + append_expression(debugger, "*(struct RStruct *) %0#x" % val.GetValueAsUnsigned(), result) elif flType == RUBY_T_ZOMBIE: tRZombie = target.FindFirstType("struct RZombie").GetPointerType() val = val.Cast(tRZombie) - append_command_output(debugger, "p *(struct RZombie *) %0#x" % val.GetValueAsUnsigned(), result) + append_expression(debugger, "*(struct RZombie *) %0#x" % val.GetValueAsUnsigned(), result) else: print("Not-handled type %0#x" % flType, file=result) print(val, file=result) @@ -459,19 +470,6 @@ def check_bits(page, bitmap_name, bitmap_index, bitmap_bit, v): else: return ' ' -def heap_page(debugger, command, ctx, result, internal_dict): - target = debugger.GetSelectedTarget() - process = target.GetProcess() - thread = process.GetSelectedThread() - frame = thread.GetSelectedFrame() - - val = frame.EvaluateExpression(command) - page = get_page(lldb, target, val) - page_type = target.FindFirstType("struct heap_page").GetPointerType() - page.Cast(page_type) - append_command_output(debugger, "p (struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) - append_command_output(debugger, "p *(struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) - def heap_page_body(debugger, command, ctx, result, internal_dict): target = debugger.GetSelectedTarget() process = target.GetProcess() @@ -504,6 +502,8 @@ def dump_node(debugger, command, ctx, result, internal_dict): output_string(ctx, result, dump) def rb_backtrace(debugger, command, result, internal_dict): + if not ('RUBY_Qfalse' in globals()): + lldb_init(debugger) bt = BackTrace(debugger, command, result, internal_dict) frame = bt.frame @@ -523,10 +523,11 @@ def rb_backtrace(debugger, command, result, internal_dict): bt.print_bt(val) def dump_bits(target, result, page, object_address, end = "\n"): - tRValue = target.FindFirstType("struct RVALUE") + slot_size = page.GetChildMemberWithName("heap").GetChildMemberWithName("slot_size").unsigned + byte_size = 40 ** math.floor(math.log(slot_size, 40)) tUintPtr = target.FindFirstType("uintptr_t") # bits_t - num_in_page = (object_address & HEAP_PAGE_ALIGN_MASK) // tRValue.GetByteSize(); + num_in_page = (object_address & HEAP_PAGE_ALIGN_MASK) // byte_size; bits_bitlength = tUintPtr.GetByteSize() * 8 bitmap_index = num_in_page // bits_bitlength bitmap_offset = num_in_page & (bits_bitlength - 1) @@ -546,10 +547,9 @@ class HeapPageIter: self.target = target self.start = page.GetChildMemberWithName('start').GetValueAsUnsigned(); self.num_slots = page.GetChildMemberWithName('total_slots').unsigned - self.slot_size = page.GetChildMemberWithName('size_pool').GetChildMemberWithName('slot_size').unsigned + self.slot_size = page.GetChildMemberWithName('heap').GetChildMemberWithName('slot_size').unsigned self.counter = 0 - self.tRBasic = target.FindFirstType("struct RBasic") - self.tRValue = target.FindFirstType("struct RVALUE") + self.tRBasic = target.FindFirstType("::RBasic") def is_valid(self): heap_page_header_size = self.target.FindFirstType("struct heap_page_header").GetByteSize() @@ -581,14 +581,13 @@ def dump_page_internal(page, target, process, thread, frame, result, debugger, h freelist = [] fl_start = page.GetChildMemberWithName('freelist').GetValueAsUnsigned() - tRVALUE = target.FindFirstType("struct RVALUE") + free_slot = target.FindFirstType("struct free_slot") while fl_start > 0: freelist.append(fl_start) obj_addr = lldb.SBAddress(fl_start, target) - obj = target.CreateValueFromAddress("object", obj_addr, tRVALUE) - fl_start = obj.GetChildMemberWithName("as").GetChildMemberWithName("free").GetChildMemberWithName("next").GetValueAsUnsigned() - + obj = target.CreateValueFromAddress("object", obj_addr, free_slot) + fl_start = obj.GetChildMemberWithName("next").GetValueAsUnsigned() page_iter = HeapPageIter(page, target) if page_iter.is_valid(): @@ -602,9 +601,13 @@ def dump_page_internal(page, target, process, thread, frame, result, debugger, h try: flidx = "%3d" % freelist.index(obj_addr) except ValueError: - flidx = ' ' + flidx = ' -1' - result_str = "%s idx: [%3d] freelist_idx: {%s} Addr: %0#x (flags: %0#x)" % (rb_type(flags, ruby_type_map), page_index, flidx, obj_addr, flags) + if flType == RUBY_T_NONE: + klass = obj.GetChildMemberWithName('klass').GetValueAsUnsigned() + result_str = "%s idx: [%3d] freelist_idx: {%s} Addr: %0#x (flags: %0#x, next: %0#x)" % (rb_type(flags, ruby_type_map), page_index, flidx, obj_addr, flags, klass) + else: + result_str = "%s idx: [%3d] freelist_idx: {%s} Addr: %0#x (flags: %0#x)" % (rb_type(flags, ruby_type_map), page_index, flidx, obj_addr, flags) if highlight == obj_addr: result_str = ' '.join([result_str, "<<<<<"]) @@ -708,18 +711,37 @@ def rb_id2str(debugger, command, result, internal_dict): pos = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE id_str = rb_ary_entry(target, ary, pos, result) lldb_inspect(debugger, target, result, id_str) +# END FUNCTION STYLE DECLS + + +load_dir, _ = os.path.split(os.path.realpath(__file__)) + +for fname in glob.glob(f"{load_dir}/lldb_rb/commands/*_command.py"): + _, basename = os.path.split(fname) + mname, _ = os.path.splitext(basename) + + exec(f"import lldb_rb.commands.{mname}") def __lldb_init_module(debugger, internal_dict): - debugger.HandleCommand("command script add -f lldb_cruby.lldb_rp rp") + # Register all classes that subclass RbBaseCommand + + for memname, mem in inspect.getmembers(sys.modules["lldb_rb.rb_base_command"]): + if memname == "RbBaseCommand": + for sclass in mem.__subclasses__(): + sclass.register_lldb_command(debugger, f"{__name__}.{sclass.__module__}") + + + ## FUNCTION INITS - These should be removed when converted to class commands + debugger.HandleCommand("command script add -f lldb_cruby.lldb_rp old_rp") debugger.HandleCommand("command script add -f lldb_cruby.count_objects rb_count_objects") debugger.HandleCommand("command script add -f lldb_cruby.stack_dump_raw SDR") debugger.HandleCommand("command script add -f lldb_cruby.dump_node dump_node") - debugger.HandleCommand("command script add -f lldb_cruby.heap_page heap_page") debugger.HandleCommand("command script add -f lldb_cruby.heap_page_body heap_page_body") debugger.HandleCommand("command script add -f lldb_cruby.rb_backtrace rbbt") debugger.HandleCommand("command script add -f lldb_cruby.dump_page dump_page") debugger.HandleCommand("command script add -f lldb_cruby.dump_page_rvalue dump_page_rvalue") - debugger.HandleCommand("command script add -f lldb_cruby.rb_id2str rb_id2str") + debugger.HandleCommand("command script add -f lldb_cruby.rb_id2str old_rb_id2str") + + lldb_rb.rb_base_command.RbBaseCommand.lldb_init(debugger) - lldb_init(debugger) print("lldb scripts for ruby has been installed.") diff --git a/misc/lldb_disasm.py b/misc/lldb_disasm.py index ff805ed428..ab759f009a 100644 --- a/misc/lldb_disasm.py +++ b/misc/lldb_disasm.py @@ -63,6 +63,7 @@ class IseqDisassembler: self.internal_dict = internal_dict self.target = debugger.GetSelectedTarget() + self.insns_address_table = self.__get_insns_address_table() self.process = self.target.GetProcess() self.thread = self.process.GetSelectedThread() self.frame = self.thread.GetSelectedFrame() @@ -88,7 +89,7 @@ class IseqDisassembler: tIntPtr = target.FindFirstType("intptr_t") size = target.EvaluateExpression('ruby_vminsn_type::VM_INSTRUCTION_SIZE').unsigned sizeOfIntPtr = tIntPtr.GetByteSize() - addr_of_table = target.FindSymbols("vm_exec_core.insns_address_table")[0].GetSymbol().GetStartAddress().GetLoadAddress(target) + addr_of_table = self.insns_address_table.GetStartAddress().GetLoadAddress(target) my_dict = {} @@ -144,7 +145,7 @@ class IseqDisassembler: def insn_len(self, target, offset): size_of_char = self.tChar.GetByteSize() - symbol = target.FindSymbols("insn_len.t")[0].GetSymbol() + symbol = target.FindSymbols("rb_vm_insn_len_info")[0].GetSymbol() section = symbol.GetStartAddress().GetSection() addr_of_table = symbol.GetStartAddress().GetOffset() @@ -162,7 +163,7 @@ class IseqDisassembler: size_of_short = tUShort.GetByteSize() size_of_char = self.tChar.GetByteSize() - symbol = target.FindSymbols("insn_op_types.y")[0].GetSymbol() + symbol = target.FindSymbols("rb_vm_insn_op_offset")[0].GetSymbol() section = symbol.GetStartAddress().GetSection() addr_of_table = symbol.GetStartAddress().GetOffset() @@ -174,7 +175,7 @@ class IseqDisassembler: if not error.Success(): print("error getting op type offset: ", error) - symbol = target.FindSymbols("insn_op_types.x")[0].GetSymbol() + symbol = target.FindSymbols("rb_vm_insn_op_base")[0].GetSymbol() section = symbol.GetStartAddress().GetSection() addr_of_table = symbol.GetStartAddress().GetOffset() addr_in_name_table = addr_of_table + (offset * size_of_char) @@ -190,7 +191,7 @@ class IseqDisassembler: tUShort = target.FindFirstType("unsigned short") size_of_short = tUShort.GetByteSize() - symbol = target.FindSymbols("insn_name.y")[0].GetSymbol() + symbol = target.FindSymbols("rb_vm_insn_name_offset")[0].GetSymbol() section = symbol.GetStartAddress().GetSection() table_offset = symbol.GetStartAddress().GetOffset() @@ -205,7 +206,7 @@ class IseqDisassembler: print("error getting insn name table offset: ", error) def insn_name(self, target, process, result, offset): - symbol = target.FindSymbols("insn_name.x")[0].GetSymbol() + symbol = target.FindSymbols("rb_vm_insn_name_base")[0].GetSymbol() section = symbol.GetStartAddress().GetSection() addr_of_table = symbol.GetStartAddress().GetOffset() @@ -220,6 +221,15 @@ class IseqDisassembler: else: print('error getting insn name', error) + def __get_insns_address_table(self): + module = self.target.FindSymbols("vm_exec_core")[0].GetModule() + + for symbol in module: + if "insns_address_table" in symbol.name and symbol.GetType() == lldb.eSymbolTypeData: + print(f"found symbol {symbol.name}") + return symbol + + def disasm(debugger, command, result, internal_dict): disassembler = IseqDisassembler(debugger, command, result, internal_dict) frame = disassembler.frame @@ -235,7 +245,6 @@ def disasm(debugger, command, result, internal_dict): disassembler.disasm(val); - def __lldb_init_module(debugger, internal_dict): debugger.HandleCommand("command script add -f lldb_disasm.disasm rbdisasm") print("lldb Ruby disasm installed.") diff --git a/misc/lldb_rb/commands/command_template.py b/misc/lldb_rb/commands/command_template.py new file mode 100644 index 0000000000..21014a993e --- /dev/null +++ b/misc/lldb_rb/commands/command_template.py @@ -0,0 +1,30 @@ +# This is a command template for implementing a helper function inside LLDB. To +# use this file +# 1. Copy it and rename the copy so it ends with `_command.py`. +# 2. Rename the class to something descriptive that ends with Command. +# 3. Change the program variable to be a descriptive command name +# 4. Ensure you are inheriting from RbBaseCommand or another command that +# implements the same interface + +import lldb + +from lldb_rb.constants import * +from lldb_rb.rb_base_command import RbBaseCommand + +# This test command inherits from RbBaseCommand which provides access to Ruby +# globals and utility helpers +class TestCommand(RbBaseCommand): + # program is the keyword the user will type in lldb to execute this command + program = "test" + + # help_string will be displayed in lldb when the user uses the help functions + help_string = "This is a test command to show how to implement lldb commands" + + # call is where our command logic will be implemented + def call(self, debugger, command, exe_ctx, result): + # This method will be called once the LLDB environment has been setup. + # You will have access to self.target, self.process, self.frame, and + # self.thread + # + # This is where we should implement our command logic + pass diff --git a/misc/lldb_rb/commands/heap_page_command.py b/misc/lldb_rb/commands/heap_page_command.py new file mode 100644 index 0000000000..2eed3c3bee --- /dev/null +++ b/misc/lldb_rb/commands/heap_page_command.py @@ -0,0 +1,27 @@ +import lldb + +from lldb_rb.constants import * +from lldb_rb.rb_base_command import RbBaseCommand + +class HeapPageCommand(RbBaseCommand): + program = "heap_page" + help_string = "prints out 'struct heap_page' for a VALUE pointer in the page" + + def call(self, debugger, command, exe_ctx, result): + self.result = result + self.t_heap_page_body = self.target.FindFirstType("struct heap_page_body") + self.t_heap_page_ptr = self.target.FindFirstType("struct heap_page").GetPointerType() + + page = self._get_page(self.frame.EvaluateExpression(command)) + page.Cast(self.t_heap_page_ptr) + + self._append_expression("(struct heap_page *) %0#x" % page.GetValueAsUnsigned()) + self._append_expression("*(struct heap_page *) %0#x" % page.GetValueAsUnsigned()) + + def _get_page(self, val): + addr = val.GetValueAsUnsigned() + page_addr = addr & ~(HEAP_PAGE_ALIGN_MASK) + address = lldb.SBAddress(page_addr, self.target) + body = self.target.CreateValueFromAddress("page", address, self.t_heap_page_body) + + return body.GetValueForExpressionPath("->header.page") diff --git a/misc/lldb_rb/commands/print_flags_command.py b/misc/lldb_rb/commands/print_flags_command.py new file mode 100644 index 0000000000..bc494ae01a --- /dev/null +++ b/misc/lldb_rb/commands/print_flags_command.py @@ -0,0 +1,31 @@ +import lldb +import re + +from lldb_rb.constants import * +from lldb_rb.rb_base_command import RbBaseCommand + +class PrintFlagsCommand(RbBaseCommand): + program = "print_flags" + + help_string = "Print out the individial flags of an RVALUE object in human readable format" + + # call is where our command logic will be implemented + def call(self, debugger, command, exe_ctx, result): + rclass_t = self.target.FindFirstType("::RBasic") + rcass_ptr = self.target.EvaluateExpression(command).Cast(rclass_t.GetPointerType()) + obj_flags = rcass_ptr.GetValueForExpressionPath("->flags").GetValueAsUnsigned() + + flags = [ + "RUBY_FL_WB_PROTECTED", "RUBY_FL_PROMOTED", "RUBY_FL_FINALIZE", + "RUBY_FL_SHAREABLE", "RUBY_FL_FREEZE", + "RUBY_FL_USER0", "RUBY_FL_USER1", "RUBY_FL_USER2", "RUBY_FL_USER3", "RUBY_FL_USER4", + "RUBY_FL_USER5", "RUBY_FL_USER6", "RUBY_FL_USER7", "RUBY_FL_USER8", "RUBY_FL_USER9", + "RUBY_FL_USER10", "RUBY_FL_USER11", "RUBY_FL_USER12", "RUBY_FL_USER13", "RUBY_FL_USER14", + "RUBY_FL_USER15", "RUBY_FL_USER16", "RUBY_FL_USER17", "RUBY_FL_USER18" + ] + + types_index = {v: k for k, v in self.ruby_globals.items() if re.match(r'RUBY_T_', k)} + print("TYPE: {}".format(types_index[obj_flags & self.ruby_globals["RUBY_T_MASK"]])) + for flag in flags: + output = "{} : {}".format(flag, "1" if (obj_flags & self.ruby_globals[flag]) else "0") + print(output, file=result) diff --git a/misc/lldb_rb/commands/rb_id2str_command.py b/misc/lldb_rb/commands/rb_id2str_command.py new file mode 100644 index 0000000000..6ee859ebf6 --- /dev/null +++ b/misc/lldb_rb/commands/rb_id2str_command.py @@ -0,0 +1,49 @@ +import lldb + +from lldb_rb.constants import * +from lldb_rb.utils import * +from lldb_rb.rb_base_command import RbBaseCommand + +class RbID2StrCommand(RbBaseCommand): + program = "rb_id2str" + + help_string = "convert and print a Ruby ID to a C string and print it to the LLDB console" + + def call(self, debugger, command, exe_ctx, result): + global_symbols = self.target.FindFirstGlobalVariable("ruby_global_symbols") + + id_val = self.frame.EvaluateExpression(command).GetValueAsUnsigned() + num = self.rb_id_to_serial(id_val) + + last_id = global_symbols.GetChildMemberWithName("last_id").GetValueAsUnsigned() + ID_ENTRY_SIZE = 2 + ID_ENTRY_UNIT = int(self.target.FindFirstGlobalVariable("ID_ENTRY_UNIT").GetValue()) + + ids = global_symbols.GetChildMemberWithName("ids") + + if num <= last_id: + idx = num // ID_ENTRY_UNIT + ary = self.rb_ary_entry(ids, idx, result) + pos = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE + id_str = self.rb_ary_entry(ary, pos, result) + + RbInspector(debugger, result, self.ruby_globals).inspect(id_str) + + def rb_id_to_serial(self, id_val): + if id_val > self.ruby_globals["tLAST_OP_ID"]: + return id_val >> self.ruby_globals["RUBY_ID_SCOPE_SHIFT"] + else: + return id_val + + def rb_ary_entry(self, ary, idx, result): + tRArray = self.target.FindFirstType("struct RArray").GetPointerType() + ary = ary.Cast(tRArray) + flags = ary.GetValueForExpressionPath("->flags").GetValueAsUnsigned() + + if flags & self.ruby_globals["RUBY_FL_USER1"]: + ptr = ary.GetValueForExpressionPath("->as.ary") + else: + ptr = ary.GetValueForExpressionPath("->as.heap.ptr") + + ptr_addr = ptr.GetValueAsUnsigned() + (idx * ptr.GetType().GetByteSize()) + return self.target.CreateValueFromAddress("ary_entry[%d]" % idx, lldb.SBAddress(ptr_addr, self.target), ptr.GetType().GetPointeeType()) diff --git a/misc/lldb_rb/commands/rclass_ext_command.py b/misc/lldb_rb/commands/rclass_ext_command.py new file mode 100644 index 0000000000..8bae911457 --- /dev/null +++ b/misc/lldb_rb/commands/rclass_ext_command.py @@ -0,0 +1,14 @@ +from lldb_rb.rb_base_command import RbBaseCommand + +class RclassExtCommand(RbBaseCommand): + program = "rclass_ext" + help_string = "retrieves and prints the rb_classext_struct for the VALUE pointer passed in" + + def call(self, debugger, command, exe_ctx, result): + uintptr_t = self.target.FindFirstType("uintptr_t") + rclass_t = self.target.FindFirstType("struct RClass") + rclass_ext_t = self.target.FindFirstType("rb_classext_t") + + rclass_addr = self.target.EvaluateExpression(command).Cast(uintptr_t) + rclass_ext_addr = (rclass_addr.GetValueAsUnsigned() + rclass_t.GetByteSize()) + debugger.HandleCommand("p *(rb_classext_t *)%0#x" % rclass_ext_addr) diff --git a/misc/lldb_rb/commands/rp_command.py b/misc/lldb_rb/commands/rp_command.py new file mode 100644 index 0000000000..06b2516d50 --- /dev/null +++ b/misc/lldb_rb/commands/rp_command.py @@ -0,0 +1,15 @@ +import lldb + +from lldb_rb.constants import * +from lldb_rb.utils import * +from lldb_rb.rb_base_command import RbBaseCommand + +class RbID2StrCommand(RbBaseCommand): + program = "rp" + + help_string = "convert and print a Ruby ID to a C string and print it to the LLDB console" + + def call(self, debugger, command, exe_ctx, result): + val = self.frame.EvaluateExpression(command) + inspector = RbInspector(debugger, result, self.ruby_globals) + inspector.inspect(val) diff --git a/misc/lldb_rb/constants.py b/misc/lldb_rb/constants.py new file mode 100644 index 0000000000..9cd56eccb0 --- /dev/null +++ b/misc/lldb_rb/constants.py @@ -0,0 +1,6 @@ +HEAP_PAGE_ALIGN_LOG = 16 +HEAP_PAGE_ALIGN_MASK = (~(~0 << HEAP_PAGE_ALIGN_LOG)) +HEAP_PAGE_ALIGN = (1 << HEAP_PAGE_ALIGN_LOG) +HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN + +IMEMO_MASK = 0x0F diff --git a/misc/lldb_rb/lldb_interface.py b/misc/lldb_rb/lldb_interface.py new file mode 100644 index 0000000000..25930b2e16 --- /dev/null +++ b/misc/lldb_rb/lldb_interface.py @@ -0,0 +1,18 @@ +class LLDBInterface: + def build_environment(self, debugger): + self.debugger = debugger + self.target = debugger.GetSelectedTarget() + self.process = self.target.GetProcess() + self.thread = self.process.GetSelectedThread() + self.frame = self.thread.GetSelectedFrame() + + def _append_command_output(self, command): + output1 = self.result.GetOutput() + self.debugger.GetCommandInterpreter().HandleCommand(command, self.result) + output2 = self.result.GetOutput() + self.result.Clear() + self.result.write(output1) + self.result.write(output2) + + def _append_expression(self, expression): + self._append_command_output("expression " + expression) diff --git a/misc/lldb_rb/rb_base_command.py b/misc/lldb_rb/rb_base_command.py new file mode 100644 index 0000000000..70a5addd6d --- /dev/null +++ b/misc/lldb_rb/rb_base_command.py @@ -0,0 +1,57 @@ +import lldb +from pydoc import locate +from lldb_rb.constants import * +from lldb_rb.utils import * + +class RbBaseCommand(LLDBInterface): + @classmethod + def register_lldb_command(cls, debugger, module_name): + # Add any commands contained in this module to LLDB + command = f"command script add -c {module_name}.{cls.__name__} {cls.program}" + debugger.HandleCommand(command) + + @classmethod + def lldb_init(cls, debugger): + target = debugger.GetSelectedTarget() + global SIZEOF_VALUE + SIZEOF_VALUE = target.FindFirstType("VALUE").GetByteSize() + + value_types = [] + g = globals() + + imemo_types = target.FindFirstType("enum imemo_type") + + #for member in imemo_types.GetEnumMembers(): + # g[member.GetName()] = member.GetValueAsUnsigned() + + for enum in target.FindFirstGlobalVariable("ruby_dummy_gdb_enums"): + enum = enum.GetType() + members = enum.GetEnumMembers() + for i in range(0, members.GetSize()): + member = members.GetTypeEnumMemberAtIndex(i) + name = member.GetName() + value = member.GetValueAsUnsigned() + g[name] = value + + if name.startswith("RUBY_T_"): + value_types.append(name) + g["value_types"] = value_types + return g + + def __init__(self, debugger, _internal_dict): + self.ruby_globals = RbBaseCommand.lldb_init(debugger) + self.internal_dict = _internal_dict + + def __call__(self, debugger, command, exe_ctx, result): + self.ruby_globals = RbBaseCommand.lldb_init(debugger) + self.build_environment(debugger) + self.call(debugger, command, exe_ctx, result) + + def call(self, debugger, command, exe_ctx, result): + raise NotImplementedError("subclasses must implement call") + + def get_short_help(self): + return self.__class__.help_string + + def get_long_help(self): + return self.__class__.help_string diff --git a/misc/lldb_rb/rb_heap_structs.py b/misc/lldb_rb/rb_heap_structs.py new file mode 100644 index 0000000000..798b838080 --- /dev/null +++ b/misc/lldb_rb/rb_heap_structs.py @@ -0,0 +1,152 @@ +import lldb +import math +from lldb_rb.lldb_interface import LLDBInterface +from lldb_rb.constants import * + +class HeapPage(LLDBInterface): + def __init__(self, debugger, val): + self.build_environment(debugger) + self.page_type = self.target.FindFirstType("struct heap_page").GetPointerType() + self.val = val + + def heap_page_body(self, command, ctx, result, internal_dict): + process = self.target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + + val = frame.EvaluateExpression(command) + page = self.get_page_body(val) + print("Page body address: ", page.GetAddress(), file=result) + print(page, file=result) + + def get_page_body(self, val): + tHeapPageBody = self.target.FindFirstType("struct heap_page_body") + addr = val.GetValueAsUnsigned() + page_addr = addr & ~(HEAP_PAGE_ALIGN_MASK) + address = lldb.SBAddress(page_addr, self.target) + return self.target.CreateValueFromAddress("page", address, tHeapPageBody) + + def get_page_raw(self, val): + body = self.get_page_body(val) + return body.GetValueForExpressionPath("->header.page") + + def to_heap_page_struct(self): + pagePtr = self.get_page_raw(self.val) + return pagePtr.Cast(self.page_type) + + +class RbObject(LLDBInterface): + def __init__(self, ptr, debugger, ruby_globals): + self.build_environment(debugger) + self.ruby_globals = ruby_globals + + self.flUser1 = self.ruby_globals["RUBY_FL_USER1"] + self.flUser2 = self.ruby_globals["RUBY_FL_USER2"] + self.flUser3 = self.ruby_globals["RUBY_FL_USER3"] + self.flUser4 = self.ruby_globals["RUBY_FL_USER4"] + self.flUser5 = self.ruby_globals["RUBY_FL_USER5"] + self.flUser6 = self.ruby_globals["RUBY_FL_USER6"] + self.flUser7 = self.ruby_globals["RUBY_FL_USER7"] + self.flUser8 = self.ruby_globals["RUBY_FL_USER8"] + self.flUser9 = self.ruby_globals["RUBY_FL_USER9"] + self.flUshift = self.ruby_globals["RUBY_FL_USHIFT"] + + self.tRBasic = self.target.FindFirstType("::RBasic").GetPointerType() + + self.val = ptr.Cast(self.tRBasic) + self.page = HeapPage(self.debugger, self.val) + self.flags = self.val.GetValueForExpressionPath("->flags").GetValueAsUnsigned() + + self.type = None + self.type_name = "" + + def check_bits(self, bitmap_name, bitmap_index, bitmap_bit, v): + page = self.page.to_heap_page_struct() + bits = page.GetChildMemberWithName(bitmap_name) + plane = bits.GetChildAtIndex(bitmap_index).GetValueAsUnsigned() + if (plane & bitmap_bit) != 0: + return v + else: + return ' ' + + def dump_bits(self, result, end = "\n"): + tUintPtr = self.target.FindFirstType("uintptr_t") # bits_t + + slot_size = self.page.to_heap_page_struct().GetChildMemberWithName("heap").GetChildMemberWithName("slot_size").unsigned + byte_size = 40 ** math.floor(math.log(slot_size, 40)) + + num_in_page = (self.val.GetValueAsUnsigned() & HEAP_PAGE_ALIGN_MASK) // byte_size; + bits_bitlength = tUintPtr.GetByteSize() * 8 + bitmap_index = num_in_page // bits_bitlength + bitmap_offset = num_in_page & (bits_bitlength - 1) + bitmap_bit = 1 << bitmap_offset + + page = self.page.to_heap_page_struct() + print("bits: [%s%s%s%s%s]" % ( + self.check_bits("uncollectible_bits", bitmap_index, bitmap_bit, "L"), + self.check_bits("mark_bits", bitmap_index, bitmap_bit, "M"), + self.check_bits("pinned_bits", bitmap_index, bitmap_bit, "P"), + self.check_bits("marking_bits", bitmap_index, bitmap_bit, "R"), + self.check_bits("wb_unprotected_bits", bitmap_index, bitmap_bit, "U"), + ), end=end, file=result) + + def promoted_p(self): + rbFlPromoted = self.ruby_globals["RUBY_FL_PROMOTED"] + return (self.flags & rbFlPromoted) == rbFlPromoted + + def frozen_p(self): + rbFlFreeze = self.ruby_globals["RUBY_FL_FREEZE"] + return (self.flags & rbFlFreeze) == rbFlFreeze + + def is_type(self, type_name): + if self.type is None: + flTMask = self.ruby_globals["RUBY_T_MASK"] + flType = self.flags & flTMask + self.type = flType + + if self.type == self.ruby_globals[type_name]: + self.type_name = type_name + return True + else: + return False + + def as_type(self, type_name): + if type_name == "array": + tRarray = self.target.FindFirstType("struct RArray") + return self.val.Cast(tRarray.GetPointerType()) + elif type_name == "bignum": + tRbignum = self.target.FindFirstType("struct RBignum") + return self.val.Cast(tRbignum.GetPointerType()) + else: + print("as_type is not implemented for:", type_name) + + def ary_ptr(self): + rval = self.as_type("array") + if self.flags & self.ruby_globals["RUBY_FL_USER1"]: + ptr = rval.GetValueForExpressionPath("->as.ary") + else: + ptr = rval.GetValueForExpressionPath("->as.heap.ptr") + return ptr + + def ary_len(self): + if self.flags & self.flUser1: + len = ((self.flags & + (self.flUser3 | self.flUser4 | self.flUser5 | self.flUser6 | + self.flUser7 | self.flUser8 | self.flUser9) + ) >> (self.flUshift + 3)) + else: + rval = self.as_type("array") + len = rval.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() + + return len + + def bignum_len(self): + if self.flags & self.flUser2: + len = ((self.flags & + (self.flUser3 | self.flUser4 | self.flUser5) + ) >> (self.flUshift + 3)) + else: + len = (self.as_type("bignum").GetValueForExpressionPath("->as.heap.len"). + GetValueAsUnsigned()) + + return len diff --git a/misc/lldb_rb/utils.py b/misc/lldb_rb/utils.py new file mode 100644 index 0000000000..a2bcedc328 --- /dev/null +++ b/misc/lldb_rb/utils.py @@ -0,0 +1,506 @@ +from lldb_rb.lldb_interface import LLDBInterface +from lldb_rb.rb_heap_structs import HeapPage, RbObject +from lldb_rb.constants import * + +class RbInspector(LLDBInterface): + def __init__(self, debugger, result, ruby_globals): + self.build_environment(debugger) + self.result = result + self.ruby_globals = ruby_globals + + def string2cstr(self, rstring): + """Returns the pointer to the C-string in the given String object""" + if rstring.TypeIsPointerType(): + rstring = rstring.Dereference() + + flags = rstring.GetValueForExpressionPath(".basic->flags").unsigned + clen = int(rstring.GetValueForExpressionPath(".len").value, 0) + if flags & self.ruby_globals["RUBY_FL_USER1"]: + cptr = int(rstring.GetValueForExpressionPath(".as.heap.ptr").value, 0) + else: + cptr = int(rstring.GetValueForExpressionPath(".as.embed.ary").location, 0) + + return cptr, clen + + def output_string(self, rstring): + cptr, clen = self.string2cstr(rstring) + self._append_expression("*(const char (*)[%d])%0#x" % (clen, cptr)) + + def fixnum_p(self, x): + return x & self.ruby_globals["RUBY_FIXNUM_FLAG"] != 0 + + def flonum_p(self, x): + return (x & self.ruby_globals["RUBY_FLONUM_MASK"]) == self.ruby_globals["RUBY_FLONUM_FLAG"] + + def static_sym_p(self, x): + special_shift = self.ruby_globals["RUBY_SPECIAL_SHIFT"] + symbol_flag = self.ruby_globals["RUBY_SYMBOL_FLAG"] + return (x & ~(~0 << special_shift)) == symbol_flag + + def generic_inspect(self, val, rtype): + tRType = self.target.FindFirstType("struct %s" % rtype).GetPointerType() + val = val.Cast(tRType) + self._append_expression("*(struct %s *) %0#x" % (rtype, val.GetValueAsUnsigned())) + + def inspect(self, val): + rbTrue = self.ruby_globals["RUBY_Qtrue"] + rbFalse = self.ruby_globals["RUBY_Qfalse"] + rbNil = self.ruby_globals["RUBY_Qnil"] + rbUndef = self.ruby_globals["RUBY_Qundef"] + rbImmediateMask = self.ruby_globals["RUBY_IMMEDIATE_MASK"] + + if self.inspect_node(val): + return + + num = val.GetValueAsSigned() + if num == rbFalse: + print('false', file=self.result) + elif num == rbTrue: + print('true', file=self.result) + elif num == rbNil: + print('nil', file=self.result) + elif num == rbUndef: + print('undef', file=self.result) + elif self.fixnum_p(num): + print(num >> 1, file=self.result) + elif self.flonum_p(num): + self._append_expression("rb_float_value(%0#x)" % val.GetValueAsUnsigned()) + elif self.static_sym_p(num): + if num < 128: + print("T_SYMBOL: %c" % num, file=self.result) + else: + print("T_SYMBOL: (%x)" % num, file=self.result) + self._append_expression("rb_id2name(%0#x)" % (num >> 8)) + + elif num & rbImmediateMask: + print('immediate(%x)' % num, file=self.result) + else: + rval = RbObject(val, self.debugger, self.ruby_globals) + rval.dump_bits(self.result) + + flaginfo = "" + if rval.promoted_p(): + flaginfo += "[PROMOTED] " + if rval.frozen_p(): + flaginfo += "[FROZEN] " + + if rval.is_type("RUBY_T_NONE"): + print('T_NONE: %s%s' % (flaginfo, val.Dereference()), file=self.result) + + elif rval.is_type("RUBY_T_NIL"): + print('T_NIL: %s%s' % (flaginfo, val.Dereference()), file=self.result) + + elif rval.is_type("RUBY_T_OBJECT"): + self.result.write('T_OBJECT: %s' % flaginfo) + self._append_expression("*(struct RObject*)%0#x" % val.GetValueAsUnsigned()) + + elif (rval.is_type("RUBY_T_CLASS") or + rval.is_type("RUBY_T_MODULE") or + rval.is_type("RUBY_T_ICLASS")): + self.result.write('T_%s: %s' % (rval.type_name.split('_')[-1], flaginfo)) + tRClass = self.target.FindFirstType("struct RClass") + + self._append_expression("*(struct RClass*)%0#x" % val.GetValueAsUnsigned()) + if not val.Cast(tRClass).GetChildMemberWithName("ptr").IsValid(): + self._append_expression( + "*(struct rb_classext_struct*)%0#x" % + (val.GetValueAsUnsigned() + tRClass.GetByteSize()) + ) + + elif rval.is_type("RUBY_T_STRING"): + self.result.write('T_STRING: %s' % flaginfo) + tRString = self.target.FindFirstType("struct RString").GetPointerType() + + chilled = self.ruby_globals["RUBY_FL_USER3"] + if (rval.flags & chilled) != 0: + self.result.write("[CHILLED] ") + + rb_enc_mask = self.ruby_globals["RUBY_ENCODING_MASK"] + rb_enc_shift = self.ruby_globals["RUBY_ENCODING_SHIFT"] + encidx = ((rval.flags & rb_enc_mask) >> rb_enc_shift) + encname = self.target.FindFirstType("enum ruby_preserved_encindex") \ + .GetEnumMembers().GetTypeEnumMemberAtIndex(encidx) \ + .GetName() + + if encname is not None: + self.result.write('[%s] ' % encname[14:]) + else: + self.result.write('[enc=%d] ' % encidx) + + coderange = rval.flags & self.ruby_globals["RUBY_ENC_CODERANGE_MASK"] + if coderange == self.ruby_globals["RUBY_ENC_CODERANGE_7BIT"]: + self.result.write('[7BIT] ') + elif coderange == self.ruby_globals["RUBY_ENC_CODERANGE_VALID"]: + self.result.write('[VALID] ') + elif coderange == self.ruby_globals["RUBY_ENC_CODERANGE_BROKEN"]: + self.result.write('[BROKEN] ') + else: + self.result.write('[UNKNOWN] ') + + ptr, len = self.string2cstr(val.Cast(tRString)) + if len == 0: + self.result.write("(empty)\n") + else: + self._append_expression("*(const char (*)[%d])%0#x" % (len, ptr)) + + elif rval.is_type("RUBY_T_SYMBOL"): + self.result.write('T_SYMBOL: %s' % flaginfo) + tRSymbol = self.target.FindFirstType("struct RSymbol").GetPointerType() + tRString = self.target.FindFirstType("struct RString").GetPointerType() + + val = val.Cast(tRSymbol) + self._append_expression('(ID)%0#x ' % val.GetValueForExpressionPath("->id").GetValueAsUnsigned()) + self.output_string(val.GetValueForExpressionPath("->fstr").Cast(tRString)) + + elif rval.is_type("RUBY_T_ARRAY"): + len = rval.ary_len() + ptr = rval.ary_ptr() + + self.result.write("T_ARRAY: %slen=%d" % (flaginfo, len)) + + if rval.flags & self.ruby_globals["RUBY_FL_USER1"]: + self.result.write(" (embed)") + elif rval.flags & self.ruby_globals["RUBY_FL_USER2"]: + shared = val.GetValueForExpressionPath("->as.heap.aux.shared").GetValueAsUnsigned() + self.result.write(" (shared) shared=%016x" % shared) + else: + capa = val.GetValueForExpressionPath("->as.heap.aux.capa").GetValueAsSigned() + self.result.write(" (ownership) capa=%d" % capa) + if len == 0: + self.result.write(" {(empty)}\n") + else: + self.result.write("\n") + if ptr.GetValueAsSigned() == 0: + self._append_expression("-fx -- ((struct RArray*)%0#x)->as.ary" % val.GetValueAsUnsigned()) + else: + self._append_expression("-Z %d -fx -- (const VALUE*)%0#x" % (len, ptr.GetValueAsUnsigned())) + + elif rval.is_type("RUBY_T_HASH"): + self.result.write("T_HASH: %s" % flaginfo) + ptr = val.GetValueAsUnsigned() + self._append_expression("*(struct RHash *) %0#x" % ptr) + if rval.flags & self.ruby_globals["RUBY_FL_USER3"]: + self._append_expression("*(struct st_table *) (%0#x + sizeof(struct RHash))" % ptr) + else: + self._append_expression("*(struct ar_table *) (%0#x + sizeof(struct RHash))" % ptr) + + elif rval.is_type("RUBY_T_BIGNUM"): + sign = '-' + if (rval.flags & self.ruby_globals["RUBY_FL_USER1"]) != 0: + sign = '+' + len = rval.bignum_len() + + if rval.flags & self.ruby_globals["RUBY_FL_USER2"]: + print("T_BIGNUM: sign=%s len=%d (embed)" % (sign, len), file=self.result) + self._append_expression("((struct RBignum *) %0#x)->as.ary" + % val.GetValueAsUnsigned()) + else: + print("T_BIGNUM: sign=%s len=%d" % (sign, len), file=self.result) + print(rval.as_type("bignum"), file=self.result) + self._append_expression("-Z %d -fx -- ((struct RBignum*)%d)->as.heap.digits" % + (len, val.GetValueAsUnsigned())) + + elif rval.is_type("RUBY_T_FLOAT"): + self._append_expression("((struct RFloat *)%d)->float_value" + % val.GetValueAsUnsigned()) + + elif rval.is_type("RUBY_T_RATIONAL"): + tRRational = self.target.FindFirstType("struct RRational").GetPointerType() + val = val.Cast(tRRational) + self.inspect(val.GetValueForExpressionPath("->num")) + output = self.result.GetOutput() + self.result.Clear() + self.result.write("(Rational) " + output.rstrip() + " / ") + self.inspect(val.GetValueForExpressionPath("->den")) + + elif rval.is_type("RUBY_T_COMPLEX"): + tRComplex = self.target.FindFirstType("struct RComplex").GetPointerType() + val = val.Cast(tRComplex) + self.inspect(val.GetValueForExpressionPath("->real")) + real = self.result.GetOutput().rstrip() + self.result.Clear() + self.inspect(val.GetValueForExpressionPath("->imag")) + imag = self.result.GetOutput().rstrip() + self.result.Clear() + if not imag.startswith("-"): + imag = "+" + imag + print("(Complex) " + real + imag + "i", file=self.result) + + elif rval.is_type("RUBY_T_REGEXP"): + tRRegex = self.target.FindFirstType("struct RRegexp").GetPointerType() + val = val.Cast(tRRegex) + print("(Regex) ->src {", file=self.result) + self.inspect(val.GetValueForExpressionPath("->src")) + print("}", file=self.result) + + elif rval.is_type("RUBY_T_DATA"): + tRTypedData = self.target.FindFirstType("struct RTypedData").GetPointerType() + val = val.Cast(tRTypedData) + + type = val.GetValueForExpressionPath("->type").GetValueAsUnsigned() + embed = (type & 1) + if embed: + flaginfo += "[EMBED] " + type = self.frame.EvaluateExpression("(rb_data_type_t *)%0#x" % (type & ~1)) + print("T_DATA: %s%s" % + (flaginfo, type.GetValueForExpressionPath("->wrap_struct_name")), + file=self.result) + print("%s", type.Dereference(), file=self.result) + ptr = val.GetValueForExpressionPath("->data") + if embed: + ptr = ptr.AddressOf() + self._append_expression("(void *)%0#x" % ptr.GetValueAsUnsigned()) + + elif rval.is_type("RUBY_T_IMEMO"): + imemo_type = ((rval.flags >> self.ruby_globals["RUBY_FL_USHIFT"]) + & IMEMO_MASK) + print("T_IMEMO: ", file=self.result) + + self._append_expression("(enum imemo_type) %d" % imemo_type) + self._append_expression("*(struct MEMO *) %0#x" % val.GetValueAsUnsigned()) + + elif rval.is_type("RUBY_T_FILE"): + self.generic_inspect(val, "RFile") + + elif rval.is_type("RUBY_T_MOVED"): + self.generic_inspect(val, "RMoved") + + elif rval.is_type("RUBY_T_MATCH"): + self.generic_inspect(val, "RMatch") + + elif rval.is_type("RUBY_T_STRUCT"): + self.generic_inspect(val, "RStruct") + + elif rval.is_type("RUBY_T_ZOMBIE"): + self.generic_inspect(val, "RZombie") + + else: + print("Not-handled type %0#x" % rval.type, file=self.result) + print(val, file=self.result) + + def inspect_node(self, val): + tRNode = self.target.FindFirstType("struct RNode").GetPointerType() + + # if val.GetType() != tRNode: does not work for unknown reason + + if val.GetType().GetPointeeType().GetCanonicalType().name != "RNode": + return False + + rbNodeTypeMask = self.ruby_globals["RUBY_NODE_TYPEMASK"] + rbNodeTypeShift = self.ruby_globals["RUBY_NODE_TYPESHIFT"] + flags = val.Cast(tRNode).GetChildMemberWithName("flags").GetValueAsUnsigned() + nd_type = (flags & rbNodeTypeMask) >> rbNodeTypeShift + + self._append_expression("(node_type) %d" % nd_type) + + if nd_type == self.ruby_globals["NODE_SCOPE"]: + self._append_expression("*(rb_node_scope_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BLOCK"]: + self._append_expression("*(rb_node_block_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_IF"]: + self._append_expression("*(rb_node_if_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_UNLESS"]: + self._append_expression("*(rb_node_unless_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CASE"]: + self._append_expression("*(rb_node_case_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CASE2"]: + self._append_expression("*(rb_node_case2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CASE3"]: + self._append_expression("*(rb_node_case3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_WHEN"]: + self._append_expression("*(rb_node_when_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_IN"]: + self._append_expression("*(rb_node_in_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_WHILE"]: + self._append_expression("*(rb_node_while_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_UNTIL"]: + self._append_expression("*(rb_node_until_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ITER"]: + self._append_expression("*(rb_node_iter_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FOR"]: + self._append_expression("*(rb_node_for_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FOR_MASGN"]: + self._append_expression("*(rb_node_for_masgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BREAK"]: + self._append_expression("*(rb_node_break_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_NEXT"]: + self._append_expression("*(rb_node_next_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_REDO"]: + self._append_expression("*(rb_node_redo_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RETRY"]: + self._append_expression("*(rb_node_retry_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BEGIN"]: + self._append_expression("*(rb_node_begin_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RESCUE"]: + self._append_expression("*(rb_node_rescue_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RESBODY"]: + self._append_expression("*(rb_node_resbody_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ENSURE"]: + self._append_expression("*(rb_node_ensure_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_AND"]: + self._append_expression("*(rb_node_and_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OR"]: + self._append_expression("*(rb_node_or_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MASGN"]: + self._append_expression("*(rb_node_masgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LASGN"]: + self._append_expression("*(rb_node_lasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DASGN"]: + self._append_expression("*(rb_node_dasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_GASGN"]: + self._append_expression("*(rb_node_gasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_IASGN"]: + self._append_expression("*(rb_node_iasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CDECL"]: + self._append_expression("*(rb_node_cdecl_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CVASGN"]: + self._append_expression("*(rb_node_cvasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN1"]: + self._append_expression("*(rb_node_op_asgn1_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN2"]: + self._append_expression("*(rb_node_op_asgn2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN_AND"]: + self._append_expression("*(rb_node_op_asgn_and_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_ASGN_OR"]: + self._append_expression("*(rb_node_op_asgn_or_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OP_CDECL"]: + self._append_expression("*(rb_node_op_cdecl_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CALL"]: + self._append_expression("*(rb_node_call_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OPCALL"]: + self._append_expression("*(rb_node_opcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FCALL"]: + self._append_expression("*(rb_node_fcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_VCALL"]: + self._append_expression("*(rb_node_vcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_QCALL"]: + self._append_expression("*(rb_node_qcall_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SUPER"]: + self._append_expression("*(rb_node_super_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ZSUPER"]: + self._append_expression("*(rb_node_zsuper_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LIST"]: + self._append_expression("*(rb_node_list_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ZLIST"]: + self._append_expression("*(rb_node_zlist_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_HASH"]: + self._append_expression("*(rb_node_hash_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_RETURN"]: + self._append_expression("*(rb_node_return_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_YIELD"]: + self._append_expression("*(rb_node_yield_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LVAR"]: + self._append_expression("*(rb_node_lvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DVAR"]: + self._append_expression("*(rb_node_dvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_GVAR"]: + self._append_expression("*(rb_node_gvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CONST"]: + self._append_expression("*(rb_node_const_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CVAR"]: + self._append_expression("*(rb_node_cvar_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_NTH_REF"]: + self._append_expression("*(rb_node_nth_ref_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_BACK_REF"]: + self._append_expression("*(rb_node_back_ref_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MATCH"]: + self._append_expression("*(rb_node_match_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MATCH2"]: + self._append_expression("*(rb_node_match2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MATCH3"]: + self._append_expression("*(rb_node_match3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_STR"]: + self._append_expression("*(rb_node_str_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DSTR"]: + self._append_expression("*(rb_node_dstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_XSTR"]: + self._append_expression("*(rb_node_xstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DXSTR"]: + self._append_expression("*(rb_node_dxstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_EVSTR"]: + self._append_expression("*(rb_node_evstr_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_REGX"]: + self._append_expression("*(rb_node_regx_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DREGX"]: + self._append_expression("*(rb_node_dregx_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ONCE"]: + self._append_expression("*(rb_node_once_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGS"]: + self._append_expression("*(rb_node_args_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGS_AUX"]: + self._append_expression("*(rb_node_args_aux_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_OPT_ARG"]: + self._append_expression("*(rb_node_opt_arg_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_KW_ARG"]: + self._append_expression("*(rb_node_kw_arg_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_POSTARG"]: + self._append_expression("*(rb_node_postarg_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGSCAT"]: + self._append_expression("*(rb_node_argscat_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARGSPUSH"]: + self._append_expression("*(rb_node_argspush_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SPLAT"]: + self._append_expression("*(rb_node_splat_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DEFN"]: + self._append_expression("*(rb_node_defn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DEFS"]: + self._append_expression("*(rb_node_defs_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ALIAS"]: + self._append_expression("*(rb_node_alias_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_VALIAS"]: + self._append_expression("*(rb_node_valias_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_UNDEF"]: + self._append_expression("*(rb_node_undef_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_CLASS"]: + self._append_expression("*(rb_node_class_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_MODULE"]: + self._append_expression("*(rb_node_module_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SCLASS"]: + self._append_expression("*(rb_node_sclass_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_COLON2"]: + self._append_expression("*(rb_node_colon2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_COLON3"]: + self._append_expression("*(rb_node_colon3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DOT2"]: + self._append_expression("*(rb_node_dot2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DOT3"]: + self._append_expression("*(rb_node_dot3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FLIP2"]: + self._append_expression("*(rb_node_flip2_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FLIP3"]: + self._append_expression("*(rb_node_flip3_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_SELF"]: + self._append_expression("*(rb_node_self_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_NIL"]: + self._append_expression("*(rb_node_nil_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_TRUE"]: + self._append_expression("*(rb_node_true_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FALSE"]: + self._append_expression("*(rb_node_false_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ERRINFO"]: + self._append_expression("*(rb_node_errinfo_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DEFINED"]: + self._append_expression("*(rb_node_defined_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_POSTEXE"]: + self._append_expression("*(rb_node_postexe_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_DSYM"]: + self._append_expression("*(rb_node_dsym_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ATTRASGN"]: + self._append_expression("*(rb_node_attrasgn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LAMBDA"]: + self._append_expression("*(rb_node_lambda_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ARYPTN"]: + self._append_expression("*(rb_node_aryptn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_HSHPTN"]: + self._append_expression("*(rb_node_hshptn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FNDPTN"]: + self._append_expression("*(rb_node_fndptn_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_ERROR"]: + self._append_expression("*(rb_node_error_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_LINE"]: + self._append_expression("*(rb_node_line_t *) %0#x" % val.GetValueAsUnsigned()) + elif nd_type == self.ruby_globals["NODE_FILE"]: + self._append_expression("*(rb_node_file_t *) %0#x" % val.GetValueAsUnsigned()) + else: + self._append_expression("*(NODE *) %0#x" % val.GetValueAsUnsigned()) + return True diff --git a/misc/lldb_yjit.py b/misc/lldb_yjit.py deleted file mode 100644 index cc37b990ea..0000000000 --- a/misc/lldb_yjit.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python -#coding: utf-8 -# -# Usage: run `command script import -r misc/lldb_yjit.py` on LLDB -# - -from __future__ import print_function -import lldb -import os -import shlex - -def list_comments(debugger, command, result, internal_dict): - target = debugger.GetSelectedTarget() - process = target.GetProcess() - thread = process.GetSelectedThread() - frame = thread.GetSelectedFrame() - - # Get the different types we need - rb_darray_meta_t = target.FindFirstType("rb_darray_meta_t") - codeblock_t = target.FindFirstType("codeblock_t") - yjit_comment = target.FindFirstType("yjit_comment") - - # Get the global variables we need - comments = target.FindFirstGlobalVariable("yjit_code_comments") - cb = target.FindFirstGlobalVariable("cb").Cast(codeblock_t.GetPointerType()) - - # Get the address of the memory block we're using - mem_addr = cb.GetChildMemberWithName("mem_block").GetValueAsUnsigned() - - # Find the size of the darray comment list - meta = comments.Cast(rb_darray_meta_t.GetPointerType()) - size = meta.GetChildMemberWithName("size").GetValueAsUnsigned() - - # Get the address of the block following the metadata header - t_offset = comments.GetValueAsUnsigned() + rb_darray_meta_t.GetByteSize() - - # Loop through each comment and print - for t in range(0, size): - addr = lldb.SBAddress(t_offset + (t * yjit_comment.GetByteSize()), target) - comment = target.CreateValueFromAddress("yjit_comment", addr, yjit_comment) - string = comment.GetChildMemberWithName("comment") - comment_offset = mem_addr + comment.GetChildMemberWithName("offset").GetValueAsUnsigned() - print("%0#x %s" % (comment_offset, string.GetSummary()), file = result) - - -def __lldb_init_module(debugger, internal_dict): - debugger.HandleCommand("command script add -f lldb_yjit.list_comments lc") diff --git a/misc/ruby-style.el b/misc/ruby-style.el index 13aad77b3d..03d0830d3a 100644 --- a/misc/ruby-style.el +++ b/misc/ruby-style.el @@ -56,7 +56,9 @@ (c-basic-offset . 4) (tab-width . 8) (indent-tabs-mode . nil) - (setq show-trailing-whitespace t) + (show-trailing-whitespace . t) + (c-backslash-column . 1) + (c-backslash-max-column . 1) (c-offsets-alist (case-label . *) (label . (ruby-style-label-indent *)) @@ -66,6 +68,17 @@ (access-label /) ))) +(c-add-style + "prism" + '("bsd" + (c-basic-offset . 4) + (tab-width . 8) + (indent-tabs-mode . nil) + (show-trailing-whitespace . t) + (c-offsets-alist + (case-label . +) + ))) + ;;;###autoload (defun ruby-style-c-mode () (interactive) diff --git a/misc/test_yjit_asm.sh b/misc/test_yjit_asm.sh deleted file mode 100755 index e09d83f0fb..0000000000 --- a/misc/test_yjit_asm.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -set -e -set -x - -clang -std=gnu99 -Wall -Werror -Wno-error=unused-function -Wshorten-64-to-32 -I "${0%/*/*}" "${0%/*}/yjit_asm_tests.c" -o asm_test - -./asm_test - -rm asm_test diff --git a/misc/tsan_suppressions.txt b/misc/tsan_suppressions.txt new file mode 100644 index 0000000000..5492500e7f --- /dev/null +++ b/misc/tsan_suppressions.txt @@ -0,0 +1,109 @@ +# TSan: ThreadSanitizer +# https://github.com/google/sanitizers/wiki/threadsanitizersuppressions +# +# This file describes a number of places where TSAN detects problems in CRuby. +# Many of these indicate bugs. Others are benign (ex. data races that can be +# replaced with relaxed atomic loads) +# +# Usage: +# Configure with: +# ./configure cflags='-fsanitize=thread' CC=clang +# Build and run with: +# TSAN_OPTIONS="suppressions=$(pwd)/misc/tsan_suppressions.txt:die_after_fork=0" +# +# Other useful TSAN_OPTIONS: +# * halt_on_error=1 +# * strip_path_prefix=$(pwd)/ + +# Namespaces +race_top:push_subclass_entry_to_list + +# sub_nounderflow includes non-atomic read, possibly other issue +race:objspace_malloc_increase_body + +# Signals and ubf +race:unregister_ubf_list + +# It's already crashing. We're doing our best +signal:rb_vm_bugreport +race:check_reserved_signal_ + +race_top:rb_check_deadlock + +# vm->ractor.sched.grq_cnt++ +race_top:ractor_sched_enq +race_top:ractor_sched_deq + +# Race between vm_remove_ractor writing ractor count and +# native_thread_check_and_create_shared reading it during thread creation. +# The write happens when a ractor thread exits, the read happens when +# checking if new shared threads need to be created. +race:vm_remove_ractor + +# th->sched.finished at end of co_start +race_top:rb_thread_sched_mark_zombies + +# Races against timer thread setting th->sched.waiting_reason.flags +race_top:thread_sched_wait_events + +# At thread start +race_top:rb_ractor_set_current_ec_ + +# TSan reports a lock-order-inversion between thread_sched_lock_ and this lock. +# It's unclear if that can cause a deadlock since the lock is on self +deadlock:ractor_lock_self + +# TSan reports a deadlock when reacquiring the this lock after a barrier, but +# we know the other threads have been stopped +deadlock:rb_ractor_sched_barrier_start + +# RVALUE_AGE_SET manipulates flag bits on objects which may be accessed in Ractors +race_top:RVALUE_AGE_SET + +# Inline caches and call cache updates +# Multiple threads can race when updating shared call caches during method lookups +# and argument forwarding. These races involve reading/writing cd->cc fields. +race_top:vm_cc_call_set +race_top:vm_cc_class_check +race_top:vm_search_cc +race_top:vm_search_method_slowpath0 +race_top:rb_vm_opt_getconstant_path +race_top:vm_ic_attr_index_set +race:vm_ic_update +race:vm_caller_setup_fwd_args + +# Race in shape_get_next where multiple threads simultaneously access and modify +# RCLASS_MAX_IV_COUNT and RCLASS_VARIATION_COUNT fields in class objects. +# One thread reads the field while another thread calls RCLASS_SET_MAX_IV_COUNT. +# This happens during instance variable shape transitions in multi-threaded code. +race:shape_get_next + +# Non-atomic reads/writes +race:gccct_method_search + +# Ignore exit for now +race:rb_ec_finalize +race:rb_ec_cleanup + +# TSan doesn't work well post-fork, this raises errors when creating the new +# timer thread +race:after_fork_ruby + +# Sets objspace->flags.dont_incremental while writebarrier may be running +race_top:objspace_each_exec +race_top:objspace_each_objects_ensure + +# Non-atomic lazy initialized static variable +race_top:rbimpl_intern_const + +# Setting def->aliased bitfield non-atomically +race_top:method_definition_addref + +# Switching to setting up tracing. Likely other ractors should be stopped for this. +race_top:encoded_iseq_trace_instrument +race:rb_iseq_trace_set_all +race:rb_tracepoint_enable + +# GC enable/disable flag modifications race with object allocation flag reads +race_top:rb_gc_impl_gc_disable +race_top:rb_gc_impl_gc_enable diff --git a/misc/yjit_asm_tests.c b/misc/yjit_asm_tests.c deleted file mode 100644 index ccf8822bbe..0000000000 --- a/misc/yjit_asm_tests.c +++ /dev/null @@ -1,443 +0,0 @@ -// For MAP_ANONYMOUS on GNU/Linux -#define _GNU_SOURCE 1 - -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> - -// This test executable doesn't compile with the rest of Ruby -// so we need to define a rb_bug(). -_Noreturn -static void rb_bug(const char *message, ...) -{ - va_list args; - va_start(args, message); - vfprintf(stderr, message, args); - va_end(args); - abort(); -} - -#include "yjit_asm.c" - -// Print the bytes in a code block -void print_bytes(codeblock_t* cb) -{ - for (uint32_t i = 0; i < cb->write_pos; ++i) - { - printf("%02X", (int)*cb_get_ptr(cb, i)); - } - - printf("\n"); -} - -// Check that the code block contains the given sequence of bytes -void check_bytes(codeblock_t* cb, const char* bytes) -{ - printf("checking encoding: %s\n", bytes); - - size_t len = strlen(bytes); - assert (len % 2 == 0); - size_t num_bytes = len / 2; - - if (cb->write_pos != num_bytes) - { - fprintf(stderr, "incorrect encoding length, expected %ld, got %d\n", - num_bytes, - cb->write_pos - ); - printf("%s\n", bytes); - print_bytes(cb); - exit(-1); - } - - for (uint32_t i = 0; i < num_bytes; ++i) - { - char byte_str[] = {0, 0, 0, 0}; - strncpy(byte_str, bytes + (2 * i), 2); - char* endptr; - long int byte = strtol(byte_str, &endptr, 16); - - uint8_t cb_byte = *cb_get_ptr(cb, i); - - if (cb_byte != byte) - { - fprintf(stderr, "incorrect encoding at position %d, expected %02X, got %02X\n", - i, - (int)byte, - (int)cb_byte - ); - printf("%s\n", bytes); - print_bytes(cb); - exit(-1); - } - } -} - -void run_assembler_tests(void) -{ - printf("Running assembler tests\n"); - - codeblock_t cb_obj; - codeblock_t* cb = &cb_obj; - uint8_t* mem_block = alloc_exec_mem(4096); - cb_init(cb, mem_block, 4096); - - // add - cb_set_pos(cb, 0); add(cb, CL, imm_opnd(3)); check_bytes(cb, "80C103"); - cb_set_pos(cb, 0); add(cb, CL, BL); check_bytes(cb, "00D9"); - cb_set_pos(cb, 0); add(cb, CL, SPL); check_bytes(cb, "4000E1"); - cb_set_pos(cb, 0); add(cb, CX, BX); check_bytes(cb, "6601D9"); - cb_set_pos(cb, 0); add(cb, RAX, RBX); check_bytes(cb, "4801D8"); - cb_set_pos(cb, 0); add(cb, ECX, EDX); check_bytes(cb, "01D1"); - cb_set_pos(cb, 0); add(cb, RDX, R14); check_bytes(cb, "4C01F2"); - cb_set_pos(cb, 0); add(cb, mem_opnd(64, RAX, 0), RDX); check_bytes(cb, "480110"); - cb_set_pos(cb, 0); add(cb, RDX, mem_opnd(64, RAX, 0)); check_bytes(cb, "480310"); - cb_set_pos(cb, 0); add(cb, RDX, mem_opnd(64, RAX, 8)); check_bytes(cb, "48035008"); - cb_set_pos(cb, 0); add(cb, RDX, mem_opnd(64, RAX, 255)); check_bytes(cb, "480390FF000000"); - cb_set_pos(cb, 0); add(cb, mem_opnd(64, RAX, 127), imm_opnd(255)); check_bytes(cb, "4881407FFF000000"); - cb_set_pos(cb, 0); add(cb, mem_opnd(32, RAX, 0), EDX); check_bytes(cb, "0110"); - cb_set_pos(cb, 0); add(cb, RSP, imm_opnd(8)); check_bytes(cb, "4883C408"); - cb_set_pos(cb, 0); add(cb, ECX, imm_opnd(8)); check_bytes(cb, "83C108"); - cb_set_pos(cb, 0); add(cb, ECX, imm_opnd(255)); check_bytes(cb, "81C1FF000000"); - - // and - cb_set_pos(cb, 0); and(cb, EBP, R12D); check_bytes(cb, "4421E5"); - cb_set_pos(cb, 0); and(cb, mem_opnd(64, RAX, 0), imm_opnd(0x08)); check_bytes(cb, "48832008"); - - // call - { - cb_set_pos(cb, 0); - uint32_t fn_label = cb_new_label(cb, "foo"); - call_label(cb, fn_label); - cb_link_labels(cb); - check_bytes(cb, "E8FBFFFFFF"); - } - cb_set_pos(cb, 0); call(cb, RAX); check_bytes(cb, "FFD0"); - cb_set_pos(cb, 0); call(cb, mem_opnd(64, RSP, 8)); check_bytes(cb, "FF542408"); - - // cmovcc - cb_set_pos(cb, 0); cmovg(cb, ESI, EDI); check_bytes(cb, "0F4FF7"); - cb_set_pos(cb, 0); cmovg(cb, ESI, mem_opnd(32, RBP, 12)); check_bytes(cb, "0F4F750C"); - cb_set_pos(cb, 0); cmovl(cb, EAX, ECX); check_bytes(cb, "0F4CC1"); - cb_set_pos(cb, 0); cmovl(cb, RBX, RBP); check_bytes(cb, "480F4CDD"); - cb_set_pos(cb, 0); cmovle(cb, ESI, mem_opnd(32, RSP, 4)); check_bytes(cb, "0F4E742404"); - - // cmp - cb_set_pos(cb, 0); cmp(cb, CL, DL); check_bytes(cb, "38D1"); - cb_set_pos(cb, 0); cmp(cb, ECX, EDI); check_bytes(cb, "39F9"); - cb_set_pos(cb, 0); cmp(cb, RDX, mem_opnd(64, R12, 0)); check_bytes(cb, "493B1424"); - cb_set_pos(cb, 0); cmp(cb, RAX, imm_opnd(2)); check_bytes(cb, "4883F802"); - - // cqo - cb_set_pos(cb, 0); cqo(cb); check_bytes(cb, "4899"); - - // div - /* - test( - delegate void (CodeBlock cb) { cb.div(X86Opnd(EDX)); }, - "F7F2" - ); - test( - delegate void (CodeBlock cb) { cb.div(X86Opnd(32, RSP, -12)); }, - "F77424F4" - ); - */ - - // jcc to label - { - cb_set_pos(cb, 0); - uint32_t loop_label = cb_new_label(cb, "loop"); - jge_label(cb, loop_label); - cb_link_labels(cb); - check_bytes(cb, "0F8DFAFFFFFF"); - } - { - cb_set_pos(cb, 0); - uint32_t loop_label = cb_new_label(cb, "loop"); - jo_label(cb, loop_label); - cb_link_labels(cb); - check_bytes(cb, "0F80FAFFFFFF"); - } - - // jmp to label - { - cb_set_pos(cb, 0); - uint32_t loop_label = cb_new_label(cb, "loop"); - jmp_label(cb, loop_label); - cb_link_labels(cb); - check_bytes(cb, "E9FBFFFFFF"); - } - - // jmp with RM operand - cb_set_pos(cb, 0); jmp_rm(cb, R12); check_bytes(cb, "41FFE4"); - - // lea - cb_set_pos(cb, 0); lea(cb, RDX, mem_opnd(64, RCX, 8)); check_bytes(cb, "488D5108"); - cb_set_pos(cb, 0); lea(cb, RAX, mem_opnd(8, RIP, 0)); check_bytes(cb, "488D0500000000"); - cb_set_pos(cb, 0); lea(cb, RAX, mem_opnd(8, RIP, 5)); check_bytes(cb, "488D0505000000"); - cb_set_pos(cb, 0); lea(cb, RDI, mem_opnd(8, RIP, 5)); check_bytes(cb, "488D3D05000000"); - - // mov - cb_set_pos(cb, 0); mov(cb, EAX, imm_opnd(7)); check_bytes(cb, "B807000000"); - cb_set_pos(cb, 0); mov(cb, EAX, imm_opnd(-3)); check_bytes(cb, "B8FDFFFFFF"); - cb_set_pos(cb, 0); mov(cb, R15, imm_opnd(3)); check_bytes(cb, "41BF03000000"); - cb_set_pos(cb, 0); mov(cb, EAX, EBX); check_bytes(cb, "89D8"); - cb_set_pos(cb, 0); mov(cb, EAX, ECX); check_bytes(cb, "89C8"); - cb_set_pos(cb, 0); mov(cb, EDX, mem_opnd(32, RBX, 128)); check_bytes(cb, "8B9380000000"); - - // Test `mov rax, 3` => `mov eax, 3` optimization - cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(0x34)); check_bytes(cb, "41B834000000"); - cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(0x80000000)); check_bytes(cb, "49B80000008000000000"); - cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(-1)); check_bytes(cb, "49B8FFFFFFFFFFFFFFFF"); - - cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(0x34)); check_bytes(cb, "B834000000"); - cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(0x80000000)); check_bytes(cb, "48B80000008000000000"); - cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(-52)); check_bytes(cb, "48B8CCFFFFFFFFFFFFFF"); - cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(-1)); check_bytes(cb, "48B8FFFFFFFFFFFFFFFF"); - /* - test( - delegate void (CodeBlock cb) { cb.mov(X86Opnd(AL), X86Opnd(8, RCX, 0, 1, RDX)); }, - "8A0411" - ); - */ - cb_set_pos(cb, 0); mov(cb, CL, R9B); check_bytes(cb, "4488C9"); - cb_set_pos(cb, 0); mov(cb, RBX, RAX); check_bytes(cb, "4889C3"); - cb_set_pos(cb, 0); mov(cb, RDI, RBX); check_bytes(cb, "4889DF"); - cb_set_pos(cb, 0); mov(cb, SIL, imm_opnd(11)); check_bytes(cb, "40B60B"); - cb_set_pos(cb, 0); mov(cb, mem_opnd(8, RSP, 0), imm_opnd(-3)); check_bytes(cb, "C60424FD"); - cb_set_pos(cb, 0); mov(cb, mem_opnd(64, RDI, 8), imm_opnd(1)); check_bytes(cb, "48C7470801000000"); - - // movsx - cb_set_pos(cb, 0); movsx(cb, AX, AL); check_bytes(cb, "660FBEC0"); - cb_set_pos(cb, 0); movsx(cb, EDX, AL); check_bytes(cb, "0FBED0"); - cb_set_pos(cb, 0); movsx(cb, RAX, BL); check_bytes(cb, "480FBEC3"); - cb_set_pos(cb, 0); movsx(cb, ECX, AX); check_bytes(cb, "0FBFC8"); - cb_set_pos(cb, 0); movsx(cb, R11, CL); check_bytes(cb, "4C0FBED9"); - cb_set_pos(cb, 0); movsx(cb, R10, mem_opnd(32, RSP, 12)); check_bytes(cb, "4C6354240C"); - cb_set_pos(cb, 0); movsx(cb, RAX, mem_opnd(8, RSP, 0)); check_bytes(cb, "480FBE0424"); - - // neg - cb_set_pos(cb, 0); neg(cb, RAX); check_bytes(cb, "48F7D8"); - - // nop - cb_set_pos(cb, 0); nop(cb, 1); check_bytes(cb, "90"); - - // not - cb_set_pos(cb, 0); not(cb, AX); check_bytes(cb, "66F7D0"); - cb_set_pos(cb, 0); not(cb, EAX); check_bytes(cb, "F7D0"); - cb_set_pos(cb, 0); not(cb, mem_opnd(64, R12, 0)); check_bytes(cb, "49F71424"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RSP, 301)); check_bytes(cb, "F794242D010000"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RSP, 0)); check_bytes(cb, "F71424"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RSP, 3)); check_bytes(cb, "F7542403"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RBP, 0)); check_bytes(cb, "F75500"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RBP, 13)); check_bytes(cb, "F7550D"); - cb_set_pos(cb, 0); not(cb, RAX); check_bytes(cb, "48F7D0"); - cb_set_pos(cb, 0); not(cb, R11); check_bytes(cb, "49F7D3"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RAX, 0)); check_bytes(cb, "F710"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RSI, 0)); check_bytes(cb, "F716"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RDI, 0)); check_bytes(cb, "F717"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RDX, 55)); check_bytes(cb, "F75237"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RDX, 1337)); check_bytes(cb, "F79239050000"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RDX, -55)); check_bytes(cb, "F752C9"); - cb_set_pos(cb, 0); not(cb, mem_opnd(32, RDX, -555)); check_bytes(cb, "F792D5FDFFFF"); - /* - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RAX, 0, 1, RBX)); }, - "F71418" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RAX, 0, 1, R12)); }, - "42F71420" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R15, 0, 1, R12)); }, - "43F71427" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R15, 5, 1, R12)); }, - "43F7542705" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R15, 5, 8, R12)); }, - "43F754E705" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R15, 5, 8, R13)); }, - "43F754EF05" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R12, 5, 4, R9)); }, - "43F7548C05" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R12, 301, 4, R9)); }, - "43F7948C2D010000" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RAX, 5, 4, RDX)); }, - "F7549005" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(64, RAX, 0, 2, RDX)); }, - "48F71450" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RSP, 0, 1, RBX)); }, - "F7141C" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RSP, 3, 1, RBX)); }, - "F7541C03" - ); - test( - delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RBP, 13, 1, RDX)); }, - "F754150D" - ); - */ - - // or - cb_set_pos(cb, 0); or(cb, EDX, ESI); check_bytes(cb, "09F2"); - - // pop - cb_set_pos(cb, 0); pop(cb, RAX); check_bytes(cb, "58"); - cb_set_pos(cb, 0); pop(cb, RBX); check_bytes(cb, "5B"); - cb_set_pos(cb, 0); pop(cb, RSP); check_bytes(cb, "5C"); - cb_set_pos(cb, 0); pop(cb, RBP); check_bytes(cb, "5D"); - cb_set_pos(cb, 0); pop(cb, R12); check_bytes(cb, "415C"); - cb_set_pos(cb, 0); pop(cb, mem_opnd(64, RAX, 0)); check_bytes(cb, "8F00"); - cb_set_pos(cb, 0); pop(cb, mem_opnd(64, R8, 0)); check_bytes(cb, "418F00"); - cb_set_pos(cb, 0); pop(cb, mem_opnd(64, R8, 3)); check_bytes(cb, "418F4003"); - cb_set_pos(cb, 0); pop(cb, mem_opnd_sib(64, RAX, RCX, 8, 3)); check_bytes(cb, "8F44C803"); - cb_set_pos(cb, 0); pop(cb, mem_opnd_sib(64, R8, RCX, 8, 3)); check_bytes(cb, "418F44C803"); - - // push - cb_set_pos(cb, 0); push(cb, RAX); check_bytes(cb, "50"); - cb_set_pos(cb, 0); push(cb, RBX); check_bytes(cb, "53"); - cb_set_pos(cb, 0); push(cb, R12); check_bytes(cb, "4154"); - cb_set_pos(cb, 0); push(cb, mem_opnd(64, RAX, 0)); check_bytes(cb, "FF30"); - cb_set_pos(cb, 0); push(cb, mem_opnd(64, R8, 0)); check_bytes(cb, "41FF30"); - cb_set_pos(cb, 0); push(cb, mem_opnd(64, R8, 3)); check_bytes(cb, "41FF7003"); - cb_set_pos(cb, 0); push(cb, mem_opnd_sib(64, RAX, RCX, 8, 3)); check_bytes(cb, "FF74C803"); - cb_set_pos(cb, 0); push(cb, mem_opnd_sib(64, R8, RCX, 8, 3)); check_bytes(cb, "41FF74C803"); - - // ret - cb_set_pos(cb, 0); ret(cb); check_bytes(cb, "C3"); - - // sal - cb_set_pos(cb, 0); sal(cb, CX, imm_opnd(1)); check_bytes(cb, "66D1E1"); - cb_set_pos(cb, 0); sal(cb, ECX, imm_opnd(1)); check_bytes(cb, "D1E1"); - cb_set_pos(cb, 0); sal(cb, EBP, imm_opnd(5)); check_bytes(cb, "C1E505"); - cb_set_pos(cb, 0); sal(cb, mem_opnd(32, RSP, 68), imm_opnd(1)); check_bytes(cb, "D1642444"); - - // sar - cb_set_pos(cb, 0); sar(cb, EDX, imm_opnd(1)); check_bytes(cb, "D1FA"); - - // shr - cb_set_pos(cb, 0); shr(cb, R14, imm_opnd(7)); check_bytes(cb, "49C1EE07"); - - /* - // sqrtsd - test( - delegate void (CodeBlock cb) { cb.sqrtsd(X86Opnd(XMM2), X86Opnd(XMM6)); }, - "F20F51D6" - ); - */ - - // sub - cb_set_pos(cb, 0); sub(cb, EAX, imm_opnd(1)); check_bytes(cb, "83E801"); - cb_set_pos(cb, 0); sub(cb, RAX, imm_opnd(2)); check_bytes(cb, "4883E802"); - - // test - cb_set_pos(cb, 0); test(cb, AL, AL); check_bytes(cb, "84C0"); - cb_set_pos(cb, 0); test(cb, AX, AX); check_bytes(cb, "6685C0"); - cb_set_pos(cb, 0); test(cb, CL, imm_opnd(8)); check_bytes(cb, "F6C108"); - cb_set_pos(cb, 0); test(cb, DL, imm_opnd(7)); check_bytes(cb, "F6C207"); - cb_set_pos(cb, 0); test(cb, RCX, imm_opnd(8)); check_bytes(cb, "F6C108"); - cb_set_pos(cb, 0); test(cb, mem_opnd(8, RDX, 8), imm_opnd(8)); check_bytes(cb, "F6420808"); - cb_set_pos(cb, 0); test(cb, mem_opnd(8, RDX, 8), imm_opnd(255)); check_bytes(cb, "F64208FF"); - cb_set_pos(cb, 0); test(cb, DX, imm_opnd(0xFFFF)); check_bytes(cb, "66F7C2FFFF"); - cb_set_pos(cb, 0); test(cb, mem_opnd(16, RDX, 8), imm_opnd(0xFFFF)); check_bytes(cb, "66F74208FFFF"); - cb_set_pos(cb, 0); test(cb, mem_opnd(8, RSI, 0), imm_opnd(1)); check_bytes(cb, "F60601"); - cb_set_pos(cb, 0); test(cb, mem_opnd(8, RSI, 16), imm_opnd(1)); check_bytes(cb, "F6461001"); - cb_set_pos(cb, 0); test(cb, mem_opnd(8, RSI, -16), imm_opnd(1)); check_bytes(cb, "F646F001"); - cb_set_pos(cb, 0); test(cb, mem_opnd(32, RSI, 64), EAX); check_bytes(cb, "854640"); - cb_set_pos(cb, 0); test(cb, mem_opnd(64, RDI, 42), RAX); check_bytes(cb, "4885472A"); - cb_set_pos(cb, 0); test(cb, RAX, RAX); check_bytes(cb, "4885C0"); - cb_set_pos(cb, 0); test(cb, RAX, RSI); check_bytes(cb, "4885F0"); - cb_set_pos(cb, 0); test(cb, mem_opnd(64, RSI, 64), imm_opnd(~0x08)); check_bytes(cb, "48F74640F7FFFFFF"); - - // xchg - cb_set_pos(cb, 0); xchg(cb, RAX, RCX); check_bytes(cb, "4891"); - cb_set_pos(cb, 0); xchg(cb, RAX, R13); check_bytes(cb, "4995"); - cb_set_pos(cb, 0); xchg(cb, RCX, RBX); check_bytes(cb, "4887D9"); - cb_set_pos(cb, 0); xchg(cb, R9, R15); check_bytes(cb, "4D87F9"); - - // xor - cb_set_pos(cb, 0); xor(cb, EAX, EAX); check_bytes(cb, "31C0"); - - printf("Assembler tests done\n"); -} - -void assert_equal(int expected, int actual) -{ - if (expected != actual) { - fprintf(stderr, "expected %d, got %d\n", expected, actual); - exit(-1); - } -} - -void run_runtime_tests(void) -{ - printf("Running runtime tests\n"); - - codeblock_t codeblock; - codeblock_t* cb = &codeblock; - - uint8_t* mem_block = alloc_exec_mem(4096); - cb_init(cb, mem_block, 4096); - - int (*function)(void); - function = (int (*)(void))mem_block; - - #define TEST(BODY) cb_set_pos(cb, 0); BODY ret(cb); cb_mark_all_executable(cb); assert_equal(7, function()); - - // add - TEST({ mov(cb, RAX, imm_opnd(0)); add(cb, RAX, imm_opnd(7)); }) - TEST({ mov(cb, RAX, imm_opnd(0)); mov(cb, RCX, imm_opnd(7)); add(cb, RAX, RCX); }) - - // and - TEST({ mov(cb, RAX, imm_opnd(31)); and(cb, RAX, imm_opnd(7)); }) - TEST({ mov(cb, RAX, imm_opnd(31)); mov(cb, RCX, imm_opnd(7)); and(cb, RAX, RCX); }) - - // or - TEST({ mov(cb, RAX, imm_opnd(3)); or(cb, RAX, imm_opnd(4)); }) - TEST({ mov(cb, RAX, imm_opnd(3)); mov(cb, RCX, imm_opnd(4)); or(cb, RAX, RCX); }) - - // push/pop - TEST({ mov(cb, RCX, imm_opnd(7)); push(cb, RCX); pop(cb, RAX); }) - - // shr - TEST({ mov(cb, RAX, imm_opnd(31)); shr(cb, RAX, imm_opnd(2)); }) - - // sub - TEST({ mov(cb, RAX, imm_opnd(12)); sub(cb, RAX, imm_opnd(5)); }) - TEST({ mov(cb, RAX, imm_opnd(12)); mov(cb, RCX, imm_opnd(5)); sub(cb, RAX, RCX); }) - - // xor - TEST({ mov(cb, RAX, imm_opnd(13)); xor(cb, RAX, imm_opnd(10)); }) - TEST({ mov(cb, RAX, imm_opnd(13)); mov(cb, RCX, imm_opnd(10)); xor(cb, RAX, RCX); }) - - #undef TEST - - printf("Runtime tests done\n"); -} - -int main(int argc, char** argv) -{ - run_assembler_tests(); - run_runtime_tests(); - - return 0; -} |
