diff options
author | Noah Gibbs <the.codefolio.guy@gmail.com> | 2022-07-28 16:45:08 +0100 |
---|---|---|
committer | Takashi Kokubun <takashikkbn@gmail.com> | 2022-08-24 10:42:45 -0700 |
commit | b4be3c00c5737649166db676278fd28f768a5e3c (patch) | |
tree | 73ad75aa40351021832e44943a40a349172568d7 | |
parent | 0ad9cc16966c2e56f0fe7e5992edf76033d3a83f (diff) |
add --yjit-dump-iseqs param (https://github.com/Shopify/ruby/pull/332)
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/6278
-rw-r--r-- | yjit.c | 12 | ||||
-rw-r--r-- | yjit/bindgen/src/main.rs | 7 | ||||
-rw-r--r-- | yjit/src/core.rs | 27 | ||||
-rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 26 | ||||
-rw-r--r-- | yjit/src/disasm.rs | 84 | ||||
-rw-r--r-- | yjit/src/options.rs | 20 | ||||
-rw-r--r-- | yjit/src/utils.rs | 35 |
7 files changed, 170 insertions, 41 deletions
@@ -399,6 +399,18 @@ rb_str_bytesize(VALUE str) return LONG2NUM(RSTRING_LEN(str)); } +unsigned long +rb_RSTRING_LEN(VALUE str) +{ + return RSTRING_LEN(str); +} + +char * +rb_RSTRING_PTR(VALUE str) +{ + return RSTRING_PTR(str); +} + // This is defined only as a named struct inside rb_iseq_constant_body. // By giving it a separate typedef, we make it nameable by rust-bindgen. // Bindgen's temp/anon name isn't guaranteed stable. diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index f54addc795..f8d87aeec8 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -70,6 +70,9 @@ fn main() { .allowlist_function("rb_str_buf_append") .allowlist_function("rb_str_dup") + // From encindex.h + .allowlist_type("ruby_preserved_encindex") + // This struct is public to Ruby C extensions // From include/ruby/internal/core/rbasic.h .allowlist_type("RBasic") @@ -240,6 +243,7 @@ fn main() { .allowlist_var("VM_ENV_DATA_INDEX_SPECVAL") .allowlist_var("VM_ENV_DATA_INDEX_FLAGS") .allowlist_var("VM_ENV_DATA_SIZE") + .allowlist_function("rb_iseq_path") // From yjit.c .allowlist_function("rb_iseq_(get|set)_yjit_payload") @@ -265,6 +269,8 @@ fn main() { .allowlist_function("rb_yjit_for_each_iseq") .allowlist_function("rb_yjit_obj_written") .allowlist_function("rb_yjit_str_simple_append") + .allowlist_function("rb_RSTRING_PTR") + .allowlist_function("rb_RSTRING_LEN") .allowlist_function("rb_ENCODING_GET") .allowlist_function("rb_yjit_exit_locations_dict") @@ -282,6 +288,7 @@ fn main() { .allowlist_function("rb_vm_insn_addr2opcode") .allowlist_function("rb_iseqw_to_iseq") .allowlist_function("rb_iseq_each") + .allowlist_function("rb_iseq_method_name") // From builtin.h .allowlist_type("rb_builtin_function.*") diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 64585653d9..cb026f6a3b 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -6,6 +6,8 @@ use crate::cruby::*; use crate::options::*; use crate::stats::*; use crate::utils::*; +#[cfg(feature="disasm")] +use crate::disasm::*; use core::ffi::c_void; use std::cell::*; use std::hash::{Hash, Hasher}; @@ -1426,6 +1428,20 @@ fn gen_block_series_body( last_blockref = new_blockref; } + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is active, see if this iseq's location matches the given substring. + // If so, we print the new blocks to the console. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_location = iseq_get_location(blockid.iseq); + if iseq_location.contains(substr) { + let last_block = last_blockref.borrow(); + println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, blockid.idx, last_block.end_idx); + println!("{}", disasm_iseq_insn_range(blockid.iseq, blockid.idx, last_block.end_idx)); + } + } + } + Some(first_block) } @@ -1956,6 +1972,17 @@ pub fn invalidate_block_version(blockref: &BlockRef) { verify_blockid(block.blockid); + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_location = iseq_get_location(block.blockid.iseq); + if iseq_location.contains(substr) { + println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, block.blockid.idx, block.end_idx); + } + } + } + // Remove this block from the version array remove_block_version(blockref); diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 31f09ef98d..a329dadc9b 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -246,6 +246,20 @@ pub const RUBY_ENCODING_SHIFT: ruby_encoding_consts = 22; pub const RUBY_ENCODING_MASK: ruby_encoding_consts = 532676608; pub const RUBY_ENCODING_MAXNAMELEN: ruby_encoding_consts = 42; pub type ruby_encoding_consts = u32; +pub const RUBY_ENCINDEX_ASCII_8BIT: ruby_preserved_encindex = 0; +pub const RUBY_ENCINDEX_UTF_8: ruby_preserved_encindex = 1; +pub const RUBY_ENCINDEX_US_ASCII: ruby_preserved_encindex = 2; +pub const RUBY_ENCINDEX_UTF_16BE: ruby_preserved_encindex = 3; +pub const RUBY_ENCINDEX_UTF_16LE: ruby_preserved_encindex = 4; +pub const RUBY_ENCINDEX_UTF_32BE: ruby_preserved_encindex = 5; +pub const RUBY_ENCINDEX_UTF_32LE: ruby_preserved_encindex = 6; +pub const RUBY_ENCINDEX_UTF_16: ruby_preserved_encindex = 7; +pub const RUBY_ENCINDEX_UTF_32: ruby_preserved_encindex = 8; +pub const RUBY_ENCINDEX_UTF8_MAC: ruby_preserved_encindex = 9; +pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10; +pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11; +pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12; +pub type ruby_preserved_encindex = u32; extern "C" { pub fn rb_obj_info_dump(obj: VALUE); } @@ -650,6 +664,9 @@ pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8; pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16; pub type vm_frame_env_flags = u32; extern "C" { + pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE; +} +extern "C" { pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE; } extern "C" { @@ -970,6 +987,9 @@ extern "C" { pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t; } extern "C" { + pub fn rb_iseq_method_name(iseq: *const rb_iseq_t) -> VALUE; +} +extern "C" { pub fn rb_vm_barrier(); } extern "C" { @@ -1020,6 +1040,12 @@ extern "C" { extern "C" { pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; } +extern "C" { + pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong; +} +extern "C" { + pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char; +} pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; extern "C" { pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool; diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs index 2082648c4a..83c80d6c66 100644 --- a/yjit/src/disasm.rs +++ b/yjit/src/disasm.rs @@ -26,15 +26,17 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU // Get the iseq pointer from the wrapper let iseq = unsafe { rb_iseqw_to_iseq(iseqw) }; - let out_string = disasm_iseq(iseq); + // This will truncate disassembly of methods with 10k+ bytecodes. + // That's a good thing - this prints to console. + let out_string = disasm_iseq_insn_range(iseq, 0, 9999); return rust_str_to_ruby(&out_string); } } #[cfg(feature = "disasm")] -fn disasm_iseq(iseq: IseqPtr) -> String { - let mut out = String::from(""); +pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> String { + let mut out = String::from(""); // Get a list of block versions generated for this iseq let mut block_list = get_iseq_block_list(iseq); @@ -84,47 +86,49 @@ fn disasm_iseq(iseq: IseqPtr) -> String { for block_idx in 0..block_list.len() { let block = block_list[block_idx].borrow(); let blockid = block.get_blockid(); - let end_idx = block.get_end_idx(); - let start_addr = block.get_start_addr().unwrap().raw_ptr(); - let end_addr = block.get_end_addr().unwrap().raw_ptr(); - let code_size = block.code_size(); - - // Write some info about the current block - let block_ident = format!( - "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ", - block_idx + 1, - block_list.len(), - blockid.idx, - end_idx, - code_size - ); - out.push_str(&format!("== {:=<60}\n", block_ident)); - - // Disassemble the instructions - let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) }; - let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); - - // For each instruction in this block - for insn in insns.as_ref() { - // Comments for this block - if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) { - for comment in comment_list { - out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment)); + if blockid.idx >= start_idx && blockid.idx < end_idx { + let end_idx = block.get_end_idx(); + let start_addr = block.get_start_addr().unwrap().raw_ptr(); + let end_addr = block.get_end_addr().unwrap().raw_ptr(); + let code_size = block.code_size(); + + // Write some info about the current block + let block_ident = format!( + "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ", + block_idx + 1, + block_list.len(), + blockid.idx, + end_idx, + code_size + ); + out.push_str(&format!("== {:=<60}\n", block_ident)); + + // Disassemble the instructions + let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) }; + let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); + + // For each instruction in this block + for insn in insns.as_ref() { + // Comments for this block + if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) { + for comment in comment_list { + out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment)); + } } + out.push_str(&format!(" {}\n", insn)); } - out.push_str(&format!(" {}\n", insn)); - } - // If this is not the last block - if block_idx < block_list.len() - 1 { - // Compute the size of the gap between this block and the next - let next_block = block_list[block_idx + 1].borrow(); - let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr(); - let gap_size = (next_start_addr as usize) - (end_addr as usize); + // If this is not the last block + if block_idx < block_list.len() - 1 { + // Compute the size of the gap between this block and the next + let next_block = block_list[block_idx + 1].borrow(); + let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr(); + let gap_size = (next_start_addr as usize) - (end_addr as usize); - // Log the size of the gap between the blocks if nonzero - if gap_size > 0 { - out.push_str(&format!("... {} byte gap ...\n", gap_size)); + // Log the size of the gap between the blocks if nonzero + if gap_size > 0 { + out.push_str(&format!("... {} byte gap ...\n", gap_size)); + } } } } diff --git a/yjit/src/options.rs b/yjit/src/options.rs index 704c709bae..7436b3583b 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -1,7 +1,7 @@ use std::ffi::CStr; // Command-line options -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Clone, PartialEq, Eq, Debug)] #[repr(C)] pub struct Options { // Size of the executable memory block to allocate in MiB @@ -30,6 +30,9 @@ pub struct Options { /// Dump compiled and executed instructions for debugging pub dump_insns: bool, + /// Print when specific ISEQ items are compiled or invalidated + pub dump_iseq_disasm: Option<String>, + /// Verify context objects (debug mode only) pub verify_ctx: bool, @@ -52,6 +55,7 @@ pub static mut OPTIONS: Options = Options { dump_insns: false, verify_ctx: false, global_constant_state: false, + dump_iseq_disasm: None, }; /// Macro to get an option value by name @@ -64,6 +68,16 @@ macro_rules! get_option { } pub(crate) use get_option; +/// Macro to reference an option value by name; we assume it's a cloneable type like String or an Option of same. +macro_rules! get_option_ref { + // Unsafe is ok here because options are initialized + // once before any Ruby code executes + ($option_name:ident) => { + unsafe { &(OPTIONS.$option_name) } + }; +} +pub(crate) use get_option_ref; + /// Expected to receive what comes after the third dash in "--yjit-*". /// Empty string means user passed only "--yjit". C code rejects when /// they pass exact "--yjit-". @@ -105,6 +119,10 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { } }, + ("dump-iseq-disasm", _) => unsafe { + OPTIONS.dump_iseq_disasm = Some(opt_val.to_string()); + }, + ("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true }, ("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true }, ("stats", "") => unsafe { OPTIONS.gen_stats = true }, diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index 02fbce47d8..ade573b8da 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -71,6 +71,41 @@ macro_rules! offset_of { #[allow(unused)] pub(crate) use offset_of; +// Convert a CRuby UTF-8-encoded RSTRING into a Rust string. +// This should work fine on ASCII strings and anything else +// that is considered legal UTF-8, including embedded nulls. +fn ruby_str_to_rust(v: VALUE) -> String { + // Make sure the CRuby encoding is UTF-8 compatible + let encoding = unsafe { rb_ENCODING_GET(v) } as u32; + assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII); + + let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8; + let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap(); + let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) }; + String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation +} + +// Location is the file defining the method, colon, method name. +// Filenames are sometimes internal strings supplied to eval, +// so be careful with them. +pub fn iseq_get_location(iseq: IseqPtr) -> String { + let iseq_path = unsafe { rb_iseq_path(iseq) }; + let iseq_method = unsafe { rb_iseq_method_name(iseq) }; + + let mut s = if iseq_path == Qnil { + "None".to_string() + } else { + ruby_str_to_rust(iseq_path) + }; + s.push_str(":"); + if iseq_method == Qnil { + s.push_str("None"); + } else { + s.push_str(& ruby_str_to_rust(iseq_method)); + } + s +} + #[cfg(test)] mod tests { #[test] |