summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah Gibbs <the.codefolio.guy@gmail.com>2022-07-28 16:45:08 +0100
committerTakashi Kokubun <takashikkbn@gmail.com>2022-08-24 10:42:45 -0700
commitb4be3c00c5737649166db676278fd28f768a5e3c (patch)
tree73ad75aa40351021832e44943a40a349172568d7
parent0ad9cc16966c2e56f0fe7e5992edf76033d3a83f (diff)
add --yjit-dump-iseqs param (https://github.com/Shopify/ruby/pull/332)
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/6278
-rw-r--r--yjit.c12
-rw-r--r--yjit/bindgen/src/main.rs7
-rw-r--r--yjit/src/core.rs27
-rw-r--r--yjit/src/cruby_bindings.inc.rs26
-rw-r--r--yjit/src/disasm.rs84
-rw-r--r--yjit/src/options.rs20
-rw-r--r--yjit/src/utils.rs35
7 files changed, 170 insertions, 41 deletions
diff --git a/yjit.c b/yjit.c
index 1a2f71a959..0dddcfdc5a 100644
--- a/yjit.c
+++ b/yjit.c
@@ -399,6 +399,18 @@ rb_str_bytesize(VALUE str)
return LONG2NUM(RSTRING_LEN(str));
}
+unsigned long
+rb_RSTRING_LEN(VALUE str)
+{
+ return RSTRING_LEN(str);
+}
+
+char *
+rb_RSTRING_PTR(VALUE str)
+{
+ return RSTRING_PTR(str);
+}
+
// This is defined only as a named struct inside rb_iseq_constant_body.
// By giving it a separate typedef, we make it nameable by rust-bindgen.
// Bindgen's temp/anon name isn't guaranteed stable.
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index f54addc795..f8d87aeec8 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -70,6 +70,9 @@ fn main() {
.allowlist_function("rb_str_buf_append")
.allowlist_function("rb_str_dup")
+ // From encindex.h
+ .allowlist_type("ruby_preserved_encindex")
+
// This struct is public to Ruby C extensions
// From include/ruby/internal/core/rbasic.h
.allowlist_type("RBasic")
@@ -240,6 +243,7 @@ fn main() {
.allowlist_var("VM_ENV_DATA_INDEX_SPECVAL")
.allowlist_var("VM_ENV_DATA_INDEX_FLAGS")
.allowlist_var("VM_ENV_DATA_SIZE")
+ .allowlist_function("rb_iseq_path")
// From yjit.c
.allowlist_function("rb_iseq_(get|set)_yjit_payload")
@@ -265,6 +269,8 @@ fn main() {
.allowlist_function("rb_yjit_for_each_iseq")
.allowlist_function("rb_yjit_obj_written")
.allowlist_function("rb_yjit_str_simple_append")
+ .allowlist_function("rb_RSTRING_PTR")
+ .allowlist_function("rb_RSTRING_LEN")
.allowlist_function("rb_ENCODING_GET")
.allowlist_function("rb_yjit_exit_locations_dict")
@@ -282,6 +288,7 @@ fn main() {
.allowlist_function("rb_vm_insn_addr2opcode")
.allowlist_function("rb_iseqw_to_iseq")
.allowlist_function("rb_iseq_each")
+ .allowlist_function("rb_iseq_method_name")
// From builtin.h
.allowlist_type("rb_builtin_function.*")
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 64585653d9..cb026f6a3b 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -6,6 +6,8 @@ use crate::cruby::*;
use crate::options::*;
use crate::stats::*;
use crate::utils::*;
+#[cfg(feature="disasm")]
+use crate::disasm::*;
use core::ffi::c_void;
use std::cell::*;
use std::hash::{Hash, Hasher};
@@ -1426,6 +1428,20 @@ fn gen_block_series_body(
last_blockref = new_blockref;
}
+ #[cfg(feature = "disasm")]
+ {
+ // If dump_iseq_disasm is active, see if this iseq's location matches the given substring.
+ // If so, we print the new blocks to the console.
+ if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() {
+ let iseq_location = iseq_get_location(blockid.iseq);
+ if iseq_location.contains(substr) {
+ let last_block = last_blockref.borrow();
+ println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, blockid.idx, last_block.end_idx);
+ println!("{}", disasm_iseq_insn_range(blockid.iseq, blockid.idx, last_block.end_idx));
+ }
+ }
+ }
+
Some(first_block)
}
@@ -1956,6 +1972,17 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
verify_blockid(block.blockid);
+ #[cfg(feature = "disasm")]
+ {
+ // If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated.
+ if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() {
+ let iseq_location = iseq_get_location(block.blockid.iseq);
+ if iseq_location.contains(substr) {
+ println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, block.blockid.idx, block.end_idx);
+ }
+ }
+ }
+
// Remove this block from the version array
remove_block_version(blockref);
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index 31f09ef98d..a329dadc9b 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -246,6 +246,20 @@ pub const RUBY_ENCODING_SHIFT: ruby_encoding_consts = 22;
pub const RUBY_ENCODING_MASK: ruby_encoding_consts = 532676608;
pub const RUBY_ENCODING_MAXNAMELEN: ruby_encoding_consts = 42;
pub type ruby_encoding_consts = u32;
+pub const RUBY_ENCINDEX_ASCII_8BIT: ruby_preserved_encindex = 0;
+pub const RUBY_ENCINDEX_UTF_8: ruby_preserved_encindex = 1;
+pub const RUBY_ENCINDEX_US_ASCII: ruby_preserved_encindex = 2;
+pub const RUBY_ENCINDEX_UTF_16BE: ruby_preserved_encindex = 3;
+pub const RUBY_ENCINDEX_UTF_16LE: ruby_preserved_encindex = 4;
+pub const RUBY_ENCINDEX_UTF_32BE: ruby_preserved_encindex = 5;
+pub const RUBY_ENCINDEX_UTF_32LE: ruby_preserved_encindex = 6;
+pub const RUBY_ENCINDEX_UTF_16: ruby_preserved_encindex = 7;
+pub const RUBY_ENCINDEX_UTF_32: ruby_preserved_encindex = 8;
+pub const RUBY_ENCINDEX_UTF8_MAC: ruby_preserved_encindex = 9;
+pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10;
+pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11;
+pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12;
+pub type ruby_preserved_encindex = u32;
extern "C" {
pub fn rb_obj_info_dump(obj: VALUE);
}
@@ -650,6 +664,9 @@ pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8;
pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16;
pub type vm_frame_env_flags = u32;
extern "C" {
+ pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE;
+}
+extern "C" {
pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE;
}
extern "C" {
@@ -970,6 +987,9 @@ extern "C" {
pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t;
}
extern "C" {
+ pub fn rb_iseq_method_name(iseq: *const rb_iseq_t) -> VALUE;
+}
+extern "C" {
pub fn rb_vm_barrier();
}
extern "C" {
@@ -1020,6 +1040,12 @@ extern "C" {
extern "C" {
pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int;
}
+extern "C" {
+ pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong;
+}
+extern "C" {
+ pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char;
+}
pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
extern "C" {
pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool;
diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs
index 2082648c4a..83c80d6c66 100644
--- a/yjit/src/disasm.rs
+++ b/yjit/src/disasm.rs
@@ -26,15 +26,17 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU
// Get the iseq pointer from the wrapper
let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };
- let out_string = disasm_iseq(iseq);
+ // This will truncate disassembly of methods with 10k+ bytecodes.
+ // That's a good thing - this prints to console.
+ let out_string = disasm_iseq_insn_range(iseq, 0, 9999);
return rust_str_to_ruby(&out_string);
}
}
#[cfg(feature = "disasm")]
-fn disasm_iseq(iseq: IseqPtr) -> String {
- let mut out = String::from("");
+pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> String {
+ let mut out = String::from("");
// Get a list of block versions generated for this iseq
let mut block_list = get_iseq_block_list(iseq);
@@ -84,47 +86,49 @@ fn disasm_iseq(iseq: IseqPtr) -> String {
for block_idx in 0..block_list.len() {
let block = block_list[block_idx].borrow();
let blockid = block.get_blockid();
- let end_idx = block.get_end_idx();
- let start_addr = block.get_start_addr().unwrap().raw_ptr();
- let end_addr = block.get_end_addr().unwrap().raw_ptr();
- let code_size = block.code_size();
-
- // Write some info about the current block
- let block_ident = format!(
- "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ",
- block_idx + 1,
- block_list.len(),
- blockid.idx,
- end_idx,
- code_size
- );
- out.push_str(&format!("== {:=<60}\n", block_ident));
-
- // Disassemble the instructions
- let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) };
- let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();
-
- // For each instruction in this block
- for insn in insns.as_ref() {
- // Comments for this block
- if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) {
- for comment in comment_list {
- out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment));
+ if blockid.idx >= start_idx && blockid.idx < end_idx {
+ let end_idx = block.get_end_idx();
+ let start_addr = block.get_start_addr().unwrap().raw_ptr();
+ let end_addr = block.get_end_addr().unwrap().raw_ptr();
+ let code_size = block.code_size();
+
+ // Write some info about the current block
+ let block_ident = format!(
+ "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ",
+ block_idx + 1,
+ block_list.len(),
+ blockid.idx,
+ end_idx,
+ code_size
+ );
+ out.push_str(&format!("== {:=<60}\n", block_ident));
+
+ // Disassemble the instructions
+ let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) };
+ let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();
+
+ // For each instruction in this block
+ for insn in insns.as_ref() {
+ // Comments for this block
+ if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) {
+ for comment in comment_list {
+ out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment));
+ }
}
+ out.push_str(&format!(" {}\n", insn));
}
- out.push_str(&format!(" {}\n", insn));
- }
- // If this is not the last block
- if block_idx < block_list.len() - 1 {
- // Compute the size of the gap between this block and the next
- let next_block = block_list[block_idx + 1].borrow();
- let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr();
- let gap_size = (next_start_addr as usize) - (end_addr as usize);
+ // If this is not the last block
+ if block_idx < block_list.len() - 1 {
+ // Compute the size of the gap between this block and the next
+ let next_block = block_list[block_idx + 1].borrow();
+ let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr();
+ let gap_size = (next_start_addr as usize) - (end_addr as usize);
- // Log the size of the gap between the blocks if nonzero
- if gap_size > 0 {
- out.push_str(&format!("... {} byte gap ...\n", gap_size));
+ // Log the size of the gap between the blocks if nonzero
+ if gap_size > 0 {
+ out.push_str(&format!("... {} byte gap ...\n", gap_size));
+ }
}
}
}
diff --git a/yjit/src/options.rs b/yjit/src/options.rs
index 704c709bae..7436b3583b 100644
--- a/yjit/src/options.rs
+++ b/yjit/src/options.rs
@@ -1,7 +1,7 @@
use std::ffi::CStr;
// Command-line options
-#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+#[derive(Clone, PartialEq, Eq, Debug)]
#[repr(C)]
pub struct Options {
// Size of the executable memory block to allocate in MiB
@@ -30,6 +30,9 @@ pub struct Options {
/// Dump compiled and executed instructions for debugging
pub dump_insns: bool,
+ /// Print when specific ISEQ items are compiled or invalidated
+ pub dump_iseq_disasm: Option<String>,
+
/// Verify context objects (debug mode only)
pub verify_ctx: bool,
@@ -52,6 +55,7 @@ pub static mut OPTIONS: Options = Options {
dump_insns: false,
verify_ctx: false,
global_constant_state: false,
+ dump_iseq_disasm: None,
};
/// Macro to get an option value by name
@@ -64,6 +68,16 @@ macro_rules! get_option {
}
pub(crate) use get_option;
+/// Macro to reference an option value by name; we assume it's a cloneable type like String or an Option of same.
+macro_rules! get_option_ref {
+ // Unsafe is ok here because options are initialized
+ // once before any Ruby code executes
+ ($option_name:ident) => {
+ unsafe { &(OPTIONS.$option_name) }
+ };
+}
+pub(crate) use get_option_ref;
+
/// Expected to receive what comes after the third dash in "--yjit-*".
/// Empty string means user passed only "--yjit". C code rejects when
/// they pass exact "--yjit-".
@@ -105,6 +119,10 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
}
},
+ ("dump-iseq-disasm", _) => unsafe {
+ OPTIONS.dump_iseq_disasm = Some(opt_val.to_string());
+ },
+
("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true },
("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true },
("stats", "") => unsafe { OPTIONS.gen_stats = true },
diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs
index 02fbce47d8..ade573b8da 100644
--- a/yjit/src/utils.rs
+++ b/yjit/src/utils.rs
@@ -71,6 +71,41 @@ macro_rules! offset_of {
#[allow(unused)]
pub(crate) use offset_of;
+// Convert a CRuby UTF-8-encoded RSTRING into a Rust string.
+// This should work fine on ASCII strings and anything else
+// that is considered legal UTF-8, including embedded nulls.
+fn ruby_str_to_rust(v: VALUE) -> String {
+ // Make sure the CRuby encoding is UTF-8 compatible
+ let encoding = unsafe { rb_ENCODING_GET(v) } as u32;
+ assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII);
+
+ let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
+ let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
+ let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
+ String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation
+}
+
+// Location is the file defining the method, colon, method name.
+// Filenames are sometimes internal strings supplied to eval,
+// so be careful with them.
+pub fn iseq_get_location(iseq: IseqPtr) -> String {
+ let iseq_path = unsafe { rb_iseq_path(iseq) };
+ let iseq_method = unsafe { rb_iseq_method_name(iseq) };
+
+ let mut s = if iseq_path == Qnil {
+ "None".to_string()
+ } else {
+ ruby_str_to_rust(iseq_path)
+ };
+ s.push_str(":");
+ if iseq_method == Qnil {
+ s.push_str("None");
+ } else {
+ s.push_str(& ruby_str_to_rust(iseq_method));
+ }
+ s
+}
+
#[cfg(test)]
mod tests {
#[test]