diff options
Diffstat (limited to 'yjit/src/disasm.rs')
-rw-r--r-- | yjit/src/disasm.rs | 273 |
1 files changed, 200 insertions, 73 deletions
diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs index 2082648c4a..7875276815 100644 --- a/yjit/src/disasm.rs +++ b/yjit/src/disasm.rs @@ -1,6 +1,15 @@ use crate::core::*; use crate::cruby::*; use crate::yjit::yjit_enabled_p; +#[cfg(feature = "disasm")] +use crate::asm::CodeBlock; +#[cfg(feature = "disasm")] +use crate::codegen::CodePtr; +#[cfg(feature = "disasm")] +use crate::options::DumpDisasm; + +#[cfg(feature = "disasm")] +use std::fmt::Write; /// Primitive called in yjit.rb /// Produce a string representing the disassembly for an ISEQ @@ -26,110 +35,226 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU // Get the iseq pointer from the wrapper let iseq = unsafe { rb_iseqw_to_iseq(iseqw) }; - let out_string = disasm_iseq(iseq); + // This will truncate disassembly of methods with 10k+ bytecodes. + // That's a good thing - this prints to console. + let out_string = with_vm_lock(src_loc!(), || disasm_iseq_insn_range(iseq, 0, 9999)); return rust_str_to_ruby(&out_string); } } +/// Only call while holding the VM lock. #[cfg(feature = "disasm")] -fn disasm_iseq(iseq: IseqPtr) -> String { +pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u16, end_idx: u16) -> String { let mut out = String::from(""); // Get a list of block versions generated for this iseq - let mut block_list = get_iseq_block_list(iseq); + let block_list = get_or_create_iseq_block_list(iseq); + let mut block_list: Vec<&Block> = block_list.into_iter().map(|blockref| { + // SAFETY: We have the VM lock here and all the blocks on iseqs are valid. + unsafe { blockref.as_ref() } + }).collect(); // Get a list of codeblocks relevant to this iseq let global_cb = crate::codegen::CodegenGlobals::get_inline_cb(); // Sort the blocks by increasing start addresses - block_list.sort_by(|a, b| { - use std::cmp::Ordering; - - // Get the start addresses for each block - let addr_a = a.borrow().get_start_addr().unwrap().raw_ptr(); - let addr_b = b.borrow().get_start_addr().unwrap().raw_ptr(); - - if addr_a < addr_b { - Ordering::Less - } else if addr_a == addr_b { - Ordering::Equal - } else { - Ordering::Greater - } - }); + block_list.sort_by_key(|block| block.get_start_addr().as_offset()); // Compute total code size in bytes for all blocks in the function let mut total_code_size = 0; for blockref in &block_list { - total_code_size += blockref.borrow().code_size(); + total_code_size += blockref.code_size(); + } + + writeln!(out, "NUM BLOCK VERSIONS: {}", block_list.len()).unwrap(); + writeln!(out, "TOTAL INLINE CODE SIZE: {} bytes", total_code_size).unwrap(); + + // For each block, sorted by increasing start address + for (block_idx, block) in block_list.iter().enumerate() { + let blockid = block.get_blockid(); + if blockid.idx >= start_idx && blockid.idx < end_idx { + let end_idx = block.get_end_idx(); + let start_addr = block.get_start_addr(); + let end_addr = block.get_end_addr(); + let code_size = block.code_size(); + + // Write some info about the current block + let blockid_idx = blockid.idx; + let block_ident = format!( + "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ", + block_idx + 1, + block_list.len(), + blockid_idx, + end_idx, + code_size + ); + writeln!(out, "== {:=<60}", block_ident).unwrap(); + + // Disassemble the instructions + for (start_addr, end_addr) in global_cb.writable_addrs(start_addr, end_addr) { + out.push_str(&disasm_addr_range(global_cb, start_addr, end_addr)); + writeln!(out).unwrap(); + } + + // If this is not the last block + if block_idx < block_list.len() - 1 { + // Compute the size of the gap between this block and the next + let next_block = block_list[block_idx + 1]; + let next_start_addr = next_block.get_start_addr(); + let gap_size = next_start_addr.as_offset() - end_addr.as_offset(); + + // Log the size of the gap between the blocks if nonzero + if gap_size > 0 { + writeln!(out, "... {} byte gap ...", gap_size).unwrap(); + } + } + } } + return out; +} + +#[cfg(feature = "disasm")] +pub fn dump_disasm_addr_range(cb: &CodeBlock, start_addr: CodePtr, end_addr: CodePtr, dump_disasm: &DumpDisasm) { + use std::fs::File; + use std::io::Write; + + for (start_addr, end_addr) in cb.writable_addrs(start_addr, end_addr) { + let disasm = disasm_addr_range(cb, start_addr, end_addr); + if disasm.len() > 0 { + match dump_disasm { + DumpDisasm::Stdout => println!("{disasm}"), + DumpDisasm::File(path) => { + let mut f = File::options().create(true).append(true).open(path).unwrap(); + f.write_all(disasm.as_bytes()).unwrap(); + } + }; + } + } +} + +#[cfg(feature = "disasm")] +pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> String { + let mut out = String::from(""); + // Initialize capstone use capstone::prelude::*; - let cs = Capstone::new() + + #[cfg(target_arch = "x86_64")] + let mut cs = Capstone::new() .x86() .mode(arch::x86::ArchMode::Mode64) .syntax(arch::x86::ArchSyntax::Intel) .build() .unwrap(); - out.push_str(&format!("NUM BLOCK VERSIONS: {}\n", block_list.len())); - out.push_str(&format!( - "TOTAL INLINE CODE SIZE: {} bytes\n", - total_code_size - )); - - // For each block, sorted by increasing start address - for block_idx in 0..block_list.len() { - let block = block_list[block_idx].borrow(); - let blockid = block.get_blockid(); - let end_idx = block.get_end_idx(); - let start_addr = block.get_start_addr().unwrap().raw_ptr(); - let end_addr = block.get_end_addr().unwrap().raw_ptr(); - let code_size = block.code_size(); - - // Write some info about the current block - let block_ident = format!( - "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ", - block_idx + 1, - block_list.len(), - blockid.idx, - end_idx, - code_size - ); - out.push_str(&format!("== {:=<60}\n", block_ident)); - - // Disassemble the instructions - let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) }; - let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); - - // For each instruction in this block - for insn in insns.as_ref() { - // Comments for this block - if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) { - for comment in comment_list { - out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment)); + #[cfg(target_arch = "aarch64")] + let mut cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .detail(true) + .build() + .unwrap(); + cs.set_skipdata(true).unwrap(); + + // Disassemble the instructions + let code_size = end_addr - start_addr; + let code_slice = unsafe { std::slice::from_raw_parts(start_addr as _, code_size) }; + // Stabilize output for cargo test + #[cfg(test)] + let start_addr = 0; + let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); + + // For each instruction in this block + for insn in insns.as_ref() { + // Comments for this block + if let Some(comment_list) = cb.comments_at(insn.address() as usize) { + for comment in comment_list { + if cb.outlined { + write!(&mut out, "\x1b[34m").unwrap(); // Make outlined code blue } + writeln!(&mut out, " \x1b[1m# {comment}\x1b[22m").unwrap(); // Make comments bold } - out.push_str(&format!(" {}\n", insn)); } + if cb.outlined { + write!(&mut out, "\x1b[34m").unwrap(); // Make outlined code blue + } + writeln!(&mut out, " {insn}").unwrap(); + if cb.outlined { + write!(&mut out, "\x1b[0m").unwrap(); // Disable blue + } + } - // If this is not the last block - if block_idx < block_list.len() - 1 { - // Compute the size of the gap between this block and the next - let next_block = block_list[block_idx + 1].borrow(); - let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr(); - let gap_size = (next_start_addr as usize) - (end_addr as usize); + return out; +} - // Log the size of the gap between the blocks if nonzero - if gap_size > 0 { - out.push_str(&format!("... {} byte gap ...\n", gap_size)); +/// Assert that CodeBlock has the code specified with hex. In addition, if tested with +/// `cargo test --all-features`, it also checks it generates the specified disasm. +#[cfg(test)] +macro_rules! assert_disasm { + ($cb:expr, $hex:expr, $disasm:expr) => { + #[cfg(feature = "disasm")] + { + let disasm = disasm_addr_range( + &$cb, + $cb.get_ptr(0).raw_addr(&$cb), + $cb.get_write_ptr().raw_addr(&$cb), + ); + assert_eq!(unindent(&disasm, false), unindent(&$disasm, true)); + } + assert_eq!(format!("{:x}", $cb), $hex); + }; +} +#[cfg(test)] +pub(crate) use assert_disasm; + +/// Remove the minimum indent from every line, skipping the first line if `skip_first`. +#[cfg(all(feature = "disasm", test))] +pub fn unindent(string: &str, trim_lines: bool) -> String { + fn split_lines(string: &str) -> Vec<String> { + let mut result: Vec<String> = vec![]; + let mut buf: Vec<u8> = vec![]; + for byte in string.as_bytes().iter() { + buf.push(*byte); + if *byte == b'\n' { + result.push(String::from_utf8(buf).unwrap()); + buf = vec![]; } } + if !buf.is_empty() { + result.push(String::from_utf8(buf).unwrap()); + } + result } - return out; + // Break up a string into multiple lines + let mut lines = split_lines(string); + if trim_lines { // raw string literals come with extra lines + lines.remove(0); + lines.remove(lines.len() - 1); + } + + // Count the minimum number of spaces + let spaces = lines.iter().filter_map(|line| { + for (i, ch) in line.as_bytes().iter().enumerate() { + if *ch != b' ' { + return Some(i); + } + } + None + }).min().unwrap_or(0); + + // Join lines, removing spaces + let mut unindented: Vec<u8> = vec![]; + for line in lines.iter() { + if line.len() > spaces { + unindented.extend_from_slice(&line.as_bytes()[spaces..]); + } else { + unindented.extend_from_slice(&line.as_bytes()); + } + } + String::from_utf8(unindented).unwrap() } /// Primitive called in yjit.rb @@ -176,24 +301,26 @@ pub extern "C" fn rb_yjit_insns_compiled(_ec: EcPtr, _ruby_self: VALUE, iseqw: V } } -fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u32)> { +fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u16)> { let mut insn_vec = Vec::new(); // Get a list of block versions generated for this iseq - let block_list = get_iseq_block_list(iseq); + let block_list = get_or_create_iseq_block_list(iseq); // For each block associated with this iseq for blockref in &block_list { - let block = blockref.borrow(); + // SAFETY: Called as part of a Ruby method, which ensures the graph is + // well connected for the given iseq. + let block = unsafe { blockref.as_ref() }; let start_idx = block.get_blockid().idx; let end_idx = block.get_end_idx(); - assert!(end_idx <= unsafe { get_iseq_encoded_size(iseq) }); + assert!(u32::from(end_idx) <= unsafe { get_iseq_encoded_size(iseq) }); // For each YARV instruction in the block let mut insn_idx = start_idx; while insn_idx < end_idx { // Get the current pc and opcode - let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; + let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes. let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) } .try_into() @@ -206,7 +333,7 @@ fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u32)> { insn_vec.push((op_name, insn_idx)); // Move to the next instruction - insn_idx += insn_len(opcode); + insn_idx += insn_len(opcode) as u16; } } |