diff options
Diffstat (limited to 'yjit/src/codegen.rs')
-rw-r--r-- | yjit/src/codegen.rs | 11694 |
1 files changed, 8029 insertions, 3665 deletions
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 1d62d74de0..072d96f1b0 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1,5 +1,8 @@ -use crate::asm::x86_64::*; +// We use the YARV bytecode constants which have a CRuby-style name +#![allow(non_upper_case_globals)] + use crate::asm::*; +use crate::backend::ir::*; use crate::core::*; use crate::cruby::*; use crate::invariants::*; @@ -7,94 +10,137 @@ use crate::options::*; use crate::stats::*; use crate::utils::*; use CodegenStatus::*; -use InsnOpnd::*; +use YARVOpnd::*; -use std::cell::RefMut; +use std::cell::Cell; use std::cmp; +use std::cmp::min; use std::collections::HashMap; +use std::ffi::c_void; use std::ffi::CStr; -use std::mem::{self, size_of}; -use std::os::raw::c_uint; +use std::mem; +use std::os::raw::c_int; use std::ptr; +use std::rc::Rc; +use std::cell::RefCell; use std::slice; -// Callee-saved registers -pub const REG_CFP: X86Opnd = R13; -pub const REG_EC: X86Opnd = R12; -pub const REG_SP: X86Opnd = RBX; - -// Scratch registers used by YJIT -pub const REG0: X86Opnd = RAX; -pub const REG0_32: X86Opnd = EAX; -pub const REG0_8: X86Opnd = AL; -pub const REG1: X86Opnd = RCX; -// pub const REG1_32: X86Opnd = ECX; +pub use crate::virtualmem::CodePtr; /// Status returned by code generation functions #[derive(PartialEq, Debug)] enum CodegenStatus { - EndBlock, + SkipNextInsn, KeepCompiling, - CantCompile, + EndBlock, } /// Code generation function signature type InsnGenFn = fn( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus; +) -> Option<CodegenStatus>; -/// Code generation state -/// This struct only lives while code is being generated +/// Ephemeral code generation state. +/// Represents a [core::Block] while we build it. pub struct JITState { - // Block version being compiled - block: BlockRef, + /// Instruction sequence for the compiling block + pub iseq: IseqPtr, - // Instruction sequence this is associated with - iseq: IseqPtr, + /// The iseq index of the first instruction in the block + starting_insn_idx: IseqIdx, - // Index of the current instruction being compiled - insn_idx: u32, + /// The [Context] entering into the first instruction of the block + starting_ctx: Context, - // Opcode for the instruction being compiled + /// The placement for the machine code of the [Block] + output_ptr: CodePtr, + + /// Index of the current instruction being compiled + insn_idx: IseqIdx, + + /// Opcode for the instruction being compiled opcode: usize, - // PC of the instruction being compiled + /// PC of the instruction being compiled pc: *mut VALUE, - // Side exit to the instruction being compiled. See :side-exit:. - side_exit_for_pc: Option<CodePtr>, + /// stack_size when it started to compile the current instruction. + stack_size_for_pc: u8, + + /// Execution context when compilation started + /// This allows us to peek at run-time values + ec: EcPtr, + + /// The outgoing branches the block will have + pub pending_outgoing: Vec<PendingBranchRef>, - // Execution context when compilation started - // This allows us to peek at run-time values - ec: Option<EcPtr>, + // --- Fields for block invalidation and invariants tracking below: + // Public mostly so into_block defined in the sibling module core + // can partially move out of Self. - // Whether we need to record the code address at - // the end of this bytecode instruction for global invalidation - record_boundary_patch_point: bool, + /// Whether we need to record the code address at + /// the end of this bytecode instruction for global invalidation + pub record_boundary_patch_point: bool, + + /// Code for immediately exiting upon entry to the block. + /// Required for invalidation. + pub block_entry_exit: Option<CodePtr>, + + /// A list of callable method entries that must be valid for the block to be valid. + pub method_lookup_assumptions: Vec<CmePtr>, + + /// A list of basic operators that not be redefined for the block to be valid. + pub bop_assumptions: Vec<(RedefinitionFlag, ruby_basic_operators)>, + + /// A list of constant expression path segments that must have + /// not been written to for the block to be valid. + pub stable_constant_names_assumption: Option<*const ID>, + + /// A list of classes that are not supposed to have a singleton class. + pub no_singleton_class_assumptions: Vec<VALUE>, + + /// When true, the block is valid only when base pointer is equal to environment pointer. + pub no_ep_escape: bool, + + /// When true, the block is valid only when there is a total of one ractor running + pub block_assumes_single_ractor: bool, + + /// Address range for Linux perf's [JIT interface](https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt) + perf_map: Rc::<RefCell::<Vec<(CodePtr, Option<CodePtr>, String)>>>, + + /// Stack of symbol names for --yjit-perf + perf_stack: Vec<String>, } impl JITState { - pub fn new(blockref: &BlockRef) -> Self { + pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr) -> Self { JITState { - block: blockref.clone(), - iseq: ptr::null(), // TODO: initialize this from the blockid + iseq: blockid.iseq, + starting_insn_idx: blockid.idx, + starting_ctx, + output_ptr, insn_idx: 0, opcode: 0, pc: ptr::null_mut::<VALUE>(), - side_exit_for_pc: None, - ec: None, + stack_size_for_pc: starting_ctx.get_stack_size(), + pending_outgoing: vec![], + ec, record_boundary_patch_point: false, + block_entry_exit: None, + method_lookup_assumptions: vec![], + bop_assumptions: vec![], + stable_constant_names_assumption: None, + no_singleton_class_assumptions: vec![], + no_ep_escape: false, + block_assumes_single_ractor: false, + perf_map: Rc::default(), + perf_stack: vec![], } } - pub fn get_block(&self) -> BlockRef { - self.block.clone() - } - - pub fn get_insn_idx(&self) -> u32 { + pub fn get_insn_idx(&self) -> IseqIdx { self.insn_idx } @@ -106,207 +152,439 @@ impl JITState { self.opcode } - pub fn add_gc_object_offset(self: &mut JITState, ptr_offset: u32) { - let mut gc_obj_vec: RefMut<_> = self.block.borrow_mut(); - gc_obj_vec.add_gc_object_offset(ptr_offset); - } - pub fn get_pc(self: &JITState) -> *mut VALUE { self.pc } -} -use crate::codegen::JCCKinds::*; + pub fn get_starting_insn_idx(&self) -> IseqIdx { + self.starting_insn_idx + } -#[allow(non_camel_case_types, unused)] -pub enum JCCKinds { - JCC_JNE, - JCC_JNZ, - JCC_JZ, - JCC_JE, - JCC_JBE, - JCC_JNA, -} + pub fn get_block_entry_exit(&self) -> Option<CodePtr> { + self.block_entry_exit + } -pub fn jit_get_arg(jit: &JITState, arg_idx: isize) -> VALUE { - // insn_len require non-test config - #[cfg(not(test))] - assert!(insn_len(jit.get_opcode()) > (arg_idx + 1).try_into().unwrap()); - unsafe { *(jit.pc.offset(arg_idx + 1)) } -} + pub fn get_starting_ctx(&self) -> Context { + self.starting_ctx + } + + pub fn get_arg(&self, arg_idx: isize) -> VALUE { + // insn_len require non-test config + #[cfg(not(test))] + assert!(insn_len(self.get_opcode()) > (arg_idx + 1).try_into().unwrap()); + unsafe { *(self.pc.offset(arg_idx + 1)) } + } + + /// Return true if the current ISEQ could escape an environment. + /// + /// As of vm_push_frame(), EP is always equal to BP. However, after pushing + /// a frame, some ISEQ setups call vm_bind_update_env(), which redirects EP. + /// Also, some method calls escape the environment to the heap. + fn escapes_ep(&self) -> bool { + match unsafe { get_iseq_body_type(self.iseq) } { + // <main> frame is always associated to TOPLEVEL_BINDING. + ISEQ_TYPE_MAIN | + // Kernel#eval uses a heap EP when a Binding argument is not nil. + ISEQ_TYPE_EVAL => true, + // If this ISEQ has previously escaped EP, give up the optimization. + _ if iseq_escapes_ep(self.iseq) => true, + _ => false, + } + } + + // Get the index of the next instruction + fn next_insn_idx(&self) -> u16 { + self.insn_idx + insn_len(self.get_opcode()) as u16 + } -// Load a VALUE into a register and keep track of the reference if it is on the GC heap. -pub fn jit_mov_gc_ptr(jit: &mut JITState, cb: &mut CodeBlock, reg: X86Opnd, ptr: VALUE) { - assert!(matches!(reg, X86Opnd::Reg(_))); - assert!(reg.num_bits() == 64); + // Check if we are compiling the instruction at the stub PC + // Meaning we are compiling the instruction that is next to execute + pub fn at_current_insn(&self) -> bool { + let ec_pc: *mut VALUE = unsafe { get_cfp_pc(self.get_cfp()) }; + ec_pc == self.pc + } - // Load the pointer constant into the specified register - mov(cb, reg, const_ptr_opnd(ptr.as_ptr())); + // Peek at the nth topmost value on the Ruby stack. + // Returns the topmost value when n == 0. + pub fn peek_at_stack(&self, ctx: &Context, n: isize) -> VALUE { + assert!(self.at_current_insn()); + assert!(n < ctx.get_stack_size() as isize); - // The pointer immediate is encoded as the last part of the mov written out - let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + // Note: this does not account for ctx->sp_offset because + // this is only available when hitting a stub, and while + // hitting a stub, cfp->sp needs to be up to date in case + // codegen functions trigger GC. See :stub-sp-flush:. + return unsafe { + let sp: *mut VALUE = get_cfp_sp(self.get_cfp()); - if !ptr.special_const_p() { - jit.add_gc_object_offset(ptr_offset); + *(sp.offset(-1 - n)) + }; } -} -// Get the index of the next instruction -fn jit_next_insn_idx(jit: &JITState) -> u32 { - jit.insn_idx + insn_len(jit.get_opcode()) -} + fn peek_at_self(&self) -> VALUE { + unsafe { get_cfp_self(self.get_cfp()) } + } -// Check if we are compiling the instruction at the stub PC -// Meaning we are compiling the instruction that is next to execute -fn jit_at_current_insn(jit: &JITState) -> bool { - let ec_pc: *mut VALUE = unsafe { get_cfp_pc(get_ec_cfp(jit.ec.unwrap())) }; - ec_pc == jit.pc -} + fn peek_at_local(&self, n: i32) -> VALUE { + assert!(self.at_current_insn()); -// Peek at the nth topmost value on the Ruby stack. -// Returns the topmost value when n == 0. -fn jit_peek_at_stack(jit: &JITState, ctx: &Context, n: isize) -> VALUE { - assert!(jit_at_current_insn(jit)); - assert!(n < ctx.get_stack_size() as isize); + let local_table_size: isize = unsafe { get_iseq_body_local_table_size(self.iseq) } + .try_into() + .unwrap(); + assert!(n < local_table_size.try_into().unwrap()); - // Note: this does not account for ctx->sp_offset because - // this is only available when hitting a stub, and while - // hitting a stub, cfp->sp needs to be up to date in case - // codegen functions trigger GC. See :stub-sp-flush:. - return unsafe { - let sp: *mut VALUE = get_cfp_sp(get_ec_cfp(jit.ec.unwrap())); + unsafe { + let ep = get_cfp_ep(self.get_cfp()); + let n_isize: isize = n.try_into().unwrap(); + let offs: isize = -(VM_ENV_DATA_SIZE as isize) - local_table_size + n_isize + 1; + *ep.offset(offs) + } + } - *(sp.offset(-1 - n)) - }; -} + fn peek_at_block_handler(&self, level: u32) -> VALUE { + assert!(self.at_current_insn()); -fn jit_peek_at_self(jit: &JITState) -> VALUE { - unsafe { get_cfp_self(get_ec_cfp(jit.ec.unwrap())) } -} + unsafe { + let ep = get_cfp_ep_level(self.get_cfp(), level); + *ep.offset(VM_ENV_DATA_INDEX_SPECVAL as isize) + } + } -fn jit_peek_at_local(jit: &JITState, n: i32) -> VALUE { - assert!(jit_at_current_insn(jit)); + pub fn assume_expected_cfunc( + &mut self, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + class: VALUE, + method: ID, + cfunc: *mut c_void, + ) -> bool { + let cme = unsafe { rb_callable_method_entry(class, method) }; - let local_table_size: isize = unsafe { get_iseq_body_local_table_size(jit.iseq) } - .try_into() - .unwrap(); - assert!(n < local_table_size.try_into().unwrap()); + if cme.is_null() { + return false; + } - unsafe { - let ep = get_cfp_ep(get_ec_cfp(jit.ec.unwrap())); - let n_isize: isize = n.try_into().unwrap(); - let offs: isize = -(VM_ENV_DATA_SIZE as isize) - local_table_size + n_isize + 1; - *ep.offset(offs) + let def_type = unsafe { get_cme_def_type(cme) }; + if def_type != VM_METHOD_TYPE_CFUNC { + return false; + } + if unsafe { get_mct_func(get_cme_def_body_cfunc(cme)) } != cfunc { + return false; + } + + self.assume_method_lookup_stable(asm, ocb, cme); + + true } -} -// Add a comment at the current position in the code block -fn add_comment(cb: &mut CodeBlock, comment_str: &str) { - if cfg!(feature = "asm_comments") { - cb.add_comment(comment_str); + pub fn assume_method_lookup_stable(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, cme: CmePtr) -> Option<()> { + jit_ensure_block_entry_exit(self, asm, ocb)?; + self.method_lookup_assumptions.push(cme); + + Some(()) + } + + /// Assume that objects of a given class will have no singleton class. + /// Return true if there has been no such singleton class since boot + /// and we can safely invalidate it. + pub fn assume_no_singleton_class(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, klass: VALUE) -> bool { + if jit_ensure_block_entry_exit(self, asm, ocb).is_none() { + return false; // out of space, give up + } + if has_singleton_class_of(klass) { + return false; // we've seen a singleton class. disable the optimization to avoid an invalidation loop. + } + self.no_singleton_class_assumptions.push(klass); + true + } + + /// Assume that base pointer is equal to environment pointer in the current ISEQ. + /// Return true if it's safe to assume so. + fn assume_no_ep_escape(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb) -> bool { + if jit_ensure_block_entry_exit(self, asm, ocb).is_none() { + return false; // out of space, give up + } + if self.escapes_ep() { + return false; // EP has been escaped in this ISEQ. disable the optimization to avoid an invalidation loop. + } + self.no_ep_escape = true; + true + } + + fn get_cfp(&self) -> *mut rb_control_frame_struct { + unsafe { get_ec_cfp(self.ec) } + } + + pub fn assume_stable_constant_names(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, id: *const ID) -> Option<()> { + jit_ensure_block_entry_exit(self, asm, ocb)?; + self.stable_constant_names_assumption = Some(id); + + Some(()) + } + + pub fn queue_outgoing_branch(&mut self, branch: PendingBranchRef) { + self.pending_outgoing.push(branch) + } + + /// Push a symbol for --yjit-perf + fn perf_symbol_push(&mut self, asm: &mut Assembler, symbol_name: &str) { + if !self.perf_stack.is_empty() { + self.perf_symbol_range_end(asm); + } + self.perf_stack.push(symbol_name.to_string()); + self.perf_symbol_range_start(asm, symbol_name); + } + + /// Pop the stack-top symbol for --yjit-perf + fn perf_symbol_pop(&mut self, asm: &mut Assembler) { + self.perf_symbol_range_end(asm); + self.perf_stack.pop(); + if let Some(symbol_name) = self.perf_stack.get(0) { + self.perf_symbol_range_start(asm, symbol_name); + } + } + + /// Mark the start address of a symbol to be reported to perf + fn perf_symbol_range_start(&self, asm: &mut Assembler, symbol_name: &str) { + let symbol_name = format!("[JIT] {}", symbol_name); + let syms = self.perf_map.clone(); + asm.pos_marker(move |start, _| syms.borrow_mut().push((start, None, symbol_name.clone()))); + } + + /// Mark the end address of a symbol to be reported to perf + fn perf_symbol_range_end(&self, asm: &mut Assembler) { + let syms = self.perf_map.clone(); + asm.pos_marker(move |end, _| { + if let Some((_, ref mut end_store, _)) = syms.borrow_mut().last_mut() { + assert_eq!(None, *end_store); + *end_store = Some(end); + } + }); + } + + /// Flush addresses and symbols to /tmp/perf-{pid}.map + fn flush_perf_symbols(&self, cb: &CodeBlock) { + assert_eq!(0, self.perf_stack.len()); + let path = format!("/tmp/perf-{}.map", std::process::id()); + let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap(); + for sym in self.perf_map.borrow().iter() { + if let (start, Some(end), name) = sym { + // In case the code straddles two pages, part of it belongs to the symbol. + for (inline_start, inline_end) in cb.writable_addrs(*start, *end) { + use std::io::Write; + let code_size = inline_end - inline_start; + writeln!(f, "{inline_start:x} {code_size:x} {name}").unwrap(); + } + } + } + } + + /// Return true if we're compiling a send-like instruction, not an opt_* instruction. + pub fn is_sendish(&self) -> bool { + match unsafe { rb_iseq_opcode_at_pc(self.iseq, self.pc) } as u32 { + YARVINSN_send | + YARVINSN_opt_send_without_block | + YARVINSN_invokesuper => true, + _ => false, + } } } -/// Increment a profiling counter with counter_name -#[cfg(not(feature = "stats"))] -macro_rules! gen_counter_incr { - ($cb:tt, $counter_name:ident) => {}; +/// Macro to call jit.perf_symbol_push() without evaluating arguments when +/// the option is turned off, which is useful for avoiding string allocation. +macro_rules! jit_perf_symbol_push { + ($jit:expr, $asm:expr, $symbol_name:expr, $perf_map:expr) => { + if get_option!(perf_map) == Some($perf_map) { + $jit.perf_symbol_push($asm, $symbol_name); + } + }; } -#[cfg(feature = "stats")] -macro_rules! gen_counter_incr { - ($cb:tt, $counter_name:ident) => { - if (get_option!(gen_stats)) { - // Get a pointer to the counter variable - let ptr = ptr_to_counter!($counter_name); - // Use REG1 because there might be return value in REG0 - mov($cb, REG1, const_ptr_opnd(ptr as *const u8)); - write_lock_prefix($cb); // for ractors. - add($cb, mem_opnd(64, REG1, 0), imm_opnd(1)); +/// Macro to call jit.perf_symbol_pop(), for consistency with jit_perf_symbol_push!(). +macro_rules! jit_perf_symbol_pop { + ($jit:expr, $asm:expr, $perf_map:expr) => { + if get_option!(perf_map) == Some($perf_map) { + $jit.perf_symbol_pop($asm); } }; } -/// Increment a counter then take an existing side exit -#[cfg(not(feature = "stats"))] -macro_rules! counted_exit { - ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => {{ - let _ = $ocb; - $existing_side_exit - }}; -} -#[cfg(feature = "stats")] -macro_rules! counted_exit { - ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => { - // The counter is only incremented when stats are enabled - if (!get_option!(gen_stats)) { - $existing_side_exit - } else { - let ocb = $ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); +/// Macro to push and pop a perf symbol around a function call. +macro_rules! perf_call { + // perf_call!("prefix: ", func(...)) uses "prefix: func" as a symbol. + ($prefix:expr, $func_name:ident($jit:expr, $asm:expr$(, $arg:expr)*$(,)?) ) => { + { + jit_perf_symbol_push!($jit, $asm, &format!("{}{}", $prefix, stringify!($func_name)), PerfMap::Codegen); + let ret = $func_name($jit, $asm, $($arg),*); + jit_perf_symbol_pop!($jit, $asm, PerfMap::Codegen); + ret + } + }; + // perf_call! { func(...) } uses "func" as a symbol. + { $func_name:ident($jit:expr, $asm:expr$(, $arg:expr)*$(,)?) } => { + perf_call!("", $func_name($jit, $asm, $($arg),*)) + }; +} + +use crate::codegen::JCCKinds::*; - // Increment the counter - gen_counter_incr!(ocb, $counter_name); +#[allow(non_camel_case_types, unused)] +pub enum JCCKinds { + JCC_JNE, + JCC_JNZ, + JCC_JZ, + JCC_JE, + JCC_JB, + JCC_JBE, + JCC_JNA, + JCC_JNAE, + JCC_JO_MUL, +} - // Jump to the existing side exit - jmp_ptr(ocb, $existing_side_exit); +#[inline(always)] +fn gen_counter_incr(asm: &mut Assembler, counter: Counter) { + // Assert that default counters are not incremented by generated code as this would impact performance + assert!(!DEFAULT_COUNTERS.contains(&counter), "gen_counter_incr incremented {:?}", counter); - // Pointer to the side-exit code - code_ptr - } - }; + if get_option!(gen_stats) { + asm_comment!(asm, "increment counter {}", counter.get_name()); + let ptr = get_counter_ptr(&counter.get_name()); + let ptr_reg = asm.load(Opnd::const_ptr(ptr as *const u8)); + let counter_opnd = Opnd::mem(64, ptr_reg, 0); + + // Increment and store the updated value + asm.incr_counter(counter_opnd, Opnd::UImm(1)); + } } // Save the incremented PC on the CFP // This is necessary when callees can raise or allocate -fn jit_save_pc(jit: &JITState, cb: &mut CodeBlock, scratch_reg: X86Opnd) { +fn jit_save_pc(jit: &JITState, asm: &mut Assembler) { let pc: *mut VALUE = jit.get_pc(); let ptr: *mut VALUE = unsafe { let cur_insn_len = insn_len(jit.get_opcode()) as isize; pc.offset(cur_insn_len) }; - mov(cb, scratch_reg, const_ptr_opnd(ptr as *const u8)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), scratch_reg); + + asm_comment!(asm, "save PC to CFP"); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(ptr as *const u8)); } /// Save the current SP on the CFP /// This realigns the interpreter SP with the JIT SP /// Note: this will change the current value of REG_SP, /// which could invalidate memory operands -fn gen_save_sp(cb: &mut CodeBlock, ctx: &mut Context) { - if ctx.get_sp_offset() != 0 { - let stack_pointer = ctx.sp_opnd(0); - lea(cb, REG_SP, stack_pointer); - let cfp_sp_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP); - mov(cb, cfp_sp_opnd, REG_SP); - ctx.set_sp_offset(0); +fn gen_save_sp(asm: &mut Assembler) { + gen_save_sp_with_offset(asm, 0); +} + +/// Save the current SP + offset on the CFP +fn gen_save_sp_with_offset(asm: &mut Assembler, offset: i8) { + if asm.ctx.get_sp_offset() != -offset { + asm_comment!(asm, "save SP to CFP"); + let stack_pointer = asm.ctx.sp_opnd(offset as i32); + let sp_addr = asm.lea(stack_pointer); + asm.mov(SP, sp_addr); + let cfp_sp_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); + asm.mov(cfp_sp_opnd, SP); + asm.ctx.set_sp_offset(-offset); } } -/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that -/// could: +/// Basically jit_prepare_non_leaf_call(), but this registers the current PC +/// to lazily push a C method frame when it's necessary. +fn jit_prepare_lazy_frame_call( + jit: &mut JITState, + asm: &mut Assembler, + cme: *const rb_callable_method_entry_t, + recv_opnd: YARVOpnd, +) -> bool { + // We can use this only when the receiver is on stack. + let recv_idx = match recv_opnd { + StackOpnd(recv_idx) => recv_idx, + _ => unreachable!("recv_opnd must be on stack, but got: {:?}", recv_opnd), + }; + + // Get the next PC. jit_save_pc() saves that PC. + let pc: *mut VALUE = unsafe { + let cur_insn_len = insn_len(jit.get_opcode()) as isize; + jit.get_pc().offset(cur_insn_len) + }; + + let pc_to_cfunc = CodegenGlobals::get_pc_to_cfunc(); + match pc_to_cfunc.get(&pc) { + Some(&(other_cme, _)) if other_cme != cme => { + // Bail out if it's not the only cme on this callsite. + incr_counter!(lazy_frame_failure); + return false; + } + _ => { + // Let rb_yjit_lazy_push_frame() lazily push a C frame on this PC. + incr_counter!(lazy_frame_count); + pc_to_cfunc.insert(pc, (cme, recv_idx)); + } + } + + // Save the PC to trigger a lazy frame push, and save the SP to get the receiver. + // The C func may call a method that doesn't raise, so prepare for invalidation too. + jit_prepare_non_leaf_call(jit, asm); + + // Make sure we're ready for calling rb_vm_push_cfunc_frame(). + let cfunc_argc = unsafe { get_mct_argc(get_cme_def_body_cfunc(cme)) }; + if cfunc_argc != -1 { + assert_eq!(recv_idx as i32, cfunc_argc); // verify the receiver index if possible + } + assert!(asm.get_leaf_ccall()); // It checks the stack canary we set for known_cfunc_codegen. + + true +} + +/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could: /// - Perform GC allocation /// - Take the VM lock through RB_VM_LOCK_ENTER() /// - Perform Ruby method call -fn jit_prepare_routine_call( +/// +/// If the routine doesn't call arbitrary methods, use jit_prepare_call_with_gc() instead. +fn jit_prepare_non_leaf_call( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - scratch_reg: X86Opnd, + asm: &mut Assembler ) { - jit.record_boundary_patch_point = true; - jit_save_pc(jit, cb, scratch_reg); - gen_save_sp(cb, ctx); + // Prepare for GC. Setting PC also prepares for showing a backtrace. + jit.record_boundary_patch_point = true; // VM lock could trigger invalidation + jit_save_pc(jit, asm); // for allocation tracing + gen_save_sp(asm); // protect objects from GC // In case the routine calls Ruby methods, it can set local variables - // through Kernel#binding and other means. - ctx.clear_local_types(); + // through Kernel#binding, rb_debug_inspector API, and other means. + asm.clear_local_types(); +} + +/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could: +/// - Perform GC allocation +/// - Take the VM lock through RB_VM_LOCK_ENTER() +fn jit_prepare_call_with_gc( + jit: &mut JITState, + asm: &mut Assembler +) { + jit.record_boundary_patch_point = true; // VM lock could trigger invalidation + jit_save_pc(jit, asm); // for allocation tracing + gen_save_sp(asm); // protect objects from GC + + // Expect a leaf ccall(). You should use jit_prepare_non_leaf_call() if otherwise. + asm.expect_leaf_ccall(); } /// Record the current codeblock write position for rewriting into a jump into /// the outlined block later. Used to implement global code invalidation. -fn record_global_inval_patch(cb: &mut CodeBlock, outline_block_target_pos: CodePtr) { - CodegenGlobals::push_global_inval_patch(cb.get_write_ptr(), outline_block_target_pos); +fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) { + // We add a padding before pos_marker so that the previous patch will not overlap this. + // jump_to_next_insn() puts a patch point at the end of the block in fallthrough cases. + // In the fallthrough case, the next block should start with the same Context, so the + // patch is fine, but it should not overlap another patch. + asm.pad_inval_patch(); + asm.pos_marker(move |code_ptr, cb| { + CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos, cb); + }); } /// Verify the ctx's types and mappings against the compile-time stack, self, @@ -316,14 +594,36 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { unsafe { CStr::from_ptr(rb_obj_info(val)).to_str().unwrap() } } + // Some types such as CString only assert the class field of the object + // when there has never been a singleton class created for objects of that class. + // Once there is a singleton class created they become their weaker + // `T*` variant, and we more objects should pass the verification. + fn relax_type_with_singleton_class_assumption(ty: Type) -> Type { + if let Type::CString | Type::CArray | Type::CHash = ty { + if has_singleton_class_of(ty.known_class().unwrap()) { + match ty { + Type::CString => return Type::TString, + Type::CArray => return Type::TArray, + Type::CHash => return Type::THash, + _ => (), + } + } + } + + ty + } + // Only able to check types when at current insn - assert!(jit_at_current_insn(jit)); + assert!(jit.at_current_insn()); - let self_val = jit_peek_at_self(jit); + let self_val = jit.peek_at_self(); let self_val_type = Type::from(self_val); + let learned_self_type = ctx.get_opnd_type(SelfOpnd); + let learned_self_type = relax_type_with_singleton_class_assumption(learned_self_type); + // Verify self operand type - if self_val_type.diff(ctx.get_opnd_type(SelfOpnd)) == usize::MAX { + if self_val_type.diff(learned_self_type) == TypeDiff::Incompatible { panic!( "verify_ctx: ctx self type ({:?}) incompatible with actual value of self {}", ctx.get_opnd_type(SelfOpnd), @@ -332,14 +632,17 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { } // Verify stack operand types - let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u16); + let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u8); for i in 0..top_idx { - let (learned_mapping, learned_type) = ctx.get_opnd_mapping(StackOpnd(i)); - let stack_val = jit_peek_at_stack(jit, ctx, i as isize); + let learned_mapping = ctx.get_opnd_mapping(StackOpnd(i)); + let learned_type = ctx.get_opnd_type(StackOpnd(i)); + let learned_type = relax_type_with_singleton_class_assumption(learned_type); + + let stack_val = jit.peek_at_stack(ctx, i as isize); let val_type = Type::from(stack_val); - match learned_mapping { - TempMapping::MapToSelf => { + match learned_mapping.get_kind() { + TempMappingKind::MapToSelf => { if self_val != stack_val { panic!( "verify_ctx: stack value was mapped to self, but values did not match!\n stack: {}\n self: {}", @@ -348,8 +651,9 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { ); } } - TempMapping::MapToLocal(local_idx) => { - let local_val = jit_peek_at_local(jit, local_idx.into()); + TempMappingKind::MapToLocal => { + let local_idx: u8 = learned_mapping.get_local_idx(); + let local_val = jit.peek_at_local(local_idx.into()); if local_val != stack_val { panic!( "verify_ctx: stack value was mapped to local, but values did not match\n stack: {}\n local {}: {}", @@ -359,15 +663,16 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { ); } } - TempMapping::MapToStack => {} + TempMappingKind::MapToStack => {} } // If the actual type differs from the learned type - if val_type.diff(learned_type) == usize::MAX { + if val_type.diff(learned_type) == TypeDiff::Incompatible { panic!( - "verify_ctx: ctx type ({:?}) incompatible with actual value on stack: {}", + "verify_ctx: ctx type ({:?}) incompatible with actual value on stack: {} ({:?})", learned_type, - obj_info_str(stack_val) + obj_info_str(stack_val), + val_type, ); } } @@ -377,10 +682,11 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { let top_idx: usize = cmp::min(local_table_size as usize, MAX_TEMP_TYPES); for i in 0..top_idx { let learned_type = ctx.get_local_type(i); - let local_val = jit_peek_at_local(jit, i as i32); + let learned_type = relax_type_with_singleton_class_assumption(learned_type); + let local_val = jit.peek_at_local(i as i32); let local_type = Type::from(local_val); - if local_type.diff(learned_type) == usize::MAX { + if local_type.diff(learned_type) == TypeDiff::Incompatible { panic!( "verify_ctx: ctx type ({:?}) incompatible with actual value of local: {} (type {:?})", learned_type, @@ -391,288 +697,429 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { } } +// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit +// to the interpreter when it cannot service a stub by generating new code. +// Before coming here, branch_stub_hit() takes care of fully reconstructing +// interpreter state. +fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); + + gen_counter_incr(&mut asm, Counter::exit_from_branch_stub); + + asm_comment!(asm, "exit from branch stub"); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); + + asm.cret(Qundef.into()); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + /// Generate an exit to return to the interpreter -fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, cb: &mut CodeBlock) -> CodePtr { - let code_ptr = cb.get_write_ptr(); +fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) { + #[cfg(all(feature = "disasm", not(test)))] + { + let opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) }; + asm_comment!(asm, "exit to interpreter on {}", insn_name(opcode as usize)); + } - add_comment(cb, "exit to interpreter"); + if asm.ctx.is_return_landing() { + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, C_RET_OPND); + } + + // Spill stack temps before returning to the interpreter + asm.spill_temps(); // Generate the code to exit to the interpreters // Write the adjusted SP back into the CFP - if ctx.get_sp_offset() != 0 { - let stack_pointer = ctx.sp_opnd(0); - lea(cb, REG_SP, stack_pointer); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG_SP); + if asm.ctx.get_sp_offset() != 0 { + let sp_opnd = asm.lea(asm.ctx.sp_opnd(0)); + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), + sp_opnd + ); } // Update CFP->PC - mov(cb, RAX, const_ptr_opnd(exit_pc as *const u8)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), RAX); + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), + Opnd::const_ptr(exit_pc as *const u8) + ); // Accumulate stats about interpreter exits - #[cfg(feature = "stats")] if get_option!(gen_stats) { - mov(cb, RDI, const_ptr_opnd(exit_pc as *const u8)); - call_ptr(cb, RSI, rb_yjit_count_side_exit_op as *const u8); + asm.ccall( + rb_yjit_count_side_exit_op as *const u8, + vec![Opnd::const_ptr(exit_pc as *const u8)] + ); + + // If --yjit-trace-exits is enabled, record the exit stack while recording + // the side exits. TraceExits::Counter is handled by gen_counted_exit(). + if get_option!(trace_exits) == Some(TraceExits::All) { + asm.ccall( + rb_yjit_record_exit_stack as *const u8, + vec![Opnd::const_ptr(exit_pc as *const u8)] + ); + } } - pop(cb, REG_SP); - pop(cb, REG_EC); - pop(cb, REG_CFP); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); - mov(cb, RAX, uimm_opnd(Qundef.into())); - ret(cb); + asm.frame_teardown(); - return code_ptr; + asm.cret(Qundef.into()); } -// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit -// to the interpreter when it cannot service a stub by generating new code. -// Before coming here, branch_stub_hit() takes care of fully reconstructing -// interpreter state. -fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { - let ocb = ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); +/// :side-exit: +/// Get an exit for the current instruction in the outlined block. The code +/// for each instruction often begins with several guards before proceeding +/// to do work. When guards fail, an option we have is to exit to the +/// interpreter at an instruction boundary. The piece of code that takes +/// care of reconstructing interpreter state and exiting out of generated +/// code is called the side exit. +/// +/// No guards change the logic for reconstructing interpreter state at the +/// moment, so there is one unique side exit for each context. Note that +/// it's incorrect to jump to the side exit after any ctx stack push operations +/// since they change the logic required for reconstructing interpreter state. +pub fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) -> Option<CodePtr> { + let mut cb = ocb.unwrap(); + let mut asm = Assembler::new(); + asm.ctx = *ctx; + asm.set_reg_temps(ctx.get_reg_temps()); + + gen_exit(exit_pc, &mut asm); + + asm.compile(&mut cb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Get a side exit. Increment a counter in it if --yjit-stats is enabled. +pub fn gen_counted_exit(exit_pc: *mut VALUE, side_exit: CodePtr, ocb: &mut OutlinedCb, counter: Option<Counter>) -> Option<CodePtr> { + // The counter is only incremented when stats are enabled + if !get_option!(gen_stats) { + return Some(side_exit); + } + let counter = match counter { + Some(counter) => counter, + None => return Some(side_exit), + }; - gen_counter_incr!(ocb, exit_from_branch_stub); + let mut asm = Assembler::new(); - pop(ocb, REG_SP); - pop(ocb, REG_EC); - pop(ocb, REG_CFP); + // Increment a counter + gen_counter_incr(&mut asm, counter); + + // Trace a counted exit if --yjit-trace-exits=counter is given. + // TraceExits::All is handled by gen_exit(). + if get_option!(trace_exits) == Some(TraceExits::CountedExit(counter)) { + with_caller_saved_temp_regs(&mut asm, |asm| { + asm.ccall(rb_yjit_record_exit_stack as *const u8, vec![Opnd::const_ptr(exit_pc as *const u8)]); + }); + } - mov(ocb, RAX, uimm_opnd(Qundef.into())); - ret(ocb); + // Jump to the existing side exit + asm.jmp(Target::CodePtr(side_exit)); - return code_ptr; + let ocb = ocb.unwrap(); + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } -// :side-exit: -// Get an exit for the current instruction in the outlined block. The code -// for each instruction often begins with several guards before proceeding -// to do work. When guards fail, an option we have is to exit to the -// interpreter at an instruction boundary. The piece of code that takes -// care of reconstructing interpreter state and exiting out of generated -// code is called the side exit. -// -// No guards change the logic for reconstructing interpreter state at the -// moment, so there is one unique side exit for each context. Note that -// it's incorrect to jump to the side exit after any ctx stack push/pop operations -// since they change the logic required for reconstructing interpreter state. -fn get_side_exit(jit: &mut JITState, ocb: &mut OutlinedCb, ctx: &Context) -> CodePtr { - match jit.side_exit_for_pc { - None => { - let exit_code = gen_exit(jit.pc, ctx, ocb.unwrap()); - jit.side_exit_for_pc = Some(exit_code); - exit_code - } - Some(code_ptr) => code_ptr, +/// Preserve caller-saved stack temp registers during the call of a given block +fn with_caller_saved_temp_regs<F, R>(asm: &mut Assembler, block: F) -> R where F: FnOnce(&mut Assembler) -> R { + for ® in caller_saved_temp_regs() { + asm.cpush(Opnd::Reg(reg)); // save stack temps + } + let ret = block(asm); + for ® in caller_saved_temp_regs().rev() { + asm.cpop_into(Opnd::Reg(reg)); // restore stack temps } + ret } // Ensure that there is an exit for the start of the block being compiled. // Block invalidation uses this exit. -pub fn jit_ensure_block_entry_exit(jit: &mut JITState, ocb: &mut OutlinedCb) { - let blockref = jit.block.clone(); - let mut block = blockref.borrow_mut(); - let block_ctx = block.get_ctx(); - let blockid = block.get_blockid(); - - if block.entry_exit.is_some() { - return; +#[must_use] +pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler, ocb: &mut OutlinedCb) -> Option<()> { + if jit.block_entry_exit.is_some() { + return Some(()); } - if jit.insn_idx == blockid.idx { - // We are compiling the first instruction in the block. - // Generate the exit with the cache in jitstate. - block.entry_exit = Some(get_side_exit(jit, ocb, &block_ctx)); + let block_starting_context = &jit.get_starting_ctx(); + + // If we're compiling the first instruction in the block. + if jit.insn_idx == jit.starting_insn_idx { + // Generate the exit with the cache in Assembler. + let side_exit_context = SideExitContext::new(jit.pc, *block_starting_context); + let entry_exit = asm.get_side_exit(&side_exit_context, None, ocb); + jit.block_entry_exit = Some(entry_exit?); } else { - let pc = unsafe { rb_iseq_pc_at_idx(blockid.iseq, blockid.idx) }; - block.entry_exit = Some(gen_exit(pc, &block_ctx, ocb.unwrap())); + let block_entry_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, jit.starting_insn_idx.into()) }; + jit.block_entry_exit = Some(gen_outlined_exit(block_entry_pc, block_starting_context, ocb)?); } + + Some(()) } -// Generate a runtime guard that ensures the PC is at the expected -// instruction index in the iseq, otherwise takes a side-exit. -// This is to handle the situation of optional parameters. -// When a function with optional parameters is called, the entry -// PC for the method isn't necessarily 0. -fn gen_pc_guard(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) { - //RUBY_ASSERT(cb != NULL); +// Landing code for when c_return tracing is enabled. See full_cfunc_return(). +fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); - let pc_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC); - let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; - let expected_pc_opnd = const_ptr_opnd(expected_pc as *const u8); - mov(cb, REG0, pc_opnd); - mov(cb, REG1, expected_pc_opnd); - cmp(cb, REG0, REG1); + // This chunk of code expects REG_EC to be filled properly and + // RAX to contain the return value of the C method. - let pc_match = cb.new_label("pc_match".to_string()); - je_label(cb, pc_match); + asm_comment!(asm, "full cfunc return"); + asm.ccall( + rb_full_cfunc_return as *const u8, + vec![EC, C_RET_OPND] + ); - // We're not starting at the first PC, so we need to exit. - gen_counter_incr!(cb, leave_start_pc_non_zero); + // Count the exit + gen_counter_incr(&mut asm, Counter::traced_cfunc_return); + + // Return to the interpreter + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); - pop(cb, REG_SP); - pop(cb, REG_EC); - pop(cb, REG_CFP); + asm.frame_teardown(); - mov(cb, RAX, imm_opnd(Qundef.into())); - ret(cb); + asm.cret(Qundef.into()); - // PC should match the expected insn_idx - cb.write_label(pc_match); - cb.link_labels(); + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } -// Landing code for when c_return tracing is enabled. See full_cfunc_return(). -fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { - let cb = ocb.unwrap(); - let code_ptr = cb.get_write_ptr(); +/// Generate a continuation for leave that exits to the interpreter at REG_CFP->pc. +/// This is used by gen_leave() and gen_entry_prologue() +fn gen_leave_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); - // This chunk of code expect REG_EC to be filled properly and - // RAX to contain the return value of the C method. + // gen_leave() fully reconstructs interpreter state and leaves the + // return value in C_RET_OPND before coming here. + let ret_opnd = asm.live_reg_opnd(C_RET_OPND); - // Call full_cfunc_return() - mov(cb, C_ARG_REGS[0], REG_EC); - mov(cb, C_ARG_REGS[1], RAX); - call_ptr(cb, REG0, rb_full_cfunc_return as *const u8); + // Every exit to the interpreter should be counted + gen_counter_incr(&mut asm, Counter::leave_interp_return); - // Count the exit - gen_counter_incr!(cb, traced_cfunc_return); + asm_comment!(asm, "exit from leave"); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); - // Return to the interpreter - pop(cb, REG_SP); - pop(cb, REG_EC); - pop(cb, REG_CFP); + asm.frame_teardown(); - mov(cb, RAX, uimm_opnd(Qundef.into())); - ret(cb); + asm.cret(ret_opnd); - return code_ptr; + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } -/// Generate a continuation for leave that exits to the interpreter at REG_CFP->pc. -/// This is used by gen_leave() and gen_entry_prologue() -fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { +// Increment SP and transfer the execution to the interpreter after jit_exec_exception(). +// On jit_exec_exception(), you need to return Qundef to keep executing caller non-FINISH +// frames on the interpreter. You also need to increment SP to push the return value to +// the caller's stack, which is different from gen_stub_exit(). +fn gen_leave_exception(ocb: &mut OutlinedCb) -> Option<CodePtr> { let ocb = ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); + let mut asm = Assembler::new(); - // Note, gen_leave() fully reconstructs interpreter state and leaves the - // return value in RAX before coming here. + // gen_leave() leaves the return value in C_RET_OPND before coming here. + let ruby_ret_val = asm.live_reg_opnd(C_RET_OPND); // Every exit to the interpreter should be counted - gen_counter_incr!(ocb, leave_interp_return); + gen_counter_incr(&mut asm, Counter::leave_interp_return); + + asm_comment!(asm, "push return value through cfp->sp"); + let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); + let sp = asm.load(cfp_sp); + asm.mov(Opnd::mem(64, sp, 0), ruby_ret_val); + let new_sp = asm.add(sp, SIZEOF_VALUE.into()); + asm.mov(cfp_sp, new_sp); + + asm_comment!(asm, "exit from exception"); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); - pop(ocb, REG_SP); - pop(ocb, REG_EC); - pop(ocb, REG_CFP); + asm.frame_teardown(); - ret(ocb); + // Execute vm_exec_core + asm.cret(Qundef.into()); - return code_ptr; + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } -/// Compile an interpreter entry block to be inserted into an iseq -/// Returns None if compilation fails. -pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option<CodePtr> { - const MAX_PROLOGUE_SIZE: usize = 1024; +// Generate a runtime guard that ensures the PC is at the expected +// instruction index in the iseq, otherwise takes an entry stub +// that generates another check and entry. +// This is to handle the situation of optional parameters. +// When a function with optional parameters is called, the entry +// PC for the method isn't necessarily 0. +pub fn gen_entry_chain_guard( + asm: &mut Assembler, + ocb: &mut OutlinedCb, + iseq: IseqPtr, + insn_idx: u16, +) -> Option<PendingEntryRef> { + let entry = new_pending_entry(); + let stub_addr = gen_entry_stub(entry.uninit_entry.as_ptr() as usize, ocb)?; - // Check if we have enough executable memory - if !cb.has_capacity(MAX_PROLOGUE_SIZE) { - return None; - } + let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC); + let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; + let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8); - let old_write_pos = cb.get_write_pos(); + asm_comment!(asm, "guard expected PC"); + asm.cmp(pc_opnd, expected_pc_opnd); - // Align the current write position to cache line boundaries - cb.align_pos(64); + asm.mark_entry_start(&entry); + asm.jne(stub_addr.into()); + asm.mark_entry_end(&entry); + return Some(entry); +} +/// Compile an interpreter entry block to be inserted into an iseq +/// Returns None if compilation fails. +/// If jit_exception is true, compile JIT code for handling exceptions. +/// See [jit_compile_exception] for details. +pub fn gen_entry_prologue( + cb: &mut CodeBlock, + ocb: &mut OutlinedCb, + iseq: IseqPtr, + insn_idx: u16, + jit_exception: bool, +) -> Option<CodePtr> { let code_ptr = cb.get_write_ptr(); - add_comment(cb, "yjit entry"); - push(cb, REG_CFP); - push(cb, REG_EC); - push(cb, REG_SP); + let mut asm = Assembler::new(); + if get_option_ref!(dump_disasm).is_some() { + asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0)); + } else { + asm_comment!(asm, "YJIT entry"); + } + + asm.frame_setup(); - // We are passed EC and CFP - mov(cb, REG_EC, C_ARG_REGS[0]); - mov(cb, REG_CFP, C_ARG_REGS[1]); + // Save the CFP, EC, SP registers to the C stack + asm.cpush(CFP); + asm.cpush(EC); + asm.cpush(SP); + + // We are passed EC and CFP as arguments + asm.mov(EC, C_ARG_OPNDS[0]); + asm.mov(CFP, C_ARG_OPNDS[1]); // Load the current SP from the CFP into REG_SP - mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP)); + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); // Setup cfp->jit_return - mov( - cb, - REG0, - code_ptr_opnd(CodegenGlobals::get_leave_exit_code()), - ); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0); + // If this is an exception handler entry point + if jit_exception { + // On jit_exec_exception(), it's NOT safe to return a non-Qundef value + // from a non-FINISH frame. This function fixes that problem. + // See [jit_compile_exception] for details. + asm.ccall( + rb_yjit_set_exception_return as *mut u8, + vec![ + CFP, + Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr(cb)), + Opnd::const_ptr(CodegenGlobals::get_leave_exception_code().raw_ptr(cb)), + ], + ); + } else { + // On jit_exec() or JIT_EXEC(), it's safe to return a non-Qundef value + // on the entry frame. See [jit_compile] for details. + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), + Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr(cb)), + ); + } - // We're compiling iseqs that we *expect* to start at `insn_idx`. But in - // the case of optional parameters, the interpreter can set the pc to a - // different location depending on the optional parameters. If an iseq - // has optional parameters, we'll add a runtime check that the PC we've + // We're compiling iseqs that we *expect* to start at `insn_idx`. + // But in the case of optional parameters or when handling exceptions, + // the interpreter can set the pc to a different location. For + // such scenarios, we'll add a runtime check that the PC we've // compiled for is the same PC that the interpreter wants us to run with. - // If they don't match, then we'll take a side exit. - if unsafe { get_iseq_flags_has_opt(iseq) } { - gen_pc_guard(cb, iseq, insn_idx); - } + // If they don't match, then we'll jump to an entry stub and generate + // another PC check and entry there. + let pending_entry = if unsafe { get_iseq_flags_has_opt(iseq) } || jit_exception { + Some(gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?) + } else { + None + }; - // Verify MAX_PROLOGUE_SIZE - assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE); + asm.compile(cb, Some(ocb))?; - return Some(code_ptr); + if cb.has_dropped_bytes() { + None + } else { + // Mark code pages for code GC + let iseq_payload = get_or_create_iseq_payload(iseq); + for page in cb.addrs_to_pages(code_ptr, cb.get_write_ptr()) { + iseq_payload.pages.insert(page); + } + // Write an entry to the heap and push it to the ISEQ + if let Some(pending_entry) = pending_entry { + let pending_entry = Rc::try_unwrap(pending_entry) + .ok().expect("PendingEntry should be unique"); + iseq_payload.entries.push(pending_entry.into_entry()); + } + Some(code_ptr) + } } // Generate code to check for interrupts and take a side-exit. // Warning: this function clobbers REG0 -fn gen_check_ints(cb: &mut CodeBlock, side_exit: CodePtr) { +fn gen_check_ints( + asm: &mut Assembler, + counter: Counter, +) { // Check for interrupts // see RUBY_VM_CHECK_INTS(ec) macro - add_comment(cb, "RUBY_VM_CHECK_INTS(ec)"); - mov( - cb, - REG0_32, - mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_MASK), - ); - not(cb, REG0_32); - test( - cb, - mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), - REG0_32, - ); - jnz_ptr(cb, side_exit); + asm_comment!(asm, "RUBY_VM_CHECK_INTS(ec)"); + + // Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages, + // signal_exec, or rb_postponed_job_flush. + let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG)); + asm.test(interrupt_flag, interrupt_flag); + + asm.jnz(Target::side_exit(counter)); } // Generate a stubbed unconditional jump to the next bytecode instruction. // Blocks that are part of a guard chain can use this to share the same successor. fn jump_to_next_insn( jit: &mut JITState, - current_context: &Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) { - // Reset the depth since in current usages we only ever jump to to +) -> Option<()> { + // Reset the depth since in current usages we only ever jump to // chain_depth > 0 from the same instruction. - let mut reset_depth = *current_context; - reset_depth.reset_chain_depth(); + let mut reset_depth = asm.ctx; + reset_depth.reset_chain_depth_and_defer(); let jump_block = BlockId { iseq: jit.iseq, - idx: jit_next_insn_idx(jit), + idx: jit.next_insn_idx(), }; // We are at the end of the current instruction. Record the boundary. if jit.record_boundary_patch_point { - let next_insn = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) }; - let exit_pos = gen_exit(next_insn, &reset_depth, ocb.unwrap()); - record_global_inval_patch(cb, exit_pos); jit.record_boundary_patch_point = false; + let exit_pc = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) }; + let exit_pos = gen_outlined_exit(exit_pc, &reset_depth, ocb); + record_global_inval_patch(asm, exit_pos?); } // Generate the jump instruction - gen_direct_jump(jit, &reset_depth, jump_block, cb); + gen_direct_jump(jit, &reset_depth, jump_block, asm); + Some(()) } // Compile a sequence of bytecode instructions for a given basic block version. @@ -687,42 +1134,72 @@ pub fn gen_single_block( ocb: &mut OutlinedCb, ) -> Result<BlockRef, ()> { // Limit the number of specialized versions for this block - let mut ctx = limit_block_versions(blockid, start_ctx); + let ctx = limit_block_versions(blockid, start_ctx); verify_blockid(blockid); assert!(!(blockid.idx == 0 && ctx.get_stack_size() > 0)); + // Save machine code placement of the block. `cb` might page switch when we + // generate code in `ocb`. + let block_start_addr = cb.get_write_ptr(); + // Instruction sequence to compile let iseq = blockid.iseq; let iseq_size = unsafe { get_iseq_encoded_size(iseq) }; - let mut insn_idx: c_uint = blockid.idx; - let starting_insn_idx = insn_idx; - - // Allocate the new block - let blockref = Block::new(blockid, &ctx); + let iseq_size: IseqIdx = if let Ok(size) = iseq_size.try_into() { + size + } else { + // ISeq too large to compile + return Err(()); + }; + let mut insn_idx: IseqIdx = blockid.idx; // Initialize a JIT state object - let mut jit = JITState::new(&blockref); + let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec); jit.iseq = blockid.iseq; - jit.ec = Some(ec); - // Mark the start position of the block - blockref.borrow_mut().set_start_addr(cb.get_write_ptr()); + // Create a backend assembler instance + let mut asm = Assembler::new(); + asm.ctx = ctx; + + #[cfg(feature = "disasm")] + if get_option_ref!(dump_disasm).is_some() { + let blockid_idx = blockid.idx; + let chain_depth = if asm.ctx.get_chain_depth() > 0 { format!("(chain_depth: {})", asm.ctx.get_chain_depth()) } else { "".to_string() }; + asm_comment!(asm, "Block: {} {}", iseq_get_location(blockid.iseq, blockid_idx), chain_depth); + asm_comment!(asm, "reg_temps: {:08b}", asm.ctx.get_reg_temps().as_u8()); + } + + // Mark the start of an ISEQ for --yjit-perf + jit_perf_symbol_push!(jit, &mut asm, &get_iseq_name(iseq), PerfMap::ISEQ); + + if asm.ctx.is_return_landing() { + // Continuation of the end of gen_leave(). + // Reload REG_SP for the current frame and transfer the return value + // to the stack top. + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + + let top = asm.stack_push(Type::Unknown); + asm.mov(top, C_RET_OPND); + + asm.ctx.clear_return_landing(); + } // For each instruction to compile // NOTE: could rewrite this loop with a std::iter::Iterator while insn_idx < iseq_size { // Get the current pc and opcode - let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; + let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes. let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) } .try_into() .unwrap(); - // opt_getinlinecache wants to be in a block all on its own. Cut the block short - // if we run into it. See gen_opt_getinlinecache() for details. - if opcode == OP_OPT_GETINLINECACHE && insn_idx > starting_insn_idx { - jump_to_next_insn(&mut jit, &ctx, cb, ocb); + // We need opt_getconstant_path to be in a block all on its own. Cut the block short + // if we run into it. This is necessary because we want to invalidate based on the + // instruction's index. + if opcode == YARVINSN_opt_getconstant_path.as_usize() && insn_idx > jit.starting_insn_idx { + jump_to_next_insn(&mut jit, &mut asm, ocb); break; } @@ -730,56 +1207,70 @@ pub fn gen_single_block( jit.insn_idx = insn_idx; jit.opcode = opcode; jit.pc = pc; - jit.side_exit_for_pc = None; + jit.stack_size_for_pc = asm.ctx.get_stack_size(); + asm.set_side_exit_context(pc, asm.ctx.get_stack_size()); + + // stack_pop doesn't immediately deallocate a register for stack temps, + // but it's safe to do so at this instruction boundary. + for stack_idx in asm.ctx.get_stack_size()..MAX_REG_TEMPS { + asm.ctx.dealloc_temp_reg(stack_idx); + } // If previous instruction requested to record the boundary if jit.record_boundary_patch_point { // Generate an exit to this instruction and record it - let exit_pos = gen_exit(jit.pc, &ctx, ocb.unwrap()); - record_global_inval_patch(cb, exit_pos); + let exit_pos = gen_outlined_exit(jit.pc, &asm.ctx, ocb).ok_or(())?; + record_global_inval_patch(&mut asm, exit_pos); jit.record_boundary_patch_point = false; } // In debug mode, verify our existing assumption - if cfg!(debug_assertions) && get_option!(verify_ctx) && jit_at_current_insn(&jit) { - verify_ctx(&jit, &ctx); + if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_current_insn() { + verify_ctx(&jit, &asm.ctx); } + // :count-placement: + // Count bytecode instructions that execute in generated code. + // Note that the increment happens even when the output takes side exit. + gen_counter_incr(&mut asm, Counter::yjit_insns_count); + // Lookup the codegen function for this instruction - let mut status = CantCompile; + let mut status = None; if let Some(gen_fn) = get_gen_fn(VALUE(opcode)) { - // :count-placement: - // Count bytecode instructions that execute in generated code. - // Note that the increment happens even when the output takes side exit. - gen_counter_incr!(cb, exec_instruction); - // Add a comment for the name of the YARV instruction - add_comment(cb, &insn_name(opcode)); + asm_comment!(asm, "Insn: {:04} {} (stack_size: {})", insn_idx, insn_name(opcode), asm.ctx.get_stack_size()); // If requested, dump instructions for debugging if get_option!(dump_insns) { println!("compiling {}", insn_name(opcode)); - print_str(cb, &format!("executing {}", insn_name(opcode))); + print_str(&mut asm, &format!("executing {}", insn_name(opcode))); } // Call the code generation function - status = gen_fn(&mut jit, &mut ctx, cb, ocb); + jit_perf_symbol_push!(jit, &mut asm, &insn_name(opcode), PerfMap::Codegen); + status = gen_fn(&mut jit, &mut asm, ocb); + jit_perf_symbol_pop!(jit, &mut asm, PerfMap::Codegen); + + #[cfg(debug_assertions)] + assert!(!asm.get_leaf_ccall(), "ccall() wasn't used after leaf_ccall was set in {}", insn_name(opcode)); } // If we can't compile this instruction // exit to the interpreter and stop compiling - if status == CantCompile { - let mut block = jit.block.borrow_mut(); - - // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE, - // the exit this generates would be wrong. We could save a copy of the entry context - // and assert that ctx is the same here. - let exit = gen_exit(jit.pc, &ctx, cb); - - // If this is the first instruction in the block, then we can use - // the exit for block->entry_exit. - if insn_idx == block.get_blockid().idx { - block.entry_exit = Some(exit); + if status == None { + if get_option!(dump_insns) { + println!("can't compile {}", insn_name(opcode)); + } + + // Rewind stack_size using ctx.with_stack_size to allow stack_size changes + // before you return None. + asm.ctx = asm.ctx.with_stack_size(jit.stack_size_for_pc); + gen_exit(jit.pc, &mut asm); + + // If this is the first instruction in the block, then + // the entry address is the address for block_entry_exit + if insn_idx == jit.starting_insn_idx { + jit.block_entry_exit = Some(jit.output_ptr); } break; @@ -787,661 +1278,955 @@ pub fn gen_single_block( // For now, reset the chain depth after each instruction as only the // first instruction in the block can concern itself with the depth. - ctx.reset_chain_depth(); + asm.ctx.reset_chain_depth_and_defer(); // Move to the next instruction to compile - insn_idx += insn_len(opcode); + insn_idx += insn_len(opcode) as u16; + + // Move past next instruction when instructed + if status == Some(SkipNextInsn) { + let next_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; + let next_opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, next_pc) }.try_into().unwrap(); + insn_idx += insn_len(next_opcode) as u16; + } // If the instruction terminates this block - if status == EndBlock { + if status == Some(EndBlock) { break; } } + let end_insn_idx = insn_idx; - // Finish filling out the block - { - let mut block = jit.block.borrow_mut(); - - // Mark the end position of the block - block.set_end_addr(cb.get_write_ptr()); + // We currently can't handle cases where the request is for a block that + // doesn't go to the next instruction in the same iseq. + assert!(!jit.record_boundary_patch_point); - // Store the index of the last instruction in the block - block.set_end_idx(insn_idx); + // Pad the block if it has the potential to be invalidated + if jit.block_entry_exit.is_some() { + asm.pad_inval_patch(); } - // We currently can't handle cases where the request is for a block that - // doesn't go to the next instruction. - //assert!(!jit.record_boundary_patch_point); + // Mark the end of an ISEQ for --yjit-perf + jit_perf_symbol_pop!(jit, &mut asm, PerfMap::ISEQ); + + // Compile code into the code block + let (_, gc_offsets) = asm.compile(cb, Some(ocb)).ok_or(())?; + let end_addr = cb.get_write_ptr(); + + // Flush perf symbols after asm.compile() writes addresses + if get_option!(perf_map).is_some() { + jit.flush_perf_symbols(cb); + } // If code for the block doesn't fit, fail if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() { return Err(()); } - // TODO: we may want a feature for this called dump_insns? Can leave commented for now - /* - if (YJIT_DUMP_MODE >= 2) { - // Dump list of compiled instrutions - fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq); - for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) { - int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx)); - fprintf(stderr, " %04d %s\n", idx, insn_name(opcode)); - idx += insn_len(opcode); - } - } - */ - // Block compiled successfully - Ok(blockref) + Ok(jit.into_block(end_insn_idx, block_start_addr, end_addr, gc_offsets)) } fn gen_nop( _jit: &mut JITState, - _ctx: &mut Context, - _cb: &mut CodeBlock, + _asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Do nothing - KeepCompiling + Some(KeepCompiling) } fn gen_pop( _jit: &mut JITState, - ctx: &mut Context, - _cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Decrement SP - ctx.stack_pop(1); - KeepCompiling + asm.stack_pop(1); + Some(KeepCompiling) } fn gen_dup( _jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let dup_val = ctx.stack_pop(0); - let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); +) -> Option<CodegenStatus> { + let dup_val = asm.stack_opnd(0); + let mapping = asm.ctx.get_opnd_mapping(dup_val.into()); - let loc0 = ctx.stack_push_mapping((mapping, tmp_type)); - mov(cb, REG0, dup_val); - mov(cb, loc0, REG0); + let loc0 = asm.stack_push_mapping(mapping); + asm.mov(loc0, dup_val); - KeepCompiling + Some(KeepCompiling) } // duplicate stack top n elements fn gen_dupn( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let nval: VALUE = jit_get_arg(jit, 0); - let VALUE(n) = nval; +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); // In practice, seems to be only used for n==2 if n != 2 { - return CantCompile; + return None; } - let opnd1: X86Opnd = ctx.stack_opnd(1); - let opnd0: X86Opnd = ctx.stack_opnd(0); + let opnd1: Opnd = asm.stack_opnd(1); + let opnd0: Opnd = asm.stack_opnd(0); - let mapping1 = ctx.get_opnd_mapping(StackOpnd(1)); - let mapping0 = ctx.get_opnd_mapping(StackOpnd(0)); + let mapping1 = asm.ctx.get_opnd_mapping(opnd1.into()); + let mapping0 = asm.ctx.get_opnd_mapping(opnd0.into()); - let dst1: X86Opnd = ctx.stack_push_mapping(mapping1); - mov(cb, REG0, opnd1); - mov(cb, dst1, REG0); + let dst1: Opnd = asm.stack_push_mapping(mapping1); + asm.mov(dst1, opnd1); - let dst0: X86Opnd = ctx.stack_push_mapping(mapping0); - mov(cb, REG0, opnd0); - mov(cb, dst0, REG0); + let dst0: Opnd = asm.stack_push_mapping(mapping0); + asm.mov(dst0, opnd0); - KeepCompiling + Some(KeepCompiling) } // Swap top 2 stack entries fn gen_swap( _jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - stack_swap(ctx, cb, 0, 1, REG0, REG1); - KeepCompiling +) -> Option<CodegenStatus> { + stack_swap(asm, 0, 1); + Some(KeepCompiling) } fn stack_swap( - ctx: &mut Context, - cb: &mut CodeBlock, - offset0: u16, - offset1: u16, - _reg0: X86Opnd, - _reg1: X86Opnd, + asm: &mut Assembler, + offset0: i32, + offset1: i32, ) { - let opnd0 = ctx.stack_opnd(offset0 as i32); - let opnd1 = ctx.stack_opnd(offset1 as i32); + let stack0_mem = asm.stack_opnd(offset0); + let stack1_mem = asm.stack_opnd(offset1); - let mapping0 = ctx.get_opnd_mapping(StackOpnd(offset0)); - let mapping1 = ctx.get_opnd_mapping(StackOpnd(offset1)); + let mapping0 = asm.ctx.get_opnd_mapping(stack0_mem.into()); + let mapping1 = asm.ctx.get_opnd_mapping(stack1_mem.into()); - mov(cb, REG0, opnd0); - mov(cb, REG1, opnd1); - mov(cb, opnd0, REG1); - mov(cb, opnd1, REG0); + let stack0_reg = asm.load(stack0_mem); + let stack1_reg = asm.load(stack1_mem); + asm.mov(stack0_mem, stack1_reg); + asm.mov(stack1_mem, stack0_reg); - ctx.set_opnd_mapping(StackOpnd(offset0), mapping1); - ctx.set_opnd_mapping(StackOpnd(offset1), mapping0); + asm.ctx.set_opnd_mapping(stack0_mem.into(), mapping1); + asm.ctx.set_opnd_mapping(stack1_mem.into(), mapping0); } fn gen_putnil( - jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + _jit: &mut JITState, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - jit_putobject(jit, ctx, cb, Qnil); - KeepCompiling +) -> Option<CodegenStatus> { + jit_putobject(asm, Qnil); + Some(KeepCompiling) } -fn jit_putobject(jit: &mut JITState, ctx: &mut Context, cb: &mut CodeBlock, arg: VALUE) { +fn jit_putobject(asm: &mut Assembler, arg: VALUE) { let val_type: Type = Type::from(arg); - let stack_top = ctx.stack_push(val_type); - - if arg.special_const_p() { - // Immediates will not move and do not need to be tracked for GC - // Thanks to this we can mov directly to memory when possible. - let imm = imm_opnd(arg.as_i64()); - - // 64-bit immediates can't be directly written to memory - if imm.num_bits() <= 32 { - mov(cb, stack_top, imm); - } else { - mov(cb, REG0, imm); - mov(cb, stack_top, REG0); - } - } else { - // Load the value to push into REG0 - // Note that this value may get moved by the GC - jit_mov_gc_ptr(jit, cb, REG0, arg); - - // Write argument at SP - mov(cb, stack_top, REG0); - } + let stack_top = asm.stack_push(val_type); + asm.mov(stack_top, arg.into()); } fn gen_putobject_int2fix( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { let opcode = jit.opcode; - let cst_val: usize = if opcode == OP_PUTOBJECT_INT2FIX_0_ { + let cst_val: usize = if opcode == YARVINSN_putobject_INT2FIX_0_.as_usize() { 0 } else { 1 }; + let cst_val = VALUE::fixnum_from_usize(cst_val); - jit_putobject(jit, ctx, cb, VALUE::fixnum_from_usize(cst_val)); - KeepCompiling + if let Some(result) = fuse_putobject_opt_ltlt(jit, asm, cst_val, ocb) { + return Some(result); + } + + jit_putobject(asm, cst_val); + Some(KeepCompiling) } fn gen_putobject( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let arg: VALUE = jit_get_arg(jit, 0); + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let arg: VALUE = jit.get_arg(0); - jit_putobject(jit, ctx, cb, arg); - KeepCompiling + if let Some(result) = fuse_putobject_opt_ltlt(jit, asm, arg, ocb) { + return Some(result); + } + + jit_putobject(asm, arg); + Some(KeepCompiling) +} + +/// Combine `putobject` and `opt_ltlt` together if profitable, for example when +/// left shifting an integer by a constant amount. +fn fuse_putobject_opt_ltlt( + jit: &mut JITState, + asm: &mut Assembler, + constant_object: VALUE, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let next_opcode = unsafe { rb_vm_insn_addr2opcode(jit.pc.add(insn_len(jit.opcode).as_usize()).read().as_ptr()) }; + if next_opcode == YARVINSN_opt_ltlt as i32 && constant_object.fixnum_p() { + // Untag the fixnum shift amount + let shift_amt = constant_object.as_isize() >> 1; + if shift_amt > 63 || shift_amt < 0 { + return None; + } + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let lhs = jit.peek_at_stack(&asm.ctx, 0); + if !lhs.fixnum_p() { + return None; + } + + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_LTLT) { + return None; + } + + asm_comment!(asm, "integer left shift with rhs={shift_amt}"); + let lhs = asm.stack_opnd(0); + + // Guard that lhs is a fixnum if necessary + let lhs_type = asm.ctx.get_opnd_type(lhs.into()); + if lhs_type != Type::Fixnum { + asm_comment!(asm, "guard arg0 fixnum"); + asm.test(lhs, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + jit_chain_guard( + JCC_JZ, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnums, + ); + } + + asm.stack_pop(1); + fixnum_left_shift_body(asm, lhs, shift_amt as u64); + return Some(SkipNextInsn); + } + return None; } fn gen_putself( _jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Load self from CFP - let cf_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG_CFP, RUBY_OFFSET_CFP_SELF); - mov(cb, REG0, cf_opnd); +) -> Option<CodegenStatus> { // Write it on the stack - let stack_top: X86Opnd = ctx.stack_push_self(); - mov(cb, stack_top, REG0); + let stack_top = asm.stack_push_self(); + asm.mov( + stack_top, + Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_SELF) + ); - KeepCompiling + Some(KeepCompiling) } fn gen_putspecialobject( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let object_type = jit_get_arg(jit, 0); - - if object_type == VALUE(VM_SPECIAL_OBJECT_VMCORE) { - let stack_top: X86Opnd = ctx.stack_push(Type::UnknownHeap); - jit_mov_gc_ptr(jit, cb, REG0, unsafe { rb_mRubyVMFrozenCore }); - mov(cb, stack_top, REG0); - KeepCompiling +) -> Option<CodegenStatus> { + let object_type = jit.get_arg(0).as_usize(); + + if object_type == VM_SPECIAL_OBJECT_VMCORE.as_usize() { + let stack_top = asm.stack_push(Type::UnknownHeap); + let frozen_core = unsafe { rb_mRubyVMFrozenCore }; + asm.mov(stack_top, frozen_core.into()); + Some(KeepCompiling) } else { // TODO: implement for VM_SPECIAL_OBJECT_CBASE and // VM_SPECIAL_OBJECT_CONST_BASE - CantCompile + None } } // set Nth stack entry to stack top fn gen_setn( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let nval: VALUE = jit_get_arg(jit, 0); - let VALUE(n) = nval; - - let top_val: X86Opnd = ctx.stack_pop(0); - let dst_opnd: X86Opnd = ctx.stack_opnd(n.try_into().unwrap()); - mov(cb, REG0, top_val); - mov(cb, dst_opnd, REG0); +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); + + let top_val = asm.stack_opnd(0); + let dst_opnd = asm.stack_opnd(n.try_into().unwrap()); + asm.mov( + dst_opnd, + top_val + ); - let mapping = ctx.get_opnd_mapping(StackOpnd(0)); - ctx.set_opnd_mapping(StackOpnd(n.try_into().unwrap()), mapping); + let mapping = asm.ctx.get_opnd_mapping(top_val.into()); + asm.ctx.set_opnd_mapping(dst_opnd.into(), mapping); - KeepCompiling + Some(KeepCompiling) } // get nth stack value, then push it fn gen_topn( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let nval: VALUE = jit_get_arg(jit, 0); - let VALUE(n) = nval; - - let top_n_val = ctx.stack_opnd(n.try_into().unwrap()); - let mapping = ctx.get_opnd_mapping(StackOpnd(n.try_into().unwrap())); +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); - let loc0 = ctx.stack_push_mapping(mapping); - mov(cb, REG0, top_n_val); - mov(cb, loc0, REG0); + let top_n_val = asm.stack_opnd(n.try_into().unwrap()); + let mapping = asm.ctx.get_opnd_mapping(top_n_val.into()); + let loc0 = asm.stack_push_mapping(mapping); + asm.mov(loc0, top_n_val); - KeepCompiling + Some(KeepCompiling) } // Pop n values off the stack fn gen_adjuststack( jit: &mut JITState, - ctx: &mut Context, - _cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let nval: VALUE = jit_get_arg(jit, 0); - let VALUE(n) = nval; - - ctx.stack_pop(n); - KeepCompiling +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); + asm.stack_pop(n); + Some(KeepCompiling) } fn gen_opt_plus( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; - } - - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) { - return CantCompile; + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(jit, asm, ocb); - // Get the operands and destination from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + // Get the operands from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Add arg0 + arg1 and test for overflow - mov(cb, REG0, arg0); - sub(cb, REG0, imm_opnd(1)); - add(cb, REG0, arg1); - jo_ptr(cb, side_exit); + let arg0_untag = asm.sub(arg0, Opnd::Imm(1)); + let out_val = asm.add(arg0_untag, arg1); + asm.jo(Target::side_exit(Counter::opt_plus_overflow)); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, out_val); - KeepCompiling + Some(KeepCompiling) } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } } // new array initialized from top N values fn gen_newarray( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let n = jit_get_arg(jit, 0).as_u32(); +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_u32(); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_call_with_gc(jit, asm); - let offset_magnitude = SIZEOF_VALUE as u32 * n; - let values_ptr = ctx.sp_opnd(-(offset_magnitude as isize)); + // If n is 0, then elts is never going to be read, so we can just pass null + let values_ptr = if n == 0 { + Opnd::UImm(0) + } else { + asm_comment!(asm, "load pointer to array elements"); + let values_opnd = asm.ctx.sp_opnd(-(n as i32)); + asm.lea(values_opnd) + }; // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts); - mov(cb, C_ARG_REGS[0], REG_EC); - mov(cb, C_ARG_REGS[1], imm_opnd(n.into())); - lea(cb, C_ARG_REGS[2], values_ptr); - call_ptr(cb, REG0, rb_ec_ary_new_from_values as *const u8); + let new_ary = asm.ccall( + rb_ec_ary_new_from_values as *const u8, + vec![ + EC, + Opnd::UImm(n.into()), + values_ptr + ] + ); - ctx.stack_pop(n.as_usize()); - let stack_ret = ctx.stack_push(Type::Array); - mov(cb, stack_ret, RAX); + asm.stack_pop(n.as_usize()); + let stack_ret = asm.stack_push(Type::CArray); + asm.mov(stack_ret, new_ary); - KeepCompiling + Some(KeepCompiling) } // dup array fn gen_duparray( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let ary = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let ary = jit.get_arg(0); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_call_with_gc(jit, asm); // call rb_ary_resurrect(VALUE ary); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary); - call_ptr(cb, REG0, rb_ary_resurrect as *const u8); + let new_ary = asm.ccall( + rb_ary_resurrect as *const u8, + vec![ary.into()], + ); - let stack_ret = ctx.stack_push(Type::Array); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::CArray); + asm.mov(stack_ret, new_ary); - KeepCompiling + Some(KeepCompiling) } // dup hash fn gen_duphash( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let hash = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let hash = jit.get_arg(0); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_call_with_gc(jit, asm); // call rb_hash_resurrect(VALUE hash); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash); - call_ptr(cb, REG0, rb_hash_resurrect as *const u8); + let hash = asm.ccall(rb_hash_resurrect as *const u8, vec![hash.into()]); - let stack_ret = ctx.stack_push(Type::Hash); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::CHash); + asm.mov(stack_ret, hash); - KeepCompiling + Some(KeepCompiling) } // call to_a on the array on the stack fn gen_splatarray( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let flag = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let flag = jit.get_arg(0).as_usize(); - // Save the PC and SP because the callee may allocate + // Save the PC and SP because the callee may call #to_a // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); // Get the operands from the stack - let ary_opnd = ctx.stack_pop(1); + let ary_opnd = asm.stack_opnd(0); // Call rb_vm_splat_array(flag, ary) - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag); - mov(cb, C_ARG_REGS[1], ary_opnd); - call_ptr(cb, REG1, rb_vm_splat_array as *const u8); + let ary = asm.ccall(rb_vm_splat_array as *const u8, vec![flag.into(), ary_opnd]); + asm.stack_pop(1); // Keep it on stack during ccall for GC - let stack_ret = ctx.stack_push(Type::Array); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::TArray); + asm.mov(stack_ret, ary); - KeepCompiling + Some(KeepCompiling) +} + +// call to_hash on hash to keyword splat before converting block +// e.g. foo(**object, &block) +fn gen_splatkw( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize on a runtime hash operand + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let comptime_hash = jit.peek_at_stack(&asm.ctx, 1); + if comptime_hash.hash_p() { + // If a compile-time hash operand is T_HASH, just guard that it's T_HASH. + let hash_opnd = asm.stack_opnd(1); + guard_object_is_hash(asm, hash_opnd, hash_opnd.into(), Counter::splatkw_not_hash); + } else if comptime_hash.nil_p() { + // Speculate we'll see nil if compile-time hash operand is nil + let hash_opnd = asm.stack_opnd(1); + let hash_opnd_type = asm.ctx.get_opnd_type(hash_opnd.into()); + + if hash_opnd_type != Type::Nil { + asm.cmp(hash_opnd, Qnil.into()); + asm.jne(Target::side_exit(Counter::splatkw_not_nil)); + + if Type::Nil.diff(hash_opnd_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(hash_opnd.into(), Type::Nil); + } + } + } else { + // Otherwise, call #to_hash on the operand if it's not nil. + + // Save the PC and SP because the callee may call #to_hash + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let block_opnd = asm.stack_opnd(0); + let block_type = asm.ctx.get_opnd_type(block_opnd.into()); + let hash_opnd = asm.stack_opnd(1); + + c_callable! { + fn to_hash_if_not_nil(mut obj: VALUE) -> VALUE { + if obj != Qnil { + obj = unsafe { rb_to_hash_type(obj) }; + } + obj + } + } + + let hash = asm.ccall(to_hash_if_not_nil as _, vec![hash_opnd]); + asm.stack_pop(2); // Keep it on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, hash); + asm.stack_push(block_type); + // Leave block_opnd spilled by ccall as is + asm.ctx.dealloc_temp_reg(asm.ctx.get_stack_size() - 1); + } + + Some(KeepCompiling) +} + +// concat two arrays +fn gen_concatarray( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Save the PC and SP because the callee may call #to_a + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let ary2st_opnd = asm.stack_opnd(0); + let ary1_opnd = asm.stack_opnd(1); + + // Call rb_vm_concat_array(ary1, ary2st) + let ary = asm.ccall(rb_vm_concat_array as *const u8, vec![ary1_opnd, ary2st_opnd]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::TArray); + asm.mov(stack_ret, ary); + + Some(KeepCompiling) +} + +// concat second array to first array. +// first argument must already be an array. +// attempts to convert second object to array using to_a. +fn gen_concattoarray( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Save the PC and SP because the callee may call #to_a + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let ary2_opnd = asm.stack_opnd(0); + let ary1_opnd = asm.stack_opnd(1); + + let ary = asm.ccall(rb_vm_concat_to_array as *const u8, vec![ary1_opnd, ary2_opnd]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::TArray); + asm.mov(stack_ret, ary); + + Some(KeepCompiling) +} + +// push given number of objects to array directly before. +fn gen_pushtoarray( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let num = jit.get_arg(0).as_u64(); + + // Save the PC and SP because the callee may allocate + jit_prepare_call_with_gc(jit, asm); + + // Get the operands from the stack + let ary_opnd = asm.stack_opnd(num as i32); + let objp_opnd = asm.lea(asm.ctx.sp_opnd(-(num as i32))); + + let ary = asm.ccall(rb_ary_cat as *const u8, vec![ary_opnd, objp_opnd, num.into()]); + asm.stack_pop(num as usize + 1); // Keep it on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::TArray); + asm.mov(stack_ret, ary); + + Some(KeepCompiling) } // new range initialized from top 2 values fn gen_newrange( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let flag = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let flag = jit.get_arg(0).as_usize(); // rb_range_new() allocates and can raise - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); // val = rb_range_new(low, high, (int)flag); - mov(cb, C_ARG_REGS[0], ctx.stack_opnd(1)); - mov(cb, C_ARG_REGS[1], ctx.stack_opnd(0)); - mov(cb, C_ARG_REGS[2], uimm_opnd(flag.into())); - call_ptr(cb, REG0, rb_range_new as *const u8); + let range_opnd = asm.ccall( + rb_range_new as *const u8, + vec![ + asm.stack_opnd(1), + asm.stack_opnd(0), + flag.into() + ] + ); - ctx.stack_pop(2); - let stack_ret = ctx.stack_push(Type::UnknownHeap); - mov(cb, stack_ret, RAX); + asm.stack_pop(2); + let stack_ret = asm.stack_push(Type::UnknownHeap); + asm.mov(stack_ret, range_opnd); - KeepCompiling + Some(KeepCompiling) } fn guard_object_is_heap( - cb: &mut CodeBlock, - object_opnd: X86Opnd, - _ctx: &mut Context, - side_exit: CodePtr, + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, ) { - add_comment(cb, "guard object is heap"); + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_heap() { + return; + } + + asm_comment!(asm, "guard object is heap"); // Test that the object is not an immediate - test(cb, object_opnd, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); - jnz_ptr(cb, side_exit); + asm.test(object, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(Target::side_exit(counter)); + + // Test that the object is not false + asm.cmp(object, Qfalse.into()); + asm.je(Target::side_exit(counter)); - // Test that the object is not false or nil - cmp(cb, object_opnd, uimm_opnd(Qnil.into())); - jbe_ptr(cb, side_exit); + if Type::UnknownHeap.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::UnknownHeap); + } } fn guard_object_is_array( - cb: &mut CodeBlock, - object_opnd: X86Opnd, - flags_opnd: X86Opnd, - _ctx: &mut Context, - side_exit: CodePtr, + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, ) { - add_comment(cb, "guard object is array"); + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_array() { + return; + } + + let object_reg = match object { + Opnd::InsnOut { .. } => object, + _ => asm.load(object), + }; + guard_object_is_heap(asm, object_reg, object_opnd, counter); + + asm_comment!(asm, "guard object is array"); // Pull out the type mask - mov( - cb, - flags_opnd, - mem_opnd( - 8 * SIZEOF_VALUE as u8, - object_opnd, - RUBY_OFFSET_RBASIC_FLAGS, - ), - ); - and(cb, flags_opnd, uimm_opnd(RUBY_T_MASK as u64)); + let flags_opnd = Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS); + let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into()); // Compare the result with T_ARRAY - cmp(cb, flags_opnd, uimm_opnd(RUBY_T_ARRAY as u64)); - jne_ptr(cb, side_exit); + asm.cmp(flags_opnd, (RUBY_T_ARRAY as u64).into()); + asm.jne(Target::side_exit(counter)); + + if Type::TArray.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::TArray); + } +} + +fn guard_object_is_hash( + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, +) { + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_hash() { + return; + } + + let object_reg = match object { + Opnd::InsnOut { .. } => object, + _ => asm.load(object), + }; + guard_object_is_heap(asm, object_reg, object_opnd, counter); + + asm_comment!(asm, "guard object is hash"); + + // Pull out the type mask + let flags_opnd = Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS); + let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into()); + + // Compare the result with T_HASH + asm.cmp(flags_opnd, (RUBY_T_HASH as u64).into()); + asm.jne(Target::side_exit(counter)); + + if Type::THash.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::THash); + } +} + +fn guard_object_is_string( + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, +) { + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_string() { + return; + } + + let object_reg = match object { + Opnd::InsnOut { .. } => object, + _ => asm.load(object), + }; + guard_object_is_heap(asm, object_reg, object_opnd, counter); + + asm_comment!(asm, "guard object is string"); + + // Pull out the type mask + let flags_reg = asm.load(Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS)); + let flags_reg = asm.and(flags_reg, Opnd::UImm(RUBY_T_MASK as u64)); + + // Compare the result with T_STRING + asm.cmp(flags_reg, Opnd::UImm(RUBY_T_STRING as u64)); + asm.jne(Target::side_exit(counter)); + + if Type::TString.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::TString); + } } -// push enough nils onto the stack to fill out an array +/// This guards that a special flag is not set on a hash. +/// By passing a hash with this flag set as the last argument +/// in a splat call, you can change the way keywords are handled +/// to behave like ruby 2. We don't currently support this. +fn guard_object_is_not_ruby2_keyword_hash( + asm: &mut Assembler, + object_opnd: Opnd, + counter: Counter, +) { + asm_comment!(asm, "guard object is not ruby2 keyword hash"); + + let not_ruby2_keyword = asm.new_label("not_ruby2_keyword"); + asm.test(object_opnd, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(not_ruby2_keyword); + + asm.cmp(object_opnd, Qfalse.into()); + asm.je(not_ruby2_keyword); + + let flags_opnd = asm.load(Opnd::mem( + VALUE_BITS, + object_opnd, + RUBY_OFFSET_RBASIC_FLAGS, + )); + let type_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into()); + + asm.cmp(type_opnd, (RUBY_T_HASH as u64).into()); + asm.jne(not_ruby2_keyword); + + asm.test(flags_opnd, (RHASH_PASS_AS_KEYWORDS as u64).into()); + asm.jnz(Target::side_exit(counter)); + + asm.write_label(not_ruby2_keyword); +} + +/// This instruction pops a single value off the stack, converts it to an +/// arrayif it isn’t already one using the #to_ary method, and then pushes +/// the values from the array back onto the stack. fn gen_expandarray( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let flag = jit_get_arg(jit, 1); - let VALUE(flag_value) = flag; +) -> Option<CodegenStatus> { + // Both arguments are rb_num_t which is unsigned + let num = jit.get_arg(0).as_u32(); + let flag = jit.get_arg(1).as_usize(); // If this instruction has the splat flag, then bail out. - if flag_value & 0x01 != 0 { - incr_counter!(expandarray_splat); - return CantCompile; + if flag & 0x01 != 0 { + gen_counter_incr(asm, Counter::expandarray_splat); + return None; } // If this instruction has the postarg flag, then bail out. - if flag_value & 0x02 != 0 { - incr_counter!(expandarray_postarg); - return CantCompile; + if flag & 0x02 != 0 { + gen_counter_incr(asm, Counter::expandarray_postarg); + return None; + } + + let array_opnd = asm.stack_opnd(0); + + // Defer compilation so we can specialize on a runtime `self` + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); } - let side_exit = get_side_exit(jit, ocb, ctx); + let comptime_recv = jit.peek_at_stack(&asm.ctx, 0); - // num is the number of requested values. If there aren't enough in the - // array then we're going to push on nils. - let num = jit_get_arg(jit, 0); - let array_type = ctx.get_opnd_type(StackOpnd(0)); - let array_opnd = ctx.stack_pop(1); + // If the comptime receiver is not an array + if !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_ARRAY) } { + // at compile time, ensure to_ary is not defined + let target_cme = unsafe { rb_callable_method_entry_or_negative(comptime_recv.class_of(), ID!(to_ary)) }; + let cme_def_type = unsafe { get_cme_def_type(target_cme) }; - if matches!(array_type, Type::Nil) { - // special case for a, b = nil pattern - // push N nils onto the stack - for _i in 0..(num.into()) { - let push_opnd = ctx.stack_push(Type::Nil); - mov(cb, push_opnd, uimm_opnd(Qnil.into())); + // if to_ary is defined, return can't compile so to_ary can be called + if cme_def_type != VM_METHOD_TYPE_UNDEF { + gen_counter_incr(asm, Counter::expandarray_to_ary); + return None; } - return KeepCompiling; + + // invalidate compile block if to_ary is later defined + jit.assume_method_lookup_stable(asm, ocb, target_cme); + + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_recv.class_of(), + array_opnd, + array_opnd.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::expandarray_not_array, + ); + + let opnd = asm.stack_pop(1); // pop after using the type info + + // If we don't actually want any values, then just keep going + if num == 0 { + return Some(KeepCompiling); + } + + // load opnd to avoid a race because we are also pushing onto the stack + let opnd = asm.load(opnd); + + for _ in 1..num { + let push_opnd = asm.stack_push(Type::Nil); + asm.mov(push_opnd, Qnil.into()); + } + + let push_opnd = asm.stack_push(Type::Unknown); + asm.mov(push_opnd, opnd); + + return Some(KeepCompiling); } - // Move the array from the stack into REG0 and check that it's an array. - mov(cb, REG0, array_opnd); - guard_object_is_heap( - cb, - REG0, - ctx, - counted_exit!(ocb, side_exit, expandarray_not_array), - ); + // Get the compile-time array length + let comptime_len = unsafe { rb_yjit_array_len(comptime_recv) as u32 }; + + // Move the array from the stack and check that it's an array. guard_object_is_array( - cb, - REG0, - REG1, - ctx, - counted_exit!(ocb, side_exit, expandarray_not_array), + asm, + array_opnd, + array_opnd.into(), + Counter::expandarray_not_array, ); // If we don't actually want any values, then just return. - if num == VALUE(0) { - return KeepCompiling; + if num == 0 { + asm.stack_pop(1); // pop the array + return Some(KeepCompiling); } - // Pull out the embed flag to check if it's an embedded array. - let flags_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RBASIC_FLAGS); - mov(cb, REG1, flags_opnd); - - // Move the length of the embedded array into REG1. - and(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_MASK as u64)); - shr(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_SHIFT as u64)); - - // Conditionally move the length of the heap array into REG1. - test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64)); - let array_len_opnd = mem_opnd( - (8 * size_of::<std::os::raw::c_long>()) as u8, - REG0, - RUBY_OFFSET_RARRAY_AS_HEAP_LEN, - ); - cmovz(cb, REG1, array_len_opnd); + let array_opnd = asm.stack_opnd(0); + let array_reg = asm.load(array_opnd); + let array_len_opnd = get_array_len(asm, array_reg); - // Only handle the case where the number of values in the array is greater - // than or equal to the number of values requested. - cmp(cb, REG1, uimm_opnd(num.into())); - jl_ptr(cb, counted_exit!(ocb, side_exit, expandarray_rhs_too_small)); - - // Load the address of the embedded array into REG1. - // (struct RArray *)(obj)->as.ary - let ary_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RARRAY_AS_ARY); - lea(cb, REG1, ary_opnd); - - // Conditionally load the address of the heap array into REG1. - // (struct RArray *)(obj)->as.heap.ptr - test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64)); - let heap_ptr_opnd = mem_opnd( - (8 * size_of::<usize>()) as u8, - REG0, - RUBY_OFFSET_RARRAY_AS_HEAP_PTR, - ); - cmovz(cb, REG1, heap_ptr_opnd); + // Guard on the comptime/expected array length + if comptime_len >= num { + asm_comment!(asm, "guard array length >= {}", num); + asm.cmp(array_len_opnd, num.into()); + jit_chain_guard( + JCC_JB, + jit, + asm, + ocb, + EXPANDARRAY_MAX_CHAIN_DEPTH, + Counter::expandarray_chain_max_depth, + ); - // Loop backward through the array and push each element onto the stack. - for i in (0..(num.as_i32())).rev() { - let top = ctx.stack_push(Type::Unknown); - mov(cb, REG0, mem_opnd(64, REG1, i * (SIZEOF_VALUE as i32))); - mov(cb, top, REG0); + } else { + asm_comment!(asm, "guard array length == {}", comptime_len); + asm.cmp(array_len_opnd, comptime_len.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + EXPANDARRAY_MAX_CHAIN_DEPTH, + Counter::expandarray_chain_max_depth, + ); } - KeepCompiling -} - -fn gen_getlocal_wc0( - jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Compute the offset from BP to the local - let slot_idx = jit_get_arg(jit, 0).as_i32(); - let offs: i32 = -(SIZEOF_VALUE as i32) * slot_idx; - let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx); + let array_opnd = asm.stack_pop(1); // pop after using the type info - // Load environment pointer EP (level 0) from CFP - gen_get_ep(cb, REG0, 0); + // Load the pointer to the embedded or heap array + let ary_opnd = if comptime_len > 0 { + let array_reg = asm.load(array_opnd); + Some(get_array_ptr(asm, array_reg)) + } else { + None + }; - // Load the local from the EP - mov(cb, REG0, mem_opnd(64, REG0, offs)); + // Loop backward through the array and push each element onto the stack. + for i in (0..num).rev() { + let top = asm.stack_push(if i < comptime_len { Type::Unknown } else { Type::Nil }); + let offset = i32::try_from(i * (SIZEOF_VALUE as u32)).unwrap(); - // Write the local at SP - let stack_top = ctx.stack_push_local(local_idx.as_usize()); - mov(cb, stack_top, REG0); + // Missing elements are Qnil + asm_comment!(asm, "load array[{}]", i); + let elem_opnd = if i < comptime_len { Opnd::mem(64, ary_opnd.unwrap(), offset) } else { Qnil.into() }; + asm.mov(top, elem_opnd); + } - KeepCompiling + Some(KeepCompiling) } // Compute the index of a local variable from its slot index -fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 { +fn ep_offset_to_local_idx(iseq: IseqPtr, ep_offset: u32) -> u32 { // Layout illustration // This is an array of VALUE // | VM_ENV_DATA_SIZE | @@ -1452,7 +2237,7 @@ fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 { // ^ ^ ^ ^ // +-------+---local_table_size----+ cfp->ep--+ // | | - // +------------------slot_idx----------------+ + // +------------------ep_offset---------------+ // // See usages of local_var_name() from iseq.c for similar calculation. @@ -1460,419 +2245,449 @@ fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 { let local_table_size: i32 = unsafe { get_iseq_body_local_table_size(iseq) } .try_into() .unwrap(); - let op = slot_idx - (VM_ENV_DATA_SIZE as i32); + let op = (ep_offset - VM_ENV_DATA_SIZE) as i32; let local_idx = local_table_size - op - 1; assert!(local_idx >= 0 && local_idx < local_table_size); local_idx.try_into().unwrap() } // Get EP at level from CFP -fn gen_get_ep(cb: &mut CodeBlock, reg: X86Opnd, level: u32) { - // Load environment pointer EP from CFP - let ep_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP); - mov(cb, reg, ep_opnd); +fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd { + // Load environment pointer EP from CFP into a register + let ep_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP); + let mut ep_opnd = asm.load(ep_opnd); for _ in (0..level).rev() { // Get the previous EP from the current EP // See GET_PREV_EP(ep) macro // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03)) - let offs = (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32); - mov(cb, reg, mem_opnd(64, reg, offs)); - and(cb, reg, imm_opnd(!0x03)); + let offs = SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL; + ep_opnd = asm.load(Opnd::mem(64, ep_opnd, offs)); + ep_opnd = asm.and(ep_opnd, Opnd::Imm(!0x03)); + } + + ep_opnd +} + +// Gets the EP of the ISeq of the containing method, or "local level". +// Equivalent of GET_LEP() macro. +fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd { + // Equivalent of get_lvar_level() in compile.c + fn get_lvar_level(iseq: IseqPtr) -> u32 { + if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } { + 0 + } else { + 1 + get_lvar_level(unsafe { rb_get_iseq_body_parent_iseq(iseq) }) + } } + + let level = get_lvar_level(jit.get_iseq()); + gen_get_ep(asm, level) } fn gen_getlocal_generic( - ctx: &mut Context, - cb: &mut CodeBlock, - local_idx: u32, + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + ep_offset: u32, level: u32, -) -> CodegenStatus { - gen_get_ep(cb, REG0, level); +) -> Option<CodegenStatus> { + let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm, ocb) { + // Load the local using SP register + asm.ctx.ep_opnd(-(ep_offset as i32)) + } else { + // Load environment pointer EP (level 0) from CFP + let ep_opnd = gen_get_ep(asm, level); - // Load the local from the block - // val = *(vm_get_ep(GET_EP(), level) - idx); - let offs = -(SIZEOF_VALUE as i32 * local_idx as i32); - mov(cb, REG0, mem_opnd(64, REG0, offs)); + // Load the local from the block + // val = *(vm_get_ep(GET_EP(), level) - idx); + let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32); + Opnd::mem(64, ep_opnd, offs) + }; // Write the local at SP - let stack_top = ctx.stack_push(Type::Unknown); - mov(cb, stack_top, REG0); + let stack_top = if level == 0 { + let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset); + asm.stack_push_local(local_idx.as_usize()) + } else { + asm.stack_push(Type::Unknown) + }; - KeepCompiling + asm.mov(stack_top, local_opnd); + + Some(KeepCompiling) } fn gen_getlocal( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let idx = jit_get_arg(jit, 0); - let level = jit_get_arg(jit, 1); - gen_getlocal_generic(ctx, cb, idx.as_u32(), level.as_u32()) + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + let level = jit.get_arg(1).as_u32(); + gen_getlocal_generic(jit, asm, ocb, idx, level) } -fn gen_getlocal_wc1( +fn gen_getlocal_wc0( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let idx = jit_get_arg(jit, 0); - gen_getlocal_generic(ctx, cb, idx.as_u32(), 1) + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_getlocal_generic(jit, asm, ocb, idx, 0) } -fn gen_setlocal_wc0( +fn gen_getlocal_wc1( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - /* - vm_env_write(const VALUE *ep, int index, VALUE v) - { - VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS]; - if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) { - VM_STACK_ENV_WRITE(ep, index, v); - } - else { - vm_env_write_slowpath(ep, index, v); - } - } - */ - - let slot_idx = jit_get_arg(jit, 0).as_i32(); - let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx).as_usize(); - - // Load environment pointer EP (level 0) from CFP - gen_get_ep(cb, REG0, 0); - - // flags & VM_ENV_FLAG_WB_REQUIRED - let flags_opnd = mem_opnd( - 64, - REG0, - SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32, - ); - test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED as i64)); - - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); - - // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - jnz_ptr(cb, side_exit); - - // Set the type of the local variable in the context - let temp_type = ctx.get_opnd_type(StackOpnd(0)); - ctx.set_local_type(local_idx, temp_type); - - // Pop the value to write from the stack - let stack_top = ctx.stack_pop(1); - mov(cb, REG1, stack_top); - - // Write the value at the environment pointer - let offs: i32 = -8 * slot_idx; - mov(cb, mem_opnd(64, REG0, offs), REG1); - - KeepCompiling +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_getlocal_generic(jit, asm, ocb, idx, 1) } fn gen_setlocal_generic( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, - local_idx: i32, + ep_offset: u32, level: u32, -) -> CodegenStatus { - // Load environment pointer EP at level - gen_get_ep(cb, REG0, level); +) -> Option<CodegenStatus> { + let value_type = asm.ctx.get_opnd_type(StackOpnd(0)); + + // Fallback because of write barrier + if asm.ctx.get_chain_depth() > 0 { + // Load environment pointer EP at level + let ep_opnd = gen_get_ep(asm, level); + + // This function should not yield to the GC. + // void rb_vm_env_write(const VALUE *ep, int index, VALUE v) + let index = -(ep_offset as i64); + let value_opnd = asm.stack_opnd(0); + asm.ccall( + rb_vm_env_write as *const u8, + vec![ + ep_opnd, + index.into(), + value_opnd, + ] + ); + asm.stack_pop(1); - // flags & VM_ENV_FLAG_WB_REQUIRED - let flags_opnd = mem_opnd( - 64, - REG0, - SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32, - ); - test(cb, flags_opnd, uimm_opnd(VM_ENV_FLAG_WB_REQUIRED.into())); + return Some(KeepCompiling); + } - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); + let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm, ocb) { + // Load flags and the local using SP register + let local_opnd = asm.ctx.ep_opnd(-(ep_offset as i32)); + let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32); + (flags_opnd, local_opnd) + } else { + // Load flags and the local for the level + let ep_opnd = gen_get_ep(asm, level); + let flags_opnd = Opnd::mem( + 64, + ep_opnd, + SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32, + ); + (flags_opnd, Opnd::mem(64, ep_opnd, -SIZEOF_VALUE_I32 * ep_offset as i32)) + }; - // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - jnz_ptr(cb, side_exit); + // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers + // only affect heap objects being written. If we know an immediate value is being written we + // can skip this check. + if !value_type.is_imm() { + // flags & VM_ENV_FLAG_WB_REQUIRED + asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); + + // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 + assert!(asm.ctx.get_chain_depth() == 0); + jit_chain_guard( + JCC_JNZ, + jit, + asm, + ocb, + 1, + Counter::setlocal_wb_required, + ); + } + + if level == 0 { + let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize(); + asm.ctx.set_local_type(local_idx, value_type); + } // Pop the value to write from the stack - let stack_top = ctx.stack_pop(1); - mov(cb, REG1, stack_top); + let stack_top = asm.stack_pop(1); // Write the value at the environment pointer - let offs = -(SIZEOF_VALUE as i32 * local_idx); - mov(cb, mem_opnd(64, REG0, offs), REG1); + asm.mov(local_opnd, stack_top); - KeepCompiling + Some(KeepCompiling) } fn gen_setlocal( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + let level = jit.get_arg(1).as_u32(); + gen_setlocal_generic(jit, asm, ocb, idx, level) +} + +fn gen_setlocal_wc0( + jit: &mut JITState, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let idx = jit_get_arg(jit, 0).as_i32(); - let level = jit_get_arg(jit, 1).as_u32(); - gen_setlocal_generic(jit, ctx, cb, ocb, idx, level) +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_setlocal_generic(jit, asm, ocb, idx, 0) } fn gen_setlocal_wc1( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let idx = jit_get_arg(jit, 0).as_i32(); - gen_setlocal_generic(jit, ctx, cb, ocb, idx, 1) +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_setlocal_generic(jit, asm, ocb, idx, 1) } // new hash initialized from top N values fn gen_newhash( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let num: i64 = jit_get_arg(jit, 0).as_i64(); +) -> Option<CodegenStatus> { + let num: u64 = jit.get_arg(0).as_u64(); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_call_with_gc(jit, asm); if num != 0 { // val = rb_hash_new_with_size(num / 2); - mov(cb, C_ARG_REGS[0], imm_opnd(num / 2)); - call_ptr(cb, REG0, rb_hash_new_with_size as *const u8); + let new_hash = asm.ccall( + rb_hash_new_with_size as *const u8, + vec![Opnd::UImm(num / 2)] + ); - // save the allocated hash as we want to push it after insertion - push(cb, RAX); - push(cb, RAX); // alignment + // Save the allocated hash as we want to push it after insertion + asm.cpush(new_hash); + asm.cpush(new_hash); // x86 alignment + + // Get a pointer to the values to insert into the hash + let stack_addr_from_top = asm.lea(asm.stack_opnd((num - 1) as i32)); // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val); - mov(cb, C_ARG_REGS[0], imm_opnd(num)); - lea( - cb, - C_ARG_REGS[1], - ctx.stack_opnd((num - 1).try_into().unwrap()), + asm.ccall( + rb_hash_bulk_insert as *const u8, + vec![ + Opnd::UImm(num), + stack_addr_from_top, + new_hash + ] ); - mov(cb, C_ARG_REGS[2], RAX); - call_ptr(cb, REG0, rb_hash_bulk_insert as *const u8); - pop(cb, RAX); // alignment - pop(cb, RAX); + let new_hash = asm.cpop(); + asm.cpop_into(new_hash); // x86 alignment - ctx.stack_pop(num.try_into().unwrap()); - let stack_ret = ctx.stack_push(Type::Hash); - mov(cb, stack_ret, RAX); + asm.stack_pop(num.try_into().unwrap()); + let stack_ret = asm.stack_push(Type::CHash); + asm.mov(stack_ret, new_hash); } else { // val = rb_hash_new(); - call_ptr(cb, REG0, rb_hash_new as *const u8); - - let stack_ret = ctx.stack_push(Type::Hash); - mov(cb, stack_ret, RAX); + let new_hash = asm.ccall(rb_hash_new as *const u8, vec![]); + let stack_ret = asm.stack_push(Type::CHash); + asm.mov(stack_ret, new_hash); } - KeepCompiling + Some(KeepCompiling) } fn gen_putstring( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let put_val = jit.get_arg(0); + + // Save the PC and SP because the callee will allocate + jit_prepare_call_with_gc(jit, asm); + + let str_opnd = asm.ccall( + rb_ec_str_resurrect as *const u8, + vec![EC, put_val.into(), 0.into()] + ); + + let stack_top = asm.stack_push(Type::CString); + asm.mov(stack_top, str_opnd); + + Some(KeepCompiling) +} + +fn gen_putchilledstring( + jit: &mut JITState, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let put_val = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let put_val = jit.get_arg(0); // Save the PC and SP because the callee will allocate - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_call_with_gc(jit, asm); + + let str_opnd = asm.ccall( + rb_ec_str_resurrect as *const u8, + vec![EC, put_val.into(), 1.into()] + ); + + let stack_top = asm.stack_push(Type::CString); + asm.mov(stack_top, str_opnd); + + Some(KeepCompiling) +} + +fn gen_checkmatch( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let flag = jit.get_arg(0).as_u32(); + + // rb_vm_check_match is not leaf unless flag is VM_CHECKMATCH_TYPE_WHEN. + // See also: leafness_of_checkmatch() and check_match() + if flag != VM_CHECKMATCH_TYPE_WHEN { + jit_prepare_non_leaf_call(jit, asm); + } - mov(cb, C_ARG_REGS[0], REG_EC); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val); - call_ptr(cb, REG0, rb_ec_str_resurrect as *const u8); + let pattern = asm.stack_opnd(0); + let target = asm.stack_opnd(1); - let stack_top = ctx.stack_push(Type::String); - mov(cb, stack_top, RAX); + extern "C" { + fn rb_vm_check_match(ec: EcPtr, target: VALUE, pattern: VALUE, num: u32) -> VALUE; + } + let result = asm.ccall(rb_vm_check_match as *const u8, vec![EC, target, pattern, flag.into()]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, result); - KeepCompiling + Some(KeepCompiling) } // Push Qtrue or Qfalse depending on whether the given keyword was supplied by // the caller fn gen_checkkeyword( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // When a keyword is unspecified past index 32, a hash will be used // instead. This can only happen in iseqs taking more than 32 keywords. if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= 32 } { - return CantCompile; + return None; } // The EP offset to the undefined bits local - let bits_offset = jit_get_arg(jit, 0).as_i32(); + let bits_offset = jit.get_arg(0).as_i32(); // The index of the keyword we want to check - let index: i64 = jit_get_arg(jit, 1).as_i64(); + let index: i64 = jit.get_arg(1).as_i64(); // Load environment pointer EP - gen_get_ep(cb, REG0, 0); + let ep_opnd = gen_get_ep(asm, 0); // VALUE kw_bits = *(ep - bits); - let bits_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * -bits_offset); + let bits_opnd = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * -bits_offset); // unsigned int b = (unsigned int)FIX2ULONG(kw_bits); // if ((b & (0x01 << idx))) { // // We can skip the FIX2ULONG conversion by shifting the bit we test let bit_test: i64 = 0x01 << (index + 1); - test(cb, bits_opnd, imm_opnd(bit_test)); - mov(cb, REG0, uimm_opnd(Qfalse.into())); - mov(cb, REG1, uimm_opnd(Qtrue.into())); - cmovz(cb, REG0, REG1); + asm.test(bits_opnd, Opnd::Imm(bit_test)); + let ret_opnd = asm.csel_z(Qtrue.into(), Qfalse.into()); - let stack_ret = ctx.stack_push(Type::UnknownImm); - mov(cb, stack_ret, REG0); + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); - KeepCompiling -} - -fn gen_jnz_to_target0( - cb: &mut CodeBlock, - target0: CodePtr, - _target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => jnz_ptr(cb, target0), - } -} - -fn gen_jz_to_target0( - cb: &mut CodeBlock, - target0: CodePtr, - _target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => jz_ptr(cb, target0), - } -} - -fn gen_jbe_to_target0( - cb: &mut CodeBlock, - target0: CodePtr, - _target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => jbe_ptr(cb, target0), - } + Some(KeepCompiling) } // Generate a jump to a stub that recompiles the current YARV instruction on failure. -// When depth_limitk is exceeded, generate a jump to a side exit. +// When depth_limit is exceeded, generate a jump to a side exit. fn jit_chain_guard( jcc: JCCKinds, - jit: &JITState, - ctx: &Context, - cb: &mut CodeBlock, + jit: &mut JITState, + asm: &mut Assembler, ocb: &mut OutlinedCb, - depth_limit: i32, - side_exit: CodePtr, + depth_limit: u8, + counter: Counter, ) { let target0_gen_fn = match jcc { - JCC_JNE | JCC_JNZ => gen_jnz_to_target0, - JCC_JZ | JCC_JE => gen_jz_to_target0, - JCC_JBE | JCC_JNA => gen_jbe_to_target0, + JCC_JNE | JCC_JNZ => BranchGenFn::JNZToTarget0, + JCC_JZ | JCC_JE => BranchGenFn::JZToTarget0, + JCC_JBE | JCC_JNA => BranchGenFn::JBEToTarget0, + JCC_JB | JCC_JNAE => BranchGenFn::JBToTarget0, + JCC_JO_MUL => BranchGenFn::JOMulToTarget0, }; - if (ctx.get_chain_depth() as i32) < depth_limit { - let mut deeper = *ctx; + if asm.ctx.get_chain_depth() < depth_limit { + // Rewind Context to use the stack_size at the beginning of this instruction. + let mut deeper = asm.ctx.with_stack_size(jit.stack_size_for_pc); deeper.increment_chain_depth(); let bid = BlockId { iseq: jit.iseq, idx: jit.insn_idx, }; - gen_branch(jit, ctx, cb, ocb, bid, &deeper, None, None, target0_gen_fn); + gen_branch(jit, asm, ocb, bid, &deeper, None, None, target0_gen_fn); } else { - target0_gen_fn(cb, side_exit, None, BranchShape::Default); + target0_gen_fn.call(asm, Target::side_exit(counter), None); } } -// up to 5 different classes, and embedded or not for each -pub const GET_IVAR_MAX_DEPTH: i32 = 10; - -// hashes and arrays -pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2; - -// up to 5 different classes -pub const SEND_MAX_DEPTH: i32 = 5; +// up to 8 different shapes for each +pub const GET_IVAR_MAX_DEPTH: u8 = 8; -// Codegen for setting an instance variable. -// Preconditions: -// - receiver is in REG0 -// - receiver has the same class as CLASS_OF(comptime_receiver) -// - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled -fn gen_set_ivar( - jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - recv: VALUE, - ivar_name: ID, -) -> CodegenStatus { - // Save the PC and SP because the callee may allocate - // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); +// up to 8 different shapes for each +pub const SET_IVAR_MAX_DEPTH: u8 = 8; - // Get the operands from the stack - let val_opnd = ctx.stack_pop(1); - let recv_opnd = ctx.stack_pop(1); +// hashes and arrays +pub const OPT_AREF_MAX_CHAIN_DEPTH: u8 = 2; - let ivar_index: u32 = unsafe { rb_obj_ensure_iv_index_mapping(recv, ivar_name) }; +// expandarray +pub const EXPANDARRAY_MAX_CHAIN_DEPTH: u8 = 4; - // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value - mov(cb, C_ARG_REGS[0], recv_opnd); - mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index.into())); - mov(cb, C_ARG_REGS[2], val_opnd); - call_ptr(cb, REG0, rb_vm_set_ivar_idx as *const u8); +// up to 5 different methods for send +pub const SEND_MAX_DEPTH: u8 = 5; - let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, RAX); +// up to 20 different offsets for case-when +pub const CASE_WHEN_MAX_DEPTH: u8 = 20; - KeepCompiling -} +pub const MAX_SPLAT_LENGTH: i32 = 127; // Codegen for getting an instance variable. // Preconditions: -// - receiver is in REG0 // - receiver has the same class as CLASS_OF(comptime_receiver) // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled fn gen_get_ivar( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, - max_chain_depth: i32, + max_chain_depth: u8, comptime_receiver: VALUE, ivar_name: ID, - reg0_opnd: InsnOpnd, - side_exit: CodePtr, -) -> CodegenStatus { + recv: Opnd, + recv_opnd: YARVOpnd, +) -> Option<CodegenStatus> { let comptime_val_klass = comptime_receiver.class_of(); - let starting_context = *ctx; // make a copy for use with jit_chain_guard + + // If recv isn't already a register, load it. + let recv = match recv { + Opnd::InsnOut { .. } => recv, + _ => asm.load(recv), + }; // Check if the comptime class uses a custom allocator let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; @@ -1886,385 +2701,687 @@ fn gen_get_ivar( // Check if the comptime receiver is a T_OBJECT let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; + // Use a general C call at the last chain to avoid exits on megamorphic shapes + let megamorphic = asm.ctx.get_chain_depth() >= max_chain_depth; + if megamorphic { + gen_counter_incr(asm, Counter::num_getivar_megamorphic); + } // If the class uses the default allocator, instances should all be T_OBJECT // NOTE: This assumes nobody changes the allocator of the class after allocation. // Eventually, we can encode whether an object is T_OBJECT or not // inside object shapes. - if !receiver_t_object || uses_custom_allocator { + // too-complex shapes can't use index access, so we use rb_ivar_get for them too. + if !receiver_t_object || uses_custom_allocator || comptime_receiver.shape_too_complex() || megamorphic { // General case. Call rb_ivar_get(). // VALUE rb_ivar_get(VALUE obj, ID id) - add_comment(cb, "call rb_ivar_get()"); + asm_comment!(asm, "call rb_ivar_get()"); - // The function could raise exceptions. - jit_prepare_routine_call(jit, ctx, cb, REG1); + // The function could raise RactorIsolationError. + jit_prepare_non_leaf_call(jit, asm); - mov(cb, C_ARG_REGS[0], REG0); - mov(cb, C_ARG_REGS[1], uimm_opnd(ivar_name)); - call_ptr(cb, REG1, rb_ivar_get as *const u8); + let ivar_val = asm.ccall(rb_ivar_get as *const u8, vec![recv, Opnd::UImm(ivar_name)]); - if reg0_opnd != SelfOpnd { - ctx.stack_pop(1); + if recv_opnd != SelfOpnd { + asm.stack_pop(1); } + // Push the ivar on the stack - let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, RAX); + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ivar_val); // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); - return EndBlock; - } - - /* - // FIXME: - // This check was added because of a failure in a test involving the - // Nokogiri Document class where we see a T_DATA that still has the default - // allocator. - // Aaron Patterson argues that this is a bug in the C extension, because - // people could call .allocate() on the class and still get a T_OBJECT - // For now I added an extra dynamic check that the receiver is T_OBJECT - // so we can safely pass all the tests in Shopify Core. - // - // Guard that the receiver is T_OBJECT - // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK) - add_comment(cb, "guard receiver is T_OBJECT"); - mov(cb, REG1, member_opnd(REG0, struct RBasic, flags)); - and(cb, REG1, imm_opnd(RUBY_T_MASK)); - cmp(cb, REG1, imm_opnd(T_OBJECT)); - jit_chain_guard(JCC_JNE, jit, &starting_context, cb, ocb, max_chain_depth, side_exit); - */ - - // FIXME: Mapping the index could fail when there is too many ivar names. If we're - // compiling for a branch stub that can cause the exception to be thrown from the - // wrong PC. - let ivar_index = - unsafe { rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name) }.as_usize(); - - // Pop receiver if it's on the temp stack - if reg0_opnd != SelfOpnd { - ctx.stack_pop(1); + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); } - // Compile time self is embedded and the ivar index lands within the object - let test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED)) != VALUE(0) }; - if test_result && ivar_index < ROBJECT_EMBED_LEN_MAX { - // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h - - // Guard that self is embedded - // TODO: BT and JC is shorter - add_comment(cb, "guard embedded getivar"); - let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS); - test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64)); - let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic); - jit_chain_guard( - JCC_JZ, - jit, - &starting_context, - cb, - ocb, - max_chain_depth, - side_exit, - ); + let ivar_index = unsafe { + let shape_id = comptime_receiver.shape_id_of(); + let shape = rb_shape_get_shape_by_id(shape_id); + let mut ivar_index: u32 = 0; + if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) { + Some(ivar_index as usize) + } else { + None + } + }; - // Load the variable - let offs = RUBY_OFFSET_ROBJECT_AS_ARY + (ivar_index * SIZEOF_VALUE) as i32; - let ivar_opnd = mem_opnd(64, REG0, offs); - mov(cb, REG1, ivar_opnd); + // Guard heap object (recv_opnd must be used before stack_pop) + guard_object_is_heap(asm, recv, recv_opnd, Counter::getivar_not_heap); - // Guard that the variable is not Qundef - cmp(cb, REG1, uimm_opnd(Qundef.into())); - mov(cb, REG0, uimm_opnd(Qnil.into())); - cmove(cb, REG1, REG0); + // Compile time self is embedded and the ivar index lands within the object + let embed_test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) }; - // Push the ivar on the stack - let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, REG1); - } else { - // Compile time value is *not* embedded. + let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); - // Guard that value is *not* embedded - // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h - add_comment(cb, "guard extended getivar"); - let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS); - test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64)); - let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic); - jit_chain_guard( - JCC_JNZ, - jit, - &starting_context, - cb, - ocb, - max_chain_depth, - side_exit, - ); + asm_comment!(asm, "guard shape"); + asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64)); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + max_chain_depth, + Counter::getivar_megamorphic, + ); - // Check that the extended table is big enough - if ivar_index > ROBJECT_EMBED_LEN_MAX { - // Check that the slot is inside the extended table (num_slots > index) - let num_slots = mem_opnd(32, REG0, RUBY_OFFSET_ROBJECT_AS_HEAP_NUMIV); + // Pop receiver if it's on the temp stack + if recv_opnd != SelfOpnd { + asm.stack_pop(1); + } - cmp(cb, num_slots, uimm_opnd(ivar_index as u64)); - jle_ptr(cb, counted_exit!(ocb, side_exit, getivar_idx_out_of_range)); + match ivar_index { + // If there is no IVAR index, then the ivar was undefined + // when we entered the compiler. That means we can just return + // nil for this shape + iv name + None => { + let out_opnd = asm.stack_push(Type::Nil); + asm.mov(out_opnd, Qnil.into()); } + Some(ivar_index) => { + if embed_test_result { + // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h - // Get a pointer to the extended table - let tbl_opnd = mem_opnd(64, REG0, RUBY_OFFSET_ROBJECT_AS_HEAP_IVPTR); - mov(cb, REG0, tbl_opnd); + // Load the variable + let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; + let ivar_opnd = Opnd::mem(64, recv, offs); - // Read the ivar from the extended table - let ivar_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE * ivar_index) as i32); - mov(cb, REG0, ivar_opnd); + // Push the ivar on the stack + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ivar_opnd); + } else { + // Compile time value is *not* embedded. - // Check that the ivar is not Qundef - cmp(cb, REG0, uimm_opnd(Qundef.into())); - mov(cb, REG1, uimm_opnd(Qnil.into())); - cmove(cb, REG0, REG1); + // Get a pointer to the extended table + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32)); - // Push the ivar on the stack - let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, REG0); + // Read the ivar from the extended table + let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); + + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ivar_opnd); + } + } } // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); - EndBlock + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) } fn gen_getinstancevariable( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); } - let ivar_name = jit_get_arg(jit, 0).as_u64(); + let ivar_name = jit.get_arg(0).as_u64(); - let comptime_val = jit_peek_at_self(jit); - let comptime_val_klass = comptime_val.class_of(); - - // Generate a side exit - let side_exit = get_side_exit(jit, ocb, ctx); + let comptime_val = jit.peek_at_self(); // Guard that the receiver has the same class as the one from compile time. - mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF)); - - jit_guard_known_klass( - jit, - ctx, - cb, - ocb, - comptime_val_klass, - SelfOpnd, - comptime_val, - GET_IVAR_MAX_DEPTH, - side_exit, - ); + let self_asm_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF); gen_get_ivar( jit, - ctx, - cb, + asm, ocb, GET_IVAR_MAX_DEPTH, comptime_val, ivar_name, + self_asm_opnd, SelfOpnd, - side_exit, ) } +// Generate an IV write. +// This function doesn't deal with writing the shape, or expanding an object +// to use an IV buffer if necessary. That is the callers responsibility +fn gen_write_iv( + asm: &mut Assembler, + comptime_receiver: VALUE, + recv: Opnd, + ivar_index: usize, + set_value: Opnd, + extension_needed: bool) +{ + // Compile time self is embedded and the ivar index lands within the object + let embed_test_result = comptime_receiver.embedded_p() && !extension_needed; + + if embed_test_result { + // Find the IV offset + let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; + let ivar_opnd = Opnd::mem(64, recv, offs); + + // Write the IV + asm_comment!(asm, "write IV"); + asm.mov(ivar_opnd, set_value); + } else { + // Compile time value is *not* embedded. + + // Get a pointer to the extended table + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32)); + + // Write the ivar in to the extended table + let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); + + asm_comment!(asm, "write IV"); + asm.mov(ivar_opnd, set_value); + } +} + fn gen_setinstancevariable( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let id = jit_get_arg(jit, 0); - let ic = jit_get_arg(jit, 1).as_u64(); // type IVC + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize on a runtime `self` + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } - // Save the PC and SP because the callee may allocate - // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + let ivar_name = jit.get_arg(0).as_u64(); + let ic = jit.get_arg(1).as_ptr(); + let comptime_receiver = jit.peek_at_self(); + gen_set_ivar( + jit, + asm, + ocb, + comptime_receiver, + ivar_name, + SelfOpnd, + Some(ic), + ) +} - // Get the operands from the stack - let val_opnd = ctx.stack_pop(1); +/// Set an instance variable on setinstancevariable or attr_writer. +/// It switches the behavior based on what recv_opnd is given. +/// * SelfOpnd: setinstancevariable, which doesn't push a result onto the stack. +/// * StackOpnd: attr_writer, which pushes a result onto the stack. +fn gen_set_ivar( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + comptime_receiver: VALUE, + ivar_name: ID, + recv_opnd: YARVOpnd, + ic: Option<*const iseq_inline_iv_cache_entry>, +) -> Option<CodegenStatus> { + let comptime_val_klass = comptime_receiver.class_of(); - // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic); - mov( - cb, - C_ARG_REGS[1], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), - ); - mov(cb, C_ARG_REGS[3], val_opnd); - mov(cb, C_ARG_REGS[2], uimm_opnd(id.into())); - mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic as *const u8)); - let iseq = VALUE(jit.iseq as usize); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], iseq); - call_ptr(cb, REG0, rb_vm_setinstancevariable as *const u8); + // If the comptime receiver is frozen, writing an IV will raise an exception + // and we don't want to JIT code to deal with that situation. + if comptime_receiver.is_frozen() { + gen_counter_incr(asm, Counter::setivar_frozen); + return None; + } + + let stack_type = asm.ctx.get_opnd_type(StackOpnd(0)); + + // Check if the comptime class uses a custom allocator + let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; + let uses_custom_allocator = match custom_allocator { + Some(alloc_fun) => { + let allocate_instance = rb_class_allocate_instance as *const u8; + alloc_fun as *const u8 != allocate_instance + } + None => false, + }; + + // Check if the comptime receiver is a T_OBJECT + let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; + // Use a general C call at the last chain to avoid exits on megamorphic shapes + let megamorphic = asm.ctx.get_chain_depth() >= SET_IVAR_MAX_DEPTH; + if megamorphic { + gen_counter_incr(asm, Counter::num_setivar_megamorphic); + } + + // Get the iv index + let shape_too_complex = comptime_receiver.shape_too_complex(); + let ivar_index = if !shape_too_complex { + let shape_id = comptime_receiver.shape_id_of(); + let shape = unsafe { rb_shape_get_shape_by_id(shape_id) }; + let mut ivar_index: u32 = 0; + if unsafe { rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) } { + Some(ivar_index as usize) + } else { + None + } + } else { + None + }; + + // The current shape doesn't contain this iv, we need to transition to another shape. + let new_shape = if !shape_too_complex && receiver_t_object && ivar_index.is_none() { + let current_shape = comptime_receiver.shape_of(); + let next_shape = unsafe { rb_shape_get_next(current_shape, comptime_receiver, ivar_name) }; + let next_shape_id = unsafe { rb_shape_id(next_shape) }; + + // If the VM ran out of shapes, or this class generated too many leaf, + // it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table). + if next_shape_id == OBJ_TOO_COMPLEX_SHAPE_ID { + Some((next_shape_id, None, 0_usize)) + } else { + let current_capacity = unsafe { (*current_shape).capacity }; - KeepCompiling + // If the new shape has a different capacity, or is TOO_COMPLEX, we'll have to + // reallocate it. + let needs_extension = unsafe { (*current_shape).capacity != (*next_shape).capacity }; + + // We can write to the object, but we need to transition the shape + let ivar_index = unsafe { (*current_shape).next_iv_index } as usize; + + let needs_extension = if needs_extension { + Some((current_capacity, unsafe { (*next_shape).capacity })) + } else { + None + }; + Some((next_shape_id, needs_extension, ivar_index)) + } + } else { + None + }; + let new_shape_too_complex = matches!(new_shape, Some((OBJ_TOO_COMPLEX_SHAPE_ID, _, _))); + + // If the receiver isn't a T_OBJECT, or uses a custom allocator, + // then just write out the IV write as a function call. + // too-complex shapes can't use index access, so we use rb_ivar_get for them too. + if !receiver_t_object || uses_custom_allocator || shape_too_complex || new_shape_too_complex || megamorphic { + // The function could raise FrozenError. + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let val_opnd = asm.stack_opnd(0); + + if let StackOpnd(index) = recv_opnd { // attr_writer + let recv = asm.stack_opnd(index as i32); + asm_comment!(asm, "call rb_vm_set_ivar_id()"); + asm.ccall( + rb_vm_set_ivar_id as *const u8, + vec![ + recv, + Opnd::UImm(ivar_name), + val_opnd, + ], + ); + } else { // setinstancevariable + asm_comment!(asm, "call rb_vm_setinstancevariable()"); + asm.ccall( + rb_vm_setinstancevariable as *const u8, + vec![ + Opnd::const_ptr(jit.iseq as *const u8), + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), + ivar_name.into(), + val_opnd, + Opnd::const_ptr(ic.unwrap() as *const u8), + ], + ); + } + } else { + // Get the receiver + let mut recv = asm.load(if let StackOpnd(index) = recv_opnd { + asm.stack_opnd(index as i32) + } else { + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF) + }); + + // Upgrade type + guard_object_is_heap(asm, recv, recv_opnd, Counter::setivar_not_heap); + + let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); + + asm_comment!(asm, "guard shape"); + asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64)); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SET_IVAR_MAX_DEPTH, + Counter::setivar_megamorphic, + ); + + let write_val; + + match ivar_index { + // If we don't have an instance variable index, then we need to + // transition out of the current shape. + None => { + let (new_shape_id, needs_extension, ivar_index) = new_shape.unwrap(); + if let Some((current_capacity, new_capacity)) = needs_extension { + // Generate the C call so that runtime code will increase + // the capacity and set the buffer. + asm_comment!(asm, "call rb_ensure_iv_list_size"); + + // It allocates so can trigger GC, which takes the VM lock + // so could yield to a different ractor. + jit_prepare_call_with_gc(jit, asm); + asm.ccall(rb_ensure_iv_list_size as *const u8, + vec![ + recv, + Opnd::UImm(current_capacity.into()), + Opnd::UImm(new_capacity.into()) + ] + ); + + // Load the receiver again after the function call + recv = asm.load(if let StackOpnd(index) = recv_opnd { + asm.stack_opnd(index as i32) + } else { + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF) + }); + } + + write_val = asm.stack_opnd(0); + gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, needs_extension.is_some()); + + asm_comment!(asm, "write shape"); + + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); + + // Store the new shape + asm.store(shape_opnd, Opnd::UImm(new_shape_id as u64)); + }, + + Some(ivar_index) => { + // If the iv index already exists, then we don't need to + // transition to a new shape. The reason is because we find + // the iv index by searching up the shape tree. If we've + // made the transition already, then there's no reason to + // update the shape on the object. Just set the IV. + write_val = asm.stack_opnd(0); + gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, false); + }, + } + + // If we know the stack value is an immediate, there's no need to + // generate WB code. + if !stack_type.is_imm() { + asm.spill_temps(); // for ccall (unconditionally spill them for RegTemps consistency) + let skip_wb = asm.new_label("skip_wb"); + // If the value we're writing is an immediate, we don't need to WB + asm.test(write_val, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(skip_wb); + + // If the value we're writing is nil or false, we don't need to WB + asm.cmp(write_val, Qnil.into()); + asm.jbe(skip_wb); + + asm_comment!(asm, "write barrier"); + asm.ccall( + rb_gc_writebarrier as *const u8, + vec![ + recv, + write_val, + ] + ); + + asm.write_label(skip_wb); + } + } + let write_val = asm.stack_pop(1); // Keep write_val on stack during ccall for GC + + // If it's attr_writer, i.e. recv_opnd is StackOpnd, we need to pop + // the receiver and push the written value onto the stack. + if let StackOpnd(_) = recv_opnd { + asm.stack_pop(1); // Pop receiver + + let out_opnd = asm.stack_push(Type::Unknown); // Push a return value + asm.mov(out_opnd, write_val); + } + + Some(KeepCompiling) } fn gen_defined( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let op_type = jit_get_arg(jit, 0); - let obj = jit_get_arg(jit, 1); - let pushval = jit_get_arg(jit, 2); +) -> Option<CodegenStatus> { + let op_type = jit.get_arg(0).as_u64(); + let obj = jit.get_arg(1); + let pushval = jit.get_arg(2); - // Save the PC and SP because the callee may allocate - // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + match op_type as u32 { + DEFINED_YIELD => { + asm.stack_pop(1); // v operand is not used + let out_opnd = asm.stack_push(Type::Unknown); // nil or "yield" - // Get the operands from the stack - let v_opnd = ctx.stack_pop(1); - - // Call vm_defined(ec, reg_cfp, op_type, obj, v) - mov(cb, C_ARG_REGS[0], REG_EC); - mov(cb, C_ARG_REGS[1], REG_CFP); - mov(cb, C_ARG_REGS[2], uimm_opnd(op_type.into())); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], obj); - mov(cb, C_ARG_REGS[4], v_opnd); - call_ptr(cb, REG0, rb_vm_defined as *const u8); - - // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) { - // val = pushval; - // } - jit_mov_gc_ptr(jit, cb, REG1, pushval); - cmp(cb, AL, imm_opnd(0)); - mov(cb, RAX, uimm_opnd(Qnil.into())); - cmovnz(cb, RAX, REG1); + gen_block_given(jit, asm, out_opnd, pushval.into(), Qnil.into()); + } + _ => { + // Save the PC and SP because the callee may allocate or call #respond_to? + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_non_leaf_call(jit, asm); - // Push the return value onto the stack - let out_type = if pushval.special_const_p() { - Type::UnknownImm - } else { - Type::Unknown + // Get the operands from the stack + let v_opnd = asm.stack_opnd(0); + + // Call vm_defined(ec, reg_cfp, op_type, obj, v) + let def_result = asm.ccall(rb_vm_defined as *const u8, vec![EC, CFP, op_type.into(), obj.into(), v_opnd]); + asm.stack_pop(1); // Keep it on stack during ccall for GC + + // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) { + // val = pushval; + // } + asm.test(def_result, Opnd::UImm(255)); + let out_value = asm.csel_nz(pushval.into(), Qnil.into()); + + // Push the return value onto the stack + let out_type = if pushval.special_const_p() { + Type::UnknownImm + } else { + Type::Unknown + }; + let stack_ret = asm.stack_push(out_type); + asm.mov(stack_ret, out_value); + } + } + + Some(KeepCompiling) +} + +fn gen_definedivar( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize base on a runtime receiver + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let ivar_name = jit.get_arg(0).as_u64(); + // Value that will be pushed on the stack if the ivar is defined. In practice this is always the + // string "instance-variable". If the ivar is not defined, nil will be pushed instead. + let pushval = jit.get_arg(2); + + // Get the receiver + let recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + + // Specialize base on compile time values + let comptime_receiver = jit.peek_at_self(); + + if comptime_receiver.shape_too_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH { + // Fall back to calling rb_ivar_defined + + // Save the PC and SP because the callee may allocate + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_call_with_gc(jit, asm); + + // Call rb_ivar_defined(recv, ivar_name) + let def_result = asm.ccall(rb_ivar_defined as *const u8, vec![recv, ivar_name.into()]); + + // if (rb_ivar_defined(recv, ivar_name)) { + // val = pushval; + // } + asm.test(def_result, Opnd::UImm(255)); + let out_value = asm.csel_nz(pushval.into(), Qnil.into()); + + // Push the return value onto the stack + let out_type = if pushval.special_const_p() { Type::UnknownImm } else { Type::Unknown }; + let stack_ret = asm.stack_push(out_type); + asm.mov(stack_ret, out_value); + + return Some(KeepCompiling) + } + + let shape_id = comptime_receiver.shape_id_of(); + let ivar_exists = unsafe { + let shape = rb_shape_get_shape_by_id(shape_id); + let mut ivar_index: u32 = 0; + rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) }; - let stack_ret = ctx.stack_push(out_type); - mov(cb, stack_ret, RAX); - KeepCompiling + // Guard heap object (recv_opnd must be used before stack_pop) + guard_object_is_heap(asm, recv, SelfOpnd, Counter::definedivar_not_heap); + + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); + + asm_comment!(asm, "guard shape"); + asm.cmp(shape_opnd, Opnd::UImm(shape_id as u64)); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + GET_IVAR_MAX_DEPTH, + Counter::definedivar_megamorphic, + ); + + let result = if ivar_exists { pushval } else { Qnil }; + jit_putobject(asm, result); + + // Jump to next instruction. This allows guard chains to share the same successor. + jump_to_next_insn(jit, asm, ocb); + + return Some(EndBlock); } fn gen_checktype( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let type_val = jit_get_arg(jit, 0).as_u32(); +) -> Option<CodegenStatus> { + let type_val = jit.get_arg(0).as_u32(); // Only three types are emitted by compile.c at the moment if let RUBY_T_STRING | RUBY_T_ARRAY | RUBY_T_HASH = type_val { - let val_type = ctx.get_opnd_type(StackOpnd(0)); - let val = ctx.stack_pop(1); + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val = asm.stack_pop(1); // Check if we know from type information - match (type_val, val_type) { - (RUBY_T_STRING, Type::String) - | (RUBY_T_ARRAY, Type::Array) - | (RUBY_T_HASH, Type::Hash) => { - // guaranteed type match - let stack_ret = ctx.stack_push(Type::True); - mov(cb, stack_ret, uimm_opnd(Qtrue.as_u64())); - return KeepCompiling; - } - _ if val_type.is_imm() || val_type.is_specific() => { - // guaranteed not to match T_STRING/T_ARRAY/T_HASH - let stack_ret = ctx.stack_push(Type::False); - mov(cb, stack_ret, uimm_opnd(Qfalse.as_u64())); - return KeepCompiling; - } + match val_type.known_value_type() { + Some(value_type) => { + if value_type == type_val { + jit_putobject(asm, Qtrue); + return Some(KeepCompiling); + } else { + jit_putobject(asm, Qfalse); + return Some(KeepCompiling); + } + }, _ => (), } - mov(cb, REG0, val); - mov(cb, REG1, uimm_opnd(Qfalse.as_u64())); - - let ret = cb.new_label("ret".to_string()); + let ret = asm.new_label("ret"); + let val = asm.load(val); if !val_type.is_heap() { // if (SPECIAL_CONST_P(val)) { // Return Qfalse via REG1 if not on heap - test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); - jnz_label(cb, ret); - cmp(cb, REG0, uimm_opnd(Qnil.as_u64())); - jbe_label(cb, ret); + asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(ret); + asm.cmp(val, Qfalse.into()); + asm.je(ret); } // Check type on object - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS)); - and(cb, REG0, uimm_opnd(RUBY_T_MASK as u64)); - cmp(cb, REG0, uimm_opnd(type_val as u64)); - mov(cb, REG0, uimm_opnd(Qtrue.as_u64())); - // REG1 contains Qfalse from above - cmove(cb, REG1, REG0); - - cb.write_label(ret); - let stack_ret = ctx.stack_push(Type::UnknownImm); - mov(cb, stack_ret, REG1); - cb.link_labels(); - - KeepCompiling + let object_type = asm.and( + Opnd::mem(64, val, RUBY_OFFSET_RBASIC_FLAGS), + Opnd::UImm(RUBY_T_MASK.into())); + asm.cmp(object_type, Opnd::UImm(type_val.into())); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); + + asm.write_label(ret); + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); + + Some(KeepCompiling) } else { - CantCompile + None } } fn gen_concatstrings( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let n = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); - // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + // rb_str_concat_literals may raise Encoding::CompatibilityError + jit_prepare_non_leaf_call(jit, asm); - let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n.as_isize())); + let values_ptr = asm.lea(asm.ctx.sp_opnd(-(n as i32))); - // call rb_str_concat_literals(long n, const VALUE *strings); - mov(cb, C_ARG_REGS[0], imm_opnd(n.into())); - lea(cb, C_ARG_REGS[1], values_ptr); - call_ptr(cb, REG0, rb_str_concat_literals as *const u8); + // call rb_str_concat_literals(size_t n, const VALUE *strings); + let return_value = asm.ccall( + rb_str_concat_literals as *const u8, + vec![n.into(), values_ptr] + ); - ctx.stack_pop(n.as_usize()); - let stack_ret = ctx.stack_push(Type::String); - mov(cb, stack_ret, RAX); + asm.stack_pop(n); + let stack_ret = asm.stack_push(Type::TString); + asm.mov(stack_ret, return_value); - KeepCompiling + Some(KeepCompiling) } -fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) { +fn guard_two_fixnums( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) { + let counter = Counter::guard_send_not_fixnums; + + // Get stack operands without popping them + let arg1 = asm.stack_opnd(0); + let arg0 = asm.stack_opnd(1); + // Get the stack operand types - let arg1_type = ctx.get_opnd_type(StackOpnd(0)); - let arg0_type = ctx.get_opnd_type(StackOpnd(1)); + let arg1_type = asm.ctx.get_opnd_type(arg1.into()); + let arg0_type = asm.ctx.get_opnd_type(arg0.into()); if arg0_type.is_heap() || arg1_type.is_heap() { - add_comment(cb, "arg is heap object"); - jmp_ptr(cb, side_exit); + asm_comment!(asm, "arg is heap object"); + asm.jmp(Target::side_exit(counter)); return; } if arg0_type != Type::Fixnum && arg0_type.is_specific() { - add_comment(cb, "arg0 not fixnum"); - jmp_ptr(cb, side_exit); + asm_comment!(asm, "arg0 not fixnum"); + asm.jmp(Target::side_exit(counter)); return; } if arg1_type != Type::Fixnum && arg1_type.is_specific() { - add_comment(cb, "arg1 not fixnum"); - jmp_ptr(cb, side_exit); + asm_comment!(asm, "arg1 not fixnum"); + asm.jmp(Target::side_exit(counter)); return; } @@ -2273,816 +3390,961 @@ fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) assert!(arg0_type == Type::Fixnum || arg0_type.is_unknown()); assert!(arg1_type == Type::Fixnum || arg1_type.is_unknown()); - // Get stack operands without popping them - let arg1 = ctx.stack_opnd(0); - let arg0 = ctx.stack_opnd(1); - - // If not fixnums, fall back + // If not fixnums at run-time, fall back if arg0_type != Type::Fixnum { - add_comment(cb, "guard arg0 fixnum"); - test(cb, arg0, uimm_opnd(RUBY_FIXNUM_FLAG as u64)); - jz_ptr(cb, side_exit); + asm_comment!(asm, "guard arg0 fixnum"); + asm.test(arg0, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + jit_chain_guard( + JCC_JZ, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + counter, + ); } if arg1_type != Type::Fixnum { - add_comment(cb, "guard arg1 fixnum"); - test(cb, arg1, uimm_opnd(RUBY_FIXNUM_FLAG as u64)); - jz_ptr(cb, side_exit); + asm_comment!(asm, "guard arg1 fixnum"); + asm.test(arg1, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + jit_chain_guard( + JCC_JZ, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + counter, + ); } // Set stack types in context - ctx.upgrade_opnd_type(StackOpnd(0), Type::Fixnum); - ctx.upgrade_opnd_type(StackOpnd(1), Type::Fixnum); + asm.ctx.upgrade_opnd_type(arg1.into(), Type::Fixnum); + asm.ctx.upgrade_opnd_type(arg0.into(), Type::Fixnum); } // Conditional move operation used by comparison operators -type CmovFn = fn(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) -> (); +type CmovFn = fn(cb: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> Opnd; fn gen_fixnum_cmp( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, cmov_op: CmovFn, -) -> CodegenStatus { - // Defer compilation so we can specialize base on a runtime receiver - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; - } - - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); + bop: ruby_basic_operators, +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize based on a runtime receiver + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_LT) { - return CantCompile; + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, bop) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(jit, asm, ocb); // Get the operands from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Compare the arguments - xor(cb, REG0_32, REG0_32); // REG0 = Qfalse - mov(cb, REG1, arg0); - cmp(cb, REG1, arg1); - mov(cb, REG1, uimm_opnd(Qtrue.into())); - cmov_op(cb, REG0, REG1); + asm.cmp(arg0, arg1); + let bool_opnd = cmov_op(asm, Qtrue.into(), Qfalse.into()); // Push the output on the stack - let dst = ctx.stack_push(Type::Unknown); - mov(cb, dst, REG0); + let dst = asm.stack_push(Type::UnknownImm); + asm.mov(dst, bool_opnd); - KeepCompiling + Some(KeepCompiling) } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } } fn gen_opt_lt( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovl) +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_l, BOP_LT) } fn gen_opt_le( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovle) +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_le, BOP_LE) } fn gen_opt_ge( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovge) +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_ge, BOP_GE) } fn gen_opt_gt( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovg) +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_g, BOP_GT) } // Implements specialized equality for either two fixnum or two strings -// Returns true if code was generated, otherwise false +// Returns None if enough type information isn't available, Some(true) +// if code was generated, otherwise Some(false). fn gen_equality_specialized( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, - side_exit: CodePtr, -) -> bool { - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - let a_opnd = ctx.stack_opnd(1); - let b_opnd = ctx.stack_opnd(0); + gen_eq: bool, +) -> Option<bool> { + let a_opnd = asm.stack_opnd(1); + let b_opnd = asm.stack_opnd(0); + + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => return None, + }; - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) { + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) { // if overridden, emit the generic version - return false; + return Some(false); } - guard_two_fixnums(ctx, cb, side_exit); - - mov(cb, REG0, a_opnd); - cmp(cb, REG0, b_opnd); + guard_two_fixnums(jit, asm, ocb); - mov(cb, REG0, imm_opnd(Qfalse.into())); - mov(cb, REG1, imm_opnd(Qtrue.into())); - cmove(cb, REG0, REG1); + asm.cmp(a_opnd, b_opnd); + let val = if gen_eq { + asm.csel_e(Qtrue.into(), Qfalse.into()) + } else { + asm.csel_ne(Qtrue.into(), Qfalse.into()) + }; // Push the output on the stack - ctx.stack_pop(2); - let dst = ctx.stack_push(Type::UnknownImm); - mov(cb, dst, REG0); + asm.stack_pop(2); + let dst = asm.stack_push(Type::UnknownImm); + asm.mov(dst, val); - true - } else if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString } - { - if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_EQ) { + return Some(true); + } + + if !jit.at_current_insn() { + return None; + } + let comptime_a = jit.peek_at_stack(&asm.ctx, 1); + let comptime_b = jit.peek_at_stack(&asm.ctx, 0); + + if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString } { + if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_EQ) { // if overridden, emit the generic version - return false; + return Some(false); } - // Load a and b in preparation for call later - mov(cb, C_ARG_REGS[0], a_opnd); - mov(cb, C_ARG_REGS[1], b_opnd); - // Guard that a is a String - mov(cb, REG0, C_ARG_REGS[0]); - unsafe { - // Use of rb_cString here requires an unsafe block - jit_guard_known_klass( - jit, - ctx, - cb, - ocb, - rb_cString, - StackOpnd(1), - comptime_a, - SEND_MAX_DEPTH, - side_exit, - ); - } + jit_guard_known_klass( + jit, + asm, + ocb, + unsafe { rb_cString }, + a_opnd, + a_opnd.into(), + comptime_a, + SEND_MAX_DEPTH, + Counter::guard_send_not_string, + ); + + let equal = asm.new_label("equal"); + let ret = asm.new_label("ret"); - let ret = cb.new_label("ret".to_string()); + // Spill for ccall. For safety, unconditionally spill temps before branching. + asm.spill_temps(); // If they are equal by identity, return true - cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]); - mov(cb, RAX, imm_opnd(Qtrue.into())); - je_label(cb, ret); + asm.cmp(a_opnd, b_opnd); + asm.je(equal); // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard) - if ctx.get_opnd_type(StackOpnd(0)) != Type::String { - mov(cb, REG0, C_ARG_REGS[1]); + let btype = asm.ctx.get_opnd_type(b_opnd.into()); + if btype.known_value_type() != Some(RUBY_T_STRING) { // Note: any T_STRING is valid here, but we check for a ::String for simplicity // To pass a mutable static variable (rb_cString) requires an unsafe block - unsafe { - jit_guard_known_klass( - jit, - ctx, - cb, - ocb, - rb_cString, - StackOpnd(0), - comptime_b, - SEND_MAX_DEPTH, - side_exit, - ); - } + jit_guard_known_klass( + jit, + asm, + ocb, + unsafe { rb_cString }, + b_opnd, + b_opnd.into(), + comptime_b, + SEND_MAX_DEPTH, + Counter::guard_send_not_string, + ); } // Call rb_str_eql_internal(a, b) - call_ptr(cb, REG0, rb_str_eql_internal as *const u8); + let val = asm.ccall( + if gen_eq { rb_str_eql_internal } else { rb_str_neq_internal } as *const u8, + vec![a_opnd, b_opnd], + ); // Push the output on the stack - cb.write_label(ret); - ctx.stack_pop(2); - let dst = ctx.stack_push(Type::UnknownImm); - mov(cb, dst, RAX); - cb.link_labels(); + asm.stack_pop(2); + let dst = asm.stack_push(Type::UnknownImm); + asm.mov(dst, val); + asm.jmp(ret); - true + asm.write_label(equal); + asm.mov(dst, if gen_eq { Qtrue } else { Qfalse }.into()); + + asm.write_label(ret); + + Some(true) } else { - false + Some(false) } } fn gen_opt_eq( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Defer compilation so we can specialize base on a runtime receiver - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; - } - - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + let specialized = match gen_equality_specialized(jit, asm, ocb, true) { + Some(specialized) => specialized, + None => { + // Defer compilation so we can specialize base on a runtime receiver + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; - if gen_equality_specialized(jit, ctx, cb, ocb, side_exit) { - jump_to_next_insn(jit, ctx, cb, ocb); - EndBlock + if specialized { + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } } fn gen_opt_neq( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // opt_neq is passed two rb_call_data as arguments: // first for ==, second for != - let cd = jit_get_arg(jit, 1).as_ptr(); - return gen_send_general(jit, ctx, cb, ocb, cd, None); + let cd = jit.get_arg(1).as_ptr(); + perf_call! { gen_send_general(jit, asm, ocb, cd, None) } } fn gen_opt_aref( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr(); +) -> Option<CodegenStatus> { + let cd: *const rb_call_data = jit.get_arg(0).as_ptr(); let argc = unsafe { vm_ci_argc((*cd).ci) }; // Only JIT one arg calls like `ary[6]` if argc != 1 { - gen_counter_incr!(cb, oaref_argc_not_one); - return CantCompile; + gen_counter_incr(asm, Counter::opt_aref_argc_not_one); + return None; } // Defer compilation so we can specialize base on a runtime receiver - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); } - // Remember the context on entry for adding guard chains - let starting_context = *ctx; - // Specialize base on compile time values - let comptime_idx = jit_peek_at_stack(jit, ctx, 0); - let comptime_recv = jit_peek_at_stack(jit, ctx, 1); - - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); + let comptime_idx = jit.peek_at_stack(&asm.ctx, 0); + let comptime_recv = jit.peek_at_stack(&asm.ctx, 1); if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_idx.fixnum_p() { - if !assume_bop_not_redefined(jit, ocb, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) { - return CantCompile; + if !assume_bop_not_redefined(jit, asm, ocb, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) { + return None; } - // Pop the stack operands - let idx_opnd = ctx.stack_pop(1); - let recv_opnd = ctx.stack_pop(1); - mov(cb, REG0, recv_opnd); - - // if (SPECIAL_CONST_P(recv)) { - // Bail if receiver is not a heap object - test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); - jnz_ptr(cb, side_exit); - cmp(cb, REG0, uimm_opnd(Qfalse.into())); - je_ptr(cb, side_exit); - cmp(cb, REG0, uimm_opnd(Qnil.into())); - je_ptr(cb, side_exit); + // Get the stack operands + let idx_opnd = asm.stack_opnd(0); + let recv_opnd = asm.stack_opnd(1); - // Bail if recv has a class other than ::Array. + // Guard that the receiver is an ::Array // BOP_AREF check above is only good for ::Array. - mov(cb, REG1, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS)); - mov(cb, REG0, uimm_opnd(unsafe { rb_cArray }.into())); - cmp(cb, REG0, REG1); - jit_chain_guard( - JCC_JNE, + jit_guard_known_klass( jit, - &starting_context, - cb, + asm, ocb, + unsafe { rb_cArray }, + recv_opnd, + recv_opnd.into(), + comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, - side_exit, + Counter::opt_aref_not_array, ); // Bail if idx is not a FIXNUM - mov(cb, REG1, idx_opnd); - test(cb, REG1, uimm_opnd(RUBY_FIXNUM_FLAG as u64)); - jz_ptr(cb, counted_exit!(ocb, side_exit, oaref_arg_not_fixnum)); + let idx_reg = asm.load(idx_opnd); + asm.test(idx_reg, (RUBY_FIXNUM_FLAG as u64).into()); + asm.jz(Target::side_exit(Counter::opt_aref_arg_not_fixnum)); // Call VALUE rb_ary_entry_internal(VALUE ary, long offset). // It never raises or allocates, so we don't need to write to cfp->pc. { - mov(cb, RDI, recv_opnd); - sar(cb, REG1, uimm_opnd(1)); // Convert fixnum to int - mov(cb, RSI, REG1); - call_ptr(cb, REG0, rb_ary_entry_internal as *const u8); + // Pop the argument and the receiver + asm.stack_pop(2); + + let idx_reg = asm.rshift(idx_reg, Opnd::UImm(1)); // Convert fixnum to int + let val = asm.ccall(rb_ary_entry_internal as *const u8, vec![recv_opnd, idx_reg]); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); } // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); - return EndBlock; + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); } else if comptime_recv.class_of() == unsafe { rb_cHash } { - if !assume_bop_not_redefined(jit, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) { - return CantCompile; + if !assume_bop_not_redefined(jit, asm, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) { + return None; } - let key_opnd = ctx.stack_opnd(0); - let recv_opnd = ctx.stack_opnd(1); + let recv_opnd = asm.stack_opnd(1); // Guard that the receiver is a hash - mov(cb, REG0, recv_opnd); jit_guard_known_klass( jit, - ctx, - cb, + asm, ocb, unsafe { rb_cHash }, - StackOpnd(1), + recv_opnd, + recv_opnd.into(), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, - side_exit, + Counter::opt_aref_not_hash, ); - // Setup arguments for rb_hash_aref(). - mov(cb, C_ARG_REGS[0], REG0); - mov(cb, C_ARG_REGS[1], key_opnd); - // Prepare to call rb_hash_aref(). It might call #hash on the key. - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); - call_ptr(cb, REG0, rb_hash_aref as *const u8); + // Call rb_hash_aref + let key_opnd = asm.stack_opnd(0); + let recv_opnd = asm.stack_opnd(1); + let val = asm.ccall(rb_hash_aref as *const u8, vec![recv_opnd, key_opnd]); // Pop the key and the receiver - ctx.stack_pop(2); + asm.stack_pop(2); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); - EndBlock + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) } else { // General case. Call the [] method. - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } } fn gen_opt_aset( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); } - let comptime_recv = jit_peek_at_stack(jit, ctx, 2); - let comptime_key = jit_peek_at_stack(jit, ctx, 1); + let comptime_recv = jit.peek_at_stack(&asm.ctx, 2); + let comptime_key = jit.peek_at_stack(&asm.ctx, 1); // Get the operands from the stack - let recv = ctx.stack_opnd(2); - let key = ctx.stack_opnd(1); - let val = ctx.stack_opnd(0); + let recv = asm.stack_opnd(2); + let key = asm.stack_opnd(1); + let _val = asm.stack_opnd(0); if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_key.fixnum_p() { - let side_exit = get_side_exit(jit, ocb, ctx); - // Guard receiver is an Array - mov(cb, REG0, recv); jit_guard_known_klass( jit, - ctx, - cb, + asm, ocb, unsafe { rb_cArray }, - StackOpnd(2), + recv, + recv.into(), comptime_recv, SEND_MAX_DEPTH, - side_exit, + Counter::opt_aset_not_array, ); // Guard key is a fixnum - mov(cb, REG0, key); jit_guard_known_klass( jit, - ctx, - cb, + asm, ocb, unsafe { rb_cInteger }, - StackOpnd(1), + key, + key.into(), comptime_key, SEND_MAX_DEPTH, - side_exit, + Counter::opt_aset_not_fixnum, ); - // Call rb_ary_store - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], key); - sar(cb, C_ARG_REGS[1], uimm_opnd(1)); // FIX2LONG(key) - mov(cb, C_ARG_REGS[2], val); - // We might allocate or raise - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); - call_ptr(cb, REG0, rb_ary_store as *const u8); + // Call rb_ary_store + let recv = asm.stack_opnd(2); + let key = asm.load(asm.stack_opnd(1)); + let key = asm.rshift(key, Opnd::UImm(1)); // FIX2LONG(key) + let val = asm.stack_opnd(0); + asm.ccall(rb_ary_store as *const u8, vec![recv, key, val]); // rb_ary_store returns void // stored value should still be on stack - mov(cb, REG0, ctx.stack_opnd(0)); + let val = asm.load(asm.stack_opnd(0)); // Push the return value onto the stack - ctx.stack_pop(3); - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, REG0); + asm.stack_pop(3); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); - jump_to_next_insn(jit, ctx, cb, ocb); - return EndBlock; + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); } else if comptime_recv.class_of() == unsafe { rb_cHash } { - let side_exit = get_side_exit(jit, ocb, ctx); - // Guard receiver is a Hash - mov(cb, REG0, recv); jit_guard_known_klass( jit, - ctx, - cb, + asm, ocb, unsafe { rb_cHash }, - StackOpnd(2), + recv, + recv.into(), comptime_recv, SEND_MAX_DEPTH, - side_exit, + Counter::opt_aset_not_hash, ); - // Call rb_hash_aset - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], key); - mov(cb, C_ARG_REGS[2], val); - // We might allocate or raise - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); - call_ptr(cb, REG0, rb_hash_aset as *const u8); + // Call rb_hash_aset + let recv = asm.stack_opnd(2); + let key = asm.stack_opnd(1); + let val = asm.stack_opnd(0); + let ret = asm.ccall(rb_hash_aset as *const u8, vec![recv, key, val]); // Push the return value onto the stack - ctx.stack_pop(3); - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.stack_pop(3); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); - jump_to_next_insn(jit, ctx, cb, ocb); - EndBlock + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } } -fn gen_opt_and( +fn gen_opt_aref_with( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus>{ + // We might allocate or raise + jit_prepare_non_leaf_call(jit, asm); + + let key_opnd = Opnd::Value(jit.get_arg(0)); + let recv_opnd = asm.stack_opnd(0); + + extern "C" { + fn rb_vm_opt_aref_with(recv: VALUE, key: VALUE) -> VALUE; } - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); + let val_opnd = asm.ccall( + rb_vm_opt_aref_with as *const u8, + vec![ + recv_opnd, + key_opnd + ], + ); + asm.stack_pop(1); // Keep it on stack during GC + + asm.cmp(val_opnd, Qundef.into()); + asm.je(Target::side_exit(Counter::opt_aref_with_qundef)); - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, val_opnd); - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_AND) { - return CantCompile; + return Some(KeepCompiling); +} + +fn gen_opt_and( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; + + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_AND) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(jit, asm, ocb); // Get the operands and destination from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Do the bitwise and arg0 & arg1 - mov(cb, REG0, arg0); - and(cb, REG0, arg1); + let val = asm.and(arg0, arg1); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, val); - KeepCompiling + Some(KeepCompiling) } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } } fn gen_opt_or( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; - } - - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_OR) { - return CantCompile; + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_OR) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(jit, asm, ocb); // Get the operands and destination from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Do the bitwise or arg0 | arg1 - mov(cb, REG0, arg0); - or(cb, REG0, arg1); + let val = asm.or(arg0, arg1); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, val); - KeepCompiling + Some(KeepCompiling) } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } } fn gen_opt_minus( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; - } - - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) { - return CantCompile; + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(jit, asm, ocb); // Get the operands and destination from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Subtract arg0 - arg1 and test for overflow - mov(cb, REG0, arg0); - sub(cb, REG0, arg1); - jo_ptr(cb, side_exit); - add(cb, REG0, imm_opnd(1)); + let val_untag = asm.sub(arg0, arg1); + asm.jo(Target::side_exit(Counter::opt_minus_overflow)); + let val = asm.add(val_untag, Opnd::Imm(1)); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, val); - KeepCompiling + Some(KeepCompiling) } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } } fn gen_opt_mult( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; + + // Fallback to a method call if it overflows + if two_fixnums && asm.ctx.get_chain_depth() == 0 { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MULT) { + return None; + } + + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); + + // Get the operands from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + + // Do some bitwise gymnastics to handle tag bits + // x * y is translated to (x >> 1) * (y - 1) + 1 + let arg0_untag = asm.rshift(arg0, Opnd::UImm(1)); + let arg1_untag = asm.sub(arg1, Opnd::UImm(1)); + let out_val = asm.mul(arg0_untag, arg1_untag); + jit_chain_guard(JCC_JO_MUL, jit, asm, ocb, 1, Counter::opt_mult_overflow); + let out_val = asm.add(out_val, Opnd::UImm(1)); + + // Push the output on the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, out_val); + + Some(KeepCompiling) + } else { + gen_opt_send_without_block(jit, asm, ocb) + } } fn gen_opt_div( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } fn gen_opt_mod( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Save the PC and SP because the callee may allocate bignums - // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; - let side_exit = get_side_exit(jit, ocb, ctx); + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MOD) { + return None; + } - // Get the operands from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); - // Call rb_vm_opt_mod(VALUE recv, VALUE obj) - mov(cb, C_ARG_REGS[0], arg0); - mov(cb, C_ARG_REGS[1], arg1); - call_ptr(cb, REG0, rb_vm_opt_mod as *const u8); + // Get the operands and destination from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); - // If val == Qundef, bail to do a method call - cmp(cb, RAX, imm_opnd(Qundef.as_i64())); - je_ptr(cb, side_exit); + // Check for arg0 % 0 + asm.cmp(arg1, Opnd::Imm(VALUE::fixnum_from_usize(0).as_i64())); + asm.je(Target::side_exit(Counter::opt_mod_zero)); - // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + // Call rb_fix_mod_fix(VALUE recv, VALUE obj) + let ret = asm.ccall(rb_fix_mod_fix as *const u8, vec![arg0, arg1]); + + // Push the return value onto the stack + // When the two arguments are fixnums, the modulo output is always a fixnum + let stack_ret = asm.stack_push(Type::Fixnum); + asm.mov(stack_ret, ret); - KeepCompiling + Some(KeepCompiling) + } else { + // Delegate to send, call the method on the recv + gen_opt_send_without_block(jit, asm, ocb) + } } fn gen_opt_ltlt( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } fn gen_opt_nil_p( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } fn gen_opt_empty_p( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } fn gen_opt_succ( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, asm, ocb) } fn gen_opt_str_freeze( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) { - return CantCompile; +) -> Option<CodegenStatus> { + if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) { + return None; } - let str = jit_get_arg(jit, 0); - jit_mov_gc_ptr(jit, cb, REG0, str); + let str = jit.get_arg(0); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::String); - mov(cb, stack_ret, REG0); + let stack_ret = asm.stack_push(Type::CString); + asm.mov(stack_ret, str.into()); - KeepCompiling + Some(KeepCompiling) } fn gen_opt_str_uminus( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) { - return CantCompile; +) -> Option<CodegenStatus> { + if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) { + return None; } - let str = jit_get_arg(jit, 0); - jit_mov_gc_ptr(jit, cb, REG0, str); + let str = jit.get_arg(0); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::String); - mov(cb, stack_ret, REG0); + let stack_ret = asm.stack_push(Type::CString); + asm.mov(stack_ret, str.into()); + + Some(KeepCompiling) +} + +fn gen_opt_newarray_max( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call #max + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_max(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_max as *const u8, + vec![ + EC, + num.into(), + values_ptr + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); - KeepCompiling + Some(KeepCompiling) +} + +fn gen_opt_newarray_send( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let method = jit.get_arg(1).as_u64(); + + if method == ID!(min) { + gen_opt_newarray_min(jit, asm, _ocb) + } else if method == ID!(max) { + gen_opt_newarray_max(jit, asm, _ocb) + } else if method == ID!(hash) { + gen_opt_newarray_hash(jit, asm, _ocb) + } else { + None + } +} + +fn gen_opt_newarray_hash( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call #hash + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_hash(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_hash as *const u8, + vec![ + EC, + num.into(), + values_ptr + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) +} + +fn gen_opt_newarray_min( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call #min + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_min(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_min as *const u8, + vec![ + EC, + num.into(), + values_ptr + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) } fn gen_opt_not( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm, ocb); } fn gen_opt_size( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm, ocb); } fn gen_opt_length( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm, ocb); } fn gen_opt_regexpmatch2( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm, ocb); } fn gen_opt_case_dispatch( - _jit: &mut JITState, - ctx: &mut Context, - _cb: &mut CodeBlock, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { // Normally this instruction would lookup the key in a hash and jump to an // offset based on that. // Instead we can take the fallback case and continue with the next @@ -3090,55 +4352,96 @@ fn gen_opt_case_dispatch( // We'd hope that our jitted code will be sufficiently fast without the // hash lookup, at least for small hashes, but it's worth revisiting this // assumption in the future. + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } - ctx.stack_pop(1); + let case_hash = jit.get_arg(0); + let else_offset = jit.get_arg(1).as_u32(); - KeepCompiling // continue with the next instruction -} + // Try to reorder case/else branches so that ones that are actually used come first. + // Supporting only Fixnum for now so that the implementation can be an equality check. + let key_opnd = asm.stack_opnd(0); + let comptime_key = jit.peek_at_stack(&asm.ctx, 0); -fn gen_branchif_branch( - cb: &mut CodeBlock, - target0: CodePtr, - target1: Option<CodePtr>, - shape: BranchShape, -) { - assert!(target1 != None); - match shape { - BranchShape::Next0 => { - jz_ptr(cb, target1.unwrap()); - } - BranchShape::Next1 => { - jnz_ptr(cb, target0); + // Check that all cases are fixnums to avoid having to register BOP assumptions on + // all the types that case hashes support. This spends compile time to save memory. + fn case_hash_all_fixnum_p(hash: VALUE) -> bool { + let mut all_fixnum = true; + unsafe { + unsafe extern "C" fn per_case(key: st_data_t, _value: st_data_t, data: st_data_t) -> c_int { + (if VALUE(key as usize).fixnum_p() { + ST_CONTINUE + } else { + (data as *mut bool).write(false); + ST_STOP + }) as c_int + } + rb_hash_stlike_foreach(hash, Some(per_case), (&mut all_fixnum) as *mut _ as st_data_t); } - BranchShape::Default => { - jnz_ptr(cb, target0); - jmp_ptr(cb, target1.unwrap()); + + all_fixnum + } + + // If megamorphic, fallback to compiling branch instructions after opt_case_dispatch + let megamorphic = asm.ctx.get_chain_depth() >= CASE_WHEN_MAX_DEPTH; + if megamorphic { + gen_counter_incr(asm, Counter::num_opt_case_dispatch_megamorphic); + } + + if comptime_key.fixnum_p() && comptime_key.0 <= u32::MAX.as_usize() && case_hash_all_fixnum_p(case_hash) && !megamorphic { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQQ) { + return None; } + + // Check if the key is the same value + asm.cmp(key_opnd, comptime_key.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + CASE_WHEN_MAX_DEPTH, + Counter::opt_case_dispatch_megamorphic, + ); + asm.stack_pop(1); // Pop key_opnd + + // Get the offset for the compile-time key + let mut offset = 0; + unsafe { rb_hash_stlike_lookup(case_hash, comptime_key.0 as _, &mut offset) }; + let jump_offset = if offset == 0 { + // NOTE: If we hit the else branch with various values, it could negatively impact the performance. + else_offset + } else { + (offset as u32) >> 1 // FIX2LONG + }; + + // Jump to the offset of case or else + let jump_idx = jit.next_insn_idx() as u32 + jump_offset; + let jump_block = BlockId { iseq: jit.iseq, idx: jump_idx.try_into().unwrap() }; + gen_direct_jump(jit, &asm.ctx.clone(), jump_block, asm); + Some(EndBlock) + } else { + asm.stack_pop(1); // Pop key_opnd + Some(KeepCompiling) // continue with === branches } } fn gen_branchif( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let jump_offset = jit_get_arg(jit, 0).as_i32(); +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { - let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, Counter::branchif_interrupted); } - // Test if any bit (outside of the Qnil bit) is on - // RUBY_Qfalse /* ...0000 0000 */ - // RUBY_Qnil /* ...0000 1000 */ - let val_opnd = ctx.stack_pop(1); - test(cb, val_opnd, imm_opnd(!Qnil.as_i64())); - // Get the branch target instruction offsets - let next_idx = jit_next_insn_idx(jit); + let next_idx = jit.next_insn_idx(); let jump_idx = (next_idx as i32) + jump_offset; let next_block = BlockId { iseq: jit.iseq, @@ -3146,63 +4449,54 @@ fn gen_branchif( }; let jump_block = BlockId { iseq: jit.iseq, - idx: jump_idx as u32, + idx: jump_idx.try_into().unwrap(), }; - // Generate the branch instructions - gen_branch( - jit, - ctx, - cb, - ocb, - jump_block, - ctx, - Some(next_block), - Some(ctx), - gen_branchif_branch, - ); + // Test if any bit (outside of the Qnil bit) is on + // See RB_TEST() + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val_opnd = asm.stack_pop(1); - EndBlock -} + incr_counter!(branch_insn_count); -fn gen_branchunless_branch( - cb: &mut CodeBlock, - target0: CodePtr, - target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 => jnz_ptr(cb, target1.unwrap()), - BranchShape::Next1 => jz_ptr(cb, target0), - BranchShape::Default => { - jz_ptr(cb, target0); - jmp_ptr(cb, target1.unwrap()); - } + if let Some(result) = val_type.known_truthy() { + let target = if result { jump_block } else { next_block }; + gen_direct_jump(jit, &asm.ctx.clone(), target, asm); + incr_counter!(branch_known_count); + } else { + asm.test(val_opnd, Opnd::Imm(!Qnil.as_i64())); + + // Generate the branch instructions + let ctx = asm.ctx; + gen_branch( + jit, + asm, + ocb, + jump_block, + &ctx, + Some(next_block), + Some(&ctx), + BranchGenFn::BranchIf(Cell::new(BranchShape::Default)), + ); } + + Some(EndBlock) } fn gen_branchunless( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let jump_offset = jit_get_arg(jit, 0).as_i32(); +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { - let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, Counter::branchunless_interrupted); } - // Test if any bit (outside of the Qnil bit) is on - // RUBY_Qfalse /* ...0000 0000 */ - // RUBY_Qnil /* ...0000 1000 */ - let val_opnd = ctx.stack_pop(1); - test(cb, val_opnd, imm_opnd(!Qnil.as_i64())); - // Get the branch target instruction offsets - let next_idx = jit_next_insn_idx(jit) as i32; + let next_idx = jit.next_insn_idx() as i32; let jump_idx = next_idx + jump_offset; let next_block = BlockId { iseq: jit.iseq, @@ -3213,59 +4507,52 @@ fn gen_branchunless( idx: jump_idx.try_into().unwrap(), }; - // Generate the branch instructions - gen_branch( - jit, - ctx, - cb, - ocb, - jump_block, - ctx, - Some(next_block), - Some(ctx), - gen_branchunless_branch, - ); + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val_opnd = asm.stack_pop(1); - EndBlock -} + incr_counter!(branch_insn_count); -fn gen_branchnil_branch( - cb: &mut CodeBlock, - target0: CodePtr, - target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 => jne_ptr(cb, target1.unwrap()), - BranchShape::Next1 => je_ptr(cb, target0), - BranchShape::Default => { - je_ptr(cb, target0); - jmp_ptr(cb, target1.unwrap()); - } + if let Some(result) = val_type.known_truthy() { + let target = if result { next_block } else { jump_block }; + gen_direct_jump(jit, &asm.ctx.clone(), target, asm); + incr_counter!(branch_known_count); + } else { + // Test if any bit (outside of the Qnil bit) is on + // See RB_TEST() + let not_qnil = !Qnil.as_i64(); + asm.test(val_opnd, not_qnil.into()); + + // Generate the branch instructions + let ctx = asm.ctx; + gen_branch( + jit, + asm, + ocb, + jump_block, + &ctx, + Some(next_block), + Some(&ctx), + BranchGenFn::BranchUnless(Cell::new(BranchShape::Default)), + ); } + + Some(EndBlock) } fn gen_branchnil( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let jump_offset = jit_get_arg(jit, 0).as_i32(); +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { - let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, Counter::branchnil_interrupted); } - // Test if the value is Qnil - // RUBY_Qnil /* ...0000 1000 */ - let val_opnd = ctx.stack_pop(1); - cmp(cb, val_opnd, uimm_opnd(Qnil.into())); - // Get the branch target instruction offsets - let next_idx = jit_next_insn_idx(jit) as i32; + let next_idx = jit.next_insn_idx() as i32; let jump_idx = next_idx + jump_offset; let next_block = BlockId { iseq: jit.iseq, @@ -3276,47 +4563,101 @@ fn gen_branchnil( idx: jump_idx.try_into().unwrap(), }; - // Generate the branch instructions - gen_branch( - jit, - ctx, - cb, - ocb, - jump_block, - ctx, - Some(next_block), - Some(ctx), - gen_branchnil_branch, - ); + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val_opnd = asm.stack_pop(1); + + incr_counter!(branch_insn_count); + + if let Some(result) = val_type.known_nil() { + let target = if result { jump_block } else { next_block }; + gen_direct_jump(jit, &asm.ctx.clone(), target, asm); + incr_counter!(branch_known_count); + } else { + // Test if the value is Qnil + asm.cmp(val_opnd, Opnd::UImm(Qnil.into())); + // Generate the branch instructions + let ctx = asm.ctx; + gen_branch( + jit, + asm, + ocb, + jump_block, + &ctx, + Some(next_block), + Some(&ctx), + BranchGenFn::BranchNil(Cell::new(BranchShape::Default)), + ); + } - EndBlock + Some(EndBlock) +} + +fn gen_throw( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let throw_state = jit.get_arg(0).as_u64(); + let throwobj = asm.stack_pop(1); + let throwobj = asm.load(throwobj); + + // Gather some statistics about throw + gen_counter_incr(asm, Counter::num_throw); + match (throw_state & VM_THROW_STATE_MASK as u64) as u32 { + RUBY_TAG_BREAK => gen_counter_incr(asm, Counter::num_throw_break), + RUBY_TAG_RETRY => gen_counter_incr(asm, Counter::num_throw_retry), + RUBY_TAG_RETURN => gen_counter_incr(asm, Counter::num_throw_return), + _ => {}, + } + + // THROW_DATA_NEW allocates. Save SP for GC and PC for allocation tracing as + // well as handling the catch table. However, not using jit_prepare_call_with_gc + // since we don't need a patch point for this implementation. + jit_save_pc(jit, asm); + gen_save_sp(asm); + + // rb_vm_throw verifies it's a valid throw, sets ec->tag->state, and returns throw + // data, which is throwobj or a vm_throw_data wrapping it. When ec->tag->state is + // set, JIT code callers will handle the throw with vm_exec_handle_exception. + extern "C" { + fn rb_vm_throw(ec: EcPtr, reg_cfp: CfpPtr, throw_state: u32, throwobj: VALUE) -> VALUE; + } + let val = asm.ccall(rb_vm_throw as *mut u8, vec![EC, CFP, throw_state.into(), throwobj]); + + asm_comment!(asm, "exit from throw"); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); + + asm.cret(val); + Some(EndBlock) } fn gen_jump( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let jump_offset = jit_get_arg(jit, 0).as_i32(); + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { - let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, Counter::jump_interrupted); } // Get the branch target instruction offsets - let jump_idx = (jit_next_insn_idx(jit) as i32) + jump_offset; + let jump_idx = jit.next_insn_idx() as i32 + jump_offset; let jump_block = BlockId { iseq: jit.iseq, - idx: jump_idx as u32, + idx: jump_idx.try_into().unwrap(), }; // Generate the jump instruction - gen_direct_jump(jit, ctx, jump_block, cb); + gen_direct_jump(jit, &asm.ctx.clone(), jump_block, asm); - EndBlock + Some(EndBlock) } /// Guard that self or a stack operand has the same class as `known_klass`, using @@ -3325,66 +4666,71 @@ fn gen_jump( /// the guard generated for one will fail for the other. /// /// Recompile as contingency if possible, or take side exit a last resort. - fn jit_guard_known_klass( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, known_klass: VALUE, - insn_opnd: InsnOpnd, + obj_opnd: Opnd, + insn_opnd: YARVOpnd, sample_instance: VALUE, - max_chain_depth: i32, - side_exit: CodePtr, -) -> bool { - let val_type = ctx.get_opnd_type(insn_opnd); + max_chain_depth: u8, + counter: Counter, +) { + let val_type = asm.ctx.get_opnd_type(insn_opnd); + + if val_type.known_class() == Some(known_klass) { + // Unless frozen, Array, Hash, and String objects may change their RBASIC_CLASS + // when they get a singleton class. Those types need invalidations. + if unsafe { [rb_cArray, rb_cHash, rb_cString].contains(&known_klass) } { + if jit.assume_no_singleton_class(asm, ocb, known_klass) { + // Speculate that this object will not have a singleton class, + // and invalidate the block in case it does. + return; + } + } else { + // We already know from type information that this is a match + return; + } + } if unsafe { known_klass == rb_cNilClass } { assert!(!val_type.is_heap()); - if val_type != Type::Nil { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is nil"); - cmp(cb, REG0, imm_opnd(Qnil.into())); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm_comment!(asm, "guard object is nil"); + asm.cmp(obj_opnd, Qnil.into()); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); - ctx.upgrade_opnd_type(insn_opnd, Type::Nil); - } + asm.ctx.upgrade_opnd_type(insn_opnd, Type::Nil); } else if unsafe { known_klass == rb_cTrueClass } { assert!(!val_type.is_heap()); - if val_type != Type::True { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is true"); - cmp(cb, REG0, imm_opnd(Qtrue.into())); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm_comment!(asm, "guard object is true"); + asm.cmp(obj_opnd, Qtrue.into()); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); - ctx.upgrade_opnd_type(insn_opnd, Type::True); - } + asm.ctx.upgrade_opnd_type(insn_opnd, Type::True); } else if unsafe { known_klass == rb_cFalseClass } { assert!(!val_type.is_heap()); - if val_type != Type::False { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is false"); - assert!(Qfalse.as_i32() == 0); - test(cb, REG0, REG0); - jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm_comment!(asm, "guard object is false"); + assert!(Qfalse.as_i32() == 0); + asm.test(obj_opnd, obj_opnd); + jit_chain_guard(JCC_JNZ, jit, asm, ocb, max_chain_depth, counter); - ctx.upgrade_opnd_type(insn_opnd, Type::False); - } + asm.ctx.upgrade_opnd_type(insn_opnd, Type::False); } else if unsafe { known_klass == rb_cInteger } && sample_instance.fixnum_p() { - assert!(!val_type.is_heap()); // We will guard fixnum and bignum as though they were separate classes // BIGNUM can be handled by the general else case below - if val_type != Type::Fixnum || !val_type.is_imm() { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is fixnum"); - test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG as i64)); - jit_chain_guard(JCC_JZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum); - } + asm_comment!(asm, "guard object is fixnum"); + asm.test(obj_opnd, Opnd::Imm(RUBY_FIXNUM_FLAG as i64)); + jit_chain_guard(JCC_JZ, jit, asm, ocb, max_chain_depth, counter); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum); } else if unsafe { known_klass == rb_cSymbol } && sample_instance.static_sym_p() { assert!(!val_type.is_heap()); // We will guard STATIC vs DYNAMIC as though they were separate classes @@ -3392,11 +4738,11 @@ fn jit_guard_known_klass( if val_type != Type::ImmSymbol || !val_type.is_imm() { assert!(val_type.is_unknown()); - add_comment(cb, "guard object is static symbol"); + asm_comment!(asm, "guard object is static symbol"); assert!(RUBY_SPECIAL_SHIFT == 8); - cmp(cb, REG0_8, uimm_opnd(RUBY_SYMBOL_FLAG as u64)); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); + asm.cmp(obj_opnd.with_num_bits(8).unwrap(), Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); } } else if unsafe { known_klass == rb_cFloat } && sample_instance.flonum_p() { assert!(!val_type.is_heap()); @@ -3404,16 +4750,16 @@ fn jit_guard_known_klass( assert!(val_type.is_unknown()); // We will guard flonum vs heap float as though they were separate classes - add_comment(cb, "guard object is flonum"); - mov(cb, REG1, REG0); - and(cb, REG1, uimm_opnd(RUBY_FLONUM_MASK as u64)); - cmp(cb, REG1, uimm_opnd(RUBY_FLONUM_FLAG as u64)); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); + asm_comment!(asm, "guard object is flonum"); + let flag_bits = asm.and(obj_opnd, Opnd::UImm(RUBY_FLONUM_MASK as u64)); + asm.cmp(flag_bits, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); } } else if unsafe { - FL_TEST(known_klass, VALUE(RUBY_FL_SINGLETON)) != VALUE(0) - && sample_instance == rb_attr_get(known_klass, id__attached__ as ID) + FL_TEST(known_klass, VALUE(RUBY_FL_SINGLETON as usize)) != VALUE(0) + && sample_instance == rb_class_attached_object(known_klass) + && !rb_obj_is_kind_of(sample_instance, rb_cIO).test() } { // Singleton classes are attached to one specific object, so we can // avoid one memory access (and potentially the is_heap check) by @@ -3425,66 +4771,74 @@ fn jit_guard_known_klass( // that its singleton class is empty, so we can't avoid the memory // access. As an example, `Object.new.singleton_class` is an object in // this situation. - add_comment(cb, "guard known object with singleton class"); - // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object. - jit_mov_gc_ptr(jit, cb, REG1, sample_instance); - cmp(cb, REG0, REG1); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + // Also, guarding by identity is incorrect for IO objects because + // IO#reopen can be used to change the class and singleton class of IO objects! + asm_comment!(asm, "guard known object with singleton class"); + asm.cmp(obj_opnd, sample_instance.into()); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); + } else if val_type == Type::CString && unsafe { known_klass == rb_cString } { + // guard elided because the context says we've already checked + unsafe { + assert_eq!(sample_instance.class_of(), rb_cString, "context says class is exactly ::String") + }; } else { assert!(!val_type.is_imm()); // Check that the receiver is a heap object // Note: if we get here, the class doesn't have immediate instances. if !val_type.is_heap() { - add_comment(cb, "guard not immediate"); - assert!(Qfalse.as_i32() < Qnil.as_i32()); - test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK as i64)); - jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); - cmp(cb, REG0, imm_opnd(Qnil.into())); - jit_chain_guard(JCC_JBE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm_comment!(asm, "guard not immediate"); + asm.test(obj_opnd, (RUBY_IMMEDIATE_MASK as u64).into()); + jit_chain_guard(JCC_JNZ, jit, asm, ocb, max_chain_depth, counter); + asm.cmp(obj_opnd, Qfalse.into()); + jit_chain_guard(JCC_JE, jit, asm, ocb, max_chain_depth, counter); - ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); } - let klass_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS); + // If obj_opnd isn't already a register, load it. + let obj_opnd = match obj_opnd { + Opnd::InsnOut { .. } => obj_opnd, + _ => asm.load(obj_opnd), + }; + let klass_opnd = Opnd::mem(64, obj_opnd, RUBY_OFFSET_RBASIC_KLASS); // Bail if receiver class is different from known_klass // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class. - add_comment(cb, "guard known class"); - jit_mov_gc_ptr(jit, cb, REG1, known_klass); - cmp(cb, klass_opnd, REG1); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm_comment!(asm, "guard known class"); + asm.cmp(klass_opnd, known_klass.into()); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); + + if known_klass == unsafe { rb_cString } { + asm.ctx.upgrade_opnd_type(insn_opnd, Type::CString); + } else if known_klass == unsafe { rb_cArray } { + asm.ctx.upgrade_opnd_type(insn_opnd, Type::CArray); + } else if known_klass == unsafe { rb_cHash } { + asm.ctx.upgrade_opnd_type(insn_opnd, Type::CHash); + } } - - true } // Generate ancestry guard for protected callee. // Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee). fn jit_protected_callee_ancestry_guard( - jit: &mut JITState, - cb: &mut CodeBlock, - ocb: &mut OutlinedCb, + asm: &mut Assembler, cme: *const rb_callable_method_entry_t, - side_exit: CodePtr, ) { // See vm_call_method(). - mov( - cb, - C_ARG_REGS[0], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), - ); let def_class = unsafe { (*cme).defined_class }; - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], def_class); // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise. // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass); - call_ptr(cb, REG0, rb_obj_is_kind_of as *mut u8); - test(cb, RAX, RAX); - jz_ptr( - cb, - counted_exit!(ocb, side_exit, send_se_protected_check_failed), + let val = asm.ccall( + rb_obj_is_kind_of as *mut u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), + def_class.into(), + ], ); + asm.test(val, val); + asm.jz(Target::side_exit(Counter::guard_send_se_protected_check_failed)) } // Codegen for rb_obj_not(). @@ -3492,122 +4846,951 @@ fn jit_protected_callee_ancestry_guard( // arity guards. fn jit_rb_obj_not( _jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - let recv_opnd = ctx.get_opnd_type(StackOpnd(0)); - - if recv_opnd == Type::Nil || recv_opnd == Type::False { - add_comment(cb, "rb_obj_not(nil_or_false)"); - ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::True); - mov(cb, out_opnd, uimm_opnd(Qtrue.into())); - } else if recv_opnd.is_heap() || recv_opnd.is_specific() { - // Note: recv_opnd != Type::Nil && recv_opnd != Type::False. - add_comment(cb, "rb_obj_not(truthy)"); - ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::False); - mov(cb, out_opnd, uimm_opnd(Qfalse.into())); - } else { - // jit_guard_known_klass() already ran on the receiver which should - // have deduced deduced the type of the receiver. This case should be - // rare if not unreachable. - return false; + let recv_opnd = asm.ctx.get_opnd_type(StackOpnd(0)); + + match recv_opnd.known_truthy() { + Some(false) => { + asm_comment!(asm, "rb_obj_not(nil_or_false)"); + asm.stack_pop(1); + let out_opnd = asm.stack_push(Type::True); + asm.mov(out_opnd, Qtrue.into()); + }, + Some(true) => { + // Note: recv_opnd != Type::Nil && recv_opnd != Type::False. + asm_comment!(asm, "rb_obj_not(truthy)"); + asm.stack_pop(1); + let out_opnd = asm.stack_push(Type::False); + asm.mov(out_opnd, Qfalse.into()); + }, + _ => { + return false; + }, } + true } // Codegen for rb_true() fn jit_rb_true( _jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - add_comment(cb, "nil? == true"); - ctx.stack_pop(1); - let stack_ret = ctx.stack_push(Type::True); - mov(cb, stack_ret, uimm_opnd(Qtrue.into())); + asm_comment!(asm, "nil? == true"); + asm.stack_pop(1); + let stack_ret = asm.stack_push(Type::True); + asm.mov(stack_ret, Qtrue.into()); true } // Codegen for rb_false() fn jit_rb_false( _jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - add_comment(cb, "nil? == false"); - ctx.stack_pop(1); - let stack_ret = ctx.stack_push(Type::False); - mov(cb, stack_ret, uimm_opnd(Qfalse.into())); + asm_comment!(asm, "nil? == false"); + asm.stack_pop(1); + let stack_ret = asm.stack_push(Type::False); + asm.mov(stack_ret, Qfalse.into()); true } +/// Codegen for Kernel#is_a? +fn jit_rb_kernel_is_a( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + + // If this is a super call we might not know the class + if known_recv_class.is_none() { + return false; + } + + // Important note: The output code will simply `return true/false`. + // Correctness follows from: + // - `known_recv_class` implies there is a guard scheduled before here + // for a particular `CLASS_OF(lhs)`. + // - We guard that rhs is identical to the compile-time sample + // - In general, for any two Class instances A, B, `A < B` does not change at runtime. + // Class#superclass is stable. + + let sample_rhs = jit.peek_at_stack(&asm.ctx, 0); + let sample_lhs = jit.peek_at_stack(&asm.ctx, 1); + + // We are not allowing module here because the module hierarchy can change at runtime. + if !unsafe { RB_TYPE_P(sample_rhs, RUBY_T_CLASS) } { + return false; + } + let sample_is_a = unsafe { rb_obj_is_kind_of(sample_lhs, sample_rhs) == Qtrue }; + + asm_comment!(asm, "Kernel#is_a?"); + asm.cmp(asm.stack_opnd(0), sample_rhs.into()); + asm.jne(Target::side_exit(Counter::guard_send_is_a_class_mismatch)); + + asm.stack_pop(2); + + if sample_is_a { + let stack_ret = asm.stack_push(Type::True); + asm.mov(stack_ret, Qtrue.into()); + } else { + let stack_ret = asm.stack_push(Type::False); + asm.mov(stack_ret, Qfalse.into()); + } + return true; +} + +/// Codegen for Kernel#instance_of? +fn jit_rb_kernel_instance_of( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + + // If this is a super call we might not know the class + if known_recv_class.is_none() { + return false; + } + + // Important note: The output code will simply `return true/false`. + // Correctness follows from: + // - `known_recv_class` implies there is a guard scheduled before here + // for a particular `CLASS_OF(lhs)`. + // - We guard that rhs is identical to the compile-time sample + // - For a particular `CLASS_OF(lhs)`, `rb_obj_class(lhs)` does not change. + // (because for any singleton class `s`, `s.superclass.equal?(s.attached_object.class)`) + + let sample_rhs = jit.peek_at_stack(&asm.ctx, 0); + let sample_lhs = jit.peek_at_stack(&asm.ctx, 1); + + // Filters out cases where the C implementation raises + if unsafe { !(RB_TYPE_P(sample_rhs, RUBY_T_CLASS) || RB_TYPE_P(sample_rhs, RUBY_T_MODULE)) } { + return false; + } + + // We need to grab the class here to deal with singleton classes. + // Instance of grabs the "real class" of the object rather than the + // singleton class. + let sample_lhs_real_class = unsafe { rb_obj_class(sample_lhs) }; + + let sample_instance_of = sample_lhs_real_class == sample_rhs; + + asm_comment!(asm, "Kernel#instance_of?"); + asm.cmp(asm.stack_opnd(0), sample_rhs.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_send_instance_of_class_mismatch, + ); + + asm.stack_pop(2); + + if sample_instance_of { + let stack_ret = asm.stack_push(Type::True); + asm.mov(stack_ret, Qtrue.into()); + } else { + let stack_ret = asm.stack_push(Type::False); + asm.mov(stack_ret, Qfalse.into()); + } + return true; +} + +fn jit_rb_mod_eqq( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + + asm_comment!(asm, "Module#==="); + // By being here, we know that the receiver is a T_MODULE or a T_CLASS, because Module#=== can + // only live on these objects. With that, we can call rb_obj_is_kind_of() without + // jit_prepare_non_leaf_call() or a control frame push because it can't raise, allocate, or call + // Ruby methods with these inputs. + // Note the difference in approach from Kernel#is_a? because we don't get a free guard for the + // right hand side. + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); // the module + let ret = asm.ccall(rb_obj_is_kind_of as *const u8, vec![rhs, lhs]); + + // Return the result + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret); + + return true; +} + // Codegen for rb_obj_equal() // object identity comparison fn jit_rb_obj_equal( _jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "equal?"); + let obj1 = asm.stack_pop(1); + let obj2 = asm.stack_pop(1); + + asm.cmp(obj1, obj2); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); + + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); + true +} + +// Codegen for rb_obj_not_equal() +// object identity comparison +fn jit_rb_obj_not_equal( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + gen_equality_specialized(jit, asm, ocb, false) == Some(true) +} + +// Codegen for rb_int_equal() +fn jit_rb_int_equal( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); + + // Compare the arguments + asm_comment!(asm, "rb_int_equal"); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + asm.cmp(arg0, arg1); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); + + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); + true +} + +fn jit_rb_int_succ( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard the receiver is fixnum + let recv_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let recv = asm.stack_pop(1); + if recv_type != Type::Fixnum { + asm_comment!(asm, "guard object is fixnum"); + asm.test(recv, Opnd::Imm(RUBY_FIXNUM_FLAG as i64)); + asm.jz(Target::side_exit(Counter::opt_succ_not_fixnum)); + } + + asm_comment!(asm, "Integer#succ"); + let out_val = asm.add(recv, Opnd::Imm(2)); // 2 is untagged Fixnum 1 + asm.jo(Target::side_exit(Counter::opt_succ_overflow)); + + // Push the output onto the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, out_val); + + true +} + +fn jit_rb_int_div( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm, ocb); + + // rb_fix_div_fix may GC-allocate for Bignum + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Integer#/"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + // Check for arg0 % 0 + asm.cmp(obj, VALUE::fixnum_from_usize(0).as_i64().into()); + asm.je(Target::side_exit(Counter::opt_div_zero)); + + let ret = asm.ccall(rb_fix_div_fix as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep them during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_int_lshift( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm, ocb); + + let comptime_shift = jit.peek_at_stack(&asm.ctx, 0); + + if !comptime_shift.fixnum_p() { + return false; + } + + // Untag the fixnum shift amount + let shift_amt = comptime_shift.as_isize() >> 1; + if shift_amt > 63 || shift_amt < 0 { + return false; + } + + // Fallback to a C call if the shift amount varies + // This check is needed because the chain guard will side-exit + // if its max depth is reached + if asm.ctx.get_chain_depth() > 0 { + return false; + } + + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); + + // Guard on the shift amount we speculated on + asm.cmp(rhs, comptime_shift.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + 1, + Counter::lshift_amount_changed, + ); + + fixnum_left_shift_body(asm, lhs, shift_amt as u64); + true +} + +fn fixnum_left_shift_body(asm: &mut Assembler, lhs: Opnd, shift_amt: u64) { + let in_val = asm.sub(lhs, 1.into()); + let shift_opnd = Opnd::UImm(shift_amt); + let out_val = asm.lshift(in_val, shift_opnd); + let unshifted = asm.rshift(out_val, shift_opnd); + + // Guard that we did not overflow + asm.cmp(unshifted, in_val); + asm.jne(Target::side_exit(Counter::lshift_overflow)); + + // Re-tag the output value + let out_val = asm.add(out_val, 1.into()); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, out_val); +} + +fn jit_rb_int_rshift( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm, ocb); + + let comptime_shift = jit.peek_at_stack(&asm.ctx, 0); + + // Untag the fixnum shift amount + let shift_amt = comptime_shift.as_isize() >> 1; + if shift_amt > 63 || shift_amt < 0 { + return false; + } + + // Fallback to a C call if the shift amount varies + // This check is needed because the chain guard will side-exit + // if its max depth is reached + if asm.ctx.get_chain_depth() > 0 { + return false; + } + + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); + + // Guard on the shift amount we speculated on + asm.cmp(rhs, comptime_shift.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + 1, + Counter::rshift_amount_changed, + ); + + let shift_opnd = Opnd::UImm(shift_amt as u64); + let out_val = asm.rshift(lhs, shift_opnd); + let out_val = asm.or(out_val, 1.into()); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, out_val); + true +} + +fn jit_rb_int_xor( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm, ocb); + + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); + + // XOR and then re-tag the resulting fixnum + let out_val = asm.xor(lhs, rhs); + let out_val = asm.or(out_val, 1.into()); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, out_val); + true +} + +fn jit_rb_int_aref( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm, ocb); + + asm_comment!(asm, "Integer#[]"); + let obj = asm.stack_pop(1); + let recv = asm.stack_pop(1); + + let ret = asm.ccall(rb_fix_aref as *const u8, vec![recv, obj]); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_plus( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_obj.class_of(), + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#+"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_plus as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_minus( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_obj.class_of(), + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#-"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_minus as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_mul( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_obj.class_of(), + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#*"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_mul as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_div( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_obj.class_of(), + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#/"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_div as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +/// If string is frozen, duplicate it to get a non-frozen string. Otherwise, return it. +fn jit_rb_str_uplus( + jit: &mut JITState, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool +{ + if argc != 0 { + return false; + } + + // We allocate when we dup the string + jit_prepare_call_with_gc(jit, asm); + asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency. + + asm_comment!(asm, "Unary plus on string"); + let recv_opnd = asm.stack_pop(1); + let recv_opnd = asm.load(recv_opnd); + let flags_opnd = asm.load(Opnd::mem(64, recv_opnd, RUBY_OFFSET_RBASIC_FLAGS)); + asm.test(flags_opnd, Opnd::Imm(RUBY_FL_FREEZE as i64)); + + let ret_label = asm.new_label("stack_ret"); + + // String#+@ can only exist on T_STRING + let stack_ret = asm.stack_push(Type::TString); + + // If the string isn't frozen, we just return it. + asm.mov(stack_ret, recv_opnd); + asm.jz(ret_label); + + // Str is frozen - duplicate it + asm.spill_temps(); // for ccall + let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); + asm.mov(stack_ret, ret_opnd); + + asm.write_label(ret_label); + + true +} + +fn jit_rb_str_length( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - add_comment(cb, "equal?"); - let obj1 = ctx.stack_pop(1); - let obj2 = ctx.stack_pop(1); - - mov(cb, REG0, obj1); - cmp(cb, REG0, obj2); - mov(cb, REG0, uimm_opnd(Qtrue.into())); - mov(cb, REG1, uimm_opnd(Qfalse.into())); - cmovne(cb, REG0, REG1); - - let stack_ret = ctx.stack_push(Type::UnknownImm); - mov(cb, stack_ret, REG0); + asm_comment!(asm, "String#length"); + extern "C" { + fn rb_str_length(str: VALUE) -> VALUE; + } + + // This function cannot allocate or raise an exceptions + let recv = asm.stack_opnd(0); + let ret_opnd = asm.ccall(rb_str_length as *const u8, vec![recv]); + asm.stack_pop(1); // Keep recv on stack during ccall for GC + + // Should be guaranteed to be a fixnum on 64-bit systems + let out_opnd = asm.stack_push(Type::Fixnum); + asm.mov(out_opnd, ret_opnd); + true } fn jit_rb_str_bytesize( _jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - add_comment(cb, "String#bytesize"); + asm_comment!(asm, "String#bytesize"); - let recv = ctx.stack_pop(1); - mov(cb, C_ARG_REGS[0], recv); - call_ptr(cb, REG0, rb_str_bytesize as *const u8); + let recv = asm.stack_pop(1); - let out_opnd = ctx.stack_push(Type::Fixnum); - mov(cb, out_opnd, RAX); + asm_comment!(asm, "get string length"); + let str_len_opnd = Opnd::mem( + std::os::raw::c_long::BITS as u8, + asm.load(recv), + RUBY_OFFSET_RSTRING_LEN as i32, + ); + + let len = asm.load(str_len_opnd); + let shifted_val = asm.lshift(len, Opnd::UImm(1)); + let out_val = asm.or(shifted_val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + let out_opnd = asm.stack_push(Type::Fixnum); + + asm.mov(out_opnd, out_val); + + true +} + +fn jit_rb_str_byteslice( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if argc != 2 { + return false + } + + // rb_str_byte_substr should be leaf if indexes are fixnums + match (asm.ctx.get_opnd_type(StackOpnd(0)), asm.ctx.get_opnd_type(StackOpnd(1))) { + (Type::Fixnum, Type::Fixnum) => {}, + // Raises when non-integers are passed in, which requires the method frame + // to be pushed for the backtrace + _ => if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) { + return false; + } + } + asm_comment!(asm, "String#byteslice"); + + // rb_str_byte_substr allocates a substring + jit_prepare_call_with_gc(jit, asm); + + // Get stack operands after potential SP change + let len = asm.stack_opnd(0); + let beg = asm.stack_opnd(1); + let recv = asm.stack_opnd(2); + + let ret_opnd = asm.ccall(rb_str_byte_substr as *const u8, vec![recv, beg, len]); + asm.stack_pop(3); + + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ret_opnd); + + true +} + +fn jit_rb_str_getbyte( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "String#getbyte"); + + // Don't pop since we may bail + let idx = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let comptime_idx = jit.peek_at_stack(&asm.ctx, 0); + if comptime_idx.fixnum_p(){ + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_idx.class_of(), + idx, + idx.into(), + comptime_idx, + SEND_MAX_DEPTH, + Counter::getbyte_idx_not_fixnum, + ); + } else { + return false; + } + + // Untag the index + let idx = asm.rshift(idx, Opnd::UImm(1)); + + // If index is negative, exit + asm.cmp(idx, Opnd::UImm(0)); + asm.jl(Target::side_exit(Counter::getbyte_idx_negative)); + + asm_comment!(asm, "get string length"); + let recv = asm.load(recv); + let str_len_opnd = Opnd::mem( + std::os::raw::c_long::BITS as u8, + asm.load(recv), + RUBY_OFFSET_RSTRING_LEN as i32, + ); + + // Exit if the indes is out of bounds + asm.cmp(idx, str_len_opnd); + asm.jge(Target::side_exit(Counter::getbyte_idx_out_of_bounds)); + + let str_ptr = get_string_ptr(asm, recv); + // FIXME: could use SIB indexing here with proper support in backend + let str_ptr = asm.add(str_ptr, idx); + let byte = asm.load(Opnd::mem(8, str_ptr, 0)); + + // Zero-extend the byte to 64 bits + let byte = byte.with_num_bits(64).unwrap(); + let byte = asm.and(byte, 0xFF.into()); + + // Tag the byte + let byte = asm.lshift(byte, Opnd::UImm(1)); + let byte = asm.or(byte, Opnd::UImm(1)); + + asm.stack_pop(2); // Keep them on stack during ccall for GC + let out_opnd = asm.stack_push(Type::Fixnum); + asm.mov(out_opnd, byte); + + true +} + +fn jit_rb_str_setbyte( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Raises when index is out of range. Lazily push a frame in that case. + if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) { + return false; + } + asm_comment!(asm, "String#setbyte"); + + let value = asm.stack_opnd(0); + let index = asm.stack_opnd(1); + let recv = asm.stack_opnd(2); + + let ret_opnd = asm.ccall(rb_str_setbyte as *const u8, vec![recv, index, value]); + asm.stack_pop(3); // Keep them on stack during ccall for GC + + let out_opnd = asm.stack_push(Type::UnknownImm); + asm.mov(out_opnd, ret_opnd); true } @@ -3618,17 +5801,16 @@ fn jit_rb_str_bytesize( // this situation happens a lot in some workloads. fn jit_rb_str_to_s( _jit: &mut JITState, - _ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - known_recv_class: *const VALUE, + known_recv_class: Option<VALUE>, ) -> bool { - if !known_recv_class.is_null() && unsafe { *known_recv_class == rb_cString } { - add_comment(cb, "to_s on plain string"); + if unsafe { known_recv_class == Some(rb_cString) } { + asm_comment!(asm, "to_s on plain string"); // The method returns the receiver, which is already on the stack. // No stack movement. return true; @@ -3636,130 +5818,448 @@ fn jit_rb_str_to_s( false } -// Codegen for rb_str_concat() +// Codegen for rb_str_empty_p() +fn jit_rb_str_empty_p( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + let recv_opnd = asm.stack_pop(1); + + asm_comment!(asm, "get string length"); + let str_len_opnd = Opnd::mem( + std::os::raw::c_long::BITS as u8, + asm.load(recv_opnd), + RUBY_OFFSET_RSTRING_LEN as i32, + ); + + asm.cmp(str_len_opnd, Opnd::UImm(0)); + let string_empty = asm.csel_e(Qtrue.into(), Qfalse.into()); + let out_opnd = asm.stack_push(Type::UnknownImm); + asm.mov(out_opnd, string_empty); + + return true; +} + +// Codegen for rb_str_concat() -- *not* String#concat // Frequently strings are concatenated using "out_str << next_str". // This is common in Erb and similar templating languages. fn jit_rb_str_concat( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - ocb: &mut OutlinedCb, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - let comptime_arg = jit_peek_at_stack(jit, ctx, 0); - let comptime_arg_type = ctx.get_opnd_type(StackOpnd(0)); - - // String#<< can take an integer codepoint as an argument, but we don't optimise that. - // Also, a non-string argument would have to call .to_str on itself before being treated - // as a string, and that would require saving pc/sp, which we don't do here. - if comptime_arg_type != Type::String { + // The << operator can accept integer codepoints for characters + // as the argument. We only specially optimise string arguments. + // If the peeked-at compile time argument is something other than + // a string, assume it won't be a string later either. + let comptime_arg = jit.peek_at_stack(&asm.ctx, 0); + if ! unsafe { RB_TYPE_P(comptime_arg, RUBY_T_STRING) } { return false; } - // Generate a side exit - let side_exit = get_side_exit(jit, ocb, ctx); + // Guard that the concat argument is a string + guard_object_is_string(asm, asm.stack_opnd(0), StackOpnd(0), Counter::guard_send_not_string); - // Guard that the argument is of class String at runtime. - let arg_opnd = ctx.stack_opnd(0); - mov(cb, REG0, arg_opnd); - if !jit_guard_known_klass( - jit, - ctx, - cb, - ocb, - unsafe { rb_cString }, - StackOpnd(0), - comptime_arg, - SEND_MAX_DEPTH, - side_exit, - ) { - return false; - } + // Guard buffers from GC since rb_str_buf_append may allocate. + // rb_str_buf_append may raise Encoding::CompatibilityError, but we accept compromised + // backtraces on this method since the interpreter does the same thing on opt_ltlt. + jit_prepare_non_leaf_call(jit, asm); + asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency. - let concat_arg = ctx.stack_pop(1); - let recv = ctx.stack_pop(1); + let concat_arg = asm.stack_pop(1); + let recv = asm.stack_pop(1); // Test if string encodings differ. If different, use rb_str_append. If the same, // use rb_yjit_str_simple_append, which calls rb_str_cat. - add_comment(cb, "<< on strings"); - - // Both rb_str_append and rb_yjit_str_simple_append take identical args - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], concat_arg); + asm_comment!(asm, "<< on strings"); // Take receiver's object flags XOR arg's flags. If any // string-encoding flags are different between the two, // the encodings don't match. - mov(cb, REG0, recv); - mov(cb, REG1, concat_arg); - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS)); - xor(cb, REG0, mem_opnd(64, REG1, RUBY_OFFSET_RBASIC_FLAGS)); - test(cb, REG0, uimm_opnd(RUBY_ENCODING_MASK as u64)); + let recv_reg = asm.load(recv); + let concat_arg_reg = asm.load(concat_arg); + let flags_xor = asm.xor( + Opnd::mem(64, recv_reg, RUBY_OFFSET_RBASIC_FLAGS), + Opnd::mem(64, concat_arg_reg, RUBY_OFFSET_RBASIC_FLAGS) + ); + asm.test(flags_xor, Opnd::UImm(RUBY_ENCODING_MASK as u64)); - let enc_mismatch = cb.new_label("enc_mismatch".to_string()); - jne_label(cb, enc_mismatch); + let enc_mismatch = asm.new_label("enc_mismatch"); + asm.jnz(enc_mismatch); // If encodings match, call the simple append function and jump to return - call_ptr(cb, REG0, rb_yjit_str_simple_append as *const u8); - let ret_label: usize = cb.new_label("stack_return".to_string()); - jmp_label(cb, ret_label); + let ret_opnd = asm.ccall(rb_yjit_str_simple_append as *const u8, vec![recv, concat_arg]); + let ret_label = asm.new_label("func_return"); + let stack_ret = asm.stack_push(Type::TString); + asm.mov(stack_ret, ret_opnd); + asm.stack_pop(1); // forget stack_ret to re-push after ccall + asm.jmp(ret_label); // If encodings are different, use a slower encoding-aware concatenate - cb.write_label(enc_mismatch); - call_ptr(cb, REG0, rb_str_append as *const u8); + asm.write_label(enc_mismatch); + asm.spill_temps(); // Ignore the register for the other local branch + let ret_opnd = asm.ccall(rb_str_buf_append as *const u8, vec![recv, concat_arg]); + let stack_ret = asm.stack_push(Type::TString); + asm.mov(stack_ret, ret_opnd); // Drop through to return - cb.write_label(ret_label); - let stack_ret = ctx.stack_push(Type::String); - mov(cb, stack_ret, RAX); + asm.write_label(ret_label); + + true +} + +// Codegen for rb_ary_empty_p() +fn jit_rb_ary_empty_p( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + let array_opnd = asm.stack_pop(1); + let array_reg = asm.load(array_opnd); + let len_opnd = get_array_len(asm, array_reg); + + asm.test(len_opnd, len_opnd); + let bool_val = asm.csel_z(Qtrue.into(), Qfalse.into()); + + let out_opnd = asm.stack_push(Type::UnknownImm); + asm.store(out_opnd, bool_val); + + return true; +} + +// Codegen for rb_ary_length() +fn jit_rb_ary_length( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + let array_opnd = asm.stack_pop(1); + let array_reg = asm.load(array_opnd); + let len_opnd = get_array_len(asm, array_reg); + + // Convert the length to a fixnum + let shifted_val = asm.lshift(len_opnd, Opnd::UImm(1)); + let out_val = asm.or(shifted_val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + let out_opnd = asm.stack_push(Type::Fixnum); + asm.store(out_opnd, out_val); + + return true; +} + +fn jit_rb_ary_push( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "Array#<<"); + + // rb_ary_push allocates memory for buffer extension and can raise FrozenError + // Not using a lazy frame here since the interpreter also has a truncated + // stack trace from opt_ltlt. + jit_prepare_non_leaf_call(jit, asm); + + let item_opnd = asm.stack_opnd(0); + let ary_opnd = asm.stack_opnd(1); + let ret = asm.ccall(rb_ary_push as *const u8, vec![ary_opnd, item_opnd]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let ret_opnd = asm.stack_push(Type::TArray); + asm.mov(ret_opnd, ret); + true +} + +// Just a leaf method, but not using `Primitive.attr! :leaf` since BOP methods can't use it. +fn jit_rb_hash_empty_p( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "Hash#empty?"); + + let hash_opnd = asm.stack_pop(1); + let ret = asm.ccall(rb_hash_empty_p as *const u8, vec![hash_opnd]); + + let ret_opnd = asm.stack_push(Type::UnknownImm); + asm.mov(ret_opnd, ret); + true +} + +fn jit_obj_respond_to( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + // respond_to(:sym) or respond_to(:sym, true) + if argc != 1 && argc != 2 { + return false; + } + + let recv_class = match known_recv_class { + Some(class) => class, + None => return false, + }; + + // Get the method_id from compile time. We will later add a guard against it. + let mid_sym = jit.peek_at_stack(&asm.ctx, (argc - 1) as isize); + if !mid_sym.static_sym_p() { + return false + } + let mid = unsafe { rb_sym2id(mid_sym) }; + + // Option<bool> representing the value of the "include_all" argument and whether it's known + let allow_priv = if argc == 1 { + // Default is false + Some(false) + } else { + // Get value from type information (may or may not be known) + asm.ctx.get_opnd_type(StackOpnd(0)).known_truthy() + }; + + let target_cme = unsafe { rb_callable_method_entry_or_negative(recv_class, mid) }; + + // Should never be null, as in that case we will be returned a "negative CME" + assert!(!target_cme.is_null()); + + let cme_def_type = unsafe { get_cme_def_type(target_cme) }; + + if cme_def_type == VM_METHOD_TYPE_REFINED { + return false; + } + + let visibility = if cme_def_type == VM_METHOD_TYPE_UNDEF { + METHOD_VISI_UNDEF + } else { + unsafe { METHOD_ENTRY_VISI(target_cme) } + }; + + let result = match (visibility, allow_priv) { + (METHOD_VISI_UNDEF, _) => { + // No method, we can return false given respond_to_missing? hasn't been overridden. + // In the future, we might want to jit the call to respond_to_missing? + if !assume_method_basic_definition(jit, asm, ocb, recv_class, ID!(respond_to_missing)) { + return false; + } + Qfalse + } + (METHOD_VISI_PUBLIC, _) | // Public method => fine regardless of include_all + (_, Some(true)) => { // include_all => all visibility are acceptable + // Method exists and has acceptable visibility + if cme_def_type == VM_METHOD_TYPE_NOTIMPLEMENTED { + // C method with rb_f_notimplement(). `respond_to?` returns false + // without consulting `respond_to_missing?`. See also: rb_add_method_cfunc() + Qfalse + } else { + Qtrue + } + } + (_, _) => return false // not public and include_all not known, can't compile + }; + + // Invalidate this block if method lookup changes for the method being queried. This works + // both for the case where a method does or does not exist, as for the latter we asked for a + // "negative CME" earlier. + jit.assume_method_lookup_stable(asm, ocb, target_cme); + + if argc == 2 { + // pop include_all argument (we only use its type info) + asm.stack_pop(1); + } + + let sym_opnd = asm.stack_pop(1); + let _recv_opnd = asm.stack_pop(1); + + // This is necessary because we have no guarantee that sym_opnd is a constant + asm_comment!(asm, "guard known mid"); + asm.cmp(sym_opnd, mid_sym.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_send_respond_to_mid_mismatch, + ); + + jit_putobject(asm, result); + + true +} + +fn jit_rb_f_block_given_p( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm.stack_pop(1); + let out_opnd = asm.stack_push(Type::UnknownImm); + + gen_block_given(jit, asm, out_opnd, Qtrue.into(), Qfalse.into()); + + true +} + +fn gen_block_given( + jit: &mut JITState, + asm: &mut Assembler, + out_opnd: Opnd, + true_opnd: Opnd, + false_opnd: Opnd, +) { + asm_comment!(asm, "block_given?"); + + // Same as rb_vm_frame_block_handler + let ep_opnd = gen_get_lep(jit, asm); + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + + // Return `block_handler != VM_BLOCK_HANDLER_NONE` + asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); + let block_given = asm.csel_ne(true_opnd, false_opnd); + asm.mov(out_opnd, block_given); +} + +// Codegen for rb_class_superclass() +fn jit_rb_class_superclass( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + _block: Option<crate::codegen::BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + extern "C" { + fn rb_class_superclass(klass: VALUE) -> VALUE; + } + + if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(0)) { + return false; + } + + asm_comment!(asm, "Class#superclass"); + let recv_opnd = asm.stack_opnd(0); + let ret = asm.ccall(rb_class_superclass as *const u8, vec![recv_opnd]); + + asm.stack_pop(1); + let ret_opnd = asm.stack_push(Type::Unknown); + asm.mov(ret_opnd, ret); + + true +} + +fn jit_rb_case_equal( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + if !jit.assume_expected_cfunc( asm, ocb, known_recv_class.unwrap(), ID!(eq), rb_obj_equal as _) { + return false; + } + + asm_comment!(asm, "case_equal: {}#===", get_class_name(known_recv_class)); + + // Compare the arguments + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + asm.cmp(arg0, arg1); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); + + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); - cb.link_labels(); true } fn jit_thread_s_current( _jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - add_comment(cb, "Thread.current"); - ctx.stack_pop(1); + asm_comment!(asm, "Thread.current"); + asm.stack_pop(1); // ec->thread_ptr - let ec_thread_ptr = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_THREAD_PTR); - mov(cb, REG0, ec_thread_ptr); + let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR)); // thread->self - let thread_self = mem_opnd(64, REG0, RUBY_OFFSET_THREAD_SELF); - mov(cb, REG0, thread_self); + let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF); - let stack_ret = ctx.stack_push(Type::UnknownHeap); - mov(cb, stack_ret, REG0); + let stack_ret = asm.stack_push(Type::UnknownHeap); + asm.mov(stack_ret, thread_self); true } // Check if we know how to codegen for a particular cfunc method fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option<MethodGenFn> { let method_serial = unsafe { get_def_method_serial(def) }; + let table = unsafe { METHOD_CODEGEN_TABLE.as_ref().unwrap() }; - CodegenGlobals::look_up_codegen_method(method_serial) + let option_ref = table.get(&method_serial); + match option_ref { + None => None, + Some(&mgf) => Some(mgf), // Deref + } } // Is anyone listening for :c_call and :c_return event currently? fn c_method_tracing_currently_enabled(jit: &JITState) -> bool { // Defer to C implementation in yjit.c unsafe { - rb_c_method_tracing_currently_enabled(jit.ec.unwrap() as *mut rb_execution_context_struct) + rb_c_method_tracing_currently_enabled(jit.ec) } } @@ -3780,26 +6280,188 @@ unsafe extern "C" fn build_kwhash(ci: *const rb_callinfo, sp: *const VALUE) -> V hash } +// SpecVal is a single value in an iseq invocation's environment on the stack, +// at sp[-2]. Depending on the frame type, it can serve different purposes, +// which are covered here by enum variants. +enum SpecVal { + BlockHandler(Option<BlockHandler>), + PrevEP(*const VALUE), + PrevEPOpnd(Opnd), +} + +// Each variant represents a branch in vm_caller_setup_arg_block. +#[derive(Clone, Copy)] +pub enum BlockHandler { + // send, invokesuper: blockiseq operand + BlockISeq(IseqPtr), + // invokesuper: GET_BLOCK_HANDLER() (GET_LEP()[VM_ENV_DATA_INDEX_SPECVAL]) + LEPSpecVal, + // part of the allocate-free block forwarding scheme + BlockParamProxy, + // To avoid holding the block arg (e.g. proc and symbol) across C calls, + // we might need to set the block handler early in the call sequence + AlreadySet, +} + +struct ControlFrame { + recv: Opnd, + sp: Opnd, + iseq: Option<IseqPtr>, + pc: Option<u64>, + frame_type: u32, + specval: SpecVal, + cme: *const rb_callable_method_entry_t, +} + +// Codegen performing a similar (but not identical) function to vm_push_frame +// +// This will generate the code to: +// * initialize locals to Qnil +// * push the environment (cme, block handler, frame type) +// * push a new CFP +// * save the new CFP to ec->cfp +// +// Notes: +// * Provided sp should point to the new frame's sp, immediately following locals and the environment +// * At entry, CFP points to the caller (not callee) frame +// * At exit, ec->cfp is updated to the pushed CFP +// * SP register is updated only if frame.iseq is set +// * Stack overflow is not checked (should be done by the caller) +// * Interrupts are not checked (should be done by the caller) +fn gen_push_frame( + jit: &mut JITState, + asm: &mut Assembler, + frame: ControlFrame, +) { + let sp = frame.sp; + + asm_comment!(asm, "push cme, specval, frame type"); + + // Write method entry at sp[-3] + // sp[-3] = me; + // Use compile time cme. It's assumed to be valid because we are notified when + // any cme we depend on become outdated. See yjit_method_lookup_change(). + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -3), VALUE::from(frame.cme).into()); + + // Write special value at sp[-2]. It's either a block handler or a pointer to + // the outer environment depending on the frame type. + // sp[-2] = specval; + let specval: Opnd = match frame.specval { + SpecVal::BlockHandler(None) => VM_BLOCK_HANDLER_NONE.into(), + SpecVal::BlockHandler(Some(block_handler)) => { + match block_handler { + BlockHandler::BlockISeq(block_iseq) => { + // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). + // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases + // with cfp->block_code. + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); + + let cfp_self = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + asm.or(cfp_self, Opnd::Imm(1)) + } + BlockHandler::LEPSpecVal => { + let lep_opnd = gen_get_lep(jit, asm); + asm.load(Opnd::mem(64, lep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)) + } + BlockHandler::BlockParamProxy => { + let ep_opnd = gen_get_lep(jit, asm); + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + block_handler + } + BlockHandler::AlreadySet => 0.into(), // unused + } + } + SpecVal::PrevEP(prev_ep) => { + let tagged_prev_ep = (prev_ep as usize) | 1; + VALUE(tagged_prev_ep).into() + } + SpecVal::PrevEPOpnd(ep_opnd) => { + asm.or(ep_opnd, 1.into()) + } + }; + if let SpecVal::BlockHandler(Some(BlockHandler::AlreadySet)) = frame.specval { + asm_comment!(asm, "specval should have been set"); + } else { + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2), specval); + } + + // Write env flags at sp[-1] + // sp[-1] = frame_type; + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -1), frame.frame_type.into()); + + // Allocate a new CFP (ec->cfp--) + fn cfp_opnd(offset: i32) -> Opnd { + Opnd::mem(64, CFP, offset - (RUBY_SIZEOF_CONTROL_FRAME as i32)) + } + + // Setup the new frame + // *cfp = (const struct rb_control_frame_struct) { + // .pc = <unset for iseq, 0 for cfunc>, + // .sp = sp, + // .iseq = <iseq for iseq, 0 for cfunc>, + // .self = recv, + // .ep = <sp - 1>, + // .block_code = 0, + // }; + asm_comment!(asm, "push callee control frame"); + + // For an iseq call PC may be None, in which case we will not set PC and will allow jitted code + // to set it as necessary. + if let Some(pc) = frame.pc { + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_PC), pc.into()); + }; + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SP), sp); + let iseq: Opnd = if let Some(iseq) = frame.iseq { + VALUE::from(iseq).into() + } else { + 0.into() + }; + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_ISEQ), iseq); + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv); + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into()); + + if frame.iseq.is_some() { + // Spill stack temps to let the callee use them (must be done before changing the SP register) + asm.spill_temps(); + + // Saving SP before calculating ep avoids a dependency on a register + // However this must be done after referencing frame.recv, which may be SP-relative + asm.mov(SP, sp); + } + let ep = asm.sub(sp, SIZEOF_VALUE.into()); + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep); +} + fn gen_send_cfunc( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, - block: Option<IseqPtr>, + block: Option<BlockHandler>, + recv_known_class: Option<VALUE>, + flags: u32, argc: i32, - recv_known_klass: *const VALUE, -) -> CodegenStatus { +) -> Option<CodegenStatus> { let cfunc = unsafe { get_cme_def_body_cfunc(cme) }; let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + let mut argc = argc; - // If the function expects a Ruby array of arguments - if cfunc_argc < 0 && cfunc_argc != -1 { - gen_counter_incr!(cb, send_cfunc_ruby_array_varg); - return CantCompile; + // Splat call to a C method that takes `VALUE *` and `len` + let variable_splat = flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc == -1; + let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0; + + // If it's a splat and the method expects a Ruby array of arguments + if cfunc_argc == -2 && flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_cfunc_splat_neg2); + return None; } + exit_if_kwsplat_non_nil(asm, flags, Counter::send_cfunc_kw_splat_non_nil)?; + let kw_splat = flags & VM_CALL_KW_SPLAT != 0; + let kw_arg = unsafe { vm_ci_kwarg(ci) }; let kw_arg_num = if kw_arg.is_null() { 0 @@ -3807,320 +6469,613 @@ fn gen_send_cfunc( unsafe { get_cikw_keyword_len(kw_arg) } }; - // Number of args which will be passed through to the callee - // This is adjusted by the kwargs being combined into a hash. - let passed_argc = if kw_arg.is_null() { - argc - } else { - argc - kw_arg_num + 1 - }; - - // If the argument count doesn't match - if cfunc_argc >= 0 && cfunc_argc != passed_argc { - gen_counter_incr!(cb, send_cfunc_argc_mismatch); - return CantCompile; - } - - // Don't JIT functions that need C stack arguments for now - if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_REGS.len() as i32) { - gen_counter_incr!(cb, send_cfunc_toomany_args); - return CantCompile; + if kw_arg_num != 0 && flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_cfunc_splat_with_kw); + return None; } if c_method_tracing_currently_enabled(jit) { // Don't JIT if tracing c_call or c_return - gen_counter_incr!(cb, send_cfunc_tracing); - return CantCompile; + gen_counter_incr(asm, Counter::send_cfunc_tracing); + return None; } + // Increment total cfunc send count + gen_counter_incr(asm, Counter::num_send_cfunc); + // Delegate to codegen for C methods if we have it. - if kw_arg.is_null() { - let codegen_p = lookup_cfunc_codegen(unsafe { (*cme).def }); - if let Some(known_cfunc_codegen) = codegen_p { - if known_cfunc_codegen(jit, ctx, cb, ocb, ci, cme, block, argc, recv_known_klass) { + if kw_arg.is_null() && + !kw_splat && + flags & VM_CALL_OPT_SEND == 0 && + flags & VM_CALL_ARGS_SPLAT == 0 && + (cfunc_argc == -1 || argc == cfunc_argc) { + let expected_stack_after = asm.ctx.get_stack_size() as i32 - argc; + if let Some(known_cfunc_codegen) = lookup_cfunc_codegen(unsafe { (*cme).def }) { + // We don't push a frame for specialized cfunc codegen, so the generated code must be leaf. + // However, the interpreter doesn't push a frame on opt_* instruction either, so we allow + // non-sendish instructions to break this rule as an exception. + let cfunc_codegen = if jit.is_sendish() { + asm.with_leaf_ccall(|asm| + perf_call!("gen_send_cfunc: ", known_cfunc_codegen(jit, asm, ocb, ci, cme, block, argc, recv_known_class)) + ) + } else { + perf_call!("gen_send_cfunc: ", known_cfunc_codegen(jit, asm, ocb, ci, cme, block, argc, recv_known_class)) + }; + + if cfunc_codegen { + assert_eq!(expected_stack_after, asm.ctx.get_stack_size() as i32); + gen_counter_incr(asm, Counter::num_send_cfunc_inline); // cfunc codegen generated code. Terminate the block so // there isn't multiple calls in the same block. - jump_to_next_insn(jit, ctx, cb, ocb); - return EndBlock; + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); } } } - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); - // Check for interrupts - gen_check_ints(cb, side_exit); + gen_check_ints(asm, Counter::guard_send_interrupted); // Stack overflow check // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) // REG_CFP <= REG_SP + 4 * SIZEOF_VALUE + sizeof(rb_control_frame_t) - add_comment(cb, "stack overflow check"); - lea( - cb, - REG0, - ctx.sp_opnd((SIZEOF_VALUE * 4 + 2 * RUBY_SIZEOF_CONTROL_FRAME) as isize), - ); - cmp(cb, REG_CFP, REG0); - jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow)); + asm_comment!(asm, "stack overflow check"); + const _: () = assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)"); + let stack_limit = asm.lea(asm.ctx.sp_opnd((4 + 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE)) as i32)); + asm.cmp(CFP, stack_limit); + asm.jbe(Target::side_exit(Counter::guard_send_se_cf_overflow)); + + // Guard for variable length splat call before any modifications to the stack + if variable_splat { + let splat_array_idx = i32::from(kw_splat) + i32::from(block_arg); + let comptime_splat_array = jit.peek_at_stack(&asm.ctx, splat_array_idx as isize); + if unsafe { rb_yjit_ruby2_keywords_splat_p(comptime_splat_array) } != 0 { + gen_counter_incr(asm, Counter::send_cfunc_splat_varg_ruby2_keywords); + return None; + } - // Points to the receiver operand on the stack - let recv = ctx.stack_opnd(argc); + let splat_array = asm.stack_opnd(splat_array_idx); + guard_object_is_array(asm, splat_array, splat_array.into(), Counter::guard_send_splat_not_array); - // Store incremented PC into current control frame in case callee raises. - jit_save_pc(jit, cb, REG0); + asm_comment!(asm, "guard variable length splat call servicable"); + let sp = asm.ctx.sp_opnd(0); + let proceed = asm.ccall(rb_yjit_splat_varg_checks as _, vec![sp, splat_array, CFP]); + asm.cmp(proceed, Qfalse.into()); + asm.je(Target::side_exit(Counter::guard_send_cfunc_bad_splat_vargs)); + } - if let Some(block_iseq) = block { - // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). - // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases - // with cfp->block_code. - jit_mov_gc_ptr(jit, cb, REG0, VALUE(block_iseq as usize)); - let block_code_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE); - mov(cb, block_code_opnd, REG0); + // Number of args which will be passed through to the callee + // This is adjusted by the kwargs being combined into a hash. + let mut passed_argc = if kw_arg.is_null() { + argc + } else { + argc - kw_arg_num + 1 + }; + + // Exclude the kw_splat hash from arity check + if kw_splat { + passed_argc -= 1; } - // Increment the stack pointer by 3 (in the callee) - // sp += 3 - lea(cb, REG0, ctx.sp_opnd((SIZEOF_VALUE as isize) * 3)); + // If the argument count doesn't match + if cfunc_argc >= 0 && cfunc_argc != passed_argc && flags & VM_CALL_ARGS_SPLAT == 0 { + gen_counter_incr(asm, Counter::send_cfunc_argc_mismatch); + return None; + } - // Write method entry at sp[-3] - // sp[-3] = me; - // Put compile time cme into REG1. It's assumed to be valid because we are notified when - // any cme we depend on become outdated. See yjit_method_lookup_change(). - jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize)); - mov(cb, mem_opnd(64, REG0, 8 * -3), REG1); - - // Write block handler at sp[-2] - // sp[-2] = block_handler; - if let Some(_block_iseq) = block { - // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); - let cfp_self = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF); - lea(cb, REG1, cfp_self); - or(cb, REG1, imm_opnd(1)); - mov(cb, mem_opnd(64, REG0, 8 * -2), REG1); + // Don't JIT functions that need C stack arguments for now + if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) { + gen_counter_incr(asm, Counter::send_cfunc_toomany_args); + return None; + } + + let block_arg_type = if block_arg { + Some(asm.ctx.get_opnd_type(StackOpnd(0))) } else { - let dst_opnd = mem_opnd(64, REG0, 8 * -2); - mov(cb, dst_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into())); + None + }; + + match block_arg_type { + Some(Type::Nil | Type::BlockParamProxy) => { + // We'll handle this later + } + None => { + // Nothing to do + } + _ => { + gen_counter_incr(asm, Counter::send_cfunc_block_arg); + return None; + } } - // Write env flags at sp[-1] - // sp[-1] = frame_type; + match block_arg_type { + Some(Type::Nil) => { + // We have a nil block arg, so let's pop it off the args + asm.stack_pop(1); + } + Some(Type::BlockParamProxy) => { + // We don't need the actual stack value + asm.stack_pop(1); + } + None => { + // Nothing to do + } + _ => { + assert!(false); + } + } + + // Pop the empty kw_splat hash + if kw_splat { + // Only `**nil` is supported right now. Checked in exit_if_kwsplat_non_nil() + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); + asm.stack_pop(1); + argc -= 1; + } + + // Splat handling when C method takes a static number of arguments. + // push_splat_args() does stack manipulation so we can no longer side exit + if flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc >= 0 { + let required_args : u32 = (cfunc_argc as u32).saturating_sub(argc as u32 - 1); + // + 1 because we pass self + if required_args + 1 >= C_ARG_OPNDS.len() as u32 { + gen_counter_incr(asm, Counter::send_cfunc_toomany_args); + return None; + } + + // We are going to assume that the splat fills + // all the remaining arguments. So the number of args + // should just equal the number of args the cfunc takes. + // In the generated code we test if this is true + // and if not side exit. + argc = cfunc_argc; + passed_argc = argc; + push_splat_args(required_args, asm) + } + + // This is a .send call and we need to adjust the stack + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } + + // Push a dynamic number of items from the splat array to the stack when calling a vargs method + let dynamic_splat_size = if variable_splat { + asm_comment!(asm, "variable length splat"); + let stack_splat_array = asm.lea(asm.stack_opnd(0)); + Some(asm.ccall(rb_yjit_splat_varg_cfunc as _, vec![stack_splat_array])) + } else { + None + }; + + // Points to the receiver operand on the stack + let recv = asm.stack_opnd(argc); + + // Store incremented PC into current control frame in case callee raises. + jit_save_pc(jit, asm); + + // Find callee's SP with space for metadata. + // Usually sp+3. + let sp = if let Some(splat_size) = dynamic_splat_size { + // Compute the callee's SP at runtime in case we accept a variable size for the splat array + const _: () = assert!(SIZEOF_VALUE == 8, "opting for a shift since mul on A64 takes no immediates"); + let splat_size_bytes = asm.lshift(splat_size, 3usize.into()); + // 3 items for method metadata, minus one to remove the splat array + let static_stack_top = asm.lea(asm.ctx.sp_opnd(2)); + asm.add(static_stack_top, splat_size_bytes) + } else { + asm.lea(asm.ctx.sp_opnd(3)) + }; + + let specval = if block_arg_type == Some(Type::BlockParamProxy) { + SpecVal::BlockHandler(Some(BlockHandler::BlockParamProxy)) + } else { + SpecVal::BlockHandler(block) + }; + let mut frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL; if !kw_arg.is_null() { frame_type |= VM_FRAME_FLAG_CFRAME_KW } - mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into())); - - // Allocate a new CFP (ec->cfp--) - let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP); - sub(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); - // Setup the new frame - // *cfp = (const struct rb_control_frame_struct) { - // .pc = 0, - // .sp = sp, - // .iseq = 0, - // .self = recv, - // .ep = sp - 1, - // .block_code = 0, - // .__bp__ = sp, - // }; + perf_call!("gen_send_cfunc: ", gen_push_frame(jit, asm, ControlFrame { + frame_type, + specval, + cme, + recv, + sp, + pc: if cfg!(debug_assertions) { + Some(!0) // Poison value. Helps to fail fast. + } else { + None // Leave PC uninitialized as cfuncs shouldn't read it + }, + iseq: None, + })); - // Can we re-use ec_cfp_opnd from above? - let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP); - mov(cb, REG1, ec_cfp_opnd); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_PC), imm_opnd(0)); - - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SP), REG0); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_ISEQ), imm_opnd(0)); - mov( - cb, - mem_opnd(64, REG1, RUBY_OFFSET_CFP_BLOCK_CODE), - imm_opnd(0), - ); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_BP), REG0); - sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64)); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_EP), REG0); - mov(cb, REG0, recv); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SELF), REG0); - - /* - // Verify that we are calling the right function - if (YJIT_CHECK_MODE > 0) { // TODO: will we have a YJIT_CHECK_MODE? - // Call check_cfunc_dispatch - mov(cb, C_ARG_REGS[0], recv); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], (VALUE)ci); - mov(cb, C_ARG_REGS[2], const_ptr_opnd((void *)cfunc->func)); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)cme); - call_ptr(cb, REG0, (void *)&check_cfunc_dispatch); - } - */ + asm_comment!(asm, "set ec->cfp"); + let new_cfp = asm.lea(Opnd::mem(64, CFP, -(RUBY_SIZEOF_CONTROL_FRAME as i32))); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), new_cfp); if !kw_arg.is_null() { // Build a hash from all kwargs passed - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], VALUE(ci as usize)); - lea(cb, C_ARG_REGS[1], ctx.sp_opnd(0)); - call_ptr(cb, REG0, build_kwhash as *const u8); + asm_comment!(asm, "build_kwhash"); + let imemo_ci = VALUE(ci as usize); + assert_ne!(0, unsafe { rb_IMEMO_TYPE_P(imemo_ci, imemo_callinfo) }, + "we assume all callinfos with kwargs are on the GC heap"); + let sp = asm.lea(asm.ctx.sp_opnd(0)); + let kwargs = asm.ccall(build_kwhash as *const u8, vec![imemo_ci.into(), sp]); // Replace the stack location at the start of kwargs with the new hash - let stack_opnd = ctx.stack_opnd(argc - passed_argc); - mov(cb, stack_opnd, RAX); + let stack_opnd = asm.stack_opnd(argc - passed_argc); + asm.mov(stack_opnd, kwargs); } - // Copy SP into RAX because REG_SP will get overwritten - lea(cb, RAX, ctx.sp_opnd(0)); - - // Pop the C function arguments from the stack (in the caller) - ctx.stack_pop((argc + 1).try_into().unwrap()); - // Write interpreter SP into CFP. - // Needed in case the callee yields to the block. - gen_save_sp(cb, ctx); + // We don't pop arguments yet to use registers for passing them, but we + // have to set cfp->sp below them for full_cfunc_return() invalidation. + gen_save_sp_with_offset(asm, -(argc + 1) as i8); // Non-variadic method - if cfunc_argc >= 0 { + let args = if cfunc_argc >= 0 { // Copy the arguments from the stack to the C argument registers // self is the 0th argument and is at index argc from the stack top - for i in 0..=passed_argc as usize { - let stack_opnd = mem_opnd(64, RAX, -(argc + 1 - (i as i32)) * SIZEOF_VALUE_I32); - let c_arg_reg = C_ARG_REGS[i]; - mov(cb, c_arg_reg, stack_opnd); - } + (0..=passed_argc).map(|i| + asm.stack_opnd(argc - i) + ).collect() } - // Variadic method - if cfunc_argc == -1 { + else if cfunc_argc == -1 { // The method gets a pointer to the first argument // rb_f_puts(int argc, VALUE *argv, VALUE recv) - mov(cb, C_ARG_REGS[0], imm_opnd(passed_argc.into())); - lea( - cb, - C_ARG_REGS[1], - mem_opnd(64, RAX, -(argc) * SIZEOF_VALUE_I32), - ); - mov( - cb, - C_ARG_REGS[2], - mem_opnd(64, RAX, -(argc + 1) * SIZEOF_VALUE_I32), + + let passed_argc_opnd = if let Some(splat_size) = dynamic_splat_size { + // The final argc is the size of the splat, minus one for the splat array itself + asm.add(splat_size, (passed_argc - 1).into()) + } else { + // Without a splat, passed_argc is static + Opnd::Imm(passed_argc.into()) + }; + + vec![ + passed_argc_opnd, + asm.lea(asm.ctx.sp_opnd(-argc)), + asm.stack_opnd(argc), + ] + } + // Variadic method taking a Ruby array + else if cfunc_argc == -2 { + // Slurp up all the arguments into an array + let stack_args = asm.lea(asm.ctx.sp_opnd(-argc)); + let args_array = asm.ccall( + rb_ec_ary_new_from_values as _, + vec![EC, passed_argc.into(), stack_args] ); - } + + // Example signature: + // VALUE neg2_method(VALUE self, VALUE argv) + vec![asm.stack_opnd(argc), args_array] + } else { + panic!("unexpected cfunc_args: {}", cfunc_argc) + }; // Call the C function // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]); // cfunc comes from compile-time cme->def, which we assume to be stable. // Invalidation logic is in yjit_method_lookup_change() - add_comment(cb, "call C function"); - call_ptr(cb, REG0, unsafe { get_mct_func(cfunc) }); + asm_comment!(asm, "call C function"); + let ret = asm.ccall(unsafe { get_mct_func(cfunc) }.cast(), args); + asm.stack_pop((argc + 1).try_into().unwrap()); // Pop arguments after ccall to use registers for passing them. // Record code position for TracePoint patching. See full_cfunc_return(). - record_global_inval_patch(cb, CodegenGlobals::get_outline_full_cfunc_return_pos()); + record_global_inval_patch(asm, CodegenGlobals::get_outline_full_cfunc_return_pos()); // Push the return value on the Ruby stack - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + + // Log the name of the method we're calling to. We intentionally don't do this for inlined cfuncs. + // We also do this after the C call to minimize the impact of spill_temps() on asm.ccall(). + if get_option!(gen_stats) { + // Assemble the method name string + let mid = unsafe { vm_ci_mid(ci) }; + let name_str = get_method_name(recv_known_class, mid); + + // Get an index for this cfunc name + let cfunc_idx = get_cfunc_idx(&name_str); + + // Increment the counter for this cfunc + asm.ccall(incr_cfunc_counter as *const u8, vec![cfunc_idx.into()]); + } // Pop the stack frame (ec->cfp++) - // Can we reuse ec_cfp_opnd from above? - let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP); - add(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); + // Instead of recalculating, we can reuse the previous CFP, which is stored in a callee-saved + // register + let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); + asm.store(ec_cfp_opnd, CFP); // cfunc calls may corrupt types - ctx.clear_local_types(); + asm.clear_local_types(); // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1 // which allows for sharing the same successor. // Jump (fall through) to the call continuation block // We do this to end the current block after the call - jump_to_next_insn(jit, ctx, cb, ocb); - EndBlock + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) } -fn gen_return_branch( - cb: &mut CodeBlock, - target0: CodePtr, - _target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => { - mov(cb, REG0, code_ptr_opnd(target0)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0); +// Generate RARRAY_LEN. For array_opnd, use Opnd::Reg to reduce memory access, +// and use Opnd::Mem to save registers. +fn get_array_len(asm: &mut Assembler, array_opnd: Opnd) -> Opnd { + asm_comment!(asm, "get array length for embedded or heap"); + + // Pull out the embed flag to check if it's an embedded array. + let array_reg = match array_opnd { + Opnd::InsnOut { .. } => array_opnd, + _ => asm.load(array_opnd), + }; + let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + + // Get the length of the array + let emb_len_opnd = asm.and(flags_opnd, (RARRAY_EMBED_LEN_MASK as u64).into()); + let emb_len_opnd = asm.rshift(emb_len_opnd, (RARRAY_EMBED_LEN_SHIFT as u64).into()); + + // Conditionally move the length of the heap array + let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into()); + + let array_reg = match array_opnd { + Opnd::InsnOut { .. } => array_opnd, + _ => asm.load(array_opnd), + }; + let array_len_opnd = Opnd::mem( + std::os::raw::c_long::BITS as u8, + array_reg, + RUBY_OFFSET_RARRAY_AS_HEAP_LEN, + ); + + // Select the array length value + asm.csel_nz(emb_len_opnd, array_len_opnd) +} + +// Generate RARRAY_CONST_PTR (part of RARRAY_AREF) +fn get_array_ptr(asm: &mut Assembler, array_reg: Opnd) -> Opnd { + asm_comment!(asm, "get array pointer for embedded or heap"); + + let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into()); + let heap_ptr_opnd = Opnd::mem( + usize::BITS as u8, + array_reg, + RUBY_OFFSET_RARRAY_AS_HEAP_PTR, + ); + + // Load the address of the embedded array + // (struct RArray *)(obj)->as.ary + let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RARRAY_AS_ARY)); + asm.csel_nz(ary_opnd, heap_ptr_opnd) +} + +// Generate RSTRING_PTR +fn get_string_ptr(asm: &mut Assembler, string_reg: Opnd) -> Opnd { + asm_comment!(asm, "get string pointer for embedded or heap"); + + let flags_opnd = Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, (RSTRING_NOEMBED as u64).into()); + let heap_ptr_opnd = asm.load(Opnd::mem( + usize::BITS as u8, + string_reg, + RUBY_OFFSET_RSTRING_AS_HEAP_PTR, + )); + + // Load the address of the embedded array + // (struct RString *)(obj)->as.ary + let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RSTRING_AS_ARY)); + asm.csel_nz(heap_ptr_opnd, ary_opnd) +} + +/// Pushes arguments from an array to the stack. Differs from push splat because +/// the array can have items left over. Array is assumed to be T_ARRAY without guards. +fn copy_splat_args_for_rest_callee(array: Opnd, num_args: u32, asm: &mut Assembler) { + asm_comment!(asm, "copy_splat_args_for_rest_callee"); + + // Unused operands cause the backend to panic + if num_args == 0 { + return; + } + + asm_comment!(asm, "Push arguments from array"); + + let array_reg = asm.load(array); + let ary_opnd = get_array_ptr(asm, array_reg); + for i in 0..num_args { + let top = asm.stack_push(Type::Unknown); + asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32)); + } +} + +/// Pushes arguments from an array to the stack that are passed with a splat (i.e. *args) +/// It optimistically compiles to a static size that is the exact number of arguments +/// needed for the function. +fn push_splat_args(required_args: u32, asm: &mut Assembler) { + asm_comment!(asm, "push_splat_args"); + + let array_opnd = asm.stack_opnd(0); + guard_object_is_array( + asm, + array_opnd, + array_opnd.into(), + Counter::guard_send_splat_not_array, + ); + + let array_len_opnd = get_array_len(asm, array_opnd); + + asm_comment!(asm, "Guard for expected splat length"); + asm.cmp(array_len_opnd, required_args.into()); + asm.jne(Target::side_exit(Counter::guard_send_splatarray_length_not_equal)); + + asm_comment!(asm, "Check last argument is not ruby2keyword hash"); + + // Need to repeat this here to deal with register allocation + let array_reg = asm.load(asm.stack_opnd(0)); + + let ary_opnd = get_array_ptr(asm, array_reg); + + let last_array_value = asm.load(Opnd::mem(64, ary_opnd, (required_args as i32 - 1) * (SIZEOF_VALUE as i32))); + + guard_object_is_not_ruby2_keyword_hash( + asm, + last_array_value, + Counter::guard_send_splatarray_last_ruby2_keywords, + ); + + asm_comment!(asm, "Push arguments from array"); + let array_opnd = asm.stack_pop(1); + + if required_args > 0 { + let array_reg = asm.load(array_opnd); + let ary_opnd = get_array_ptr(asm, array_reg); + + for i in 0..required_args { + let top = asm.stack_push(Type::Unknown); + asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32)); } + + asm_comment!(asm, "end push_each"); } } -fn gen_send_iseq( +fn gen_send_bmethod( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, - block: Option<IseqPtr>, + block: Option<BlockHandler>, + flags: u32, argc: i32, -) -> CodegenStatus { - let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; - let mut argc = argc; +) -> Option<CodegenStatus> { + let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; - // When you have keyword arguments, there is an extra object that gets - // placed on the stack the represents a bitmap of the keywords that were not - // specified at the call site. We need to keep track of the fact that this - // value is present on the stack in order to properly set up the callee's - // stack pointer. - let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) }; - let supplying_kws = unsafe { vm_ci_flag(ci) & VM_CALL_KWARG } != 0; + let proc = unsafe { rb_yjit_get_proc_ptr(procv) }; + let proc_block = unsafe { &(*proc).block }; - if unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0 { - // We can't handle tailcalls - gen_counter_incr!(cb, send_iseq_tailcall); - return CantCompile; + if proc_block.type_ != block_type_iseq { + return None; } - // No support for callees with these parameters yet as they require allocation - // or complex handling. - if unsafe { - get_iseq_flags_has_rest(iseq) - || get_iseq_flags_has_post(iseq) - || get_iseq_flags_has_kwrest(iseq) - } { - gen_counter_incr!(cb, send_iseq_complex_callee); - return CantCompile; + let capture = unsafe { proc_block.as_.captured.as_ref() }; + let iseq = unsafe { *capture.code.iseq.as_ref() }; + + // Optimize for single ractor mode and avoid runtime check for + // "defined with an un-shareable Proc in a different Ractor" + if !assume_single_ractor_mode(jit, asm, ocb) { + gen_counter_incr(asm, Counter::send_bmethod_ractor); + return None; } - // If we have keyword arguments being passed to a callee that only takes - // positionals, then we need to allocate a hash. For now we're going to - // call that too complex and bail. - if supplying_kws && !unsafe { get_iseq_flags_has_kw(iseq) } { - gen_counter_incr!(cb, send_iseq_complex_callee); - return CantCompile; + // Passing a block to a block needs logic different from passing + // a block to a method and sometimes requires allocation. Bail for now. + if block.is_some() { + gen_counter_incr(asm, Counter::send_bmethod_block_arg); + return None; } - // If we have a method accepting no kwargs (**nil), exit if we have passed - // it any kwargs. - if supplying_kws && unsafe { get_iseq_flags_has_accepts_no_kwarg(iseq) } { - gen_counter_incr!(cb, send_iseq_complex_callee); - return CantCompile; + let frame_type = VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA; + perf_call! { gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, Some(capture.ep), cme, block, flags, argc, None) } +} + +/// The kind of a value an ISEQ returns +enum IseqReturn { + Value(VALUE), + LocalVariable(u32), + Receiver, +} + +extern { + fn rb_simple_iseq_p(iseq: IseqPtr) -> bool; +} + +/// Return the ISEQ's return value if it consists of one simple instruction and leave. +fn iseq_get_return_value(iseq: IseqPtr, captured_opnd: Option<Opnd>, ci_flags: u32) -> Option<IseqReturn> { + // Expect only two instructions and one possible operand + let iseq_size = unsafe { get_iseq_encoded_size(iseq) }; + if !(2..=3).contains(&iseq_size) { + return None; } - // For computing number of locals to set up for the callee - let mut num_params = unsafe { get_iseq_body_param_size(iseq) }; + // Get the first two instructions + let first_insn = iseq_opcode_at_idx(iseq, 0); + let second_insn = iseq_opcode_at_idx(iseq, insn_len(first_insn as usize)); - // Block parameter handling. This mirrors setup_parameters_complex(). - if unsafe { get_iseq_flags_has_block(iseq) } { - if unsafe { get_iseq_body_local_iseq(iseq) == iseq } { - num_params -= 1; - } else { - // In this case (param.flags.has_block && local_iseq != iseq), - // the block argument is setup as a local variable and requires - // materialization (allocation). Bail. - gen_counter_incr!(cb, send_iseq_complex_callee); - return CantCompile; + // Extract the return value if known + if second_insn != YARVINSN_leave { + return None; + } + match first_insn { + YARVINSN_getlocal_WC_0 => { + // Only accept simple positional only cases for both the caller and the callee. + // Reject block ISEQs to avoid autosplat and other block parameter complications. + if captured_opnd.is_none() && unsafe { rb_simple_iseq_p(iseq) } && ci_flags & VM_CALL_ARGS_SIMPLE != 0 { + let ep_offset = unsafe { *rb_iseq_pc_at_idx(iseq, 1) }.as_u32(); + let local_idx = ep_offset_to_local_idx(iseq, ep_offset); + Some(IseqReturn::LocalVariable(local_idx)) + } else { + None + } } + YARVINSN_putnil => Some(IseqReturn::Value(Qnil)), + YARVINSN_putobject => Some(IseqReturn::Value(unsafe { *rb_iseq_pc_at_idx(iseq, 1) })), + YARVINSN_putobject_INT2FIX_0_ => Some(IseqReturn::Value(VALUE::fixnum_from_usize(0))), + YARVINSN_putobject_INT2FIX_1_ => Some(IseqReturn::Value(VALUE::fixnum_from_usize(1))), + // We don't support invokeblock for now. Such ISEQs are likely not used by blocks anyway. + YARVINSN_putself if captured_opnd.is_none() => Some(IseqReturn::Receiver), + _ => None, } +} + +fn gen_send_iseq( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + iseq: *const rb_iseq_t, + ci: *const rb_callinfo, + frame_type: u32, + prev_ep: Option<*const VALUE>, + cme: *const rb_callable_method_entry_t, + block: Option<BlockHandler>, + flags: u32, + argc: i32, + captured_opnd: Option<Opnd>, +) -> Option<CodegenStatus> { + // Argument count. We will change this as we gather values from + // sources to satisfy the callee's parameters. To help make sense + // of changes, note that: + // - Parameters syntactically on the left have lower addresses. + // For example, all the lead (required) and optional parameters + // have lower addresses than the rest parameter array. + // - The larger the index one passes to Assembler::stack_opnd(), + // the *lower* the address. + let mut argc = argc; - let mut start_pc_offset = 0; + // Iseqs with keyword parameters have a hidden, unnamed parameter local + // that the callee could use to know which keywords are unspecified + // (see the `checkkeyword` instruction and check `ruby --dump=insn -e 'def foo(k:itself)=k'`). + // We always need to set up this local if the call goes through. + let has_kwrest = unsafe { get_iseq_flags_has_kwrest(iseq) }; + let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) } || has_kwrest; + let supplying_kws = unsafe { vm_ci_flag(ci) & VM_CALL_KWARG } != 0; + let iseq_has_rest = unsafe { get_iseq_flags_has_rest(iseq) }; + let iseq_has_block_param = unsafe { get_iseq_flags_has_block(iseq) }; + let arg_setup_block = captured_opnd.is_some(); // arg_setup_type: arg_setup_block (invokeblock) + let kw_splat = flags & VM_CALL_KW_SPLAT != 0; + let splat_call = flags & VM_CALL_ARGS_SPLAT != 0; + + // For computing offsets to callee locals + let num_params = unsafe { get_iseq_body_param_size(iseq) as i32 }; + let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 }; + + let mut start_pc_offset: u16 = 0; let required_num = unsafe { get_iseq_body_param_lead_num(iseq) }; // This struct represents the metadata about the caller-specified @@ -4132,393 +7087,628 @@ fn gen_send_iseq( unsafe { get_cikw_keyword_len(kw_arg) } }; - // Arity handling and optional parameter setup - let opts_filled = argc - required_num - kw_arg_num; + // Arity handling and optional parameter setup for positional arguments. + // Splats are handled later. + let mut opts_filled = argc - required_num - kw_arg_num - i32::from(kw_splat) - i32::from(splat_call); let opt_num = unsafe { get_iseq_body_param_opt_num(iseq) }; - let opts_missing: i32 = opt_num - opts_filled; + // With a rest parameter or a yield to a block, + // callers can pass more than required + optional. + // So we cap ops_filled at opt_num. + if iseq_has_rest || arg_setup_block { + opts_filled = min(opts_filled, opt_num); + } + let mut opts_missing: i32 = opt_num - opts_filled; + + let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0; + // Stack index of the splat array + let splat_pos = i32::from(block_arg) + i32::from(kw_splat) + kw_arg_num; + + exit_if_stack_too_large(iseq)?; + exit_if_tail_call(asm, ci)?; + exit_if_has_post(asm, iseq)?; + exit_if_kwsplat_non_nil(asm, flags, Counter::send_iseq_kw_splat_non_nil)?; + exit_if_has_rest_and_captured(asm, iseq_has_rest, captured_opnd)?; + exit_if_has_kwrest_and_captured(asm, has_kwrest, captured_opnd)?; + exit_if_has_rest_and_supplying_kws(asm, iseq_has_rest, supplying_kws)?; + exit_if_supplying_kw_and_has_no_kw(asm, supplying_kws, doing_kw_call)?; + exit_if_supplying_kws_and_accept_no_kwargs(asm, supplying_kws, iseq)?; + exit_if_doing_kw_and_splat(asm, doing_kw_call, flags)?; + exit_if_wrong_number_arguments(asm, arg_setup_block, opts_filled, flags, opt_num, iseq_has_rest)?; + exit_if_doing_kw_and_opts_missing(asm, doing_kw_call, opts_missing)?; + exit_if_has_rest_and_optional_and_block(asm, iseq_has_rest, opt_num, iseq, block_arg)?; + let block_arg_type = exit_if_unsupported_block_arg_type(jit, asm, block_arg)?; + + // Bail if we can't drop extra arguments for a yield by just popping them + if supplying_kws && arg_setup_block && argc > (kw_arg_num + required_num + opt_num) { + gen_counter_incr(asm, Counter::send_iseq_complex_discard_extras); + return None; + } - if opts_filled < 0 || opts_filled > opt_num { - gen_counter_incr!(cb, send_iseq_arity_error); - return CantCompile; + // Block parameter handling. This mirrors setup_parameters_complex(). + if iseq_has_block_param { + if unsafe { get_iseq_body_local_iseq(iseq) == iseq } { + // Do nothing + } else { + // In this case (param.flags.has_block && local_iseq != iseq), + // the block argument is setup as a local variable and requires + // materialization (allocation). Bail. + gen_counter_incr(asm, Counter::send_iseq_materialized_block); + return None; + } } - // If we have unfilled optional arguments and keyword arguments then we - // would need to move adjust the arguments location to account for that. - // For now we aren't handling this case. - if doing_kw_call && opts_missing > 0 { - gen_counter_incr!(cb, send_iseq_complex_callee); - return CantCompile; + // Check that required keyword arguments are supplied and find any extras + // that should go into the keyword rest parameter (**kw_rest). + if doing_kw_call { + gen_iseq_kw_call_checks(asm, iseq, kw_arg, has_kwrest, kw_arg_num)?; } + let splat_array_length = if splat_call { + let array = jit.peek_at_stack(&asm.ctx, splat_pos as isize); + let array_length = if array == Qnil { + 0 + } else if unsafe { !RB_TYPE_P(array, RUBY_T_ARRAY) } { + gen_counter_incr(asm, Counter::send_iseq_splat_not_array); + return None; + } else { + unsafe { rb_yjit_array_len(array) as u32} + }; + + // Arity check accounting for size of the splat. When callee has rest parameters, we insert + // runtime guards later in copy_splat_args_for_rest_callee() + if !iseq_has_rest { + let supplying = argc - 1 - i32::from(kw_splat) + array_length as i32; + if (required_num..=required_num + opt_num).contains(&supplying) == false { + gen_counter_incr(asm, Counter::send_iseq_splat_arity_error); + return None; + } + } + + if iseq_has_rest && opt_num > 0 { + // If we have a rest and option arguments + // we are going to set the pc_offset for where + // to jump in the called method. + // If the number of args change, that would need to + // change and we don't change that dynmically so we side exit. + // On a normal splat without rest and option args this is handled + // elsewhere depending on the case + asm_comment!(asm, "Side exit if length doesn't not equal compile time length"); + let array_len_opnd = get_array_len(asm, asm.stack_opnd(splat_pos)); + asm.cmp(array_len_opnd, array_length.into()); + asm.jne(Target::side_exit(Counter::guard_send_splatarray_length_not_equal)); + } + + Some(array_length) + } else { + None + }; + + // Check if we need the arg0 splat handling of vm_callee_setup_block_arg() + // Also known as "autosplat" inside setup_parameters_complex(). + // Autosplat checks argc == 1 after splat and kwsplat processing, so make + // sure to amend this if we start support kw_splat. + let block_arg0_splat = arg_setup_block + && (argc == 1 || (argc == 2 && splat_array_length == Some(0))) + && !supplying_kws && !doing_kw_call + && unsafe { + (get_iseq_flags_has_lead(iseq) || opt_num > 1) + && !get_iseq_flags_ambiguous_param0(iseq) + }; + if block_arg0_splat { + // If block_arg0_splat, we still need side exits after splat, but + // the splat modifies the stack which breaks side exits. So bail out. + if splat_call { + gen_counter_incr(asm, Counter::invokeblock_iseq_arg0_args_splat); + return None; + } + // The block_arg0_splat implementation cannot deal with optional parameters. + // This is a setup_parameters_complex() situation and interacts with the + // starting position of the callee. + if opt_num > 1 { + gen_counter_incr(asm, Counter::invokeblock_iseq_arg0_optional); + return None; + } + } + + // Adjust `opts_filled` and `opts_missing` taking + // into account the size of the splat expansion. + if let Some(len) = splat_array_length { + assert_eq!(kw_arg_num, 0); // Due to exit_if_doing_kw_and_splat(). + // Simplifies calculation below. + let num_args = argc - 1 - i32::from(kw_splat) + len as i32; + + opts_filled = if num_args >= required_num { + min(num_args - required_num, opt_num) + } else { + 0 + }; + opts_missing = opt_num - opts_filled; + } + + assert_eq!(opts_missing + opts_filled, opt_num); + assert!(opts_filled >= 0); + + // ISeq with optional parameters start at different + // locations depending on the number of optionals given. if opt_num > 0 { - num_params -= opts_missing as u32; + assert!(opts_filled >= 0); unsafe { let opt_table = get_iseq_body_param_opt_table(iseq); - start_pc_offset = (*opt_table.offset(opts_filled as isize)).as_u32(); + start_pc_offset = opt_table.offset(opts_filled as isize).read().try_into().unwrap(); } } - if doing_kw_call { - // Here we're calling a method with keyword arguments and specifying - // keyword arguments at this call site. - - // This struct represents the metadata about the callee-specified - // keyword parameters. - let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; - let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap(); - let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap(); - - let mut required_kwargs_filled = 0; - - if keyword_num > 30 { - // We have so many keywords that (1 << num) encoded as a FIXNUM - // (which shifts it left one more) no longer fits inside a 32-bit - // immediate. - gen_counter_incr!(cb, send_iseq_complex_callee); - return CantCompile; - } - - // Check that the kwargs being passed are valid - if supplying_kws { - // This is the list of keyword arguments that the callee specified - // in its initial declaration. - // SAFETY: see compile.c for sizing of this slice. - let callee_kwargs = unsafe { slice::from_raw_parts((*keyword).table, keyword_num) }; - - // Here we're going to build up a list of the IDs that correspond to - // the caller-specified keyword arguments. If they're not in the - // same order as the order specified in the callee declaration, then - // we're going to need to generate some code to swap values around - // on the stack. - let kw_arg_keyword_len: usize = - unsafe { get_cikw_keyword_len(kw_arg) }.try_into().unwrap(); - let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len]; - for kwarg_idx in 0..kw_arg_keyword_len { - let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) }; - caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) }; - } + // Increment total ISEQ send count + gen_counter_incr(asm, Counter::num_send_iseq); - // First, we're going to be sure that the names of every - // caller-specified keyword argument correspond to a name in the - // list of callee-specified keyword parameters. - for caller_kwarg in caller_kwargs { - let search_result = callee_kwargs - .iter() - .enumerate() // inject element index - .find(|(_, &kwarg)| kwarg == caller_kwarg); - - match search_result { - None => { - // If the keyword was never found, then we know we have a - // mismatch in the names of the keyword arguments, so we need to - // bail. - gen_counter_incr!(cb, send_iseq_kwargs_mismatch); - return CantCompile; - } - Some((callee_idx, _)) if callee_idx < keyword_required_num => { - // Keep a count to ensure all required kwargs are specified - required_kwargs_filled += 1; - } - _ => (), + // Shortcut for special `Primitive.attr! :leaf` builtins + let builtin_attrs = unsafe { rb_yjit_iseq_builtin_attrs(iseq) }; + let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) }; + let builtin_func = if builtin_func_raw.is_null() { None } else { Some(builtin_func_raw) }; + let opt_send_call = flags & VM_CALL_OPT_SEND != 0; // .send call is not currently supported for builtins + if let (None, Some(builtin_info), true, false, None | Some(0)) = + (block, builtin_func, builtin_attrs & BUILTIN_ATTR_LEAF != 0, opt_send_call, splat_array_length) { + let builtin_argc = unsafe { (*builtin_info).argc }; + if builtin_argc + 1 < (C_ARG_OPNDS.len() as i32) { + // We pop the block arg without using it because: + // - the builtin is leaf, so it promises to not `yield`. + // - no leaf builtins have block param at the time of writing, and + // adding one requires interpreter changes to support. + if block_arg_type.is_some() { + if iseq_has_block_param { + gen_counter_incr(asm, Counter::send_iseq_leaf_builtin_block_arg_block_param); + return None; } + asm.stack_pop(1); } - } - assert!(required_kwargs_filled <= keyword_required_num); - if required_kwargs_filled != keyword_required_num { - gen_counter_incr!(cb, send_iseq_kwargs_mismatch); - return CantCompile; - } - } - // Number of locals that are not parameters - let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 } - (num_params as i32); + // Pop empty kw_splat hash which passes nothing (exit_if_kwsplat_non_nil()) + if kw_splat { + asm.stack_pop(1); + } - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); + // Pop empty splat array which passes nothing + if let Some(0) = splat_array_length { + asm.stack_pop(1); + } - // Check for interrupts - gen_check_ints(cb, side_exit); + asm_comment!(asm, "inlined leaf builtin"); + gen_counter_incr(asm, Counter::num_send_iseq_leaf); - let leaf_builtin_raw = unsafe { rb_leaf_builtin_function(iseq) }; - let leaf_builtin: Option<*const rb_builtin_function> = if leaf_builtin_raw.is_null() { - None - } else { - Some(leaf_builtin_raw) - }; - if let (None, Some(builtin_info)) = (block, leaf_builtin) { - let builtin_argc = unsafe { (*builtin_info).argc }; - if builtin_argc + 1 /* for self */ + 1 /* for ec */ <= (C_ARG_REGS.len() as i32) { - add_comment(cb, "inlined leaf builtin"); + // The callee may allocate, e.g. Integer#abs on a Bignum. + // Save SP for GC, save PC for allocation tracing, and prepare + // for global invalidation after GC's VM lock contention. + jit_prepare_call_with_gc(jit, asm); // Call the builtin func (ec, recv, arg1, arg2, ...) - mov(cb, C_ARG_REGS[0], REG_EC); + let mut args = vec![EC]; // Copy self and arguments for i in 0..=builtin_argc { - let stack_opnd = ctx.stack_opnd(builtin_argc - i); - let idx: usize = (i + 1).try_into().unwrap(); - let c_arg_reg = C_ARG_REGS[idx]; - mov(cb, c_arg_reg, stack_opnd); + let stack_opnd = asm.stack_opnd(builtin_argc - i); + args.push(stack_opnd); } - ctx.stack_pop((builtin_argc + 1).try_into().unwrap()); - let builtin_func_ptr = unsafe { (*builtin_info).func_ptr as *const u8 }; - call_ptr(cb, REG0, builtin_func_ptr); + let val = asm.ccall(unsafe { (*builtin_info).func_ptr as *const u8 }, args); + asm.stack_pop((builtin_argc + 1).try_into().unwrap()); // Keep them on stack during ccall for GC // Push the return value - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); // Note: assuming that the leaf builtin doesn't change local variables here. // Seems like a safe assumption. - return KeepCompiling; + // Let guard chains share the same successor + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); + } + } + + // Inline simple ISEQs whose return value is known at compile time + if let (Some(value), None, false) = (iseq_get_return_value(iseq, captured_opnd, flags), block_arg_type, opt_send_call) { + asm_comment!(asm, "inlined simple ISEQ"); + gen_counter_incr(asm, Counter::num_send_iseq_inline); + + match value { + IseqReturn::LocalVariable(local_idx) => { + // Put the local variable at the return slot + let stack_local = asm.stack_opnd(argc - 1 - local_idx as i32); + let stack_return = asm.stack_opnd(argc); + asm.mov(stack_return, stack_local); + + // Update the mapping for the return value + let mapping = asm.ctx.get_opnd_mapping(stack_local.into()); + asm.ctx.set_opnd_mapping(stack_return.into(), mapping); + + // Pop everything but the return value + asm.stack_pop(argc as usize); + } + IseqReturn::Value(value) => { + // Pop receiver and arguments + asm.stack_pop(argc as usize + if captured_opnd.is_some() { 0 } else { 1 }); + + // Push the return value + let stack_ret = asm.stack_push(Type::from(value)); + asm.mov(stack_ret, value.into()); + }, + IseqReturn::Receiver => { + // Just pop arguments and leave the receiver on stack + asm.stack_pop(argc as usize); + } } + + // Let guard chains share the same successor + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); } // Stack overflow check // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2. // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) - add_comment(cb, "stack overflow check"); + asm_comment!(asm, "stack overflow check"); + const _: () = assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)"); let stack_max: i32 = unsafe { get_iseq_body_stack_max(iseq) }.try_into().unwrap(); - let locals_offs = - (SIZEOF_VALUE as i32) * (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME as i32); - lea(cb, REG0, ctx.sp_opnd(locals_offs as isize)); - cmp(cb, REG_CFP, REG0); - jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow)); + let locals_offs = (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE) as i32; + let stack_limit = asm.lea(asm.ctx.sp_opnd(locals_offs)); + asm.cmp(CFP, stack_limit); + asm.jbe(Target::side_exit(Counter::guard_send_se_cf_overflow)); + + if iseq_has_rest && splat_call { + // Insert length guard for a call to copy_splat_args_for_rest_callee() + // that will come later. We will have made changes to + // the stack by spilling or handling __send__ shifting + // by the time we get to that code, so we need the + // guard here where we can still side exit. + let non_rest_arg_count = argc - i32::from(kw_splat) - 1; + if non_rest_arg_count < required_num + opt_num { + let take_count: u32 = (required_num - non_rest_arg_count + opts_filled) + .try_into().unwrap(); + + if take_count > 0 { + asm_comment!(asm, "guard splat_array_length >= {take_count}"); + + let splat_array = asm.stack_opnd(splat_pos); + let array_len_opnd = get_array_len(asm, splat_array); + asm.cmp(array_len_opnd, take_count.into()); + asm.jl(Target::side_exit(Counter::guard_send_iseq_has_rest_and_splat_too_few)); + } + } - if doing_kw_call { - // Here we're calling a method with keyword arguments and specifying - // keyword arguments at this call site. + // All splats need to guard for ruby2_keywords hash. Check with a function call when + // splatting into a rest param since the index for the last item in the array is dynamic. + asm_comment!(asm, "guard no ruby2_keywords hash in splat"); + let bad_splat = asm.ccall(rb_yjit_ruby2_keywords_splat_p as _, vec![asm.stack_opnd(splat_pos)]); + asm.cmp(bad_splat, 0.into()); + asm.jnz(Target::side_exit(Counter::guard_send_splatarray_last_ruby2_keywords)); + } - // Number of positional arguments the callee expects before the first - // keyword argument - let args_before_kw = required_num + opt_num; + match block_arg_type { + Some(BlockArg::Nil) => { + // We have a nil block arg, so let's pop it off the args + asm.stack_pop(1); + } + Some(BlockArg::BlockParamProxy) => { + // We don't need the actual stack value + asm.stack_pop(1); + } + Some(BlockArg::TProc) => { + // Place the proc as the block handler. We do this early because + // the block arg being at the top of the stack gets in the way of + // rest param handling later. Also, since there are C calls that + // come later, we can't hold this value in a register and place it + // near the end when we push a new control frame. + asm_comment!(asm, "guard block arg is a proc"); + // Simple predicate, no need for jit_prepare_non_leaf_call(). + let is_proc = asm.ccall(rb_obj_is_proc as _, vec![asm.stack_opnd(0)]); + asm.cmp(is_proc, Qfalse.into()); + jit_chain_guard( + JCC_JE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_send_block_arg_type, + ); - // This struct represents the metadata about the caller-specified - // keyword arguments. - let ci_kwarg = unsafe { vm_ci_kwarg(ci) }; - let caller_keyword_len: usize = if ci_kwarg.is_null() { - 0 - } else { - unsafe { get_cikw_keyword_len(ci_kwarg) } - .try_into() - .unwrap() - }; + let callee_ep = -argc + num_locals + VM_ENV_DATA_SIZE as i32 - 1; + let callee_specval = callee_ep + VM_ENV_DATA_INDEX_SPECVAL; + if callee_specval < 0 { + // Can't write to sp[-n] since that's where the arguments are + gen_counter_incr(asm, Counter::send_iseq_clobbering_block_arg); + return None; + } + let proc = asm.stack_pop(1); // Pop first, as argc doesn't account for the block arg + let callee_specval = asm.ctx.sp_opnd(callee_specval); + asm.store(callee_specval, proc); + } + None => { + // Nothing to do + } + } - // This struct represents the metadata about the callee-specified - // keyword parameters. - let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; + if kw_splat { + // Only `**nil` is supported right now. Checked in exit_if_kwsplat_non_nil() + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); + asm.stack_pop(1); + argc -= 1; + } - add_comment(cb, "keyword args"); + // push_splat_args does stack manipulation so we can no longer side exit + if let Some(array_length) = splat_array_length { + if !iseq_has_rest { + // Speculate that future splats will be done with + // an array that has the same length. We will insert guards. + argc = argc - 1 + array_length as i32; + if argc + asm.ctx.get_stack_size() as i32 > MAX_SPLAT_LENGTH { + gen_counter_incr(asm, Counter::send_splat_too_long); + return None; + } + push_splat_args(array_length, asm); + } + } - // This is the list of keyword arguments that the callee specified - // in its initial declaration. - let callee_kwargs = unsafe { (*keyword).table }; - let total_kwargs: usize = unsafe { (*keyword).num }.try_into().unwrap(); + // This is a .send call and we need to adjust the stack + // TODO: This can be more efficient if we do it before + // extracting from the splat array above. + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } - // Here we're going to build up a list of the IDs that correspond to - // the caller-specified keyword arguments. If they're not in the - // same order as the order specified in the callee declaration, then - // we're going to need to generate some code to swap values around - // on the stack. - let mut caller_kwargs: Vec<ID> = vec![0; total_kwargs]; + if iseq_has_rest { + // We are going to allocate so setting pc and sp. + jit_save_pc(jit, asm); + gen_save_sp(asm); - for kwarg_idx in 0..caller_keyword_len { - let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) }; - caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) }; - } - let mut kwarg_idx = caller_keyword_len; + let rest_param_array = if splat_call { + let non_rest_arg_count = argc - 1; + // We start by dupping the array because someone else might have + // a reference to it. This also normalizes to an ::Array instance. + let array = asm.stack_opnd(0); + let array = asm.ccall( + rb_ary_dup as *const u8, + vec![array], + ); + asm.stack_pop(1); // Pop array after ccall to use a register for passing it. + + // This is the end stack state of all `non_rest_arg_count` situations below + argc = required_num + opts_filled; + + if non_rest_arg_count > required_num + opt_num { + // If we have more arguments than required, we need to prepend + // the items from the stack onto the array. + let diff: u32 = (non_rest_arg_count - (required_num + opt_num)) + .try_into().unwrap(); + + // diff is >0 so no need to worry about null pointer + asm_comment!(asm, "load pointer to array elements"); + let values_opnd = asm.ctx.sp_opnd(-(diff as i32)); + let values_ptr = asm.lea(values_opnd); + + asm_comment!(asm, "prepend stack values to rest array"); + let array = asm.ccall( + rb_ary_unshift_m as *const u8, + vec![Opnd::UImm(diff as u64), values_ptr, array], + ); + asm.stack_pop(diff as usize); - let mut unspecified_bits = 0; + array + } else if non_rest_arg_count < required_num + opt_num { + // If we have fewer arguments than required, we need to take some + // from the array and move them to the stack. + asm_comment!(asm, "take items from splat array"); - let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap(); - for callee_idx in keyword_required_num..total_kwargs { - let mut already_passed = false; - let callee_kwarg = unsafe { *(callee_kwargs.offset(callee_idx.try_into().unwrap())) }; + let take_count: u32 = (required_num - non_rest_arg_count + opts_filled) + .try_into().unwrap(); - for caller_idx in 0..caller_keyword_len { - if caller_kwargs[caller_idx] == callee_kwarg { - already_passed = true; - break; - } - } + // Copy required arguments to the stack without modifying the array + copy_splat_args_for_rest_callee(array, take_count, asm); - if !already_passed { - // Reserve space on the stack for each default value we'll be - // filling in (which is done in the next loop). Also increments - // argc so that the callee's SP is recorded correctly. - argc += 1; - let default_arg = ctx.stack_push(Type::Unknown); - - // callee_idx - keyword->required_num is used in a couple of places below. - let req_num: isize = unsafe { (*keyword).required_num }.try_into().unwrap(); - let callee_idx_isize: isize = callee_idx.try_into().unwrap(); - let extra_args = callee_idx_isize - req_num; - - //VALUE default_value = keyword->default_values[callee_idx - keyword->required_num]; - let mut default_value = unsafe { *((*keyword).default_values.offset(extra_args)) }; - - if default_value == Qundef { - // Qundef means that this value is not constant and must be - // recalculated at runtime, so we record it in unspecified_bits - // (Qnil is then used as a placeholder instead of Qundef). - unspecified_bits |= 0x01 << extra_args; - default_value = Qnil; - } + // We will now slice the array to give us a new array of the correct size + let sliced = asm.ccall(rb_yjit_rb_ary_subseq_length as *const u8, vec![array, Opnd::UImm(take_count.into())]); - jit_mov_gc_ptr(jit, cb, REG0, default_value); - mov(cb, default_arg, REG0); + sliced + } else { + // The arguments are equal so we can just push to the stack + asm_comment!(asm, "same length for splat array and rest param"); + assert!(non_rest_arg_count == required_num + opt_num); - caller_kwargs[kwarg_idx] = callee_kwarg; - kwarg_idx += 1; + array } - } - - assert!(kwarg_idx == total_kwargs); + } else { + asm_comment!(asm, "rest parameter without splat"); + + assert!(argc >= required_num); + let n = (argc - required_num - opts_filled) as u32; + argc = required_num + opts_filled; + // If n is 0, then elts is never going to be read, so we can just pass null + let values_ptr = if n == 0 { + Opnd::UImm(0) + } else { + asm_comment!(asm, "load pointer to array elements"); + let values_opnd = asm.ctx.sp_opnd(-(n as i32)); + asm.lea(values_opnd) + }; + + let new_ary = asm.ccall( + rb_ec_ary_new_from_values as *const u8, + vec![ + EC, + Opnd::UImm(n.into()), + values_ptr + ] + ); + asm.stack_pop(n.as_usize()); - // Next, we're going to loop through every keyword that was - // specified by the caller and make sure that it's in the correct - // place. If it's not we're going to swap it around with another one. - for kwarg_idx in 0..total_kwargs { - let kwarg_idx_isize: isize = kwarg_idx.try_into().unwrap(); - let callee_kwarg = unsafe { *(callee_kwargs.offset(kwarg_idx_isize)) }; + new_ary + }; - // If the argument is already in the right order, then we don't - // need to generate any code since the expected value is already - // in the right place on the stack. - if callee_kwarg == caller_kwargs[kwarg_idx] { - continue; - } + // Find where to put the rest parameter array + let rest_param = if opts_missing == 0 { + // All optionals are filled, the rest param goes at the top of the stack + argc += 1; + asm.stack_push(Type::TArray) + } else { + // The top of the stack will be a missing optional, but the rest + // parameter needs to be placed after all the missing optionals. + // Place it using a stack operand with a negative stack index. + // (Higher magnitude negative stack index have higher address.) + assert!(opts_missing > 0); + // The argument deepest in the stack will be the 0th local in the callee. + let callee_locals_base = argc - 1; + let rest_param_stack_idx = callee_locals_base - required_num - opt_num; + assert!(rest_param_stack_idx < 0); + asm.stack_opnd(rest_param_stack_idx) + }; + // Store rest param to memory to avoid register shuffle as + // we won't be reading it for the remainder of the block. + asm.ctx.dealloc_temp_reg(rest_param.stack_idx()); + asm.store(rest_param, rest_param_array); + } + + // Pop surplus positional arguments when yielding + if arg_setup_block { + let extras = argc - required_num - opt_num; + if extras > 0 { + // Checked earlier. If there are keyword args, then + // the positional arguments are not at the stack top. + assert_eq!(0, kw_arg_num); + + asm.stack_pop(extras as usize); + argc = required_num + opt_num; + } + } - // In this case the argument is not in the right place, so we - // need to find its position where it _should_ be and swap with - // that location. - for swap_idx in (kwarg_idx + 1)..total_kwargs { - if callee_kwarg == caller_kwargs[swap_idx] { - // First we're going to generate the code that is going - // to perform the actual swapping at runtime. - let swap_idx_i32: i32 = swap_idx.try_into().unwrap(); - let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap(); - let offset0: u16 = (argc - 1 - swap_idx_i32 - args_before_kw) - .try_into() - .unwrap(); - let offset1: u16 = (argc - 1 - kwarg_idx_i32 - args_before_kw) - .try_into() - .unwrap(); - stack_swap(ctx, cb, offset0, offset1, REG1, REG0); - - // Next we're going to do some bookkeeping on our end so - // that we know the order that the arguments are - // actually in now. - caller_kwargs.swap(kwarg_idx, swap_idx); + // Keyword argument passing + if doing_kw_call { + argc = gen_iseq_kw_call(jit, asm, kw_arg, iseq, argc, has_kwrest); + } + + // Same as vm_callee_setup_block_arg_arg0_check and vm_callee_setup_block_arg_arg0_splat + // on vm_callee_setup_block_arg for arg_setup_block. This is done after CALLER_SETUP_ARG + // and CALLER_REMOVE_EMPTY_KW_SPLAT, so this implementation is put here. This may need + // side exits, so you still need to allow side exits here if block_arg0_splat is true. + // Note that you can't have side exits after this arg0 splat. + if block_arg0_splat { + let arg0_opnd = asm.stack_opnd(0); + + // Only handle the case that you don't need to_ary conversion + let not_array_counter = Counter::invokeblock_iseq_arg0_not_array; + guard_object_is_array(asm, arg0_opnd, arg0_opnd.into(), not_array_counter); + + // Only handle the same that the array length == ISEQ's lead_num (most common) + let arg0_len_opnd = get_array_len(asm, arg0_opnd); + let lead_num = unsafe { rb_get_iseq_body_param_lead_num(iseq) }; + asm.cmp(arg0_len_opnd, lead_num.into()); + asm.jne(Target::side_exit(Counter::invokeblock_iseq_arg0_wrong_len)); + + let arg0_reg = asm.load(arg0_opnd); + let array_opnd = get_array_ptr(asm, arg0_reg); + asm_comment!(asm, "push splat arg0 onto the stack"); + asm.stack_pop(argc.try_into().unwrap()); + for i in 0..lead_num { + let stack_opnd = asm.stack_push(Type::Unknown); + asm.mov(stack_opnd, Opnd::mem(64, array_opnd, SIZEOF_VALUE_I32 * i)); + } + argc = lead_num; + } - break; - } - } + fn nil_fill(comment: &'static str, fill_range: std::ops::Range<i32>, asm: &mut Assembler) { + if fill_range.is_empty() { + return; } - // Keyword arguments cause a special extra local variable to be - // pushed onto the stack that represents the parameters that weren't - // explicitly given a value and have a non-constant default. - let unspec_opnd = uimm_opnd(VALUE::fixnum_from_usize(unspecified_bits).as_u64()); - mov(cb, ctx.stack_opnd(-1), unspec_opnd); + asm_comment!(asm, "{}", comment); + for i in fill_range { + let value_slot = asm.ctx.sp_opnd(i); + asm.store(value_slot, Qnil.into()); + } } - // Points to the receiver operand on the stack - let recv = ctx.stack_opnd(argc); + // Nil-initialize missing optional parameters + nil_fill( + "nil-initialize missing optionals", + { + let begin = -argc + required_num + opts_filled; + let end = -argc + required_num + opt_num; - // Store the updated SP on the current frame (pop arguments and receiver) - add_comment(cb, "store caller sp"); - lea( - cb, - REG0, - ctx.sp_opnd((SIZEOF_VALUE as isize) * -((argc as isize) + 1)), + begin..end + }, + asm ); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0); - - // Store the next PC in the current frame - jit_save_pc(jit, cb, REG0); + // Nil-initialize the block parameter. It's the last parameter local + if iseq_has_block_param { + let block_param = asm.ctx.sp_opnd(-argc + num_params - 1); + asm.store(block_param, Qnil.into()); + } + // Nil-initialize non-parameter locals + nil_fill( + "nil-initialize locals", + { + let begin = -argc + num_params; + let end = -argc + num_locals; - if let Some(block_val) = block { - // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). - // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases - // with cfp->block_code. - let gc_ptr = VALUE(block_val as usize); - jit_mov_gc_ptr(jit, cb, REG0, gc_ptr); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE), REG0); - } + begin..end + }, + asm + ); - // Adjust the callee's stack pointer - let offs = - (SIZEOF_VALUE as isize) * (3 + (num_locals as isize) + if doing_kw_call { 1 } else { 0 }); - lea(cb, REG0, ctx.sp_opnd(offs)); + // Points to the receiver operand on the stack unless a captured environment is used + let recv = match captured_opnd { + Some(captured_opnd) => asm.load(Opnd::mem(64, captured_opnd, 0)), // captured->self + _ => asm.stack_opnd(argc), + }; + let captured_self = captured_opnd.is_some(); + let sp_offset = argc + if captured_self { 0 } else { 1 }; - // Initialize local variables to Qnil - for i in 0..num_locals { - let offs = (SIZEOF_VALUE as i32) * (i - num_locals - 3); - mov(cb, mem_opnd(64, REG0, offs), uimm_opnd(Qnil.into())); - } + // Store the updated SP on the current frame (pop arguments and receiver) + asm_comment!(asm, "store caller sp"); + let caller_sp = asm.lea(asm.ctx.sp_opnd(-sp_offset)); + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), caller_sp); - add_comment(cb, "push env"); - // Put compile time cme into REG1. It's assumed to be valid because we are notified when - // any cme we depend on become outdated. See yjit_method_lookup_change(). - jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize)); - // Write method entry at sp[-3] - // sp[-3] = me; - mov(cb, mem_opnd(64, REG0, 8 * -3), REG1); + // Store the next PC in the current frame + jit_save_pc(jit, asm); - // Write block handler at sp[-2] - // sp[-2] = block_handler; - match block { - Some(_) => { - // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); - lea(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF)); - or(cb, REG1, imm_opnd(1)); - mov(cb, mem_opnd(64, REG0, 8 * -2), REG1); - } - None => { - mov( - cb, - mem_opnd(64, REG0, 8 * -2), - uimm_opnd(VM_BLOCK_HANDLER_NONE.into()), - ); - } - } + // Adjust the callee's stack pointer + let callee_sp = asm.lea(asm.ctx.sp_opnd(-argc + num_locals + VM_ENV_DATA_SIZE as i32)); + + let specval = if let Some(prev_ep) = prev_ep { + // We've already side-exited if the callee expects a block, so we + // ignore any supplied block here + SpecVal::PrevEP(prev_ep) + } else if let Some(captured_opnd) = captured_opnd { + let ep_opnd = asm.load(Opnd::mem(64, captured_opnd, SIZEOF_VALUE_I32)); // captured->ep + SpecVal::PrevEPOpnd(ep_opnd) + } else if let Some(BlockArg::TProc) = block_arg_type { + SpecVal::BlockHandler(Some(BlockHandler::AlreadySet)) + } else if let Some(BlockArg::BlockParamProxy) = block_arg_type { + SpecVal::BlockHandler(Some(BlockHandler::BlockParamProxy)) + } else { + SpecVal::BlockHandler(block) + }; - // Write env flags at sp[-1] - // sp[-1] = frame_type; - let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; - mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into())); + // Setup the new frame + perf_call!("gen_send_iseq: ", gen_push_frame(jit, asm, ControlFrame { + frame_type, + specval, + cme, + recv, + sp: callee_sp, + iseq: Some(iseq), + pc: None, // We are calling into jitted code, which will set the PC as necessary + })); + + // Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs. + // We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall(). + if get_option!(gen_stats) { + // Assemble the ISEQ name string + let name_str = get_iseq_name(iseq); - add_comment(cb, "push callee CFP"); - // Allocate a new CFP (ec->cfp--) - sub(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); - mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP); + // Get an index for this ISEQ name + let iseq_idx = get_iseq_idx(&name_str); - // Setup the new frame - // *cfp = (const struct rb_control_frame_struct) { - // .pc = pc, - // .sp = sp, - // .iseq = iseq, - // .self = recv, - // .ep = sp - 1, - // .block_code = 0, - // .__bp__ = sp, - // }; - mov(cb, REG1, recv); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), REG1); - mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BP), REG0); - sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP), REG0); - jit_mov_gc_ptr(jit, cb, REG0, VALUE(iseq as usize)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ), REG0); - mov( - cb, - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE), - imm_opnd(0), - ); + // Increment the counter for this cfunc + asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]); + } // No need to set cfp->pc since the callee sets it whenever calling into routines // that could look at it through jit_save_pc(). @@ -4528,49 +7718,61 @@ fn gen_send_iseq( // Stub so we can return to JITted code let return_block = BlockId { iseq: jit.iseq, - idx: jit_next_insn_idx(jit), + idx: jit.next_insn_idx(), }; // Create a context for the callee - let mut callee_ctx = Context::new(); // Was DEFAULT_CTX + let mut callee_ctx = Context::default(); + + // If the callee has :inline_block annotation and the callsite has a block ISEQ, + // duplicate a callee block for each block ISEQ to make its `yield` monomorphic. + if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) { + callee_ctx.set_inline_block(iseq); + } // Set the argument types in the callee's context for arg_idx in 0..argc { - let stack_offs: u16 = (argc - arg_idx - 1).try_into().unwrap(); - let arg_type = ctx.get_opnd_type(StackOpnd(stack_offs)); + let stack_offs: u8 = (argc - arg_idx - 1).try_into().unwrap(); + let arg_type = asm.ctx.get_opnd_type(StackOpnd(stack_offs)); callee_ctx.set_local_type(arg_idx.try_into().unwrap(), arg_type); } - let recv_type = ctx.get_opnd_type(StackOpnd(argc.try_into().unwrap())); + let recv_type = if captured_self { + Type::Unknown // we don't track the type information of captured->self for now + } else { + asm.ctx.get_opnd_type(StackOpnd(argc.try_into().unwrap())) + }; callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type); // The callee might change locals through Kernel#binding and other means. - ctx.clear_local_types(); + asm.clear_local_types(); - // Pop arguments and receiver in return context, push the return value - // After the return, sp_offset will be 1. The codegen for leave writes - // the return value in case of JIT-to-JIT return. - let mut return_ctx = *ctx; - return_ctx.stack_pop((argc + 1).try_into().unwrap()); - return_ctx.stack_push(Type::Unknown); - return_ctx.set_sp_offset(1); - return_ctx.reset_chain_depth(); + // Pop arguments and receiver in return context and + // mark it as a continuation of gen_leave() + let mut return_asm = Assembler::new(); + return_asm.ctx = asm.ctx; + return_asm.stack_pop(sp_offset.try_into().unwrap()); + return_asm.ctx.set_sp_offset(0); // We set SP on the caller's frame above + return_asm.ctx.reset_chain_depth_and_defer(); + return_asm.ctx.set_as_return_landing(); // Write the JIT return address on the callee frame gen_branch( jit, - ctx, - cb, + asm, ocb, return_block, - &return_ctx, - Some(return_block), - Some(&return_ctx), - gen_return_branch, + &return_asm.ctx, + None, + None, + BranchGenFn::JITReturn, ); - //print_str(cb, "calling Ruby func:"); - //print_str(cb, rb_id2name(vm_ci_mid(ci))); + // ec->cfp is updated after cfp->jit_return for rb_profile_frames() safety + asm_comment!(asm, "switch to new CFP"); + let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); + asm.mov(CFP, new_cfp); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); // Directly jump to the entry point of the callee gen_direct_jump( @@ -4580,24 +7782,514 @@ fn gen_send_iseq( iseq: iseq, idx: start_pc_offset, }, - cb, + asm, ); - EndBlock + Some(EndBlock) +} + +// Check if we can handle a keyword call +fn gen_iseq_kw_call_checks( + asm: &mut Assembler, + iseq: *const rb_iseq_t, + kw_arg: *const rb_callinfo_kwarg, + has_kwrest: bool, + caller_kw_num: i32 +) -> Option<()> { + // This struct represents the metadata about the callee-specified + // keyword parameters. + let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; + let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap(); + let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap(); + + let mut required_kwargs_filled = 0; + + if keyword_num > 30 || caller_kw_num > 64 { + // We have so many keywords that (1 << num) encoded as a FIXNUM + // (which shifts it left one more) no longer fits inside a 32-bit + // immediate. Similarly, we use a u64 in case of keyword rest parameter. + gen_counter_incr(asm, Counter::send_iseq_too_many_kwargs); + return None; + } + + // Check that the kwargs being passed are valid + if caller_kw_num > 0 { + // This is the list of keyword arguments that the callee specified + // in its initial declaration. + // SAFETY: see compile.c for sizing of this slice. + let callee_kwargs = if keyword_num == 0 { + &[] + } else { + unsafe { slice::from_raw_parts((*keyword).table, keyword_num) } + }; + + // Here we're going to build up a list of the IDs that correspond to + // the caller-specified keyword arguments. If they're not in the + // same order as the order specified in the callee declaration, then + // we're going to need to generate some code to swap values around + // on the stack. + let kw_arg_keyword_len = caller_kw_num as usize; + let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len]; + for kwarg_idx in 0..kw_arg_keyword_len { + let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) }; + caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) }; + } + + // First, we're going to be sure that the names of every + // caller-specified keyword argument correspond to a name in the + // list of callee-specified keyword parameters. + for caller_kwarg in caller_kwargs { + let search_result = callee_kwargs + .iter() + .enumerate() // inject element index + .find(|(_, &kwarg)| kwarg == caller_kwarg); + + match search_result { + None if !has_kwrest => { + // If the keyword was never found, then we know we have a + // mismatch in the names of the keyword arguments, so we need to + // bail. + gen_counter_incr(asm, Counter::send_iseq_kwargs_mismatch); + return None; + } + Some((callee_idx, _)) if callee_idx < keyword_required_num => { + // Keep a count to ensure all required kwargs are specified + required_kwargs_filled += 1; + } + _ => (), + } + } + } + assert!(required_kwargs_filled <= keyword_required_num); + if required_kwargs_filled != keyword_required_num { + gen_counter_incr(asm, Counter::send_iseq_kwargs_mismatch); + return None; + } + + Some(()) +} + +// Codegen for keyword argument handling. Essentially private to gen_send_iseq() since +// there are a lot of preconditions to check before reaching this code. +fn gen_iseq_kw_call( + jit: &mut JITState, + asm: &mut Assembler, + ci_kwarg: *const rb_callinfo_kwarg, + iseq: *const rb_iseq_t, + mut argc: i32, + has_kwrest: bool, +) -> i32 { + let caller_keyword_len_i32: i32 = if ci_kwarg.is_null() { + 0 + } else { + unsafe { get_cikw_keyword_len(ci_kwarg) } + }; + let caller_keyword_len: usize = caller_keyword_len_i32.try_into().unwrap(); + let anon_kwrest = unsafe { rb_get_iseq_flags_anon_kwrest(iseq) && !get_iseq_flags_has_kw(iseq) }; + + // This struct represents the metadata about the callee-specified + // keyword parameters. + let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; + + asm_comment!(asm, "keyword args"); + + // This is the list of keyword arguments that the callee specified + // in its initial declaration. + let callee_kwargs = unsafe { (*keyword).table }; + let callee_kw_count_i32: i32 = unsafe { (*keyword).num }; + let callee_kw_count: usize = callee_kw_count_i32.try_into().unwrap(); + let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap(); + + // Here we're going to build up a list of the IDs that correspond to + // the caller-specified keyword arguments. If they're not in the + // same order as the order specified in the callee declaration, then + // we're going to need to generate some code to swap values around + // on the stack. + let mut kwargs_order: Vec<ID> = vec![0; cmp::max(caller_keyword_len, callee_kw_count)]; + for kwarg_idx in 0..caller_keyword_len { + let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) }; + kwargs_order[kwarg_idx] = unsafe { rb_sym2id(sym) }; + } + + let mut unspecified_bits = 0; + + // The stack_opnd() index to the 0th keyword argument. + let kwargs_stack_base = caller_keyword_len_i32 - 1; + + // Build the keyword rest parameter hash before we make any changes to the order of + // the supplied keyword arguments + let kwrest_type = if has_kwrest { + c_callable! { + fn build_kw_rest(rest_mask: u64, stack_kwargs: *const VALUE, keywords: *const rb_callinfo_kwarg) -> VALUE { + if keywords.is_null() { + return unsafe { rb_hash_new() }; + } + + // Use the total number of supplied keywords as a size upper bound + let keyword_len = unsafe { (*keywords).keyword_len } as usize; + let hash = unsafe { rb_hash_new_with_size(keyword_len as u64) }; + + // Put pairs into the kwrest hash as the mask describes + for kwarg_idx in 0..keyword_len { + if (rest_mask & (1 << kwarg_idx)) != 0 { + unsafe { + let keyword_symbol = (*keywords).keywords.as_ptr().add(kwarg_idx).read(); + let keyword_value = stack_kwargs.add(kwarg_idx).read(); + rb_hash_aset(hash, keyword_symbol, keyword_value); + } + } + } + return hash; + } + } + + asm_comment!(asm, "build kwrest hash"); + + // Make a bit mask describing which keywords should go into kwrest. + let mut rest_mask: u64 = 0; + // Index for one argument that will go into kwrest. + let mut rest_collected_idx = None; + for (supplied_kw_idx, &supplied_kw) in kwargs_order.iter().take(caller_keyword_len).enumerate() { + let mut found = false; + for callee_idx in 0..callee_kw_count { + let callee_kw = unsafe { callee_kwargs.add(callee_idx).read() }; + if callee_kw == supplied_kw { + found = true; + break; + } + } + if !found { + rest_mask |= 1 << supplied_kw_idx; + if rest_collected_idx.is_none() { + rest_collected_idx = Some(supplied_kw_idx as i32); + } + } + } + + let (kwrest, kwrest_type) = if rest_mask == 0 && anon_kwrest { + // In case the kwrest hash should be empty and is anonymous in the callee, + // we can pass nil instead of allocating. Anonymous kwrest can only be + // delegated, and nil is the same as an empty hash when delegating. + (Qnil.into(), Type::Nil) + } else { + // Save PC and SP before allocating + jit_save_pc(jit, asm); + gen_save_sp(asm); + + // Build the kwrest hash. `struct rb_callinfo_kwarg` is malloc'd, so no GC concerns. + let kwargs_start = asm.lea(asm.ctx.sp_opnd(-caller_keyword_len_i32)); + let hash = asm.ccall( + build_kw_rest as _, + vec![rest_mask.into(), kwargs_start, Opnd::const_ptr(ci_kwarg.cast())] + ); + (hash, Type::THash) + }; + + // The kwrest parameter sits after `unspecified_bits` if the callee specifies any + // keywords. + let stack_kwrest_idx = kwargs_stack_base - callee_kw_count_i32 - i32::from(callee_kw_count > 0); + let stack_kwrest = asm.stack_opnd(stack_kwrest_idx); + // If `stack_kwrest` already has another argument there, we need to stow it elsewhere + // first before putting kwrest there. Use `rest_collected_idx` because that value went + // into kwrest so the slot is now free. + let kwrest_idx = callee_kw_count + usize::from(callee_kw_count > 0); + if let (Some(rest_collected_idx), true) = (rest_collected_idx, kwrest_idx < caller_keyword_len) { + let rest_collected = asm.stack_opnd(kwargs_stack_base - rest_collected_idx); + let mapping = asm.ctx.get_opnd_mapping(stack_kwrest.into()); + asm.mov(rest_collected, stack_kwrest); + asm.ctx.set_opnd_mapping(rest_collected.into(), mapping); + // Update our bookkeeping to inform the reordering step later. + kwargs_order[rest_collected_idx as usize] = kwargs_order[kwrest_idx]; + kwargs_order[kwrest_idx] = 0; + } + // Put kwrest straight into memory, since we might pop it later + asm.ctx.dealloc_temp_reg(stack_kwrest.stack_idx()); + asm.mov(stack_kwrest, kwrest); + if stack_kwrest_idx >= 0 { + asm.ctx.set_opnd_mapping(stack_kwrest.into(), TempMapping::map_to_stack(kwrest_type)); + } + + Some(kwrest_type) + } else { + None + }; + + // Ensure the stack is large enough for the callee + for _ in caller_keyword_len..callee_kw_count { + argc += 1; + asm.stack_push(Type::Unknown); + } + // Now this is the stack_opnd() index to the 0th keyword argument. + let kwargs_stack_base = kwargs_order.len() as i32 - 1; + + // Next, we're going to loop through every keyword that was + // specified by the caller and make sure that it's in the correct + // place. If it's not we're going to swap it around with another one. + for kwarg_idx in 0..callee_kw_count { + let callee_kwarg = unsafe { callee_kwargs.add(kwarg_idx).read() }; + + // If the argument is already in the right order, then we don't + // need to generate any code since the expected value is already + // in the right place on the stack. + if callee_kwarg == kwargs_order[kwarg_idx] { + continue; + } + + // In this case the argument is not in the right place, so we + // need to find its position where it _should_ be and swap with + // that location. + for swap_idx in 0..kwargs_order.len() { + if callee_kwarg == kwargs_order[swap_idx] { + // First we're going to generate the code that is going + // to perform the actual swapping at runtime. + let swap_idx_i32: i32 = swap_idx.try_into().unwrap(); + let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap(); + let offset0 = kwargs_stack_base - swap_idx_i32; + let offset1 = kwargs_stack_base - kwarg_idx_i32; + stack_swap(asm, offset0, offset1); + + // Next we're going to do some bookkeeping on our end so + // that we know the order that the arguments are + // actually in now. + kwargs_order.swap(kwarg_idx, swap_idx); + + break; + } + } + } + + // Now that every caller specified kwarg is in the right place, filling + // in unspecified default paramters won't overwrite anything. + for kwarg_idx in keyword_required_num..callee_kw_count { + if kwargs_order[kwarg_idx] != unsafe { callee_kwargs.add(kwarg_idx).read() } { + let default_param_idx = kwarg_idx - keyword_required_num; + let mut default_value = unsafe { (*keyword).default_values.add(default_param_idx).read() }; + + if default_value == Qundef { + // Qundef means that this value is not constant and must be + // recalculated at runtime, so we record it in unspecified_bits + // (Qnil is then used as a placeholder instead of Qundef). + unspecified_bits |= 0x01 << default_param_idx; + default_value = Qnil; + } + + let default_param = asm.stack_opnd(kwargs_stack_base - kwarg_idx as i32); + let param_type = Type::from(default_value); + asm.mov(default_param, default_value.into()); + asm.ctx.set_opnd_mapping(default_param.into(), TempMapping::map_to_stack(param_type)); + } + } + + // Pop extra arguments that went into kwrest now that they're at stack top + if has_kwrest && caller_keyword_len > callee_kw_count { + let extra_kwarg_count = caller_keyword_len - callee_kw_count; + asm.stack_pop(extra_kwarg_count); + argc = argc - extra_kwarg_count as i32; + } + + // Keyword arguments cause a special extra local variable to be + // pushed onto the stack that represents the parameters that weren't + // explicitly given a value and have a non-constant default. + if callee_kw_count > 0 { + let unspec_opnd = VALUE::fixnum_from_usize(unspecified_bits).as_u64(); + let top = asm.stack_push(Type::Fixnum); + asm.mov(top, unspec_opnd.into()); + argc += 1; + } + + // The kwrest parameter sits after `unspecified_bits` + if let Some(kwrest_type) = kwrest_type { + let kwrest = asm.stack_push(kwrest_type); + // We put the kwrest parameter in memory earlier + asm.ctx.dealloc_temp_reg(kwrest.stack_idx()); + argc += 1; + } + + argc +} + +/// This is a helper function to allow us to exit early +/// during code generation if a predicate is true. +/// We return Option<()> here because we will be able to +/// short-circuit using the ? operator if we return None. +/// It would be great if rust let you implement ? for your +/// own types, but as of right now they don't. +fn exit_if(asm: &mut Assembler, pred: bool, counter: Counter) -> Option<()> { + if pred { + gen_counter_incr(asm, counter); + return None + } + Some(()) +} + +#[must_use] +fn exit_if_tail_call(asm: &mut Assembler, ci: *const rb_callinfo) -> Option<()> { + exit_if(asm, unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0, Counter::send_iseq_tailcall) +} + +#[must_use] +fn exit_if_has_post(asm: &mut Assembler, iseq: *const rb_iseq_t) -> Option<()> { + exit_if(asm, unsafe { get_iseq_flags_has_post(iseq) }, Counter::send_iseq_has_post) +} + +#[must_use] +fn exit_if_kwsplat_non_nil(asm: &mut Assembler, flags: u32, counter: Counter) -> Option<()> { + let kw_splat = flags & VM_CALL_KW_SPLAT != 0; + let kw_splat_stack = StackOpnd((flags & VM_CALL_ARGS_BLOCKARG != 0).into()); + exit_if(asm, kw_splat && asm.ctx.get_opnd_type(kw_splat_stack) != Type::Nil, counter) +} + +#[must_use] +fn exit_if_has_rest_and_captured(asm: &mut Assembler, iseq_has_rest: bool, captured_opnd: Option<Opnd>) -> Option<()> { + exit_if(asm, iseq_has_rest && captured_opnd.is_some(), Counter::send_iseq_has_rest_and_captured) +} + +#[must_use] +fn exit_if_has_kwrest_and_captured(asm: &mut Assembler, iseq_has_kwrest: bool, captured_opnd: Option<Opnd>) -> Option<()> { + // We need to call a C function to allocate the kwrest hash, but also need to hold the captred + // block across the call, which we can't do. + exit_if(asm, iseq_has_kwrest && captured_opnd.is_some(), Counter::send_iseq_has_kwrest_and_captured) +} + +#[must_use] +fn exit_if_has_rest_and_supplying_kws(asm: &mut Assembler, iseq_has_rest: bool, supplying_kws: bool) -> Option<()> { + // There can be a gap between the rest parameter array and the supplied keywords, or + // no space to put the rest array (e.g. `def foo(*arr, k:) = arr; foo(k: 1)` 1 is + // sitting where the rest array should be). + exit_if( + asm, + iseq_has_rest && supplying_kws, + Counter::send_iseq_has_rest_and_kw_supplied, + ) +} + +#[must_use] +fn exit_if_supplying_kw_and_has_no_kw(asm: &mut Assembler, supplying_kws: bool, callee_kws: bool) -> Option<()> { + // Passing keyword arguments to a callee means allocating a hash and treating + // that as a positional argument. Bail for now. + exit_if( + asm, + supplying_kws && !callee_kws, + Counter::send_iseq_has_no_kw, + ) +} + +#[must_use] +fn exit_if_supplying_kws_and_accept_no_kwargs(asm: &mut Assembler, supplying_kws: bool, iseq: *const rb_iseq_t) -> Option<()> { + // If we have a method accepting no kwargs (**nil), exit if we have passed + // it any kwargs. + exit_if( + asm, + supplying_kws && unsafe { get_iseq_flags_accepts_no_kwarg(iseq) }, + Counter::send_iseq_accepts_no_kwarg + ) +} + +#[must_use] +fn exit_if_doing_kw_and_splat(asm: &mut Assembler, doing_kw_call: bool, flags: u32) -> Option<()> { + exit_if(asm, doing_kw_call && flags & VM_CALL_ARGS_SPLAT != 0, Counter::send_iseq_splat_with_kw) +} + +#[must_use] +fn exit_if_wrong_number_arguments( + asm: &mut Assembler, + args_setup_block: bool, + opts_filled: i32, + flags: u32, + opt_num: i32, + iseq_has_rest: bool, +) -> Option<()> { + // Too few arguments and no splat to make up for it + let too_few = opts_filled < 0 && flags & VM_CALL_ARGS_SPLAT == 0; + // Too many arguments and no sink that take them + let too_many = opts_filled > opt_num && !(iseq_has_rest || args_setup_block); + + exit_if(asm, too_few || too_many, Counter::send_iseq_arity_error) +} + +#[must_use] +fn exit_if_doing_kw_and_opts_missing(asm: &mut Assembler, doing_kw_call: bool, opts_missing: i32) -> Option<()> { + // If we have unfilled optional arguments and keyword arguments then we + // would need to adjust the arguments location to account for that. + // For now we aren't handling this case. + exit_if(asm, doing_kw_call && opts_missing > 0, Counter::send_iseq_missing_optional_kw) +} + +#[must_use] +fn exit_if_has_rest_and_optional_and_block(asm: &mut Assembler, iseq_has_rest: bool, opt_num: i32, iseq: *const rb_iseq_t, block_arg: bool) -> Option<()> { + exit_if( + asm, + iseq_has_rest && opt_num != 0 && (unsafe { get_iseq_flags_has_block(iseq) } || block_arg), + Counter::send_iseq_has_rest_opt_and_block + ) +} + +#[derive(Clone, Copy)] +enum BlockArg { + Nil, + /// A special sentinel value indicating the block parameter should be read from + /// the current surrounding cfp + BlockParamProxy, + /// A proc object. Could be an instance of a subclass of ::rb_cProc + TProc, +} + +#[must_use] +fn exit_if_unsupported_block_arg_type( + jit: &mut JITState, + asm: &mut Assembler, + supplying_block_arg: bool +) -> Option<Option<BlockArg>> { + let block_arg_type = if supplying_block_arg { + asm.ctx.get_opnd_type(StackOpnd(0)) + } else { + // Passing no block argument + return Some(None); + }; + + match block_arg_type { + // We'll handle Nil and BlockParamProxy later + Type::Nil => Some(Some(BlockArg::Nil)), + Type::BlockParamProxy => Some(Some(BlockArg::BlockParamProxy)), + _ if { + let sample_block_arg = jit.peek_at_stack(&asm.ctx, 0); + unsafe { rb_obj_is_proc(sample_block_arg) }.test() + } => { + // Speculate that we'll have a proc as the block arg + Some(Some(BlockArg::TProc)) + } + _ => { + gen_counter_incr(asm, Counter::send_iseq_block_arg_type); + None + } + } +} + +#[must_use] +fn exit_if_stack_too_large(iseq: *const rb_iseq_t) -> Option<()> { + let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) }; + // Reject ISEQs with very large temp stacks, + // this will allow us to use u8/i8 values to track stack_size and sp_offset + if stack_max >= i8::MAX as u32 { + incr_counter!(iseq_stack_too_large); + return None; + } + Some(()) } fn gen_struct_aref( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, comptime_recv: VALUE, - _comptime_recv_klass: VALUE, -) -> CodegenStatus { + flags: u32, + argc: i32, +) -> Option<CodegenStatus> { + if unsafe { vm_ci_argc(ci) } != 0 { - return CantCompile; + return None; } let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) } @@ -4613,50 +8305,71 @@ fn gen_struct_aref( { let native_off = (off as i64) * (SIZEOF_VALUE as i64); if native_off > (i32::MAX as i64) { - return CantCompile; + return None; } } + if c_method_tracing_currently_enabled(jit) { + // Struct accesses need fire c_call and c_return events, which we can't support + // See :attr-tracing: + gen_counter_incr(asm, Counter::send_cfunc_tracing); + return None; + } + + // This is a .send call and we need to adjust the stack + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } + // All structs from the same Struct class should have the same // length. So if our comptime_recv is embedded all runtime // structs of the same class should be as well, and the same is // true of the converse. let embedded = unsafe { FL_TEST_RAW(comptime_recv, VALUE(RSTRUCT_EMBED_LEN_MASK)) }; - add_comment(cb, "struct aref"); - - let recv = ctx.stack_pop(1); + asm_comment!(asm, "struct aref"); - mov(cb, REG0, recv); + let recv = asm.stack_pop(1); + let recv = asm.load(recv); - if embedded != VALUE(0) { - let ary_elt = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_ARY + (8 * off)); - mov(cb, REG0, ary_elt); + let val = if embedded != VALUE(0) { + Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_ARY + (SIZEOF_VALUE_I32 * off)) } else { - let rstruct_ptr = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR); - mov(cb, REG0, rstruct_ptr); - mov(cb, REG0, mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * off)); - } + let rstruct_ptr = asm.load(Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR)); + Opnd::mem(64, rstruct_ptr, SIZEOF_VALUE_I32 * off) + }; - let ret = ctx.stack_push(Type::Unknown); - mov(cb, ret, REG0); + let ret = asm.stack_push(Type::Unknown); + asm.mov(ret, val); - jump_to_next_insn(jit, ctx, cb, ocb); - EndBlock + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) } fn gen_struct_aset( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, comptime_recv: VALUE, - _comptime_recv_klass: VALUE, -) -> CodegenStatus { + flags: u32, + argc: i32, +) -> Option<CodegenStatus> { if unsafe { vm_ci_argc(ci) } != 1 { - return CantCompile; + return None; + } + + if c_method_tracing_currently_enabled(jit) { + // Struct accesses need fire c_call and c_return events, which we can't support + // See :attr-tracing: + gen_counter_incr(asm, Counter::send_cfunc_tracing); + return None; + } + + // This is a .send call and we need to adjust the stack + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); } let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) } @@ -4667,31 +8380,77 @@ fn gen_struct_aset( assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) }); assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) }); - add_comment(cb, "struct aset"); + asm_comment!(asm, "struct aset"); + + let val = asm.stack_pop(1); + let recv = asm.stack_pop(1); + + let val = asm.ccall(RSTRUCT_SET as *const u8, vec![recv, (off as i64).into(), val]); + + let ret = asm.stack_push(Type::Unknown); + asm.mov(ret, val); + + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) +} + +// Generate code that calls a method with dynamic dispatch +fn gen_send_dynamic<F: Fn(&mut Assembler) -> Opnd>( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + cd: *const rb_call_data, + sp_pops: usize, + vm_sendish: F, +) -> Option<CodegenStatus> { + // Our frame handling is not compatible with tailcall + if unsafe { vm_ci_flag((*cd).ci) } & VM_CALL_TAILCALL != 0 { + return None; + } + jit_perf_symbol_push!(jit, asm, "gen_send_dynamic", PerfMap::Codegen); + + // Rewind stack_size using ctx.with_stack_size to allow stack_size changes + // before you return None. + asm.ctx = asm.ctx.with_stack_size(jit.stack_size_for_pc); + + // Save PC and SP to prepare for dynamic dispatch + jit_prepare_non_leaf_call(jit, asm); - let val = ctx.stack_pop(1); - let recv = ctx.stack_pop(1); + // Squash stack canary that might be left over from elsewhere + assert_eq!(false, asm.get_leaf_ccall()); + if cfg!(debug_assertions) { + asm.store(asm.ctx.sp_opnd(0), 0.into()); + } + + // Dispatch a method + let ret = vm_sendish(asm); - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], imm_opnd(off as i64)); - mov(cb, C_ARG_REGS[2], val); - call_ptr(cb, REG0, RSTRUCT_SET as *const u8); + // Pop arguments and a receiver + asm.stack_pop(sp_pops); - let ret = ctx.stack_push(Type::Unknown); - mov(cb, ret, RAX); + // Push the return value + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); - jump_to_next_insn(jit, ctx, cb, ocb); - EndBlock + // Fix the interpreter SP deviated by vm_sendish + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), SP); + + gen_counter_incr(asm, Counter::num_send_dynamic); + + jit_perf_symbol_pop!(jit, asm, PerfMap::Codegen); + + // End the current block for invalidationg and sharing the same successor + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) } fn gen_send_general( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, cd: *const rb_call_data, - block: Option<IseqPtr>, -) -> CodegenStatus { + block: Option<BlockHandler>, +) -> Option<CodegenStatus> { // Relevant definitions: // rb_execution_context_t : vm_core.h // invoker, cfunc logic : method.h, vm_method.c @@ -4703,64 +8462,78 @@ fn gen_send_general( // see vm_call_method(). let ci = unsafe { get_call_data_ci(cd) }; // info about the call site - let argc = unsafe { vm_ci_argc(ci) }; - let mid = unsafe { vm_ci_mid(ci) }; - let flags = unsafe { vm_ci_flag(ci) }; + let mut argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); + let mut mid = unsafe { vm_ci_mid(ci) }; + let mut flags = unsafe { vm_ci_flag(ci) }; - // Don't JIT calls with keyword splat - if flags & VM_CALL_KW_SPLAT != 0 { - gen_counter_incr!(cb, send_kw_splat); - return CantCompile; + // Defer compilation so we can specialize on class of receiver + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); } - // Don't JIT calls that aren't simple - // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. - if flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(cb, send_args_splat); - return CantCompile; - } - if flags & VM_CALL_ARGS_BLOCKARG != 0 { - gen_counter_incr!(cb, send_block_arg); - return CantCompile; + let recv_idx = argc + if flags & VM_CALL_ARGS_BLOCKARG != 0 { 1 } else { 0 }; + let comptime_recv = jit.peek_at_stack(&asm.ctx, recv_idx as isize); + let comptime_recv_klass = comptime_recv.class_of(); + assert_eq!(RUBY_T_CLASS, comptime_recv_klass.builtin_type(), + "objects visible to ruby code should have a T_CLASS in their klass field"); + + // Don't compile calls through singleton classes to avoid retaining the receiver. + // Make an exception for class methods since classes tend to be retained anyways. + // Also compile calls on top_self to help tests. + if VALUE(0) != unsafe { FL_TEST(comptime_recv_klass, VALUE(RUBY_FL_SINGLETON as usize)) } + && comptime_recv != unsafe { rb_vm_top_self() } + && !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_CLASS) } + && !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_MODULE) } { + gen_counter_incr(asm, Counter::send_singleton_class); + return None; } - // Defer compilation so we can specialize on class of receiver - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; - } + // Points to the receiver operand on the stack + let recv = asm.stack_opnd(recv_idx); + let recv_opnd: YARVOpnd = recv.into(); - let comptime_recv = jit_peek_at_stack(jit, ctx, argc as isize); - let comptime_recv_klass = comptime_recv.class_of(); + // Log the name of the method we're calling to + #[cfg(feature = "disasm")] + asm_comment!(asm, "call to {}", get_method_name(Some(comptime_recv_klass), mid)); - // Guard that the receiver has the same class as the one from compile time - let side_exit = get_side_exit(jit, ocb, ctx); + // Gather some statistics about sends + gen_counter_incr(asm, Counter::num_send); + if let Some(_known_klass) = asm.ctx.get_opnd_type(recv_opnd).known_class() { + gen_counter_incr(asm, Counter::num_send_known_class); + } + if asm.ctx.get_chain_depth() > 1 { + gen_counter_incr(asm, Counter::num_send_polymorphic); + } + // If megamorphic, let the caller fallback to dynamic dispatch + if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH { + gen_counter_incr(asm, Counter::send_megamorphic); + return None; + } - // Points to the receiver operand on the stack - let recv = ctx.stack_opnd(argc); - let recv_opnd = StackOpnd(argc.try_into().unwrap()); - mov(cb, REG0, recv); - if !jit_guard_known_klass( + perf_call!("gen_send_general: ", jit_guard_known_klass( jit, - ctx, - cb, + asm, ocb, comptime_recv_klass, + recv, recv_opnd, comptime_recv, SEND_MAX_DEPTH, - side_exit, - ) { - return CantCompile; - } + Counter::guard_send_klass_megamorphic, + )); // Do method lookup let mut cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) }; if cme.is_null() { - // TODO: counter - return CantCompile; + gen_counter_incr(asm, Counter::send_cme_not_found); + return None; } + // Load an overloaded cme if applicable. See vm_search_cc(). + // It allows you to use a faster ISEQ if possible. + cme = unsafe { rb_check_overloaded_cme(cme, ci) }; + let visi = unsafe { METHOD_ENTRY_VISI(cme) }; match visi { METHOD_VISI_PUBLIC => { @@ -4770,11 +8543,17 @@ fn gen_send_general( if flags & VM_CALL_FCALL == 0 { // Can only call private methods with FCALL callsites. // (at the moment they are callsites without a receiver or an explicit `self` receiver) - return CantCompile; + gen_counter_incr(asm, Counter::send_private_not_fcall); + return None; } } METHOD_VISI_PROTECTED => { - jit_protected_callee_ancestry_guard(jit, cb, ocb, cme, side_exit); + // If the method call is an FCALL, it is always valid + if flags & VM_CALL_FCALL == 0 { + // otherwise we need an ancestry check to ensure the receiver is valid to be called + // as protected + jit_protected_callee_ancestry_guard(asm, cme); + } } _ => { panic!("cmes should always have a visibility!"); @@ -4783,33 +8562,69 @@ fn gen_send_general( // Register block for invalidation //assert!(cme->called_id == mid); - assume_method_lookup_stable(jit, ocb, comptime_recv_klass, cme); + jit.assume_method_lookup_stable(asm, ocb, cme); // To handle the aliased method case (VM_METHOD_TYPE_ALIAS) loop { let def_type = unsafe { get_cme_def_type(cme) }; + match def_type { VM_METHOD_TYPE_ISEQ => { - return gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc); + let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; + let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; + return perf_call! { gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, None, cme, block, flags, argc, None) }; } VM_METHOD_TYPE_CFUNC => { - return gen_send_cfunc( + return perf_call! { gen_send_cfunc( jit, - ctx, - cb, + asm, ocb, ci, cme, block, + Some(comptime_recv_klass), + flags, argc, - &comptime_recv_klass, - ); + ) }; } VM_METHOD_TYPE_IVAR => { + // This is a .send call not supported right now for attr_reader + if flags & VM_CALL_OPT_SEND != 0 { + gen_counter_incr(asm, Counter::send_send_attr_reader); + return None; + } + + if flags & VM_CALL_ARGS_BLOCKARG != 0 { + match asm.ctx.get_opnd_type(StackOpnd(0)) { + Type::Nil | Type::BlockParamProxy => { + // Getters ignore the block arg, and these types of block args can be + // passed without side-effect (never any `to_proc` call). + asm.stack_pop(1); + } + _ => { + gen_counter_incr(asm, Counter::send_getter_block_arg); + return None; + } + } + } + if argc != 0 { - // Argument count mismatch. Getters take no arguments. - gen_counter_incr!(cb, send_getter_arity); - return CantCompile; + // Guard for simple splat of empty array + if VM_CALL_ARGS_SPLAT == flags & (VM_CALL_ARGS_SPLAT | VM_CALL_KWARG | VM_CALL_KW_SPLAT) + && argc == 1 { + // Not using chain guards since on failure these likely end up just raising + // ArgumentError + let splat = asm.stack_opnd(0); + guard_object_is_array(asm, splat, splat.into(), Counter::guard_send_getter_splat_non_empty); + let splat_len = get_array_len(asm, splat); + asm.cmp(splat_len, 0.into()); + asm.jne(Target::side_exit(Counter::guard_send_getter_splat_non_empty)); + asm.stack_pop(1); + } else { + // Argument count mismatch. Getters take no arguments. + gen_counter_incr(asm, Counter::send_getter_arity); + return None; + } } if c_method_tracing_currently_enabled(jit) { @@ -4818,106 +8633,233 @@ fn gen_send_general( // Handling the C method tracing events for attr_accessor // methods is easier than regular C methods as we know the // "method" we are calling into never enables those tracing - // events. Once global invalidation runs, the code for the - // attr_accessor is invalidated and we exit at the closest - // instruction boundary which is always outside of the body of - // the attr_accessor code. - gen_counter_incr!(cb, send_cfunc_tracing); - return CantCompile; + // events. We are never inside the code that needs to be + // invalidated when invalidation happens. + gen_counter_incr(asm, Counter::send_cfunc_tracing); + return None; } - mov(cb, REG0, recv); + let recv = asm.stack_opnd(0); // the receiver should now be the stack top let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; return gen_get_ivar( jit, - ctx, - cb, + asm, ocb, SEND_MAX_DEPTH, comptime_recv, ivar_name, - recv_opnd, - side_exit, + recv, + recv.into(), ); } VM_METHOD_TYPE_ATTRSET => { + // This is a .send call not supported right now for attr_writer + if flags & VM_CALL_OPT_SEND != 0 { + gen_counter_incr(asm, Counter::send_send_attr_writer); + return None; + } + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_args_splat_attrset); + return None; + } if flags & VM_CALL_KWARG != 0 { - gen_counter_incr!(cb, send_attrset_kwargs); - return CantCompile; + gen_counter_incr(asm, Counter::send_attrset_kwargs); + return None; } else if argc != 1 || unsafe { !RB_TYPE_P(comptime_recv, RUBY_T_OBJECT) } { - gen_counter_incr!(cb, send_ivar_set_method); - return CantCompile; + gen_counter_incr(asm, Counter::send_ivar_set_method); + return None; } else if c_method_tracing_currently_enabled(jit) { // Can't generate code for firing c_call and c_return events // See :attr-tracing: - gen_counter_incr!(cb, send_cfunc_tracing); - return CantCompile; + gen_counter_incr(asm, Counter::send_cfunc_tracing); + return None; + } else if flags & VM_CALL_ARGS_BLOCKARG != 0 { + gen_counter_incr(asm, Counter::send_attrset_block_arg); + return None; } else { let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; - return gen_set_ivar(jit, ctx, cb, comptime_recv, ivar_name); + return gen_set_ivar(jit, asm, ocb, comptime_recv, ivar_name, StackOpnd(1), None); } } // Block method, e.g. define_method(:foo) { :my_block } VM_METHOD_TYPE_BMETHOD => { - gen_counter_incr!(cb, send_bmethod); - return CantCompile; - } - VM_METHOD_TYPE_ZSUPER => { - gen_counter_incr!(cb, send_zsuper_method); - return CantCompile; + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_args_splat_bmethod); + return None; + } + return gen_send_bmethod(jit, asm, ocb, ci, cme, block, flags, argc); } VM_METHOD_TYPE_ALIAS => { // Retrieve the aliased method and re-enter the switch cme = unsafe { rb_aliased_callable_method_entry(cme) }; continue; } - VM_METHOD_TYPE_UNDEF => { - gen_counter_incr!(cb, send_undef_method); - return CantCompile; - } - VM_METHOD_TYPE_NOTIMPLEMENTED => { - gen_counter_incr!(cb, send_not_implemented_method); - return CantCompile; - } // Send family of methods, e.g. call/apply VM_METHOD_TYPE_OPTIMIZED => { + if flags & VM_CALL_ARGS_BLOCKARG != 0 { + gen_counter_incr(asm, Counter::send_optimized_block_arg); + return None; + } + let opt_type = unsafe { get_cme_def_body_optimized_type(cme) }; match opt_type { OPTIMIZED_METHOD_TYPE_SEND => { - gen_counter_incr!(cb, send_optimized_method_send); - return CantCompile; + // This is for method calls like `foo.send(:bar)` + // The `send` method does not get its own stack frame. + // instead we look up the method and call it, + // doing some stack shifting based on the VM_CALL_OPT_SEND flag + + // Reject nested cases such as `send(:send, :alias_for_send, :foo))`. + // We would need to do some stack manipulation here or keep track of how + // many levels deep we need to stack manipulate. Because of how exits + // currently work, we can't do stack manipulation until we will no longer + // side exit. + if flags & VM_CALL_OPT_SEND != 0 { + gen_counter_incr(asm, Counter::send_send_nested); + return None; + } + + if argc == 0 { + gen_counter_incr(asm, Counter::send_send_wrong_args); + return None; + } + + argc -= 1; + + let compile_time_name = jit.peek_at_stack(&asm.ctx, argc as isize); + + mid = unsafe { rb_get_symbol_id(compile_time_name) }; + if mid == 0 { + // This also rejects method names that need conversion + gen_counter_incr(asm, Counter::send_send_null_mid); + return None; + } + + cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) }; + if cme.is_null() { + gen_counter_incr(asm, Counter::send_send_null_cme); + return None; + } + + flags |= VM_CALL_FCALL | VM_CALL_OPT_SEND; + + jit.assume_method_lookup_stable(asm, ocb, cme); + + asm_comment!( + asm, + "guard sending method name \'{}\'", + unsafe { cstr_to_rust_string(rb_id2name(mid)) }.unwrap_or_else(|| "<unknown>".to_owned()), + ); + + let name_opnd = asm.stack_opnd(argc); + let symbol_id_opnd = asm.ccall(rb_get_symbol_id as *const u8, vec![name_opnd]); + + asm.cmp(symbol_id_opnd, mid.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_send_send_name_chain, + ); + + // We have changed the argc, flags, mid, and cme, so we need to re-enter the match + // and compile whatever method we found from send. + continue; + } OPTIMIZED_METHOD_TYPE_CALL => { - gen_counter_incr!(cb, send_optimized_method_call); - return CantCompile; + + if block.is_some() { + gen_counter_incr(asm, Counter::send_call_block); + return None; + } + + if flags & VM_CALL_KWARG != 0 { + gen_counter_incr(asm, Counter::send_call_kwarg); + return None; + } + + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_args_splat_opt_call); + return None; + } + + // Optimize for single ractor mode and avoid runtime check for + // "defined with an un-shareable Proc in a different Ractor" + if !assume_single_ractor_mode(jit, asm, ocb) { + gen_counter_incr(asm, Counter::send_call_multi_ractor); + return None; + } + + // If this is a .send call we need to adjust the stack + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } + + // About to reset the SP, need to load this here + let recv_load = asm.load(recv); + + let sp = asm.lea(asm.ctx.sp_opnd(0)); + + // Save the PC and SP because the callee can make Ruby calls + jit_prepare_non_leaf_call(jit, asm); + + let kw_splat = flags & VM_CALL_KW_SPLAT; + let stack_argument_pointer = asm.lea(Opnd::mem(64, sp, -(argc) * SIZEOF_VALUE_I32)); + + let ret = asm.ccall(rb_optimized_call as *const u8, vec![ + recv_load, + EC, + argc.into(), + stack_argument_pointer, + kw_splat.into(), + VM_BLOCK_HANDLER_NONE.into(), + ]); + + asm.stack_pop(argc as usize + 1); + + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + return Some(KeepCompiling); + } OPTIMIZED_METHOD_TYPE_BLOCK_CALL => { - gen_counter_incr!(cb, send_optimized_method_block_call); - return CantCompile; + gen_counter_incr(asm, Counter::send_optimized_method_block_call); + return None; } OPTIMIZED_METHOD_TYPE_STRUCT_AREF => { + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_args_splat_aref); + return None; + } return gen_struct_aref( jit, - ctx, - cb, + asm, ocb, ci, cme, comptime_recv, - comptime_recv_klass, + flags, + argc, ); } OPTIMIZED_METHOD_TYPE_STRUCT_ASET => { + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_args_splat_aset); + return None; + } return gen_struct_aset( jit, - ctx, - cb, + asm, ocb, ci, cme, comptime_recv, - comptime_recv_klass, + flags, + argc, ); } _ => { @@ -4925,13 +8867,25 @@ fn gen_send_general( } } } + VM_METHOD_TYPE_ZSUPER => { + gen_counter_incr(asm, Counter::send_zsuper_method); + return None; + } + VM_METHOD_TYPE_UNDEF => { + gen_counter_incr(asm, Counter::send_undef_method); + return None; + } + VM_METHOD_TYPE_NOTIMPLEMENTED => { + gen_counter_incr(asm, Counter::send_not_implemented_method); + return None; + } VM_METHOD_TYPE_MISSING => { - gen_counter_incr!(cb, send_missing_method); - return CantCompile; + gen_counter_incr(asm, Counter::send_missing_method); + return None; } VM_METHOD_TYPE_REFINED => { - gen_counter_incr!(cb, send_refined_method); - return CantCompile; + gen_counter_incr(asm, Counter::send_refined_method); + return None; } _ => { unreachable!(); @@ -4940,91 +8894,350 @@ fn gen_send_general( } } +/// Get class name from a class pointer. +fn get_class_name(class: Option<VALUE>) -> String { + class.and_then(|class| unsafe { + cstr_to_rust_string(rb_class2name(class)) + }).unwrap_or_else(|| "Unknown".to_string()) +} + +/// Assemble "{class_name}#{method_name}" from a class pointer and a method ID +fn get_method_name(class: Option<VALUE>, mid: u64) -> String { + let class_name = get_class_name(class); + let method_name = if mid != 0 { + unsafe { cstr_to_rust_string(rb_id2name(mid)) } + } else { + None + }.unwrap_or_else(|| "Unknown".to_string()); + format!("{}#{}", class_name, method_name) +} + +/// Assemble "{label}@{iseq_path}:{lineno}" (iseq_inspect() format) from an ISEQ +fn get_iseq_name(iseq: IseqPtr) -> String { + let c_string = unsafe { rb_yjit_iseq_inspect(iseq) }; + let string = unsafe { CStr::from_ptr(c_string) }.to_str() + .unwrap_or_else(|_| "not UTF-8").to_string(); + unsafe { ruby_xfree(c_string as *mut c_void); } + string +} + +/// Shifts the stack for send in order to remove the name of the method +/// Comment below borrow from vm_call_opt_send in vm_insnhelper.c +/// E.g. when argc == 2 +/// | | | | TOPN +/// +------+ | | +/// | arg1 | ---+ | | 0 +/// +------+ | +------+ +/// | arg0 | -+ +-> | arg1 | 1 +/// +------+ | +------+ +/// | sym | +---> | arg0 | 2 +/// +------+ +------+ +/// | recv | | recv | 3 +///--+------+--------+------+------ +/// +/// We do this for our compiletime context and the actual stack +fn handle_opt_send_shift_stack(asm: &mut Assembler, argc: i32) { + asm_comment!(asm, "shift_stack"); + for j in (0..argc).rev() { + let opnd = asm.stack_opnd(j); + let opnd2 = asm.stack_opnd(j + 1); + asm.mov(opnd2, opnd); + } + asm.shift_stack(argc as usize); +} + fn gen_opt_send_without_block( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let cd = jit_get_arg(jit, 0).as_ptr(); +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = perf_call! { gen_send_general(jit, asm, ocb, cd, None) } { + return Some(status); + } - gen_send_general(jit, ctx, cb, ocb, cd, None) + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_opt_send_without_block(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; + } + asm.ccall( + rb_vm_opt_send_without_block as *const u8, + vec![EC, CFP, (cd as usize).into()], + ) + }) } fn gen_send( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let cd = jit_get_arg(jit, 0).as_ptr(); - let block = jit_get_arg(jit, 1).as_optional_ptr(); - return gen_send_general(jit, ctx, cb, ocb, cd, block); +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + let block = jit.get_arg(1).as_optional_ptr().map(|iseq| BlockHandler::BlockISeq(iseq)); + if let Some(status) = perf_call! { gen_send_general(jit, asm, ocb, cd, block) } { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_send(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_send as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) +} + +fn gen_invokeblock( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = gen_invokeblock_specialized(jit, asm, ocb, cd) { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_invokeblock_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_invokeblock(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; + } + asm.ccall( + rb_vm_invokeblock as *const u8, + vec![EC, CFP, (cd as usize).into()], + ) + }) +} + +fn gen_invokeblock_specialized( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + cd: *const rb_call_data, +) -> Option<CodegenStatus> { + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + // Fallback to dynamic dispatch if this callsite is megamorphic + if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH { + gen_counter_incr(asm, Counter::invokeblock_megamorphic); + return None; + } + + // Get call info + let ci = unsafe { get_call_data_ci(cd) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); + let flags = unsafe { vm_ci_flag(ci) }; + + // Get block_handler + let cfp = jit.get_cfp(); + let lep = unsafe { rb_vm_ep_local_ep(get_cfp_ep(cfp)) }; + let comptime_handler = unsafe { *lep.offset(VM_ENV_DATA_INDEX_SPECVAL.try_into().unwrap()) }; + + // Handle each block_handler type + if comptime_handler.0 == VM_BLOCK_HANDLER_NONE as usize { // no block given + gen_counter_incr(asm, Counter::invokeblock_none); + None + } else if comptime_handler.0 & 0x3 == 0x1 { // VM_BH_ISEQ_BLOCK_P + asm_comment!(asm, "get local EP"); + let ep_opnd = gen_get_lep(jit, asm); + let block_handler_opnd = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + + asm_comment!(asm, "guard block_handler type"); + let tag_opnd = asm.and(block_handler_opnd, 0x3.into()); // block_handler is a tagged pointer + asm.cmp(tag_opnd, 0x1.into()); // VM_BH_ISEQ_BLOCK_P + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_invokeblock_tag_changed, + ); + + // If the current ISEQ is annotated to be inlined but it's not being inlined here, + // generate a dynamic dispatch to avoid making this yield megamorphic. + if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() { + gen_counter_incr(asm, Counter::invokeblock_iseq_not_inlined); + return None; + } + + let comptime_captured = unsafe { ((comptime_handler.0 & !0x3) as *const rb_captured_block).as_ref().unwrap() }; + let comptime_iseq = unsafe { *comptime_captured.code.iseq.as_ref() }; + + asm_comment!(asm, "guard known ISEQ"); + let captured_opnd = asm.and(block_handler_opnd, Opnd::Imm(!0x3)); + let iseq_opnd = asm.load(Opnd::mem(64, captured_opnd, SIZEOF_VALUE_I32 * 2)); + asm.cmp(iseq_opnd, VALUE::from(comptime_iseq).into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_invokeblock_iseq_block_changed, + ); + + perf_call! { gen_send_iseq(jit, asm, ocb, comptime_iseq, ci, VM_FRAME_MAGIC_BLOCK, None, 0 as _, None, flags, argc, Some(captured_opnd)) } + } else if comptime_handler.0 & 0x3 == 0x3 { // VM_BH_IFUNC_P + // We aren't handling CALLER_SETUP_ARG and CALLER_REMOVE_EMPTY_KW_SPLAT yet. + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::invokeblock_ifunc_args_splat); + return None; + } + if flags & VM_CALL_KW_SPLAT != 0 { + gen_counter_incr(asm, Counter::invokeblock_ifunc_kw_splat); + return None; + } + + asm_comment!(asm, "get local EP"); + let ep_opnd = gen_get_lep(jit, asm); + let block_handler_opnd = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + + asm_comment!(asm, "guard block_handler type"); + let tag_opnd = asm.and(block_handler_opnd, 0x3.into()); // block_handler is a tagged pointer + asm.cmp(tag_opnd, 0x3.into()); // VM_BH_IFUNC_P + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_invokeblock_tag_changed, + ); + + // The cfunc may not be leaf + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_yield_with_cfunc(ec: EcPtr, captured: *const rb_captured_block, argc: c_int, argv: *const VALUE) -> VALUE; + } + asm_comment!(asm, "call ifunc"); + let captured_opnd = asm.and(block_handler_opnd, Opnd::Imm(!0x3)); + let argv = asm.lea(asm.ctx.sp_opnd(-argc)); + let ret = asm.ccall( + rb_vm_yield_with_cfunc as *const u8, + vec![EC, captured_opnd, argc.into(), argv], + ); + + asm.stack_pop(argc.try_into().unwrap()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + + // cfunc calls may corrupt types + asm.clear_local_types(); + + // Share the successor with other chains + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) + } else if comptime_handler.symbol_p() { + gen_counter_incr(asm, Counter::invokeblock_symbol); + None + } else { // Proc + gen_counter_incr(asm, Counter::invokeblock_proc); + None + } } fn gen_invokesuper( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr(); - let block: Option<IseqPtr> = jit_get_arg(jit, 1).as_optional_ptr(); +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = gen_invokesuper_specialized(jit, asm, ocb, cd) { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_invokesuper(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_invokesuper as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) +} +fn gen_invokesuper_specialized( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + cd: *const rb_call_data, +) -> Option<CodegenStatus> { // Defer compilation so we can specialize on class of receiver - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + // Handle the last two branches of vm_caller_setup_arg_block + let block = if let Some(iseq) = jit.get_arg(1).as_optional_ptr() { + BlockHandler::BlockISeq(iseq) + } else { + BlockHandler::LEPSpecVal + }; + + // Fallback to dynamic dispatch if this callsite is megamorphic + if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH { + gen_counter_incr(asm, Counter::invokesuper_megamorphic); + return None; } - let me = unsafe { rb_vm_frame_method_entry(get_ec_cfp(jit.ec.unwrap())) }; + let me = unsafe { rb_vm_frame_method_entry(jit.get_cfp()) }; if me.is_null() { - return CantCompile; + gen_counter_incr(asm, Counter::invokesuper_no_me); + return None; } // FIXME: We should track and invalidate this block when this cme is invalidated let current_defined_class = unsafe { (*me).defined_class }; let mid = unsafe { get_def_original_id((*me).def) }; - if me != unsafe { rb_callable_method_entry(current_defined_class, (*me).called_id) } { - // Though we likely could generate this call, as we are only concerned - // with the method entry remaining valid, assume_method_lookup_stable - // below requires that the method lookup matches as well - return CantCompile; - } - // vm_search_normal_superclass let rbasic_ptr: *const RBasic = current_defined_class.as_ptr(); if current_defined_class.builtin_type() == RUBY_T_ICLASS - && unsafe { FL_TEST_RAW((*rbasic_ptr).klass, VALUE(RMODULE_IS_REFINEMENT)) != VALUE(0) } + && unsafe { RB_TYPE_P((*rbasic_ptr).klass, RUBY_T_MODULE) && FL_TEST_RAW((*rbasic_ptr).klass, VALUE(RMODULE_IS_REFINEMENT.as_usize())) != VALUE(0) } { - return CantCompile; + gen_counter_incr(asm, Counter::invokesuper_refinement); + return None; } let comptime_superclass = unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) }; let ci = unsafe { get_call_data_ci(cd) }; - let argc = unsafe { vm_ci_argc(ci) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); let ci_flags = unsafe { vm_ci_flag(ci) }; // Don't JIT calls that aren't simple // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. - if ci_flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(cb, send_args_splat); - return CantCompile; - } + if ci_flags & VM_CALL_KWARG != 0 { - gen_counter_incr!(cb, send_keywords); - return CantCompile; + gen_counter_incr(asm, Counter::invokesuper_kwarg); + return None; } if ci_flags & VM_CALL_KW_SPLAT != 0 { - gen_counter_incr!(cb, send_kw_splat); - return CantCompile; - } - if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 { - gen_counter_incr!(cb, send_block_arg); - return CantCompile; + gen_counter_incr(asm, Counter::invokesuper_kw_splat); + return None; } // Ensure we haven't rebound this method onto an incompatible class. @@ -5032,488 +9245,523 @@ fn gen_invokesuper( // cheaper calculations first, but since we specialize on the method entry // and so only have to do this once at compile time this is fine to always // check and side exit. - let comptime_recv = jit_peek_at_stack(jit, ctx, argc as isize); + let comptime_recv = jit.peek_at_stack(&asm.ctx, argc as isize); if unsafe { rb_obj_is_kind_of(comptime_recv, current_defined_class) } == VALUE(0) { - return CantCompile; + gen_counter_incr(asm, Counter::invokesuper_defined_class_mismatch); + return None; + } + + // Don't compile `super` on objects with singleton class to avoid retaining the receiver. + if VALUE(0) != unsafe { FL_TEST(comptime_recv.class_of(), VALUE(RUBY_FL_SINGLETON as usize)) } { + gen_counter_incr(asm, Counter::invokesuper_singleton_class); + return None; } // Do method lookup let cme = unsafe { rb_callable_method_entry(comptime_superclass, mid) }; - if cme.is_null() { - return CantCompile; + gen_counter_incr(asm, Counter::invokesuper_no_cme); + return None; } // Check that we'll be able to write this method dispatch before generating checks let cme_def_type = unsafe { get_cme_def_type(cme) }; if cme_def_type != VM_METHOD_TYPE_ISEQ && cme_def_type != VM_METHOD_TYPE_CFUNC { // others unimplemented - return CantCompile; - } - - // Guard that the receiver has the same class as the one from compile time - let side_exit = get_side_exit(jit, ocb, ctx); - - let cfp = unsafe { get_ec_cfp(jit.ec.unwrap()) }; - let ep = unsafe { get_cfp_ep(cfp) }; - let cref_me = unsafe { *ep.offset(VM_ENV_DATA_INDEX_ME_CREF.try_into().unwrap()) }; - let me_as_value = VALUE(me as usize); - if cref_me != me_as_value { - // This will be the case for super within a block - return CantCompile; + gen_counter_incr(asm, Counter::invokesuper_not_iseq_or_cfunc); + return None; } - add_comment(cb, "guard known me"); - mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); - let ep_me_opnd = mem_opnd( + asm_comment!(asm, "guard known me"); + let lep_opnd = gen_get_lep(jit, asm); + let ep_me_opnd = Opnd::mem( 64, - REG0, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_ME_CREF as i32), + lep_opnd, + SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF, ); - jit_mov_gc_ptr(jit, cb, REG1, me_as_value); - cmp(cb, ep_me_opnd, REG1); - jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_me_changed)); - - if block.is_none() { - // Guard no block passed - // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE - // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep)) - // - // TODO: this could properly forward the current block handler, but - // would require changes to gen_send_* - add_comment(cb, "guard no block given"); - // EP is in REG0 from above - let ep_specval_opnd = mem_opnd( - 64, - REG0, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), - ); - cmp(cb, ep_specval_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into())); - jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_block)); - } - // Points to the receiver operand on the stack - let recv = ctx.stack_opnd(argc); - mov(cb, REG0, recv); + let me_as_value = VALUE(me as usize); + asm.cmp(ep_me_opnd, me_as_value.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_invokesuper_me_changed, + ); // We need to assume that both our current method entry and the super // method entry we invoke remain stable - assume_method_lookup_stable(jit, ocb, current_defined_class, me); - assume_method_lookup_stable(jit, ocb, comptime_superclass, cme); + jit.assume_method_lookup_stable(asm, ocb, me); + jit.assume_method_lookup_stable(asm, ocb, cme); // Method calls may corrupt types - ctx.clear_local_types(); + asm.clear_local_types(); match cme_def_type { - VM_METHOD_TYPE_ISEQ => gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc), + VM_METHOD_TYPE_ISEQ => { + let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; + let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; + perf_call! { gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, None, cme, Some(block), ci_flags, argc, None) } + } VM_METHOD_TYPE_CFUNC => { - gen_send_cfunc(jit, ctx, cb, ocb, ci, cme, block, argc, ptr::null()) + perf_call! { gen_send_cfunc(jit, asm, ocb, ci, cme, Some(block), None, ci_flags, argc) } } _ => unreachable!(), } } fn gen_leave( - jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - ocb: &mut OutlinedCb, -) -> CodegenStatus { + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { // Only the return value should be on the stack - assert!(ctx.get_stack_size() == 1); - - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); - - // Load environment pointer EP from CFP - mov(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); + assert_eq!(1, asm.ctx.get_stack_size(), "leave instruction expects stack size 1, but was: {}", asm.ctx.get_stack_size()); // Check for interrupts - add_comment(cb, "check for interrupts"); - gen_check_ints(cb, counted_exit!(ocb, side_exit, leave_se_interrupt)); - - // Load the return value - mov(cb, REG0, ctx.stack_pop(1)); + gen_check_ints(asm, Counter::leave_se_interrupt); // Pop the current frame (ec->cfp++) // Note: the return PC is already in the previous CFP - add_comment(cb, "pop stack frame"); - add(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); - mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP); + asm_comment!(asm, "pop stack frame"); + let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); + asm.mov(CFP, incr_cfp); + asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); - // Reload REG_SP for the caller and write the return value. - // Top of the stack is REG_SP[0] since the caller has sp_offset=1. - mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP)); - mov(cb, mem_opnd(64, REG_SP, 0), REG0); + // Load the return value + let retval_opnd = asm.stack_pop(1); - // Jump to the JIT return address on the frame that was just popped + // Move the return value into the C return register + asm.mov(C_RET_OPND, retval_opnd); + + // Jump to the JIT return address on the frame that was just popped. + // There are a few possible jump targets: + // - gen_leave_exit() and gen_leave_exception(), for C callers + // - Return context set up by gen_send_iseq() + // We don't write the return value to stack memory like the interpreter here. + // Each jump target do it as necessary. let offset_to_jit_return = - -(RUBY_SIZEOF_CONTROL_FRAME as i32) + (RUBY_OFFSET_CFP_JIT_RETURN as i32); - jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return)); + -(RUBY_SIZEOF_CONTROL_FRAME as i32) + RUBY_OFFSET_CFP_JIT_RETURN; + asm.jmp_opnd(Opnd::mem(64, CFP, offset_to_jit_return)); - EndBlock + Some(EndBlock) } fn gen_getglobal( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let gid = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let gid = jit.get_arg(0).as_usize(); // Save the PC and SP because we might make a Ruby call for warning - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); - mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64())); - - call_ptr(cb, REG0, rb_gvar_get as *const u8); + let val_opnd = asm.ccall( + rb_gvar_get as *const u8, + vec![ gid.into() ] + ); - let top = ctx.stack_push(Type::Unknown); - mov(cb, top, RAX); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, val_opnd); - KeepCompiling + Some(KeepCompiling) } fn gen_setglobal( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let gid = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let gid = jit.get_arg(0).as_usize(); // Save the PC and SP because we might make a Ruby call for // Kernel#set_trace_var - jit_prepare_routine_call(jit, ctx, cb, REG0); - - mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64())); - - let val = ctx.stack_pop(1); - - mov(cb, C_ARG_REGS[1], val); - - call_ptr(cb, REG0, rb_gvar_set as *const u8); + jit_prepare_non_leaf_call(jit, asm); + + let val = asm.stack_opnd(0); + asm.ccall( + rb_gvar_set as *const u8, + vec![ + gid.into(), + val, + ], + ); + asm.stack_pop(1); // Keep it during ccall for GC - KeepCompiling + Some(KeepCompiling) } fn gen_anytostring( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Save the PC and SP because we might make a Ruby call for - // Kernel#set_trace_var - jit_prepare_routine_call(jit, ctx, cb, REG0); +) -> Option<CodegenStatus> { + // Save the PC and SP since we might call #to_s + jit_prepare_non_leaf_call(jit, asm); - let str = ctx.stack_pop(1); - let val = ctx.stack_pop(1); + let str = asm.stack_opnd(0); + let val = asm.stack_opnd(1); - mov(cb, C_ARG_REGS[0], str); - mov(cb, C_ARG_REGS[1], val); - - call_ptr(cb, REG0, rb_obj_as_string_result as *const u8); + let val = asm.ccall(rb_obj_as_string_result as *const u8, vec![str, val]); + asm.stack_pop(2); // Keep them during ccall for GC // Push the return value - let stack_ret = ctx.stack_push(Type::String); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::TString); + asm.mov(stack_ret, val); - KeepCompiling + Some(KeepCompiling) } fn gen_objtostring( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; +) -> Option<CodegenStatus> { + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); } - let recv = ctx.stack_opnd(0); - let comptime_recv = jit_peek_at_stack(jit, ctx, 0); + let recv = asm.stack_opnd(0); + let comptime_recv = jit.peek_at_stack(&asm.ctx, 0); if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRING) } { - let side_exit = get_side_exit(jit, ocb, ctx); - - mov(cb, REG0, recv); jit_guard_known_klass( jit, - ctx, - cb, + asm, ocb, comptime_recv.class_of(), - StackOpnd(0), + recv, + recv.into(), comptime_recv, SEND_MAX_DEPTH, - side_exit, + Counter::objtostring_not_string, ); + // No work needed. The string value is already on the top of the stack. - KeepCompiling + Some(KeepCompiling) } else { - let cd = jit_get_arg(jit, 0).as_ptr(); - gen_send_general(jit, ctx, cb, ocb, cd, None) + let cd = jit.get_arg(0).as_ptr(); + perf_call! { gen_send_general(jit, asm, ocb, cd, None) } } } fn gen_intern( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Save the PC and SP because we might allocate - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_call_with_gc(jit, asm); - let str = ctx.stack_pop(1); - - mov(cb, C_ARG_REGS[0], str); - - call_ptr(cb, REG0, rb_str_intern as *const u8); + let str = asm.stack_opnd(0); + let sym = asm.ccall(rb_str_intern as *const u8, vec![str]); + asm.stack_pop(1); // Keep it during ccall for GC // Push the return value - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, sym); - KeepCompiling + Some(KeepCompiling) } fn gen_toregexp( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let opt = jit_get_arg(jit, 0).as_i64(); - let cnt = jit_get_arg(jit, 1).as_usize(); +) -> Option<CodegenStatus> { + let opt = jit.get_arg(0).as_i64(); + let cnt = jit.get_arg(1).as_usize(); // Save the PC and SP because this allocates an object and could // raise an exception. - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); - let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize))); - ctx.stack_pop(cnt); + let values_ptr = asm.lea(asm.ctx.sp_opnd(-(cnt as i32))); - mov(cb, C_ARG_REGS[0], imm_opnd(0)); - mov(cb, C_ARG_REGS[1], imm_opnd(cnt.try_into().unwrap())); - lea(cb, C_ARG_REGS[2], values_ptr); - call_ptr(cb, REG0, rb_ary_tmp_new_from_values as *const u8); + let ary = asm.ccall( + rb_ary_tmp_new_from_values as *const u8, + vec![ + Opnd::Imm(0), + cnt.into(), + values_ptr, + ] + ); + asm.stack_pop(cnt); // Let ccall spill them // Save the array so we can clear it later - push(cb, RAX); - push(cb, RAX); // Alignment - mov(cb, C_ARG_REGS[0], RAX); - mov(cb, C_ARG_REGS[1], imm_opnd(opt)); - call_ptr(cb, REG0, rb_reg_new_ary as *const u8); + asm.cpush(ary); + asm.cpush(ary); // Alignment + + let val = asm.ccall( + rb_reg_new_ary as *const u8, + vec![ + ary, + Opnd::Imm(opt), + ] + ); // The actual regex is in RAX now. Pop the temp array from // rb_ary_tmp_new_from_values into C arg regs so we can clear it - pop(cb, REG1); // Alignment - pop(cb, C_ARG_REGS[0]); + let ary = asm.cpop(); // Alignment + asm.cpop_into(ary); // The value we want to push on the stack is in RAX right now - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::UnknownHeap); + asm.mov(stack_ret, val); // Clear the temp array. - call_ptr(cb, REG0, rb_ary_clear as *const u8); + asm.ccall(rb_ary_clear as *const u8, vec![ary]); - KeepCompiling + Some(KeepCompiling) } fn gen_getspecial( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // This takes two arguments, key and type // key is only used when type == 0 // A non-zero type determines which type of backref to fetch - //rb_num_t key = jit_get_arg(jit, 0); - let rtype = jit_get_arg(jit, 1).as_u64(); + //rb_num_t key = jit.jit_get_arg(0); + let rtype = jit.get_arg(1).as_u64(); if rtype == 0 { // not yet implemented - return CantCompile; + return None; } else if rtype & 0x01 != 0 { // Fetch a "special" backref based on a char encoded by shifting by 1 // Can raise if matchdata uninitialized - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); // call rb_backref_get() - add_comment(cb, "rb_backref_get"); - call_ptr(cb, REG0, rb_backref_get as *const u8); - mov(cb, C_ARG_REGS[0], RAX); + asm_comment!(asm, "rb_backref_get"); + let backref = asm.ccall(rb_backref_get as *const u8, vec![]); let rt_u8: u8 = (rtype >> 1).try_into().unwrap(); - match rt_u8.into() { + let val = match rt_u8.into() { '&' => { - add_comment(cb, "rb_reg_last_match"); - call_ptr(cb, REG0, rb_reg_last_match as *const u8); + asm_comment!(asm, "rb_reg_last_match"); + asm.ccall(rb_reg_last_match as *const u8, vec![backref]) } '`' => { - add_comment(cb, "rb_reg_match_pre"); - call_ptr(cb, REG0, rb_reg_match_pre as *const u8); + asm_comment!(asm, "rb_reg_match_pre"); + asm.ccall(rb_reg_match_pre as *const u8, vec![backref]) } '\'' => { - add_comment(cb, "rb_reg_match_post"); - call_ptr(cb, REG0, rb_reg_match_post as *const u8); + asm_comment!(asm, "rb_reg_match_post"); + asm.ccall(rb_reg_match_post as *const u8, vec![backref]) } '+' => { - add_comment(cb, "rb_reg_match_last"); - call_ptr(cb, REG0, rb_reg_match_last as *const u8); + asm_comment!(asm, "rb_reg_match_last"); + asm.ccall(rb_reg_match_last as *const u8, vec![backref]) } _ => panic!("invalid back-ref"), - } + }; - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); - KeepCompiling + Some(KeepCompiling) } else { // Fetch the N-th match from the last backref based on type shifted by 1 // Can raise if matchdata uninitialized - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); // call rb_backref_get() - add_comment(cb, "rb_backref_get"); - call_ptr(cb, REG0, rb_backref_get as *const u8); + asm_comment!(asm, "rb_backref_get"); + let backref = asm.ccall(rb_backref_get as *const u8, vec![]); // rb_reg_nth_match((int)(type >> 1), backref); - add_comment(cb, "rb_reg_nth_match"); - mov( - cb, - C_ARG_REGS[0], - imm_opnd((rtype >> 1).try_into().unwrap()), + asm_comment!(asm, "rb_reg_nth_match"); + let val = asm.ccall( + rb_reg_nth_match as *const u8, + vec![ + Opnd::Imm((rtype >> 1).try_into().unwrap()), + backref, + ] ); - mov(cb, C_ARG_REGS[1], RAX); - call_ptr(cb, REG0, rb_reg_nth_match as *const u8); - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); - KeepCompiling + Some(KeepCompiling) } } fn gen_getclassvariable( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // rb_vm_getclassvariable can raise exceptions. - jit_prepare_routine_call(jit, ctx, cb, REG0); - - let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ); - mov(cb, C_ARG_REGS[0], cfp_iseq_opnd); - mov(cb, C_ARG_REGS[1], REG_CFP); - mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64())); - mov(cb, C_ARG_REGS[3], uimm_opnd(jit_get_arg(jit, 1).as_u64())); - - call_ptr(cb, REG0, rb_vm_getclassvariable as *const u8); + jit_prepare_non_leaf_call(jit, asm); + + let val_opnd = asm.ccall( + rb_vm_getclassvariable as *const u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + CFP, + Opnd::UImm(jit.get_arg(0).as_u64()), + Opnd::UImm(jit.get_arg(1).as_u64()), + ], + ); - let stack_top = ctx.stack_push(Type::Unknown); - mov(cb, stack_top, RAX); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, val_opnd); - KeepCompiling + Some(KeepCompiling) } fn gen_setclassvariable( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // rb_vm_setclassvariable can raise exceptions. - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); + + let val = asm.stack_opnd(0); + asm.ccall( + rb_vm_setclassvariable as *const u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + CFP, + Opnd::UImm(jit.get_arg(0).as_u64()), + val, + Opnd::UImm(jit.get_arg(1).as_u64()), + ], + ); + asm.stack_pop(1); // Keep it during ccall for GC + + Some(KeepCompiling) +} + +fn gen_getconstant( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + + let id = jit.get_arg(0).as_usize(); + + // vm_get_ev_const can raise exceptions. + jit_prepare_non_leaf_call(jit, asm); + + let allow_nil_opnd = asm.stack_opnd(0); + let klass_opnd = asm.stack_opnd(1); + + extern "C" { + fn rb_vm_get_ev_const(ec: EcPtr, klass: VALUE, id: ID, allow_nil: VALUE) -> VALUE; + } - let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ); - mov(cb, C_ARG_REGS[0], cfp_iseq_opnd); - mov(cb, C_ARG_REGS[1], REG_CFP); - mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64())); - mov(cb, C_ARG_REGS[3], ctx.stack_pop(1)); - mov(cb, C_ARG_REGS[4], uimm_opnd(jit_get_arg(jit, 1).as_u64())); + let val_opnd = asm.ccall( + rb_vm_get_ev_const as *const u8, + vec![ + EC, + klass_opnd, + id.into(), + allow_nil_opnd + ], + ); + asm.stack_pop(2); // Keep them during ccall for GC - call_ptr(cb, REG0, rb_vm_setclassvariable as *const u8); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, val_opnd); - KeepCompiling + Some(KeepCompiling) } -fn gen_opt_getinlinecache( +fn gen_opt_getconstant_path( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - let jump_offset = jit_get_arg(jit, 0); - let const_cache_as_value = jit_get_arg(jit, 1); +) -> Option<CodegenStatus> { + let const_cache_as_value = jit.get_arg(0); let ic: *const iseq_inline_constant_cache = const_cache_as_value.as_ptr(); + let idlist: *const ID = unsafe { (*ic).segments }; + + // Make sure there is an exit for this block as the interpreter might want + // to invalidate this block from yjit_constant_ic_update(). + jit_ensure_block_entry_exit(jit, asm, ocb)?; // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update(). + // If a cache is not filled, fallback to the general C call. let ice = unsafe { (*ic).entry }; if ice.is_null() { - // In this case, leave a block that unconditionally side exits - // for the interpreter to invalidate. - return CantCompile; - } + // Prepare for const_missing + jit_prepare_non_leaf_call(jit, asm); - // Make sure there is an exit for this block as the interpreter might want - // to invalidate this block from yjit_constant_ic_update(). - jit_ensure_block_entry_exit(jit, ocb); + // If this does not trigger const_missing, vm_ic_update will invalidate this block. + extern "C" { + fn rb_vm_opt_getconstant_path(ec: EcPtr, cfp: CfpPtr, ic: *const u8) -> VALUE; + } + let val = asm.ccall( + rb_vm_opt_getconstant_path as *const u8, + vec![EC, CFP, Opnd::const_ptr(ic as *const u8)], + ); + + let stack_top = asm.stack_push(Type::Unknown); + asm.store(stack_top, val); + + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); + } if !unsafe { (*ice).ic_cref }.is_null() { // Cache is keyed on a certain lexical scope. Use the interpreter's cache. - let side_exit = get_side_exit(jit, ocb, ctx); + let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); // Call function to verify the cache. It doesn't allocate or call methods. - mov(cb, C_ARG_REGS[0], const_ptr_opnd(ic as *const u8)); - mov(cb, C_ARG_REGS[1], mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); - call_ptr(cb, REG0, rb_vm_ic_hit_p as *const u8); + let ret_val = asm.ccall( + rb_vm_ic_hit_p as *const u8, + vec![inline_cache, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)] + ); - // Check the result. _Bool is one byte in SysV. - test(cb, AL, AL); - jz_ptr(cb, counted_exit!(ocb, side_exit, opt_getinlinecache_miss)); + // Check the result. SysV only specifies one byte for _Bool return values, + // so it's important we only check one bit to ignore the higher bits in the register. + asm.test(ret_val, 1.into()); + asm.jz(Target::side_exit(Counter::opt_getconstant_path_ic_miss)); + + let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); + + let ic_entry = asm.load(Opnd::mem( + 64, + inline_cache, + RUBY_OFFSET_IC_ENTRY + )); + + let ic_entry_val = asm.load(Opnd::mem( + 64, + ic_entry, + RUBY_OFFSET_ICE_VALUE + )); // Push ic->entry->value - mov(cb, REG0, const_ptr_opnd(ic as *mut u8)); - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_IC_ENTRY)); - let stack_top = ctx.stack_push(Type::Unknown); - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_ICE_VALUE)); - mov(cb, stack_top, REG0); + let stack_top = asm.stack_push(Type::Unknown); + asm.store(stack_top, ic_entry_val); } else { // Optimize for single ractor mode. - // FIXME: This leaks when st_insert raises NoMemoryError - if !assume_single_ractor_mode(jit, ocb) { - return CantCompile; + if !assume_single_ractor_mode(jit, asm, ocb) { + gen_counter_incr(asm, Counter::opt_getconstant_path_multi_ractor); + return None; } // Invalidate output code on any constant writes associated with // constants referenced within the current block. - assume_stable_constant_names(jit, ocb); + jit.assume_stable_constant_names(asm, ocb, idlist); - jit_putobject(jit, ctx, cb, unsafe { (*ice).value }); + jit_putobject(asm, unsafe { (*ice).value }); } - // Jump over the code for filling the cache - let jump_idx = jit_next_insn_idx(jit) + jump_offset.as_u32(); - gen_direct_jump( - jit, - ctx, - BlockId { - iseq: jit.iseq, - idx: jump_idx, - }, - cb, - ); - EndBlock + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) } // Push the explicit block parameter onto the temporary stack. Part of the @@ -5521,200 +9769,249 @@ fn gen_opt_getinlinecache( // explicit block parameters. fn gen_getblockparamproxy( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, -) -> CodegenStatus { - // A mirror of the interpreter code. Checking for the case - // where it's pushing rb_block_param_proxy. - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } // EP level - let level = jit_get_arg(jit, 1).as_u32(); + let level = jit.get_arg(1).as_u32(); + + // Peek at the block handler so we can check whether it's nil + let comptime_handler = jit.peek_at_block_handler(level); + + // Filter for the 4 cases we currently handle + if !(comptime_handler.as_u64() == 0 || // no block given + comptime_handler.as_u64() & 0x3 == 0x1 || // iseq block (no associated GC managed object) + comptime_handler.as_u64() & 0x3 == 0x3 || // ifunc block (no associated GC managed object) + unsafe { rb_obj_is_proc(comptime_handler) }.test() // block is a Proc + ) { + // Missing the symbol case, where we basically need to call Symbol#to_proc at runtime + gen_counter_incr(asm, Counter::gbpp_unsupported_type); + return None; + } // Load environment pointer EP from CFP - gen_get_ep(cb, REG0, level); + let ep_opnd = gen_get_ep(asm, level); // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero - let flag_check = mem_opnd( + let flag_check = Opnd::mem( 64, - REG0, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32), + ep_opnd, + SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32), ); - test( - cb, - flag_check, - uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()), - ); - jnz_ptr(cb, counted_exit!(ocb, side_exit, gbpp_block_param_modified)); + asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + asm.jnz(Target::side_exit(Counter::gbpp_block_param_modified)); // Load the block handler for the current frame // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) - mov( - cb, - REG0, - mem_opnd( - 64, - REG0, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), - ), + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) ); - // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P(). - and(cb, REG0_8, imm_opnd(0x3)); + // Use block handler sample to guide specialization... + // NOTE: we use jit_chain_guard() in this decision tree, and since + // there are only a few cases, it should never reach the depth limit use + // the exit counter we pass to it. + // + // No block given + if comptime_handler.as_u64() == 0 { + // Bail if there is a block handler + asm.cmp(block_handler, Opnd::UImm(0)); - // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null. - cmp(cb, REG0_8, imm_opnd(0x1)); - jnz_ptr( - cb, - counted_exit!(ocb, side_exit, gbpp_block_handler_not_iseq), - ); + jit_chain_guard( + JCC_JNZ, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::gbpp_block_handler_not_none, + ); - // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr. - mov( - cb, - REG0, - const_ptr_opnd(unsafe { rb_block_param_proxy }.as_ptr()), - ); - assert!(!unsafe { rb_block_param_proxy }.special_const_p()); - let top = ctx.stack_push(Type::UnknownHeap); - mov(cb, top, REG0); + jit_putobject(asm, Qnil); + } else if comptime_handler.as_u64() & 0x1 == 0x1 { + // This handles two cases which are nearly identical + // Block handler is a tagged pointer. Look at the tag. + // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01 + // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03 + // So to check for either of those cases we can use: val & 0x1 == 0x1 + const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers"); + // Procs are aligned heap pointers so testing the bit rejects them too. + + asm.test(block_handler, 0x1.into()); + jit_chain_guard( + JCC_JZ, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::gbpp_block_handler_not_iseq, + ); + + // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr. + assert!(!unsafe { rb_block_param_proxy }.special_const_p()); + + let top = asm.stack_push(Type::BlockParamProxy); + asm.mov(top, Opnd::const_ptr(unsafe { rb_block_param_proxy }.as_ptr())); + } else if unsafe { rb_obj_is_proc(comptime_handler) }.test() { + // The block parameter is a Proc + c_callable! { + // We can't hold values across C calls due to a backend limitation, + // so we'll use this thin wrapper around rb_obj_is_proc(). + fn is_proc(object: VALUE) -> VALUE { + if unsafe { rb_obj_is_proc(object) }.test() { + // VM_BH_TO_PROC() is the identify function. + object + } else { + Qfalse + } + } + } + + // Simple predicate, no need to jit_prepare_non_leaf_call() + let proc_or_false = asm.ccall(is_proc as _, vec![block_handler]); + + // Guard for proc + asm.cmp(proc_or_false, Qfalse.into()); + jit_chain_guard( + JCC_JE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::gbpp_block_handler_not_proc, + ); - KeepCompiling + let top = asm.stack_push(Type::Unknown); + asm.mov(top, proc_or_false); + } else { + unreachable!("absurd given initial filtering"); + } + + jump_to_next_insn(jit, asm, ocb); + + Some(EndBlock) } fn gen_getblockparam( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - ocb: &mut OutlinedCb, -) -> CodegenStatus { + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { // EP level - let level = jit_get_arg(jit, 1).as_u32(); + let level = jit.get_arg(1).as_u32(); // Save the PC and SP because we might allocate - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_call_with_gc(jit, asm); + asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency. // A mirror of the interpreter code. Checking for the case // where it's pushing rb_block_param_proxy. - let side_exit = get_side_exit(jit, ocb, ctx); // Load environment pointer EP from CFP - gen_get_ep(cb, REG1, level); + let ep_opnd = gen_get_ep(asm, level); // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero - let flag_check = mem_opnd( - 64, - REG1, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32), - ); + let flag_check = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); // FIXME: This is testing bits in the same place that the WB check is testing. // We should combine these at some point - test( - cb, - flag_check, - uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()), - ); + asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); // If the frame flag has been modified, then the actual proc value is // already in the EP and we should just use the value. - let frame_flag_modified = cb.new_label("frame_flag_modified".to_string()); - jnz_label(cb, frame_flag_modified); + let frame_flag_modified = asm.new_label("frame_flag_modified"); + asm.jnz(frame_flag_modified); // This instruction writes the block handler to the EP. If we need to // fire a write barrier for the write, then exit (we'll let the // interpreter handle it so it can fire the write barrier). // flags & VM_ENV_FLAG_WB_REQUIRED - let flags_opnd = mem_opnd( + let flags_opnd = Opnd::mem( 64, - REG1, - SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32, + ep_opnd, + SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32, ); - test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED.into())); + asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - jnz_ptr(cb, side_exit); - - // Load the block handler for the current frame - // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) - mov( - cb, - C_ARG_REGS[1], - mem_opnd( - 64, - REG1, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), - ), - ); + asm.jnz(Target::side_exit(Counter::gbp_wb_required)); // Convert the block handler in to a proc // call rb_vm_bh_to_procval(const rb_execution_context_t *ec, VALUE block_handler) - mov(cb, C_ARG_REGS[0], REG_EC); - call_ptr(cb, REG0, rb_vm_bh_to_procval as *const u8); + let proc = asm.ccall( + rb_vm_bh_to_procval as *const u8, + vec![ + EC, + // The block handler for the current frame + // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) + Opnd::mem( + 64, + ep_opnd, + SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL, + ), + ] + ); // Load environment pointer EP from CFP (again) - gen_get_ep(cb, REG1, level); - - // Set the frame modified flag - or(cb, flag_check, uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into())); + let ep_opnd = gen_get_ep(asm, level); // Write the value at the environment pointer - let idx = jit_get_arg(jit, 0).as_i32(); - let offs = -(SIZEOF_VALUE as i32 * idx); - mov(cb, mem_opnd(64, REG1, offs), RAX); + let idx = jit.get_arg(0).as_i32(); + let offs = -(SIZEOF_VALUE_I32 * idx); + asm.mov(Opnd::mem(64, ep_opnd, offs), proc); - cb.write_label(frame_flag_modified); + // Set the frame modified flag + let flag_check = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); + let modified_flag = asm.or(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + asm.store(flag_check, modified_flag); - // Push the proc on the stack - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, RAX, mem_opnd(64, REG1, offs)); - mov(cb, stack_ret, RAX); + asm.write_label(frame_flag_modified); - cb.link_labels(); + // Push the proc on the stack + let stack_ret = asm.stack_push(Type::Unknown); + let ep_opnd = gen_get_ep(asm, level); + asm.mov(stack_ret, Opnd::mem(64, ep_opnd, offs)); - KeepCompiling + Some(KeepCompiling) } fn gen_invokebuiltin( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr(); +) -> Option<CodegenStatus> { + let bf: *const rb_builtin_function = jit.get_arg(0).as_ptr(); let bf_argc: usize = unsafe { (*bf).argc }.try_into().expect("non negative argc"); // ec, self, and arguments - if bf_argc + 2 > C_ARG_REGS.len() { - return CantCompile; + if bf_argc + 2 > C_ARG_OPNDS.len() { + incr_counter!(invokebuiltin_too_many_args); + return None; } // If the calls don't allocate, do they need up to date PC, SP? - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_non_leaf_call(jit, asm); // Call the builtin func (ec, recv, arg1, arg2, ...) - mov(cb, C_ARG_REGS[0], REG_EC); - mov( - cb, - C_ARG_REGS[1], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), - ); + let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)]; // Copy arguments from locals for i in 0..bf_argc { - let stack_opnd = ctx.stack_opnd((bf_argc - i - 1) as i32); - let c_arg_reg = C_ARG_REGS[2 + i]; - mov(cb, c_arg_reg, stack_opnd); + let stack_opnd = asm.stack_opnd((bf_argc - i - 1) as i32); + args.push(stack_opnd); } - call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8); + let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); // Push the return value - ctx.stack_pop(bf_argc); - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.stack_pop(bf_argc); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); - KeepCompiling + Some(KeepCompiling) } // opt_invokebuiltin_delegate calls a builtin function, like @@ -5722,145 +10019,152 @@ fn gen_invokebuiltin( // stack uses the argument locals (and self) from the current method. fn gen_opt_invokebuiltin_delegate( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr(); +) -> Option<CodegenStatus> { + let bf: *const rb_builtin_function = jit.get_arg(0).as_ptr(); let bf_argc = unsafe { (*bf).argc }; - let start_index = jit_get_arg(jit, 1).as_i32(); + let start_index = jit.get_arg(1).as_i32(); // ec, self, and arguments - if bf_argc + 2 > (C_ARG_REGS.len() as i32) { - return CantCompile; + if bf_argc + 2 > (C_ARG_OPNDS.len() as i32) { + incr_counter!(invokebuiltin_too_many_args); + return None; } // If the calls don't allocate, do they need up to date PC, SP? - jit_prepare_routine_call(jit, ctx, cb, REG0); - - if bf_argc > 0 { - // Load environment pointer EP from CFP - mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); - } + jit_prepare_non_leaf_call(jit, asm); // Call the builtin func (ec, recv, arg1, arg2, ...) - mov(cb, C_ARG_REGS[0], REG_EC); - mov( - cb, - C_ARG_REGS[1], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), - ); + let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)]; // Copy arguments from locals - for i in 0..bf_argc { - let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) }; - let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i; - let local_opnd = mem_opnd(64, REG0, offs * (SIZEOF_VALUE as i32)); - let offs: usize = (i + 2) as usize; - let c_arg_reg = C_ARG_REGS[offs]; - mov(cb, c_arg_reg, local_opnd); + if bf_argc > 0 { + // Load environment pointer EP from CFP + let ep = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)); + + for i in 0..bf_argc { + let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) }; + let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i; + let local_opnd = Opnd::mem(64, ep, offs * SIZEOF_VALUE_I32); + args.push(local_opnd); + } } - call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8); + let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); // Push the return value - let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); - KeepCompiling + Some(KeepCompiling) } /// Maps a YARV opcode to a code generation function (if supported) fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { let VALUE(opcode) = opcode; + let opcode = opcode as ruby_vminsn_type; assert!(opcode < VM_INSTRUCTION_SIZE); match opcode { - OP_NOP => Some(gen_nop), - OP_POP => Some(gen_pop), - OP_DUP => Some(gen_dup), - OP_DUPN => Some(gen_dupn), - OP_SWAP => Some(gen_swap), - OP_PUTNIL => Some(gen_putnil), - OP_PUTOBJECT => Some(gen_putobject), - OP_PUTOBJECT_INT2FIX_0_ => Some(gen_putobject_int2fix), - OP_PUTOBJECT_INT2FIX_1_ => Some(gen_putobject_int2fix), - OP_PUTSELF => Some(gen_putself), - OP_PUTSPECIALOBJECT => Some(gen_putspecialobject), - OP_SETN => Some(gen_setn), - OP_TOPN => Some(gen_topn), - OP_ADJUSTSTACK => Some(gen_adjuststack), - OP_GETLOCAL => Some(gen_getlocal), - OP_GETLOCAL_WC_0 => Some(gen_getlocal_wc0), - OP_GETLOCAL_WC_1 => Some(gen_getlocal_wc1), - OP_SETLOCAL => Some(gen_setlocal), - OP_SETLOCAL_WC_0 => Some(gen_setlocal_wc0), - OP_SETLOCAL_WC_1 => Some(gen_setlocal_wc1), - OP_OPT_PLUS => Some(gen_opt_plus), - OP_OPT_MINUS => Some(gen_opt_minus), - OP_OPT_AND => Some(gen_opt_and), - OP_OPT_OR => Some(gen_opt_or), - OP_NEWHASH => Some(gen_newhash), - OP_DUPHASH => Some(gen_duphash), - OP_NEWARRAY => Some(gen_newarray), - OP_DUPARRAY => Some(gen_duparray), - OP_CHECKTYPE => Some(gen_checktype), - OP_OPT_LT => Some(gen_opt_lt), - OP_OPT_LE => Some(gen_opt_le), - OP_OPT_GT => Some(gen_opt_gt), - OP_OPT_GE => Some(gen_opt_ge), - OP_OPT_MOD => Some(gen_opt_mod), - OP_OPT_STR_FREEZE => Some(gen_opt_str_freeze), - OP_OPT_STR_UMINUS => Some(gen_opt_str_uminus), - OP_SPLATARRAY => Some(gen_splatarray), - OP_NEWRANGE => Some(gen_newrange), - OP_PUTSTRING => Some(gen_putstring), - OP_EXPANDARRAY => Some(gen_expandarray), - OP_DEFINED => Some(gen_defined), - OP_CHECKKEYWORD => Some(gen_checkkeyword), - OP_CONCATSTRINGS => Some(gen_concatstrings), - OP_GETINSTANCEVARIABLE => Some(gen_getinstancevariable), - OP_SETINSTANCEVARIABLE => Some(gen_setinstancevariable), - - OP_OPT_EQ => Some(gen_opt_eq), - OP_OPT_NEQ => Some(gen_opt_neq), - OP_OPT_AREF => Some(gen_opt_aref), - OP_OPT_ASET => Some(gen_opt_aset), - OP_OPT_MULT => Some(gen_opt_mult), - OP_OPT_DIV => Some(gen_opt_div), - OP_OPT_LTLT => Some(gen_opt_ltlt), - OP_OPT_NIL_P => Some(gen_opt_nil_p), - OP_OPT_EMPTY_P => Some(gen_opt_empty_p), - OP_OPT_SUCC => Some(gen_opt_succ), - OP_OPT_NOT => Some(gen_opt_not), - OP_OPT_SIZE => Some(gen_opt_size), - OP_OPT_LENGTH => Some(gen_opt_length), - OP_OPT_REGEXPMATCH2 => Some(gen_opt_regexpmatch2), - OP_OPT_GETINLINECACHE => Some(gen_opt_getinlinecache), - OP_INVOKEBUILTIN => Some(gen_invokebuiltin), - OP_OPT_INVOKEBUILTIN_DELEGATE => Some(gen_opt_invokebuiltin_delegate), - OP_OPT_INVOKEBUILTIN_DELEGATE_LEAVE => Some(gen_opt_invokebuiltin_delegate), - OP_OPT_CASE_DISPATCH => Some(gen_opt_case_dispatch), - OP_BRANCHIF => Some(gen_branchif), - OP_BRANCHUNLESS => Some(gen_branchunless), - OP_BRANCHNIL => Some(gen_branchnil), - OP_JUMP => Some(gen_jump), - - OP_GETBLOCKPARAMPROXY => Some(gen_getblockparamproxy), - OP_GETBLOCKPARAM => Some(gen_getblockparam), - OP_OPT_SEND_WITHOUT_BLOCK => Some(gen_opt_send_without_block), - OP_SEND => Some(gen_send), - OP_INVOKESUPER => Some(gen_invokesuper), - OP_LEAVE => Some(gen_leave), - - OP_GETGLOBAL => Some(gen_getglobal), - OP_SETGLOBAL => Some(gen_setglobal), - OP_ANYTOSTRING => Some(gen_anytostring), - OP_OBJTOSTRING => Some(gen_objtostring), - OP_INTERN => Some(gen_intern), - OP_TOREGEXP => Some(gen_toregexp), - OP_GETSPECIAL => Some(gen_getspecial), - OP_GETCLASSVARIABLE => Some(gen_getclassvariable), - OP_SETCLASSVARIABLE => Some(gen_setclassvariable), + YARVINSN_nop => Some(gen_nop), + YARVINSN_pop => Some(gen_pop), + YARVINSN_dup => Some(gen_dup), + YARVINSN_dupn => Some(gen_dupn), + YARVINSN_swap => Some(gen_swap), + YARVINSN_putnil => Some(gen_putnil), + YARVINSN_putobject => Some(gen_putobject), + YARVINSN_putobject_INT2FIX_0_ => Some(gen_putobject_int2fix), + YARVINSN_putobject_INT2FIX_1_ => Some(gen_putobject_int2fix), + YARVINSN_putself => Some(gen_putself), + YARVINSN_putspecialobject => Some(gen_putspecialobject), + YARVINSN_setn => Some(gen_setn), + YARVINSN_topn => Some(gen_topn), + YARVINSN_adjuststack => Some(gen_adjuststack), + + YARVINSN_getlocal => Some(gen_getlocal), + YARVINSN_getlocal_WC_0 => Some(gen_getlocal_wc0), + YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1), + YARVINSN_setlocal => Some(gen_setlocal), + YARVINSN_setlocal_WC_0 => Some(gen_setlocal_wc0), + YARVINSN_setlocal_WC_1 => Some(gen_setlocal_wc1), + YARVINSN_opt_plus => Some(gen_opt_plus), + YARVINSN_opt_minus => Some(gen_opt_minus), + YARVINSN_opt_and => Some(gen_opt_and), + YARVINSN_opt_or => Some(gen_opt_or), + YARVINSN_newhash => Some(gen_newhash), + YARVINSN_duphash => Some(gen_duphash), + YARVINSN_newarray => Some(gen_newarray), + YARVINSN_duparray => Some(gen_duparray), + YARVINSN_checktype => Some(gen_checktype), + YARVINSN_opt_lt => Some(gen_opt_lt), + YARVINSN_opt_le => Some(gen_opt_le), + YARVINSN_opt_gt => Some(gen_opt_gt), + YARVINSN_opt_ge => Some(gen_opt_ge), + YARVINSN_opt_mod => Some(gen_opt_mod), + YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), + YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), + YARVINSN_opt_newarray_send => Some(gen_opt_newarray_send), + YARVINSN_splatarray => Some(gen_splatarray), + YARVINSN_splatkw => Some(gen_splatkw), + YARVINSN_concatarray => Some(gen_concatarray), + YARVINSN_concattoarray => Some(gen_concattoarray), + YARVINSN_pushtoarray => Some(gen_pushtoarray), + YARVINSN_newrange => Some(gen_newrange), + YARVINSN_putstring => Some(gen_putstring), + YARVINSN_putchilledstring => Some(gen_putchilledstring), + YARVINSN_expandarray => Some(gen_expandarray), + YARVINSN_defined => Some(gen_defined), + YARVINSN_definedivar => Some(gen_definedivar), + YARVINSN_checkmatch => Some(gen_checkmatch), + YARVINSN_checkkeyword => Some(gen_checkkeyword), + YARVINSN_concatstrings => Some(gen_concatstrings), + YARVINSN_getinstancevariable => Some(gen_getinstancevariable), + YARVINSN_setinstancevariable => Some(gen_setinstancevariable), + + YARVINSN_opt_eq => Some(gen_opt_eq), + YARVINSN_opt_neq => Some(gen_opt_neq), + YARVINSN_opt_aref => Some(gen_opt_aref), + YARVINSN_opt_aset => Some(gen_opt_aset), + YARVINSN_opt_aref_with => Some(gen_opt_aref_with), + YARVINSN_opt_mult => Some(gen_opt_mult), + YARVINSN_opt_div => Some(gen_opt_div), + YARVINSN_opt_ltlt => Some(gen_opt_ltlt), + YARVINSN_opt_nil_p => Some(gen_opt_nil_p), + YARVINSN_opt_empty_p => Some(gen_opt_empty_p), + YARVINSN_opt_succ => Some(gen_opt_succ), + YARVINSN_opt_not => Some(gen_opt_not), + YARVINSN_opt_size => Some(gen_opt_size), + YARVINSN_opt_length => Some(gen_opt_length), + YARVINSN_opt_regexpmatch2 => Some(gen_opt_regexpmatch2), + YARVINSN_getconstant => Some(gen_getconstant), + YARVINSN_opt_getconstant_path => Some(gen_opt_getconstant_path), + YARVINSN_invokebuiltin => Some(gen_invokebuiltin), + YARVINSN_opt_invokebuiltin_delegate => Some(gen_opt_invokebuiltin_delegate), + YARVINSN_opt_invokebuiltin_delegate_leave => Some(gen_opt_invokebuiltin_delegate), + YARVINSN_opt_case_dispatch => Some(gen_opt_case_dispatch), + YARVINSN_branchif => Some(gen_branchif), + YARVINSN_branchunless => Some(gen_branchunless), + YARVINSN_branchnil => Some(gen_branchnil), + YARVINSN_throw => Some(gen_throw), + YARVINSN_jump => Some(gen_jump), + + YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), + YARVINSN_getblockparam => Some(gen_getblockparam), + YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block), + YARVINSN_send => Some(gen_send), + YARVINSN_invokeblock => Some(gen_invokeblock), + YARVINSN_invokesuper => Some(gen_invokesuper), + YARVINSN_leave => Some(gen_leave), + + YARVINSN_getglobal => Some(gen_getglobal), + YARVINSN_setglobal => Some(gen_setglobal), + YARVINSN_anytostring => Some(gen_anytostring), + YARVINSN_objtostring => Some(gen_objtostring), + YARVINSN_intern => Some(gen_intern), + YARVINSN_toregexp => Some(gen_toregexp), + YARVINSN_getspecial => Some(gen_getspecial), + YARVINSN_getclassvariable => Some(gen_getclassvariable), + YARVINSN_setclassvariable => Some(gen_setclassvariable), // Unimplemented opcode, YJIT won't generate code for this yet _ => None, @@ -5868,20 +10172,118 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { } // Return true when the codegen function generates code. -// known_recv_klass is non-NULL when the caller has used jit_guard_known_klass(). +// known_recv_class has Some value when the caller has used jit_guard_known_klass(). // See yjit_reg_method(). type MethodGenFn = fn( jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, - block: Option<IseqPtr>, + block: Option<BlockHandler>, argc: i32, - known_recv_class: *const VALUE, + known_recv_class: Option<VALUE>, ) -> bool; +/// Methods for generating code for hardcoded (usually C) methods +static mut METHOD_CODEGEN_TABLE: Option<HashMap<usize, MethodGenFn>> = None; + +/// Register codegen functions for some Ruby core methods +pub fn yjit_reg_method_codegen_fns() { + unsafe { + assert!(METHOD_CODEGEN_TABLE.is_none()); + METHOD_CODEGEN_TABLE = Some(HashMap::default()); + + // Specialization for C methods. See yjit_reg_method() for details. + yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not); + + yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true); + yjit_reg_method(rb_mKernel, "nil?", jit_rb_false); + yjit_reg_method(rb_mKernel, "is_a?", jit_rb_kernel_is_a); + yjit_reg_method(rb_mKernel, "kind_of?", jit_rb_kernel_is_a); + yjit_reg_method(rb_mKernel, "instance_of?", jit_rb_kernel_instance_of); + + yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal); + yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal); + yjit_reg_method(rb_cBasicObject, "!=", jit_rb_obj_not_equal); + yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal); + yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal); + yjit_reg_method(rb_cModule, "===", jit_rb_mod_eqq); + yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal); + yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal); + yjit_reg_method(rb_cInteger, "==", jit_rb_int_equal); + yjit_reg_method(rb_cInteger, "===", jit_rb_int_equal); + + yjit_reg_method(rb_cInteger, "succ", jit_rb_int_succ); + yjit_reg_method(rb_cInteger, "/", jit_rb_int_div); + yjit_reg_method(rb_cInteger, "<<", jit_rb_int_lshift); + yjit_reg_method(rb_cInteger, ">>", jit_rb_int_rshift); + yjit_reg_method(rb_cInteger, "^", jit_rb_int_xor); + yjit_reg_method(rb_cInteger, "[]", jit_rb_int_aref); + + yjit_reg_method(rb_cFloat, "+", jit_rb_float_plus); + yjit_reg_method(rb_cFloat, "-", jit_rb_float_minus); + yjit_reg_method(rb_cFloat, "*", jit_rb_float_mul); + yjit_reg_method(rb_cFloat, "/", jit_rb_float_div); + + yjit_reg_method(rb_cString, "empty?", jit_rb_str_empty_p); + yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s); + yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s); + yjit_reg_method(rb_cString, "length", jit_rb_str_length); + yjit_reg_method(rb_cString, "size", jit_rb_str_length); + yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize); + yjit_reg_method(rb_cString, "getbyte", jit_rb_str_getbyte); + yjit_reg_method(rb_cString, "setbyte", jit_rb_str_setbyte); + yjit_reg_method(rb_cString, "byteslice", jit_rb_str_byteslice); + yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); + yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus); + + yjit_reg_method(rb_cNilClass, "===", jit_rb_case_equal); + yjit_reg_method(rb_cTrueClass, "===", jit_rb_case_equal); + yjit_reg_method(rb_cFalseClass, "===", jit_rb_case_equal); + + yjit_reg_method(rb_cArray, "empty?", jit_rb_ary_empty_p); + yjit_reg_method(rb_cArray, "length", jit_rb_ary_length); + yjit_reg_method(rb_cArray, "size", jit_rb_ary_length); + yjit_reg_method(rb_cArray, "<<", jit_rb_ary_push); + + yjit_reg_method(rb_cHash, "empty?", jit_rb_hash_empty_p); + + yjit_reg_method(rb_mKernel, "respond_to?", jit_obj_respond_to); + yjit_reg_method(rb_mKernel, "block_given?", jit_rb_f_block_given_p); + + yjit_reg_method(rb_cClass, "superclass", jit_rb_class_superclass); + + yjit_reg_method(rb_singleton_class(rb_cThread), "current", jit_thread_s_current); + } +} + +// Register a specialized codegen function for a particular method. Note that +// the if the function returns true, the code it generates runs without a +// control frame and without interrupt checks. To avoid creating observable +// behavior changes, the codegen function should only target simple code paths +// that do not allocate and do not make method calls. +fn yjit_reg_method(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) { + let id_string = std::ffi::CString::new(mid_str).expect("couldn't convert to CString!"); + let mid = unsafe { rb_intern(id_string.as_ptr()) }; + let me = unsafe { rb_method_entry_at(klass, mid) }; + + if me.is_null() { + panic!("undefined optimized method!: {mid_str}"); + } + + // For now, only cfuncs are supported + //RUBY_ASSERT(me && me->def); + //RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC); + + let method_serial = unsafe { + let def = (*me).def; + get_def_method_serial(def) + }; + + unsafe { METHOD_CODEGEN_TABLE.as_mut().unwrap().insert(method_serial, gen_fn); } +} + /// Global state needed for code generation pub struct CodegenGlobals { /// Inline code block (fast path) @@ -5893,24 +10295,31 @@ pub struct CodegenGlobals { /// Code for exiting back to the interpreter from the leave instruction leave_exit_code: CodePtr, + /// Code for exiting back to the interpreter after handling an exception + leave_exception_code: CodePtr, + // For exiting from YJIT frame from branch_stub_hit(). - // Filled by gen_code_for_exit_from_stub(). + // Filled by gen_stub_exit(). stub_exit_code: CodePtr, + // For servicing branch stubs + branch_stub_hit_trampoline: CodePtr, + + // For servicing entry stubs + entry_stub_hit_trampoline: CodePtr, + // Code for full logic of returning from C method and exiting to the interpreter outline_full_cfunc_return_pos: CodePtr, /// For implementing global code invalidation global_inval_patches: Vec<CodepagePatch>, - /// For implementing global code invalidation. The number of bytes counting from the beginning - /// of the inline code block that should not be changed. After patching for global invalidation, - /// no one should make changes to the invalidated code region anymore. This is used to - /// break out of invalidation race when there are multiple ractors. - inline_frozen_bytes: usize, + /// Page indexes for outlined code that are not associated to any ISEQ. + ocb_pages: Vec<usize>, - // Methods for generating code for hardcoded (usually C) methods - method_codegen_table: HashMap<u64, MethodGenFn>, + /// Map of cfunc YARV PCs to CMEs and receiver indexes, used to lazily push + /// a frame when rb_yjit_lazy_push_frame() is called with a PC in this HashMap. + pc_to_cfunc: HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)>, } /// For implementing global code invalidation. A position in the inline @@ -5927,19 +10336,41 @@ static mut CODEGEN_GLOBALS: Option<CodegenGlobals> = None; impl CodegenGlobals { /// Initialize the codegen globals pub fn init() { - // Executable memory size in MiB - let mem_size = get_option!(exec_mem_size) * 1024 * 1024; + // Executable memory and code page size in bytes + let mem_size = get_option!(exec_mem_size); #[cfg(not(test))] let (mut cb, mut ocb) = { - let page_size = unsafe { rb_yjit_get_page_size() }.as_usize(); - let mem_block: *mut u8 = unsafe { alloc_exec_mem(mem_size.try_into().unwrap()) }; - let cb = CodeBlock::new(mem_block, mem_size / 2, page_size); - let ocb = OutlinedCb::wrap(CodeBlock::new( - unsafe { mem_block.add(mem_size / 2) }, - mem_size / 2, + let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) }; + + // Memory protection syscalls need page-aligned addresses, so check it here. Assuming + // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the + // page size in bytes is a power of two 2¹⁹ or smaller. This is because the user + // requested size is half of mem_option × 2²⁰ as it's in MiB. + // + // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB + // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though. + let page_size = unsafe { rb_yjit_get_page_size() }; + assert_eq!( + virt_block as usize % page_size.as_usize(), 0, + "Start of virtual address block should be page-aligned", + ); + + use crate::virtualmem::*; + use std::ptr::NonNull; + + let mem_block = VirtualMem::new( + SystemAllocator {}, page_size, - )); + NonNull::new(virt_block).unwrap(), + mem_size, + ); + let mem_block = Rc::new(RefCell::new(mem_block)); + + let freed_pages = Rc::new(None); + let cb = CodeBlock::new(mem_block.clone(), false, freed_pages.clone()); + let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true, freed_pages)); + (cb, ocb) }; @@ -5950,99 +10381,54 @@ impl CodegenGlobals { #[cfg(test)] let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2)); - let leave_exit_code = gen_leave_exit(&mut ocb); + let ocb_start_addr = ocb.unwrap().get_write_ptr(); + let leave_exit_code = gen_leave_exit(&mut ocb).unwrap(); + let leave_exception_code = gen_leave_exception(&mut ocb).unwrap(); + + let stub_exit_code = gen_stub_exit(&mut ocb).unwrap(); - let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb); + let branch_stub_hit_trampoline = gen_branch_stub_hit_trampoline(&mut ocb).unwrap(); + let entry_stub_hit_trampoline = gen_entry_stub_hit_trampoline(&mut ocb).unwrap(); // Generate full exit code for C func - let cfunc_exit_code = gen_full_cfunc_return(&mut ocb); + let cfunc_exit_code = gen_full_cfunc_return(&mut ocb).unwrap(); + + let ocb_end_addr = ocb.unwrap().get_write_ptr(); + let ocb_pages = ocb.unwrap().addrs_to_pages(ocb_start_addr, ocb_end_addr); // Mark all code memory as executable cb.mark_all_executable(); ocb.unwrap().mark_all_executable(); - let mut codegen_globals = CodegenGlobals { + let codegen_globals = CodegenGlobals { inline_cb: cb, outlined_cb: ocb, - leave_exit_code: leave_exit_code, - stub_exit_code: stub_exit_code, + leave_exit_code, + leave_exception_code, + stub_exit_code, outline_full_cfunc_return_pos: cfunc_exit_code, + branch_stub_hit_trampoline, + entry_stub_hit_trampoline, global_inval_patches: Vec::new(), - inline_frozen_bytes: 0, - method_codegen_table: HashMap::new(), + ocb_pages, + pc_to_cfunc: HashMap::new(), }; - // Register the method codegen functions - codegen_globals.reg_method_codegen_fns(); - // Initialize the codegen globals instance unsafe { CODEGEN_GLOBALS = Some(codegen_globals); } } - // Register a specialized codegen function for a particular method. Note that - // the if the function returns true, the code it generates runs without a - // control frame and without interrupt checks. To avoid creating observable - // behavior changes, the codegen function should only target simple code paths - // that do not allocate and do not make method calls. - fn yjit_reg_method(&mut self, klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) { - let id_string = std::ffi::CString::new(mid_str).expect("couldn't convert to CString!"); - let mid = unsafe { rb_intern(id_string.as_ptr()) }; - let me = unsafe { rb_method_entry_at(klass, mid) }; - - if me.is_null() { - panic!("undefined optimized method!"); - } - - // For now, only cfuncs are supported - //RUBY_ASSERT(me && me->def); - //RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC); - - let method_serial = unsafe { - let def = (*me).def; - get_def_method_serial(def) - }; - - self.method_codegen_table.insert(method_serial, gen_fn); - } - - /// Register codegen functions for some Ruby core methods - fn reg_method_codegen_fns(&mut self) { - unsafe { - // Specialization for C methods. See yjit_reg_method() for details. - self.yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not); - - self.yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true); - self.yjit_reg_method(rb_mKernel, "nil?", jit_rb_false); - - self.yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal); - self.yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal); - self.yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal); - self.yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal); - self.yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal); - self.yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal); - - // rb_str_to_s() methods in string.c - self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s); - self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s); - self.yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize); - self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); - - // Thread.current - self.yjit_reg_method( - rb_singleton_class(rb_cThread), - "current", - jit_thread_s_current, - ); - } - } - /// Get a mutable reference to the codegen globals instance pub fn get_instance() -> &'static mut CodegenGlobals { unsafe { CODEGEN_GLOBALS.as_mut().unwrap() } } + pub fn has_instance() -> bool { + unsafe { CODEGEN_GLOBALS.as_mut().is_some() } + } + /// Get a mutable reference to the inline code block pub fn get_inline_cb() -> &'static mut CodeBlock { &mut CodegenGlobals::get_instance().inline_cb @@ -6057,14 +10443,26 @@ impl CodegenGlobals { CodegenGlobals::get_instance().leave_exit_code } + pub fn get_leave_exception_code() -> CodePtr { + CodegenGlobals::get_instance().leave_exception_code + } + pub fn get_stub_exit_code() -> CodePtr { CodegenGlobals::get_instance().stub_exit_code } - pub fn push_global_inval_patch(i_pos: CodePtr, o_pos: CodePtr) { + pub fn push_global_inval_patch(inline_pos: CodePtr, outlined_pos: CodePtr, cb: &CodeBlock) { + if let Some(last_patch) = CodegenGlobals::get_instance().global_inval_patches.last() { + let patch_offset = inline_pos.as_offset() - last_patch.inline_patch_pos.as_offset(); + assert!( + patch_offset < 0 || cb.jmp_ptr_bytes() as i64 <= patch_offset, + "patches should not overlap (patch_offset: {patch_offset})", + ); + } + let patch = CodepagePatch { - inline_patch_pos: i_pos, - outlined_target_pos: o_pos, + inline_patch_pos: inline_pos, + outlined_target_pos: outlined_pos, }; CodegenGlobals::get_instance() .global_inval_patches @@ -6077,26 +10475,24 @@ impl CodegenGlobals { mem::take(&mut globals.global_inval_patches) } - pub fn get_inline_frozen_bytes() -> usize { - CodegenGlobals::get_instance().inline_frozen_bytes + pub fn get_outline_full_cfunc_return_pos() -> CodePtr { + CodegenGlobals::get_instance().outline_full_cfunc_return_pos } - pub fn set_inline_frozen_bytes(frozen_bytes: usize) { - CodegenGlobals::get_instance().inline_frozen_bytes = frozen_bytes; + pub fn get_branch_stub_hit_trampoline() -> CodePtr { + CodegenGlobals::get_instance().branch_stub_hit_trampoline } - pub fn get_outline_full_cfunc_return_pos() -> CodePtr { - CodegenGlobals::get_instance().outline_full_cfunc_return_pos + pub fn get_entry_stub_hit_trampoline() -> CodePtr { + CodegenGlobals::get_instance().entry_stub_hit_trampoline } - pub fn look_up_codegen_method(method_serial: u64) -> Option<MethodGenFn> { - let table = &CodegenGlobals::get_instance().method_codegen_table; + pub fn get_ocb_pages() -> &'static Vec<usize> { + &CodegenGlobals::get_instance().ocb_pages + } - let option_ref = table.get(&method_serial); - match option_ref { - None => None, - Some(&mgf) => Some(mgf), // Deref - } + pub fn get_pc_to_cfunc() -> &'static mut HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)> { + &mut CodegenGlobals::get_instance().pc_to_cfunc } } @@ -6104,17 +10500,19 @@ impl CodegenGlobals { mod tests { use super::*; - fn setup_codegen() -> (JITState, Context, CodeBlock, OutlinedCb) { - let blockid = BlockId { - iseq: ptr::null(), - idx: 0, - }; - let block = Block::new(blockid, &Context::default()); + fn setup_codegen() -> (JITState, Context, Assembler, CodeBlock, OutlinedCb) { + let cb = CodeBlock::new_dummy(256 * 1024); return ( - JITState::new(&block), - Context::new(), - CodeBlock::new_dummy(256 * 1024), + JITState::new( + BlockId { iseq: std::ptr::null(), idx: 0 }, + Context::default(), + cb.get_write_ptr(), + ptr::null(), // No execution context in tests. No peeking! + ), + Context::default(), + Assembler::new(), + cb, OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)), ); } @@ -6128,237 +10526,203 @@ mod tests { #[test] fn test_gen_exit() { - let (_, ctx, mut cb, _) = setup_codegen(); - gen_exit(0 as *mut VALUE, &ctx, &mut cb); + let (_, _ctx, mut asm, mut cb, _) = setup_codegen(); + gen_exit(0 as *mut VALUE, &mut asm); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); } #[test] fn test_get_side_exit() { - let (mut jit, ctx, _, mut ocb) = setup_codegen(); - get_side_exit(&mut jit, &mut ocb, &ctx); + let (_jit, ctx, mut asm, _, mut ocb) = setup_codegen(); + let side_exit_context = SideExitContext::new(0 as _, ctx); + asm.get_side_exit(&side_exit_context, None, &mut ocb); assert!(ocb.unwrap().get_write_pos() > 0); } #[test] fn test_gen_check_ints() { - let (_, _ctx, mut cb, mut ocb) = setup_codegen(); - let side_exit = ocb.unwrap().get_write_ptr(); - gen_check_ints(&mut cb, side_exit); + let (_jit, _ctx, mut asm, _cb, _ocb) = setup_codegen(); + asm.set_side_exit_context(0 as _, 0); + gen_check_ints(&mut asm, Counter::guard_send_interrupted); } #[test] fn test_gen_nop() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - let status = gen_nop(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_nop(&mut jit, &mut asm, &mut ocb); + asm.compile(&mut cb, None).unwrap(); - assert_eq!(status, KeepCompiling); - assert_eq!(context.diff(&Context::new()), 0); + assert_eq!(status, Some(KeepCompiling)); + assert_eq!(context.diff(&Context::default()), TypeDiff::Compatible(0)); assert_eq!(cb.get_write_pos(), 0); } #[test] fn test_gen_pop() { - let (mut jit, _, mut cb, mut ocb) = setup_codegen(); - let mut context = Context::new_with_stack_size(1); - let status = gen_pop(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, _, mut asm, _cb, mut ocb) = setup_codegen(); + let context = Context::default(); + asm.stack_push(Type::Fixnum); + let status = gen_pop(&mut jit, &mut asm, &mut ocb); - assert_eq!(status, KeepCompiling); - assert_eq!(context.diff(&Context::new()), 0); + assert_eq!(status, Some(KeepCompiling)); + let mut default = Context::default(); + default.set_reg_temps(context.get_reg_temps()); + assert_eq!(context.diff(&default), TypeDiff::Compatible(0)); } #[test] fn test_gen_dup() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - let status = gen_dup(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Fixnum); + let status = gen_dup(&mut jit, &mut asm, &mut ocb); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); // Did we duplicate the type information for the Fixnum type? - assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(0))); - assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(1))); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); // Write some movs } #[test] fn test_gen_dupn() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - context.stack_push(Type::Flonum); + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); let mut value_array: [u64; 2] = [0, 2]; // We only compile for n == 2 let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_dupn(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_dupn(&mut jit, &mut asm, &mut ocb); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); - assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(3))); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(2))); - assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1))); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(3))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0))); + // TODO: this is writing zero bytes on x86. Why? + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); // Write some movs } #[test] fn test_gen_swap() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - context.stack_push(Type::Flonum); + let (mut jit, _context, mut asm, _cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); - let status = gen_swap(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_swap(&mut jit, &mut asm, &mut ocb); - let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); - let (_, tmp_type_next) = context.get_opnd_mapping(StackOpnd(1)); + let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0)); + let tmp_type_next = asm.ctx.get_opnd_type(StackOpnd(1)); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); assert_eq!(tmp_type_top, Type::Fixnum); assert_eq!(tmp_type_next, Type::Flonum); } #[test] fn test_putnil() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - let status = gen_putnil(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_putnil(&mut jit, &mut asm, &mut ocb); - let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); + let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0)); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); assert_eq!(tmp_type_top, Type::Nil); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); } - #[test] - fn test_putobject_qtrue() { - // Test gen_putobject with Qtrue - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - - let mut value_array: [u64; 2] = [0, Qtrue.into()]; - let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; - jit.pc = pc; - - let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb); - - let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); - - assert_eq!(status, KeepCompiling); - assert_eq!(tmp_type_top, Type::True); - assert!(cb.get_write_pos() > 0); - } - - #[test] - fn test_putobject_fixnum() { - // Test gen_putobject with a Fixnum to test another conditional branch - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - - // The Fixnum 7 is encoded as 7 * 2 + 1, or 15 - let mut value_array: [u64; 2] = [0, 15]; - let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; - jit.pc = pc; - - let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb); - - let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); - - assert_eq!(status, KeepCompiling); - assert_eq!(tmp_type_top, Type::Fixnum); - assert!(cb.get_write_pos() > 0); - } - - #[test] - fn test_int2fix() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - jit.opcode = OP_PUTOBJECT_INT2FIX_0_; - let status = gen_putobject_int2fix(&mut jit, &mut context, &mut cb, &mut ocb); - - let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); - - // Right now we're not testing the generated machine code to make sure a literal 1 or 0 was pushed. I've checked locally. - assert_eq!(status, KeepCompiling); - assert_eq!(tmp_type_top, Type::Fixnum); - } #[test] fn test_putself() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - let status = gen_putself(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_putself(&mut jit, &mut asm, &mut ocb); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); } #[test] fn test_gen_setn() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - context.stack_push(Type::Flonum); - context.stack_push(Type::String); + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); let mut value_array: [u64; 2] = [0, 2]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_setn(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_setn(&mut jit, &mut asm, &mut ocb); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); - assert_eq!(Type::String, context.get_opnd_type(StackOpnd(2))); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(1))); - assert_eq!(Type::String, context.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(0))); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); } #[test] fn test_gen_topn() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Flonum); - context.stack_push(Type::String); + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); let mut value_array: [u64; 2] = [0, 1]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_topn(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_topn(&mut jit, &mut asm, &mut ocb); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(2))); - assert_eq!(Type::String, context.get_opnd_type(StackOpnd(1))); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0))); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); // Write some movs } #[test] fn test_gen_adjuststack() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Flonum); - context.stack_push(Type::String); - context.stack_push(Type::Fixnum); + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); + asm.stack_push(Type::Fixnum); let mut value_array: [u64; 3] = [0, 2, 0]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_adjuststack(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_adjuststack(&mut jit, &mut asm, &mut ocb); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0))); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() == 0); // No instructions written } #[test] fn test_gen_leave() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, _context, mut asm, _cb, mut ocb) = setup_codegen(); // Push return value - context.stack_push(Type::Fixnum); - gen_leave(&mut jit, &mut context, &mut cb, &mut ocb); + asm.stack_push(Type::Fixnum); + asm.set_side_exit_context(0 as _, 0); + gen_leave(&mut jit, &mut asm, &mut ocb); } } |