//! This module is for native code generation. #![allow(clippy::let_and_return)] use std::cell::{Cell, RefCell}; use std::rc::Rc; use std::ffi::{c_int, c_long, c_void}; use std::slice; use crate::asm::Label; use crate::backend::current::ALLOC_REGS; use crate::invariants::{ track_bop_assumption, track_cme_assumption, track_no_ep_escape_assumption, track_no_trace_point_assumption, track_single_ractor_assumption, track_stable_constant_names_assumption, track_no_singleton_class_assumption }; use crate::gc::{append_gc_offsets, get_or_create_iseq_payload, get_or_create_iseq_payload_ptr, IseqCodePtrs, IseqPayload, IseqStatus}; use crate::state::ZJITState; use crate::stats::{send_fallback_counter, exit_counter_for_compile_error, incr_counter, incr_counter_by, send_fallback_counter_for_method_type, send_without_block_fallback_counter_for_method_type, send_without_block_fallback_counter_for_optimized_method_type, send_fallback_counter_ptr_for_opcode, CompileError}; use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compile_error}}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP}; use crate::hir::{iseq_to_hir, BlockId, BranchEdge, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType}; use crate::hir::{Const, FrameState, Function, Insn, InsnId, SendFallbackReason}; use crate::hir_type::{types, Type}; use crate::options::get_option; use crate::cast::IntoUsize; /// Sentinel program counter stored in C frames when runtime checks are enabled. const PC_POISON: Option<*const VALUE> = if cfg!(feature = "runtime_checks") { Some(usize::MAX as *const VALUE) } else { None }; /// Ephemeral code generation state struct JITState { /// Instruction sequence for the method being compiled iseq: IseqPtr, /// Low-level IR Operands indexed by High-level IR's Instruction ID opnds: Vec>, /// Labels for each basic block indexed by the BlockId labels: Vec>, /// JIT entry point for the `iseq` jit_entries: Vec>>, /// ISEQ calls that need to be compiled later iseq_calls: Vec, } impl JITState { /// Create a new JITState instance fn new(iseq: IseqPtr, num_insns: usize, num_blocks: usize) -> Self { JITState { iseq, opnds: vec![None; num_insns], labels: vec![None; num_blocks], jit_entries: Vec::default(), iseq_calls: Vec::default(), } } /// Retrieve the output of a given instruction that has been compiled fn get_opnd(&self, insn_id: InsnId) -> lir::Opnd { self.opnds[insn_id.0].unwrap_or_else(|| panic!("Failed to get_opnd({insn_id})")) } /// Find or create a label for a given BlockId fn get_label(&mut self, asm: &mut Assembler, block_id: BlockId) -> Target { match &self.labels[block_id.0] { Some(label) => label.clone(), None => { let label = asm.new_label(&format!("{block_id}")); self.labels[block_id.0] = Some(label.clone()); label } } } } /// CRuby API to compile a given ISEQ. /// If jit_exception is true, compile JIT code for handling exceptions. /// See jit_compile_exception() for details. #[unsafe(no_mangle)] pub extern "C" fn rb_zjit_iseq_gen_entry_point(iseq: IseqPtr, jit_exception: bool) -> *const u8 { // Take a lock to avoid writing to ISEQ in parallel with Ractors. // with_vm_lock() does nothing if the program doesn't use Ractors. with_vm_lock(src_loc!(), || { let cb = ZJITState::get_code_block(); let mut code_ptr = with_time_stat(compile_time_ns, || gen_iseq_entry_point(cb, iseq, jit_exception)); if let Err(err) = &code_ptr { // Assert that the ISEQ compiles if RubyVM::ZJIT.assert_compiles is enabled. // We assert only `jit_exception: false` cases until we support exception handlers. if ZJITState::assert_compiles_enabled() && !jit_exception { let iseq_location = iseq_get_location(iseq, 0); panic!("Failed to compile: {iseq_location}"); } // For --zjit-stats, generate an entry that just increments exit_compilation_failure and exits if get_option!(stats) { code_ptr = gen_compile_error_counter(cb, err); } } // Always mark the code region executable if asm.compile() has been used. // We need to do this even if code_ptr is None because, whether gen_entry() // fails or not, gen_iseq() may have already used asm.compile(). cb.mark_all_executable(); code_ptr.map_or(std::ptr::null(), |ptr| ptr.raw_ptr(cb)) }) } /// Compile an entry point for a given ISEQ fn gen_iseq_entry_point(cb: &mut CodeBlock, iseq: IseqPtr, jit_exception: bool) -> Result { // We don't support exception handlers yet if jit_exception { return Err(CompileError::ExceptionHandler); } // Compile ISEQ into High-level IR let function = compile_iseq(iseq).inspect_err(|_| { incr_counter!(failed_iseq_count); })?; // Compile the High-level IR let IseqCodePtrs { start_ptr, .. } = gen_iseq(cb, iseq, Some(&function)).inspect_err(|err| { debug!("{err:?}: gen_iseq failed: {}", iseq_get_location(iseq, 0)); })?; // Compile an entry point to the JIT code gen_entry(cb, iseq, start_ptr).inspect_err(|err| { debug!("{err:?}: gen_entry failed: {}", iseq_get_location(iseq, 0)); }) } /// Stub a branch for a JIT-to-JIT call fn gen_iseq_call(cb: &mut CodeBlock, caller_iseq: IseqPtr, iseq_call: &IseqCallRef) -> Result<(), CompileError> { // Compile a function stub let stub_ptr = gen_function_stub(cb, iseq_call.clone()).inspect_err(|err| { debug!("{err:?}: gen_function_stub failed: {} -> {}", iseq_get_location(caller_iseq, 0), iseq_get_location(iseq_call.iseq.get(), 0)); })?; // Update the JIT-to-JIT call to call the stub let stub_addr = stub_ptr.raw_ptr(cb); let iseq = iseq_call.iseq.get(); iseq_call.regenerate(cb, |asm| { asm_comment!(asm, "call function stub: {}", iseq_get_location(iseq, 0)); asm.ccall(stub_addr, vec![]); }); Ok(()) } /// Write an entry to the perf map in /tmp fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) { use std::io::Write; let perf_map = format!("/tmp/perf-{}.map", std::process::id()); let Ok(file) = std::fs::OpenOptions::new().create(true).append(true).open(&perf_map) else { debug!("Failed to open perf map file: {perf_map}"); return; }; let mut file = std::io::BufWriter::new(file); let Ok(_) = writeln!(file, "{:#x} {:#x} zjit::{}", start_ptr, code_size, iseq_name) else { debug!("Failed to write {iseq_name} to perf map file: {perf_map}"); return; }; } /// Compile a JIT entry fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function_ptr: CodePtr) -> Result { // Set up registers for CFP, EC, SP, and basic block arguments let mut asm = Assembler::new(); gen_entry_prologue(&mut asm, iseq); // Jump to the first block using a call instruction asm.ccall(function_ptr.raw_ptr(cb), vec![]); // Restore registers for CFP, EC, and SP after use asm_comment!(asm, "return to the interpreter"); asm.frame_teardown(lir::JIT_PRESERVED_REGS); asm.cret(C_RET_OPND); let (code_ptr, gc_offsets) = asm.compile(cb)?; assert!(gc_offsets.is_empty()); if get_option!(perf) { let start_ptr = code_ptr.raw_addr(cb); let end_ptr = cb.get_write_ptr().raw_addr(cb); let code_size = end_ptr - start_ptr; let iseq_name = iseq_get_location(iseq, 0); register_with_perf(format!("entry for {iseq_name}"), start_ptr, code_size); } Ok(code_ptr) } /// Compile an ISEQ into machine code if not compiled yet fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr, function: Option<&Function>) -> Result { // Return an existing pointer if it's already compiled let payload = get_or_create_iseq_payload(iseq); match &payload.status { IseqStatus::Compiled(code_ptrs) => return Ok(code_ptrs.clone()), IseqStatus::CantCompile(err) => return Err(err.clone()), IseqStatus::NotCompiled => {}, } // Compile the ISEQ let code_ptrs = gen_iseq_body(cb, iseq, function, payload); match &code_ptrs { Ok(code_ptrs) => { payload.status = IseqStatus::Compiled(code_ptrs.clone()); incr_counter!(compiled_iseq_count); } Err(err) => { payload.status = IseqStatus::CantCompile(err.clone()); incr_counter!(failed_iseq_count); } } code_ptrs } /// Compile an ISEQ into machine code fn gen_iseq_body(cb: &mut CodeBlock, iseq: IseqPtr, function: Option<&Function>, payload: &mut IseqPayload) -> Result { // Convert ISEQ into optimized High-level IR if not given let function = match function { Some(function) => function, None => &compile_iseq(iseq)?, }; // Compile the High-level IR let (iseq_code_ptrs, gc_offsets, iseq_calls) = gen_function(cb, iseq, function)?; // Stub callee ISEQs for JIT-to-JIT calls for iseq_call in iseq_calls.iter() { gen_iseq_call(cb, iseq, iseq_call)?; } // Prepare for GC payload.iseq_calls.extend(iseq_calls); append_gc_offsets(iseq, &gc_offsets); Ok(iseq_code_ptrs) } /// Compile a function fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Result<(IseqCodePtrs, Vec, Vec), CompileError> { let num_spilled_params = max_num_params(function).saturating_sub(ALLOC_REGS.len()); let mut jit = JITState::new(iseq, function.num_insns(), function.num_blocks()); let mut asm = Assembler::new_with_stack_slots(num_spilled_params); // Compile each basic block let reverse_post_order = function.rpo(); for &block_id in reverse_post_order.iter() { // Write a label to jump to the basic block let label = jit.get_label(&mut asm, block_id); asm.write_label(label); let block = function.block(block_id); asm_comment!( asm, "{block_id}({}): {}", block.params().map(|param| format!("{param}")).collect::>().join(", "), iseq_get_location(iseq, block.insn_idx), ); // Compile all parameters for (idx, &insn_id) in block.params().enumerate() { match function.find(insn_id) { Insn::Param => { jit.opnds[insn_id.0] = Some(gen_param(&mut asm, idx)); }, insn => unreachable!("Non-param insn found in block.params: {insn:?}"), } } // Compile all instructions for &insn_id in block.insns() { let insn = function.find(insn_id); if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) { debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit."); gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledHIRInsn(insn_id), &function.frame_state(last_snapshot)); // Don't bother generating code after a side-exit. We won't run it. // TODO(max): Generate ud2 or equivalent. break; }; // It's fine; we generated the instruction } // Make sure the last patch point has enough space to insert a jump asm.pad_patch_point(); } // Generate code if everything can be compiled let result = asm.compile(cb); if let Ok((start_ptr, _)) = result { if get_option!(perf) { let start_usize = start_ptr.raw_addr(cb); let end_usize = cb.get_write_ptr().raw_addr(cb); let code_size = end_usize - start_usize; let iseq_name = iseq_get_location(iseq, 0); register_with_perf(iseq_name, start_usize, code_size); } if ZJITState::should_log_compiled_iseqs() { let iseq_name = iseq_get_location(iseq, 0); ZJITState::log_compile(iseq_name); } } result.map(|(start_ptr, gc_offsets)| { // Make sure jit_entry_ptrs can be used as a parallel vector to jit_entry_insns() jit.jit_entries.sort_by_key(|jit_entry| jit_entry.borrow().jit_entry_idx); let jit_entry_ptrs = jit.jit_entries.iter().map(|jit_entry| jit_entry.borrow().start_addr.get().expect("start_addr should have been set by pos_marker in gen_entry_point") ).collect(); (IseqCodePtrs { start_ptr, jit_entry_ptrs }, gc_offsets, jit.iseq_calls) }) } /// Compile an instruction fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, function: &Function, insn_id: InsnId, insn: &Insn) -> Result<(), InsnId> { // Convert InsnId to lir::Opnd macro_rules! opnd { ($insn_id:ident) => { jit.get_opnd($insn_id.clone()) }; } macro_rules! opnds { ($insn_ids:ident) => { { $insn_ids.iter().map(|insn_id| jit.get_opnd(*insn_id)).collect::>() } }; } macro_rules! no_output { ($call:expr) => { { let () = $call; return Ok(()); } }; } if !matches!(*insn, Insn::Snapshot { .. }) { asm_comment!(asm, "Insn: {insn_id} {insn}"); } let out_opnd = match insn { &Insn::Const { val: Const::Value(val) } => gen_const_value(val), &Insn::Const { val: Const::CPtr(val) } => gen_const_cptr(val), Insn::Const { .. } => panic!("Unexpected Const in gen_insn: {insn}"), Insn::NewArray { elements, state } => gen_new_array(asm, opnds!(elements), &function.frame_state(*state)), Insn::NewHash { elements, state } => gen_new_hash(jit, asm, opnds!(elements), &function.frame_state(*state)), Insn::NewRange { low, high, flag, state } => gen_new_range(jit, asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)), Insn::NewRangeFixnum { low, high, flag, state } => gen_new_range_fixnum(asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)), Insn::ArrayDup { val, state } => gen_array_dup(asm, opnd!(val), &function.frame_state(*state)), Insn::ArrayArefFixnum { array, index, .. } => gen_aref_fixnum(asm, opnd!(array), opnd!(index)), Insn::ArrayPop { array, state } => gen_array_pop(asm, opnd!(array), &function.frame_state(*state)), Insn::ArrayLength { array } => gen_array_length(asm, opnd!(array)), Insn::ObjectAlloc { val, state } => gen_object_alloc(jit, asm, opnd!(val), &function.frame_state(*state)), &Insn::ObjectAllocClass { class, state } => gen_object_alloc_class(asm, class, &function.frame_state(state)), Insn::StringCopy { val, chilled, state } => gen_string_copy(asm, opnd!(val), *chilled, &function.frame_state(*state)), // concatstrings shouldn't have 0 strings // If it happens we abort the compilation for now Insn::StringConcat { strings, state, .. } if strings.is_empty() => return Err(*state), Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)), &Insn::StringGetbyteFixnum { string, index } => gen_string_getbyte_fixnum(asm, opnd!(string), opnd!(index)), Insn::StringAppend { recv, other, state } => gen_string_append(jit, asm, opnd!(recv), opnd!(other), &function.frame_state(*state)), Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)), Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)), Insn::Param => unreachable!("block.insns should not have Insn::Param"), Insn::Snapshot { .. } => return Ok(()), // we don't need to do anything for this instruction at the moment Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)), Insn::IfTrue { val, target } => no_output!(gen_if_true(jit, asm, opnd!(val), target)), Insn::IfFalse { val, target } => no_output!(gen_if_false(jit, asm, opnd!(val), target)), &Insn::Send { cd, blockiseq, state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason), // Give up SendWithoutBlockDirect for 6+ args since asm.ccall() doesn't support it. Insn::SendWithoutBlockDirect { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::SendWithoutBlockDirectTooManyArgs), Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state)), &Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason), // Ensure we have enough room fit ec, self, and arguments // TODO remove this check when we have stack args (we can use Time.new to test it) Insn::InvokeBuiltin { bf, state, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return Err(*state), Insn::InvokeBuiltin { bf, leaf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, *leaf, opnds!(args)), &Insn::EntryPoint { jit_entry_idx } => no_output!(gen_entry_point(jit, asm, jit_entry_idx)), Insn::Return { val } => no_output!(gen_return(asm, opnd!(val))), Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state)), Insn::FixnumSub { left, right, state } => gen_fixnum_sub(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state)), Insn::FixnumMult { left, right, state } => gen_fixnum_mult(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state)), Insn::FixnumEq { left, right } => gen_fixnum_eq(asm, opnd!(left), opnd!(right)), Insn::FixnumNeq { left, right } => gen_fixnum_neq(asm, opnd!(left), opnd!(right)), Insn::FixnumLt { left, right } => gen_fixnum_lt(asm, opnd!(left), opnd!(right)), Insn::FixnumLe { left, right } => gen_fixnum_le(asm, opnd!(left), opnd!(right)), Insn::FixnumGt { left, right } => gen_fixnum_gt(asm, opnd!(left), opnd!(right)), Insn::FixnumGe { left, right } => gen_fixnum_ge(asm, opnd!(left), opnd!(right)), Insn::FixnumAnd { left, right } => gen_fixnum_and(asm, opnd!(left), opnd!(right)), Insn::FixnumOr { left, right } => gen_fixnum_or(asm, opnd!(left), opnd!(right)), Insn::FixnumXor { left, right } => gen_fixnum_xor(asm, opnd!(left), opnd!(right)), &Insn::FixnumMod { left, right, state } => gen_fixnum_mod(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), Insn::IsNil { val } => gen_isnil(asm, opnd!(val)), &Insn::IsMethodCfunc { val, cd, cfunc, state: _ } => gen_is_method_cfunc(jit, asm, opnd!(val), cd, cfunc), &Insn::IsBitEqual { left, right } => gen_is_bit_equal(asm, opnd!(left), opnd!(right)), &Insn::IsBitNotEqual { left, right } => gen_is_bit_not_equal(asm, opnd!(left), opnd!(right)), &Insn::BoxBool { val } => gen_box_bool(asm, opnd!(val)), Insn::Test { val } => gen_test(asm, opnd!(val)), Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardBitEquals { val, expected, state } => gen_guard_bit_equals(jit, asm, opnd!(val), *expected, &function.frame_state(*state)), &Insn::GuardBlockParamProxy { level, state } => no_output!(gen_guard_block_param_proxy(jit, asm, level, &function.frame_state(state))), Insn::GuardNotFrozen { val, state } => gen_guard_not_frozen(jit, asm, opnd!(val), &function.frame_state(*state)), Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))), Insn::CCall { cfunc, args, name: _, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, opnds!(args)), // Give up CCallWithFrame for 7+ args since asm.ccall() doesn't support it. Insn::CCallWithFrame { cd, state, args, .. } if args.len() > C_ARG_OPNDS.len() => gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs), Insn::CCallWithFrame { cfunc, args, cme, state, blockiseq, .. } => gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, *blockiseq, &function.frame_state(*state)), Insn::CCallVariadic { cfunc, recv, args, name: _, cme, state, return_type: _, elidable: _ } => { gen_ccall_variadic(jit, asm, *cfunc, opnd!(recv), opnds!(args), *cme, &function.frame_state(*state)) } Insn::GetIvar { self_val, id, state: _ } => gen_getivar(asm, opnd!(self_val), *id), Insn::SetGlobal { id, val, state } => no_output!(gen_setglobal(jit, asm, *id, opnd!(val), &function.frame_state(*state))), Insn::GetGlobal { id, state } => gen_getglobal(jit, asm, *id, &function.frame_state(*state)), &Insn::GetLocal { ep_offset, level, use_sp, .. } => gen_getlocal(asm, ep_offset, level, use_sp), &Insn::SetLocal { val, ep_offset, level } => no_output!(gen_setlocal(asm, opnd!(val), function.type_of(val), ep_offset, level)), Insn::GetConstantPath { ic, state } => gen_get_constant_path(jit, asm, *ic, &function.frame_state(*state)), Insn::GetClassVar { id, ic, state } => gen_getclassvar(jit, asm, *id, *ic, &function.frame_state(*state)), Insn::SetClassVar { id, val, ic, state } => no_output!(gen_setclassvar(jit, asm, *id, opnd!(val), *ic, &function.frame_state(*state))), Insn::SetIvar { self_val, id, val, state } => no_output!(gen_setivar(jit, asm, opnd!(self_val), *id, opnd!(val), &function.frame_state(*state))), Insn::SideExit { state, reason } => no_output!(gen_side_exit(jit, asm, reason, &function.frame_state(*state))), Insn::PutSpecialObject { value_type } => gen_putspecialobject(asm, *value_type), Insn::AnyToString { val, str, state } => gen_anytostring(asm, opnd!(val), opnd!(str), &function.frame_state(*state)), Insn::Defined { op_type, obj, pushval, v, state } => gen_defined(jit, asm, *op_type, *obj, *pushval, opnd!(v), &function.frame_state(*state)), Insn::GetSpecialSymbol { symbol_type, state: _ } => gen_getspecial_symbol(asm, *symbol_type), Insn::GetSpecialNumber { nth, state } => gen_getspecial_number(asm, *nth, &function.frame_state(*state)), &Insn::IncrCounter(counter) => no_output!(gen_incr_counter(asm, counter)), Insn::IncrCounterPtr { counter_ptr } => no_output!(gen_incr_counter_ptr(asm, *counter_ptr)), Insn::ObjToString { val, cd, state, .. } => gen_objtostring(jit, asm, opnd!(val), *cd, &function.frame_state(*state)), &Insn::CheckInterrupts { state } => no_output!(gen_check_interrupts(jit, asm, &function.frame_state(state))), &Insn::HashDup { val, state } => { gen_hash_dup(asm, opnd!(val), &function.frame_state(state)) }, &Insn::HashAref { hash, key, state } => { gen_hash_aref(jit, asm, opnd!(hash), opnd!(key), &function.frame_state(state)) }, &Insn::ArrayPush { array, val, state } => { no_output!(gen_array_push(asm, opnd!(array), opnd!(val), &function.frame_state(state))) }, &Insn::ToNewArray { val, state } => { gen_to_new_array(jit, asm, opnd!(val), &function.frame_state(state)) }, &Insn::ToArray { val, state } => { gen_to_array(jit, asm, opnd!(val), &function.frame_state(state)) }, &Insn::DefinedIvar { self_val, id, pushval, .. } => { gen_defined_ivar(asm, opnd!(self_val), id, pushval) }, &Insn::ArrayExtend { left, right, state } => { no_output!(gen_array_extend(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state))) }, &Insn::GuardShape { val, shape, state } => gen_guard_shape(jit, asm, opnd!(val), shape, &function.frame_state(state)), Insn::LoadPC => gen_load_pc(asm), Insn::LoadSelf => gen_load_self(), &Insn::LoadField { recv, id, offset, return_type: _ } => gen_load_field(asm, opnd!(recv), id, offset), &Insn::IsBlockGiven => gen_is_block_given(jit, asm), &Insn::ArrayMax { state, .. } | &Insn::FixnumDiv { state, .. } | &Insn::Throw { state, .. } => return Err(state), }; assert!(insn.has_output(), "Cannot write LIR output of HIR instruction with no output: {insn}"); // If the instruction has an output, remember it in jit.opnds jit.opnds[insn_id.0] = Some(out_opnd); Ok(()) } /// Gets the EP of the ISeq of the containing method, or "local level". /// Equivalent of GET_LEP() macro. fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd { // Equivalent of get_lvar_level() in compile.c fn get_lvar_level(mut iseq: IseqPtr) -> u32 { let local_iseq = unsafe { rb_get_iseq_body_local_iseq(iseq) }; let mut level = 0; while iseq != local_iseq { iseq = unsafe { rb_get_iseq_body_parent_iseq(iseq) }; level += 1; } level } let level = get_lvar_level(jit.iseq); gen_get_ep(asm, level) } // Get EP at `level` from CFP fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd { // Load environment pointer EP from CFP into a register let ep_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP); let mut ep_opnd = asm.load(ep_opnd); for _ in 0..level { // Get the previous EP from the current EP // See GET_PREV_EP(ep) macro // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03)) const UNTAGGING_MASK: Opnd = Opnd::Imm(!0x03); let offset = SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL; ep_opnd = asm.load(Opnd::mem(64, ep_opnd, offset)); ep_opnd = asm.and(ep_opnd, UNTAGGING_MASK); } ep_opnd } fn gen_objtostring(jit: &mut JITState, asm: &mut Assembler, val: Opnd, cd: *const rb_call_data, state: &FrameState) -> Opnd { gen_prepare_non_leaf_call(jit, asm, state); // TODO: Specialize for immediate types // Call rb_vm_objtostring(iseq, recv, cd) let ret = asm_ccall!(asm, rb_vm_objtostring, VALUE::from(jit.iseq).into(), val, Opnd::const_ptr(cd)); // TODO: Call `to_s` on the receiver if rb_vm_objtostring returns Qundef // Need to replicate what CALL_SIMPLE_METHOD does asm_comment!(asm, "side-exit if rb_vm_objtostring returns Qundef"); asm.cmp(ret, Qundef.into()); asm.je(side_exit(jit, state, ObjToStringFallback)); ret } fn gen_defined(jit: &JITState, asm: &mut Assembler, op_type: usize, obj: VALUE, pushval: VALUE, tested_value: Opnd, state: &FrameState) -> Opnd { match op_type as defined_type { DEFINED_YIELD => { // `yield` goes to the block handler stowed in the "local" iseq which is // the current iseq or a parent. Only the "method" iseq type can be passed a // block handler. (e.g. `yield` in the top level script is a syntax error.) // // Similar to gen_is_block_given let local_iseq = unsafe { rb_get_iseq_body_local_iseq(jit.iseq) }; if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD { let lep = gen_get_lep(jit, asm); let block_handler = asm.load(Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); let pushval = asm.load(pushval.into()); asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); asm.csel_e(Qnil.into(), pushval) } else { Qnil.into() } } _ => { // Save the PC and SP because the callee may allocate or call #respond_to? gen_prepare_non_leaf_call(jit, asm, state); // TODO: Inline the cases for each op_type // Call vm_defined(ec, reg_cfp, op_type, obj, v) let def_result = asm_ccall!(asm, rb_vm_defined, EC, CFP, op_type.into(), obj.into(), tested_value); asm.cmp(def_result.with_num_bits(8), 0.into()); asm.csel_ne(pushval.into(), Qnil.into()) } } } /// Similar to gen_defined for DEFINED_YIELD fn gen_is_block_given(jit: &JITState, asm: &mut Assembler) -> Opnd { let local_iseq = unsafe { rb_get_iseq_body_local_iseq(jit.iseq) }; if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD { let lep = gen_get_lep(jit, asm); let block_handler = asm.load(Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); asm.csel_e(Qfalse.into(), Qtrue.into()) } else { Qfalse.into() } } /// Get a local variable from a higher scope or the heap. `local_ep_offset` is in number of VALUEs. /// We generate this instruction with level=0 only when the local variable is on the heap, so we /// can't optimize the level=0 case using the SP register. fn gen_getlocal(asm: &mut Assembler, local_ep_offset: u32, level: u32, use_sp: bool) -> lir::Opnd { let local_ep_offset = i32::try_from(local_ep_offset).unwrap_or_else(|_| panic!("Could not convert local_ep_offset {local_ep_offset} to i32")); if level > 0 { gen_incr_counter(asm, Counter::vm_read_from_parent_iseq_local_count); } let local = if use_sp { assert_eq!(level, 0, "use_sp optimization should be used only for level=0 locals"); let offset = -(SIZEOF_VALUE_I32 * (local_ep_offset + 1)); Opnd::mem(64, SP, offset) } else { let ep = gen_get_ep(asm, level); let offset = -(SIZEOF_VALUE_I32 * local_ep_offset); Opnd::mem(64, ep, offset) }; asm.load(local) } /// Set a local variable from a higher scope or the heap. `local_ep_offset` is in number of VALUEs. /// We generate this instruction with level=0 only when the local variable is on the heap, so we /// can't optimize the level=0 case using the SP register. fn gen_setlocal(asm: &mut Assembler, val: Opnd, val_type: Type, local_ep_offset: u32, level: u32) { let local_ep_offset = c_int::try_from(local_ep_offset).unwrap_or_else(|_| panic!("Could not convert local_ep_offset {local_ep_offset} to i32")); if level > 0 { gen_incr_counter(asm, Counter::vm_write_to_parent_iseq_local_count); } let ep = gen_get_ep(asm, level); // When we've proved that we're writing an immediate, // we can skip the write barrier. if val_type.is_immediate() { let offset = -(SIZEOF_VALUE_I32 * local_ep_offset); asm.mov(Opnd::mem(64, ep, offset), val); } else { // We're potentially writing a reference to an IMEMO/env object, // so take care of the write barrier with a function. let local_index = -local_ep_offset; asm_ccall!(asm, rb_vm_env_write, ep, local_index.into(), val); } } fn gen_guard_block_param_proxy(jit: &JITState, asm: &mut Assembler, level: u32, state: &FrameState) { // Bail out if the `&block` local variable has been modified let ep = gen_get_ep(asm, level); let flags = Opnd::mem(64, ep, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); asm.test(flags, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); asm.jnz(side_exit(jit, state, SideExitReason::BlockParamProxyModified)); // This handles two cases which are nearly identical // Block handler is a tagged pointer. Look at the tag. // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01 // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03 // So to check for either of those cases we can use: val & 0x1 == 0x1 const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers"); // Bail ouf if the block handler is neither ISEQ nor ifunc let block_handler = asm.load(Opnd::mem(64, ep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); asm.test(block_handler, 0x1.into()); asm.jz(side_exit(jit, state, SideExitReason::BlockParamProxyNotIseqOrIfunc)); } fn gen_guard_not_frozen(jit: &JITState, asm: &mut Assembler, val: Opnd, state: &FrameState) -> Opnd { let ret = asm_ccall!(asm, rb_obj_frozen_p, val); asm_comment!(asm, "side-exit if rb_obj_frozen_p returns Qtrue"); asm.cmp(ret, Qtrue.into()); asm.je(side_exit(jit, state, GuardNotFrozen)); val } fn gen_get_constant_path(jit: &JITState, asm: &mut Assembler, ic: *const iseq_inline_constant_cache, state: &FrameState) -> Opnd { unsafe extern "C" { fn rb_vm_opt_getconstant_path(ec: EcPtr, cfp: CfpPtr, ic: *const iseq_inline_constant_cache) -> VALUE; } // Anything could be called on const_missing gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_vm_opt_getconstant_path, EC, CFP, Opnd::const_ptr(ic)) } fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, leaf: bool, args: Vec) -> lir::Opnd { assert!(bf.argc + 2 <= C_ARG_OPNDS.len() as i32, "gen_invokebuiltin should not be called for builtin function {} with too many arguments: {}", unsafe { std::ffi::CStr::from_ptr(bf.name).to_str().unwrap() }, bf.argc); if leaf { gen_prepare_leaf_call_with_gc(asm, state); } else { // Anything can happen inside builtin functions gen_prepare_non_leaf_call(jit, asm, state); } let mut cargs = vec![EC]; cargs.extend(args); asm.ccall(bf.func_ptr as *const u8, cargs) } /// Record a patch point that should be invalidated on a given invariant fn gen_patch_point(jit: &mut JITState, asm: &mut Assembler, invariant: &Invariant, state: &FrameState) { let payload_ptr = get_or_create_iseq_payload_ptr(jit.iseq); let label = asm.new_label("patch_point").unwrap_label(); let invariant = *invariant; // Compile a side exit. Fill nop instructions if the last patch point is too close. asm.patch_point(build_side_exit(jit, state, PatchPoint(invariant), Some(label))); // Remember the current address as a patch point asm.pos_marker(move |code_ptr, cb| { let side_exit_ptr = cb.resolve_label(label); match invariant { Invariant::BOPRedefined { klass, bop } => { track_bop_assumption(klass, bop, code_ptr, side_exit_ptr, payload_ptr); } Invariant::MethodRedefined { klass: _, method: _, cme } => { track_cme_assumption(cme, code_ptr, side_exit_ptr, payload_ptr); } Invariant::StableConstantNames { idlist } => { track_stable_constant_names_assumption(idlist, code_ptr, side_exit_ptr, payload_ptr); } Invariant::NoTracePoint => { track_no_trace_point_assumption(code_ptr, side_exit_ptr, payload_ptr); } Invariant::NoEPEscape(iseq) => { track_no_ep_escape_assumption(iseq, code_ptr, side_exit_ptr, payload_ptr); } Invariant::SingleRactorMode => { track_single_ractor_assumption(code_ptr, side_exit_ptr, payload_ptr); } Invariant::NoSingletonClass { klass } => { track_no_singleton_class_assumption(klass, code_ptr, side_exit_ptr, payload_ptr); } } }); } /// Generate code for a C function call that pushes a frame fn gen_ccall_with_frame( jit: &mut JITState, asm: &mut Assembler, cfunc: *const u8, args: Vec, cme: *const rb_callable_method_entry_t, blockiseq: Option, state: &FrameState, ) -> lir::Opnd { gen_incr_counter(asm, Counter::non_variadic_cfunc_optimized_send_count); gen_stack_overflow_check(jit, asm, state, state.stack_size()); let caller_stack_size = state.stack_size() - args.len(); // Can't use gen_prepare_non_leaf_call() because we need to adjust the SP // to account for the receiver and arguments (and block arguments if any) gen_prepare_call_with_gc(asm, state, false); gen_save_sp(asm, caller_stack_size); gen_spill_stack(jit, asm, state); gen_spill_locals(jit, asm, state); let block_handler_specval = if let Some(block_iseq) = blockiseq { // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). // VM_CFP_TO_CAPTURED_BLOCK then turns &cfp->self into a block handler. // rb_captured_block->code.iseq aliases with cfp->block_code. asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); let cfp_self_addr = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); asm.or(cfp_self_addr, Opnd::Imm(1)) } else { VM_BLOCK_HANDLER_NONE.into() }; gen_push_frame(asm, args.len(), state, ControlFrame { recv: args[0], iseq: None, cme, frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL, pc: PC_POISON, specval: block_handler_specval, }); asm_comment!(asm, "switch to new SP register"); let sp_offset = (caller_stack_size + VM_ENV_DATA_SIZE.to_usize()) * SIZEOF_VALUE; let new_sp = asm.add(SP, sp_offset.into()); asm.mov(SP, new_sp); asm_comment!(asm, "switch to new CFP"); let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); let result = asm.ccall(cfunc, args); asm_comment!(asm, "pop C frame"); let new_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); asm_comment!(asm, "restore SP register for the caller"); let new_sp = asm.sub(SP, sp_offset.into()); asm.mov(SP, new_sp); result } /// Lowering for [`Insn::CCall`]. This is a low-level raw call that doesn't know /// anything about the callee, so handling for e.g. GC safety is dealt with elsewhere. fn gen_ccall(asm: &mut Assembler, cfunc: *const u8, args: Vec) -> lir::Opnd { asm.ccall(cfunc, args) } /// Generate code for a variadic C function call /// func(int argc, VALUE *argv, VALUE recv) fn gen_ccall_variadic( jit: &mut JITState, asm: &mut Assembler, cfunc: *const u8, recv: Opnd, args: Vec, cme: *const rb_callable_method_entry_t, state: &FrameState, ) -> lir::Opnd { gen_incr_counter(asm, Counter::variadic_cfunc_optimized_send_count); gen_prepare_non_leaf_call(jit, asm, state); let stack_growth = state.stack_size(); gen_stack_overflow_check(jit, asm, state, stack_growth); gen_push_frame(asm, args.len(), state, ControlFrame { recv, iseq: None, cme, frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL, specval: VM_BLOCK_HANDLER_NONE.into(), pc: PC_POISON, }); asm_comment!(asm, "switch to new SP register"); let sp_offset = (state.stack().len() - args.len() + VM_ENV_DATA_SIZE.to_usize()) * SIZEOF_VALUE; let new_sp = asm.add(SP, sp_offset.into()); asm.mov(SP, new_sp); asm_comment!(asm, "switch to new CFP"); let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); let argv_ptr = gen_push_opnds(asm, &args); let result = asm.ccall(cfunc, vec![args.len().into(), argv_ptr, recv]); gen_pop_opnds(asm, &args); asm_comment!(asm, "pop C frame"); let new_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); asm_comment!(asm, "restore SP register for the caller"); let new_sp = asm.sub(SP, sp_offset.into()); asm.mov(SP, new_sp); result } /// Emit an uncached instance variable lookup fn gen_getivar(asm: &mut Assembler, recv: Opnd, id: ID) -> Opnd { gen_incr_counter(asm, Counter::dynamic_getivar_count); asm_ccall!(asm, rb_ivar_get, recv, id.0.into()) } /// Emit an uncached instance variable store fn gen_setivar(jit: &mut JITState, asm: &mut Assembler, recv: Opnd, id: ID, val: Opnd, state: &FrameState) { gen_incr_counter(asm, Counter::dynamic_setivar_count); // Setting an ivar can raise FrozenError, so we need proper frame state for exception handling. gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_ivar_set, recv, id.0.into(), val); } fn gen_getclassvar(jit: &mut JITState, asm: &mut Assembler, id: ID, ic: *const iseq_inline_cvar_cache_entry, state: &FrameState) -> Opnd { gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_vm_getclassvariable, VALUE::from(jit.iseq).into(), CFP, id.0.into(), Opnd::const_ptr(ic)) } fn gen_setclassvar(jit: &mut JITState, asm: &mut Assembler, id: ID, val: Opnd, ic: *const iseq_inline_cvar_cache_entry, state: &FrameState) { gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_vm_setclassvariable, VALUE::from(jit.iseq).into(), CFP, id.0.into(), val, Opnd::const_ptr(ic)); } /// Look up global variables fn gen_getglobal(jit: &mut JITState, asm: &mut Assembler, id: ID, state: &FrameState) -> Opnd { // `Warning` module's method `warn` can be called when reading certain global variables gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_gvar_get, id.0.into()) } /// Intern a string fn gen_intern(asm: &mut Assembler, val: Opnd, state: &FrameState) -> Opnd { gen_prepare_leaf_call_with_gc(asm, state); asm_ccall!(asm, rb_str_intern, val) } /// Set global variables fn gen_setglobal(jit: &mut JITState, asm: &mut Assembler, id: ID, val: Opnd, state: &FrameState) { // When trace_var is used, setting a global variable can cause exceptions gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_gvar_set, id.0.into(), val); } /// Side-exit into the interpreter fn gen_side_exit(jit: &mut JITState, asm: &mut Assembler, reason: &SideExitReason, state: &FrameState) { asm.jmp(side_exit(jit, state, *reason)); } /// Emit a special object lookup fn gen_putspecialobject(asm: &mut Assembler, value_type: SpecialObjectType) -> Opnd { // Get the EP of the current CFP and load it into a register let ep_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP); let ep_reg = asm.load(ep_opnd); asm_ccall!(asm, rb_vm_get_special_object, ep_reg, Opnd::UImm(u64::from(value_type))) } fn gen_getspecial_symbol(asm: &mut Assembler, symbol_type: SpecialBackrefSymbol) -> Opnd { // Fetch a "special" backref based on the symbol type let backref = asm_ccall!(asm, rb_backref_get,); match symbol_type { SpecialBackrefSymbol::LastMatch => { asm_ccall!(asm, rb_reg_last_match, backref) } SpecialBackrefSymbol::PreMatch => { asm_ccall!(asm, rb_reg_match_pre, backref) } SpecialBackrefSymbol::PostMatch => { asm_ccall!(asm, rb_reg_match_post, backref) } SpecialBackrefSymbol::LastGroup => { asm_ccall!(asm, rb_reg_match_last, backref) } } } fn gen_getspecial_number(asm: &mut Assembler, nth: u64, state: &FrameState) -> Opnd { // Fetch the N-th match from the last backref based on type shifted by 1 let backref = asm_ccall!(asm, rb_backref_get,); gen_prepare_leaf_call_with_gc(asm, state); asm_ccall!(asm, rb_reg_nth_match, Opnd::Imm((nth >> 1).try_into().unwrap()), backref) } fn gen_check_interrupts(jit: &mut JITState, asm: &mut Assembler, state: &FrameState) { // Check for interrupts // see RUBY_VM_CHECK_INTS(ec) macro asm_comment!(asm, "RUBY_VM_CHECK_INTS(ec)"); // Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages, // signal_exec, or rb_postponed_job_flush. let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG)); asm.test(interrupt_flag, interrupt_flag); asm.jnz(side_exit(jit, state, SideExitReason::Interrupt)); } fn gen_hash_dup(asm: &mut Assembler, val: Opnd, state: &FrameState) -> lir::Opnd { gen_prepare_leaf_call_with_gc(asm, state); asm_ccall!(asm, rb_hash_resurrect, val) } fn gen_hash_aref(jit: &mut JITState, asm: &mut Assembler, hash: Opnd, key: Opnd, state: &FrameState) -> lir::Opnd { gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_hash_aref, hash, key) } fn gen_array_push(asm: &mut Assembler, array: Opnd, val: Opnd, state: &FrameState) { gen_prepare_leaf_call_with_gc(asm, state); asm_ccall!(asm, rb_ary_push, array, val); } fn gen_to_new_array(jit: &mut JITState, asm: &mut Assembler, val: Opnd, state: &FrameState) -> lir::Opnd { gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_vm_splat_array, Opnd::Value(Qtrue), val) } fn gen_to_array(jit: &mut JITState, asm: &mut Assembler, val: Opnd, state: &FrameState) -> lir::Opnd { gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_vm_splat_array, Opnd::Value(Qfalse), val) } fn gen_defined_ivar(asm: &mut Assembler, self_val: Opnd, id: ID, pushval: VALUE) -> lir::Opnd { asm_ccall!(asm, rb_zjit_defined_ivar, self_val, id.0.into(), Opnd::Value(pushval)) } fn gen_array_extend(jit: &mut JITState, asm: &mut Assembler, left: Opnd, right: Opnd, state: &FrameState) { gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_ary_concat, left, right); } fn gen_guard_shape(jit: &mut JITState, asm: &mut Assembler, val: Opnd, shape: ShapeId, state: &FrameState) -> Opnd { let shape_id_offset = unsafe { rb_shape_id_offset() }; let val = asm.load(val); let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, val, shape_id_offset); asm.cmp(shape_opnd, Opnd::UImm(shape.0 as u64)); asm.jne(side_exit(jit, state, SideExitReason::GuardShape(shape))); val } fn gen_load_pc(asm: &mut Assembler) -> Opnd { asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC)) } fn gen_load_self() -> Opnd { Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF) } fn gen_load_field(asm: &mut Assembler, recv: Opnd, id: ID, offset: i32) -> Opnd { asm_comment!(asm, "Load field id={} offset={}", id.contents_lossy(), offset); let recv = asm.load(recv); asm.load(Opnd::mem(64, recv, offset)) } /// Compile an interpreter entry block to be inserted into an ISEQ fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) { asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0)); // Save the registers we'll use for CFP, EP, SP asm.frame_setup(lir::JIT_PRESERVED_REGS); // EC and CFP are passed as arguments asm.mov(EC, C_ARG_OPNDS[0]); asm.mov(CFP, C_ARG_OPNDS[1]); // Load the current SP from the CFP into REG_SP asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); } /// Set branch params to basic block arguments fn gen_branch_params(jit: &mut JITState, asm: &mut Assembler, branch: &BranchEdge) { if branch.args.is_empty() { return; } asm_comment!(asm, "set branch params: {}", branch.args.len()); asm.parallel_mov(branch.args.iter().enumerate().map(|(idx, &arg)| (param_opnd(idx), jit.get_opnd(arg)) ).collect()); } /// Compile a constant fn gen_const_value(val: VALUE) -> lir::Opnd { // Just propagate the constant value and generate nothing Opnd::Value(val) } /// Compile Const::CPtr fn gen_const_cptr(val: *const u8) -> lir::Opnd { Opnd::const_ptr(val) } /// Compile a basic block argument fn gen_param(asm: &mut Assembler, idx: usize) -> lir::Opnd { // Allocate a register or a stack slot match param_opnd(idx) { // If it's a register, insert LiveReg instruction to reserve the register // in the register pool for register allocation. param @ Opnd::Reg(_) => asm.live_reg_opnd(param), param => param, } } /// Compile a jump to a basic block fn gen_jump(jit: &mut JITState, asm: &mut Assembler, branch: &BranchEdge) { // Set basic block arguments gen_branch_params(jit, asm, branch); // Jump to the basic block let target = jit.get_label(asm, branch.target); asm.jmp(target); } /// Compile a conditional branch to a basic block fn gen_if_true(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, branch: &BranchEdge) { // If val is zero, move on to the next instruction. let if_false = asm.new_label("if_false"); asm.test(val, val); asm.jz(if_false.clone()); // If val is not zero, set basic block arguments and jump to the branch target. // TODO: Consider generating the loads out-of-line let if_true = jit.get_label(asm, branch.target); gen_branch_params(jit, asm, branch); asm.jmp(if_true); asm.write_label(if_false); } /// Compile a conditional branch to a basic block fn gen_if_false(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, branch: &BranchEdge) { // If val is not zero, move on to the next instruction. let if_true = asm.new_label("if_true"); asm.test(val, val); asm.jnz(if_true.clone()); // If val is zero, set basic block arguments and jump to the branch target. // TODO: Consider generating the loads out-of-line let if_false = jit.get_label(asm, branch.target); gen_branch_params(jit, asm, branch); asm.jmp(if_false); asm.write_label(if_true); } /// Compile a dynamic dispatch with block fn gen_send( jit: &mut JITState, asm: &mut Assembler, cd: *const rb_call_data, blockiseq: IseqPtr, state: &FrameState, reason: SendFallbackReason, ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); gen_prepare_non_leaf_call(jit, asm, state); asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd)); unsafe extern "C" { fn rb_vm_send(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; } asm.ccall( rb_vm_send as *const u8, vec![EC, CFP, Opnd::const_ptr(cd), VALUE::from(blockiseq).into()], ) } /// Compile a dynamic dispatch with `...` fn gen_send_forward( jit: &mut JITState, asm: &mut Assembler, cd: *const rb_call_data, blockiseq: IseqPtr, state: &FrameState, reason: SendFallbackReason, ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); gen_prepare_non_leaf_call(jit, asm, state); asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd)); unsafe extern "C" { fn rb_vm_sendforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; } asm.ccall( rb_vm_sendforward as *const u8, vec![EC, CFP, Opnd::const_ptr(cd), VALUE::from(blockiseq).into()], ) } /// Compile a dynamic dispatch without block fn gen_send_without_block( jit: &mut JITState, asm: &mut Assembler, cd: *const rb_call_data, state: &FrameState, reason: SendFallbackReason, ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); gen_prepare_non_leaf_call(jit, asm, state); asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd)); unsafe extern "C" { fn rb_vm_opt_send_without_block(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; } asm.ccall( rb_vm_opt_send_without_block as *const u8, vec![EC, CFP, Opnd::const_ptr(cd)], ) } /// Compile a direct jump to an ISEQ call without block fn gen_send_without_block_direct( cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, cme: *const rb_callable_method_entry_t, iseq: IseqPtr, recv: Opnd, args: Vec, state: &FrameState, ) -> lir::Opnd { gen_incr_counter(asm, Counter::iseq_optimized_send_count); let local_size = unsafe { get_iseq_body_local_table_size(iseq) }.to_usize(); let stack_growth = state.stack_size() + local_size + unsafe { get_iseq_body_stack_max(iseq) }.to_usize(); gen_stack_overflow_check(jit, asm, state, stack_growth); // Save cfp->pc and cfp->sp for the caller frame gen_prepare_call_with_gc(asm, state, false); // Special SP math. Can't use gen_prepare_non_leaf_call gen_save_sp(asm, state.stack().len() - args.len() - 1); // -1 for receiver gen_spill_locals(jit, asm, state); gen_spill_stack(jit, asm, state); let (frame_type, specval) = if VM_METHOD_TYPE_BMETHOD == unsafe { get_cme_def_type(cme) } { // Extract EP from the Proc instance let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; let proc = unsafe { rb_jit_get_proc_ptr(procv) }; let proc_block = unsafe { &(*proc).block }; let capture = unsafe { proc_block.as_.captured.as_ref() }; let bmethod_frame_type = VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA; // Tag the captured EP like VM_GUARDED_PREV_EP() in vm_call_iseq_bmethod() let bmethod_specval = (capture.ep.addr() | 1).into(); (bmethod_frame_type, bmethod_specval) } else { (VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL, VM_BLOCK_HANDLER_NONE.into()) }; // Set up the new frame // TODO: Lazily materialize caller frames on side exits or when needed gen_push_frame(asm, args.len(), state, ControlFrame { recv, iseq: Some(iseq), cme, frame_type, pc: None, specval, }); asm_comment!(asm, "switch to new SP register"); let sp_offset = (state.stack().len() + local_size - args.len() + VM_ENV_DATA_SIZE.to_usize()) * SIZEOF_VALUE; let new_sp = asm.add(SP, sp_offset.into()); asm.mov(SP, new_sp); asm_comment!(asm, "switch to new CFP"); let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); // Set up arguments let mut c_args = vec![recv]; c_args.extend(args); // Make a method call. The target address will be rewritten once compiled. let iseq_call = IseqCall::new(iseq); let dummy_ptr = cb.get_write_ptr().raw_ptr(cb); jit.iseq_calls.push(iseq_call.clone()); let ret = asm.ccall_with_iseq_call(dummy_ptr, c_args, &iseq_call); // If a callee side-exits, i.e. returns Qundef, propagate the return value to the caller. // The caller will side-exit the callee into the interpreter. // TODO: Let side exit code pop all JIT frames to optimize away this cmp + je. asm_comment!(asm, "side-exit if callee side-exits"); asm.cmp(ret, Qundef.into()); // Restore the C stack pointer on exit asm.je(ZJITState::get_exit_trampoline().into()); asm_comment!(asm, "restore SP register for the caller"); let new_sp = asm.sub(SP, sp_offset.into()); asm.mov(SP, new_sp); ret } /// Compile for invokeblock fn gen_invokeblock( jit: &mut JITState, asm: &mut Assembler, cd: *const rb_call_data, state: &FrameState, reason: SendFallbackReason, ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); gen_prepare_non_leaf_call(jit, asm, state); asm_comment!(asm, "call invokeblock"); unsafe extern "C" { fn rb_vm_invokeblock(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; } asm.ccall( rb_vm_invokeblock as *const u8, vec![EC, CFP, Opnd::const_ptr(cd)], ) } /// Compile a dynamic dispatch for `super` fn gen_invokesuper( jit: &mut JITState, asm: &mut Assembler, cd: *const rb_call_data, blockiseq: IseqPtr, state: &FrameState, reason: SendFallbackReason, ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); gen_prepare_non_leaf_call(jit, asm, state); asm_comment!(asm, "call super with dynamic dispatch"); unsafe extern "C" { fn rb_vm_invokesuper(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; } asm.ccall( rb_vm_invokesuper as *const u8, vec![EC, CFP, Opnd::const_ptr(cd), VALUE::from(blockiseq).into()], ) } /// Compile a string resurrection fn gen_string_copy(asm: &mut Assembler, recv: Opnd, chilled: bool, state: &FrameState) -> Opnd { // TODO: split rb_ec_str_resurrect into separate functions gen_prepare_leaf_call_with_gc(asm, state); let chilled = if chilled { Opnd::Imm(1) } else { Opnd::Imm(0) }; asm_ccall!(asm, rb_ec_str_resurrect, EC, recv, chilled) } /// Compile an array duplication instruction fn gen_array_dup( asm: &mut Assembler, val: lir::Opnd, state: &FrameState, ) -> lir::Opnd { gen_prepare_leaf_call_with_gc(asm, state); asm_ccall!(asm, rb_ary_resurrect, val) } /// Compile a new array instruction fn gen_new_array( asm: &mut Assembler, elements: Vec, state: &FrameState, ) -> lir::Opnd { gen_prepare_leaf_call_with_gc(asm, state); let length: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long"); let new_array = asm_ccall!(asm, rb_ary_new_capa, length.into()); for val in elements { asm_ccall!(asm, rb_ary_push, new_array, val); } new_array } /// Compile array access (`array[index]`) fn gen_aref_fixnum( asm: &mut Assembler, array: Opnd, index: Opnd, ) -> lir::Opnd { let unboxed_idx = asm.rshift(index, Opnd::UImm(1)); asm_ccall!(asm, rb_ary_entry, array, unboxed_idx) } fn gen_array_pop(asm: &mut Assembler, array: Opnd, state: &FrameState) -> lir::Opnd { gen_prepare_leaf_call_with_gc(asm, state); asm_ccall!(asm, rb_ary_pop, array) } fn gen_array_length(asm: &mut Assembler, array: Opnd) -> lir::Opnd { asm_ccall!(asm, rb_jit_array_len, array) } /// Compile a new hash instruction fn gen_new_hash( jit: &mut JITState, asm: &mut Assembler, elements: Vec, state: &FrameState, ) -> lir::Opnd { gen_prepare_non_leaf_call(jit, asm, state); let cap: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long"); let new_hash = asm_ccall!(asm, rb_hash_new_with_size, lir::Opnd::Imm(cap)); if !elements.is_empty() { let argv = gen_push_opnds(asm, &elements); asm_ccall!(asm, rb_hash_bulk_insert, elements.len().into(), argv, new_hash); gen_pop_opnds(asm, &elements); } new_hash } /// Compile a new range instruction fn gen_new_range( jit: &JITState, asm: &mut Assembler, low: lir::Opnd, high: lir::Opnd, flag: RangeType, state: &FrameState, ) -> lir::Opnd { // Sometimes calls `low.<=>(high)` gen_prepare_non_leaf_call(jit, asm, state); // Call rb_range_new(low, high, flag) asm_ccall!(asm, rb_range_new, low, high, (flag as i32).into()) } fn gen_new_range_fixnum( asm: &mut Assembler, low: lir::Opnd, high: lir::Opnd, flag: RangeType, state: &FrameState, ) -> lir::Opnd { gen_prepare_leaf_call_with_gc(asm, state); asm_ccall!(asm, rb_range_new, low, high, (flag as i64).into()) } fn gen_object_alloc(jit: &JITState, asm: &mut Assembler, val: lir::Opnd, state: &FrameState) -> lir::Opnd { // Allocating an object from an unknown class is non-leaf; see doc for `ObjectAlloc`. gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_obj_alloc, val) } fn gen_object_alloc_class(asm: &mut Assembler, class: VALUE, state: &FrameState) -> lir::Opnd { // Allocating an object for a known class with default allocator is leaf; see doc for // `ObjectAllocClass`. gen_prepare_leaf_call_with_gc(asm, state); if unsafe { rb_zjit_class_has_default_allocator(class) } { // TODO(max): inline code to allocate an instance asm_ccall!(asm, rb_class_allocate_instance, class.into()) } else { assert!(class_has_leaf_allocator(class), "class passed to ObjectAllocClass must have a leaf allocator"); let alloc_func = unsafe { rb_zjit_class_get_alloc_func(class) }; assert!(alloc_func.is_some(), "class {} passed to ObjectAllocClass must have an allocator", get_class_name(class)); asm_comment!(asm, "call allocator for class {}", get_class_name(class)); asm.ccall(alloc_func.unwrap() as *const u8, vec![class.into()]) } } /// Compile a frame setup. If jit_entry_idx is Some, remember the address of it as a JIT entry. fn gen_entry_point(jit: &mut JITState, asm: &mut Assembler, jit_entry_idx: Option) { if let Some(jit_entry_idx) = jit_entry_idx { let jit_entry = JITEntry::new(jit_entry_idx); jit.jit_entries.push(jit_entry.clone()); asm.pos_marker(move |code_ptr, _| { jit_entry.borrow_mut().start_addr.set(Some(code_ptr)); }); } asm.frame_setup(&[]); } /// Compile code that exits from JIT code with a return value fn gen_return(asm: &mut Assembler, val: lir::Opnd) { // Pop the current frame (ec->cfp++) // Note: the return PC is already in the previous CFP asm_comment!(asm, "pop stack frame"); let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, incr_cfp); asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); // Order here is important. Because we're about to tear down the frame, // we need to load the return value, which might be part of the frame. asm.load_into(C_RET_OPND, val); // Return from the function asm.frame_teardown(&[]); // matching the setup in gen_entry_point() asm.cret(C_RET_OPND); } /// Compile Fixnum + Fixnum fn gen_fixnum_add(jit: &mut JITState, asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd, state: &FrameState) -> lir::Opnd { // Add left + right and test for overflow let left_untag = asm.sub(left, Opnd::Imm(1)); let out_val = asm.add(left_untag, right); asm.jo(side_exit(jit, state, FixnumAddOverflow)); out_val } /// Compile Fixnum - Fixnum fn gen_fixnum_sub(jit: &mut JITState, asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd, state: &FrameState) -> lir::Opnd { // Subtract left - right and test for overflow let val_untag = asm.sub(left, right); asm.jo(side_exit(jit, state, FixnumSubOverflow)); asm.add(val_untag, Opnd::Imm(1)) } /// Compile Fixnum * Fixnum fn gen_fixnum_mult(jit: &mut JITState, asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd, state: &FrameState) -> lir::Opnd { // Do some bitwise gymnastics to handle tag bits // x * y is translated to (x >> 1) * (y - 1) + 1 let left_untag = asm.rshift(left, Opnd::UImm(1)); let right_untag = asm.sub(right, Opnd::UImm(1)); let out_val = asm.mul(left_untag, right_untag); // Test for overflow asm.jo_mul(side_exit(jit, state, FixnumMultOverflow)); asm.add(out_val, Opnd::UImm(1)) } /// Compile Fixnum == Fixnum fn gen_fixnum_eq(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { asm.cmp(left, right); asm.csel_e(Qtrue.into(), Qfalse.into()) } /// Compile Fixnum != Fixnum fn gen_fixnum_neq(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { asm.cmp(left, right); asm.csel_ne(Qtrue.into(), Qfalse.into()) } /// Compile Fixnum < Fixnum fn gen_fixnum_lt(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { asm.cmp(left, right); asm.csel_l(Qtrue.into(), Qfalse.into()) } /// Compile Fixnum <= Fixnum fn gen_fixnum_le(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { asm.cmp(left, right); asm.csel_le(Qtrue.into(), Qfalse.into()) } /// Compile Fixnum > Fixnum fn gen_fixnum_gt(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { asm.cmp(left, right); asm.csel_g(Qtrue.into(), Qfalse.into()) } /// Compile Fixnum >= Fixnum fn gen_fixnum_ge(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { asm.cmp(left, right); asm.csel_ge(Qtrue.into(), Qfalse.into()) } /// Compile Fixnum & Fixnum fn gen_fixnum_and(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { asm.and(left, right) } /// Compile Fixnum | Fixnum fn gen_fixnum_or(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { asm.or(left, right) } /// Compile Fixnum ^ Fixnum fn gen_fixnum_xor(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { // XOR and then re-tag the resulting fixnum let out_val = asm.xor(left, right); asm.add(out_val, Opnd::UImm(1)) } fn gen_fixnum_mod(jit: &mut JITState, asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd, state: &FrameState) -> lir::Opnd { // Check for left % 0, which raises ZeroDivisionError asm.cmp(right, Opnd::from(VALUE::fixnum_from_usize(0))); asm.je(side_exit(jit, state, FixnumModByZero)); asm_ccall!(asm, rb_fix_mod_fix, left, right) } // Compile val == nil fn gen_isnil(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { asm.cmp(val, Qnil.into()); // TODO: Implement and use setcc asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) } fn gen_is_method_cfunc(jit: &JITState, asm: &mut Assembler, val: lir::Opnd, cd: *const rb_call_data, cfunc: *const u8) -> lir::Opnd { unsafe extern "C" { fn rb_vm_method_cfunc_is(iseq: IseqPtr, cd: *const rb_call_data, recv: VALUE, cfunc: *const u8) -> VALUE; } asm_ccall!(asm, rb_vm_method_cfunc_is, VALUE::from(jit.iseq).into(), Opnd::const_ptr(cd), val, Opnd::const_ptr(cfunc)) } fn gen_is_bit_equal(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { asm.cmp(left, right); asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) } fn gen_is_bit_not_equal(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir::Opnd { asm.cmp(left, right); asm.csel_ne(Opnd::Imm(1), Opnd::Imm(0)) } fn gen_box_bool(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { asm.test(val, val); asm.csel_nz(Opnd::Value(Qtrue), Opnd::Value(Qfalse)) } fn gen_anytostring(asm: &mut Assembler, val: lir::Opnd, str: lir::Opnd, state: &FrameState) -> lir::Opnd { gen_prepare_leaf_call_with_gc(asm, state); asm_ccall!(asm, rb_obj_as_string_result, str, val) } /// Evaluate if a value is truthy /// Produces a CBool type (0 or 1) /// In Ruby, only nil and false are falsy /// Everything else evaluates to true fn gen_test(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { // Test if any bit (outside of the Qnil bit) is on // See RB_TEST(), include/ruby/internal/special_consts.h asm.test(val, Opnd::Imm(!Qnil.as_i64())); asm.csel_e(0.into(), 1.into()) } /// Compile a type check with a side exit fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard_type: Type, state: &FrameState) -> lir::Opnd { gen_incr_counter(asm, Counter::guard_type_count); if guard_type.is_subtype(types::Fixnum) { asm.test(val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); asm.jz(side_exit(jit, state, GuardType(guard_type))); } else if guard_type.is_subtype(types::Flonum) { // Flonum: (val & RUBY_FLONUM_MASK) == RUBY_FLONUM_FLAG let masked = asm.and(val, Opnd::UImm(RUBY_FLONUM_MASK as u64)); asm.cmp(masked, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); asm.jne(side_exit(jit, state, GuardType(guard_type))); } else if guard_type.is_subtype(types::StaticSymbol) { // Static symbols have (val & 0xff) == RUBY_SYMBOL_FLAG // Use 8-bit comparison like YJIT does. GuardType should not be used // for a known VALUE, which with_num_bits() does not support. asm.cmp(val.with_num_bits(8), Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); asm.jne(side_exit(jit, state, GuardType(guard_type))); } else if guard_type.is_subtype(types::NilClass) { asm.cmp(val, Qnil.into()); asm.jne(side_exit(jit, state, GuardType(guard_type))); } else if guard_type.is_subtype(types::TrueClass) { asm.cmp(val, Qtrue.into()); asm.jne(side_exit(jit, state, GuardType(guard_type))); } else if guard_type.is_subtype(types::FalseClass) { asm.cmp(val, Qfalse.into()); asm.jne(side_exit(jit, state, GuardType(guard_type))); } else if guard_type.is_immediate() { // All immediate types' guard should have been handled above panic!("unexpected immediate guard type: {guard_type}"); } else if let Some(expected_class) = guard_type.runtime_exact_ruby_class() { asm_comment!(asm, "guard exact class for non-immediate types"); // If val isn't in a register, load it to use it as the base of Opnd::mem later. // TODO: Max thinks codegen should not care about the shapes of the operands except to create them. (Shopify/ruby#685) let val = match val { Opnd::Reg(_) | Opnd::VReg { .. } => val, _ => asm.load(val), }; // Check if it's a special constant let side_exit = side_exit(jit, state, GuardType(guard_type)); asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into()); asm.jnz(side_exit.clone()); // Check if it's false asm.cmp(val, Qfalse.into()); asm.je(side_exit.clone()); // Load the class from the object's klass field let klass = asm.load(Opnd::mem(64, val, RUBY_OFFSET_RBASIC_KLASS)); asm.cmp(klass, Opnd::Value(expected_class)); asm.jne(side_exit); } else if guard_type.is_subtype(types::String) { let side = side_exit(jit, state, GuardType(guard_type)); // Check special constant asm.test(val, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); asm.jnz(side.clone()); // Check false asm.cmp(val, Qfalse.into()); asm.je(side.clone()); let val = match val { Opnd::Reg(_) | Opnd::VReg { .. } => val, _ => asm.load(val), }; let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS)); let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64)); asm.cmp(tag, Opnd::UImm(RUBY_T_STRING as u64)); asm.jne(side); } else if guard_type.bit_equal(types::HeapBasicObject) { let side_exit = side_exit(jit, state, GuardType(guard_type)); asm.cmp(val, Opnd::Value(Qfalse)); asm.je(side_exit.clone()); asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into()); asm.jnz(side_exit); } else { unimplemented!("unsupported type: {guard_type}"); } val } fn gen_guard_type_not(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard_type: Type, state: &FrameState) -> lir::Opnd { if guard_type.is_subtype(types::String) { // We only exit if val *is* a String. Otherwise we fall through. let cont = asm.new_label("guard_type_not_string_cont"); let side = side_exit(jit, state, GuardTypeNot(guard_type)); // Continue if special constant (not string) asm.test(val, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); asm.jnz(cont.clone()); // Continue if false (not string) asm.cmp(val, Qfalse.into()); asm.je(cont.clone()); let val = match val { Opnd::Reg(_) | Opnd::VReg { .. } => val, _ => asm.load(val), }; let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS)); let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64)); asm.cmp(tag, Opnd::UImm(RUBY_T_STRING as u64)); asm.je(side); // Otherwise (non-string heap object), continue. asm.write_label(cont); } else { unimplemented!("unsupported type: {guard_type}"); } val } /// Compile an identity check with a side exit fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, expected: crate::hir::Const, state: &FrameState) -> lir::Opnd { let expected_opnd: Opnd = match expected { crate::hir::Const::Value(v) => { Opnd::Value(v) } crate::hir::Const::CInt64(v) => { v.into() } _ => panic!("gen_guard_bit_equals: unexpected hir::Const {:?}", expected), }; asm.cmp(val, expected_opnd); asm.jnz(side_exit(jit, state, GuardBitEquals(expected))); val } /// Generate code that records unoptimized C functions if --zjit-stats is enabled fn gen_incr_counter_ptr(asm: &mut Assembler, counter_ptr: *mut u64) { if get_option!(stats) { asm.incr_counter(Opnd::const_ptr(counter_ptr as *const u8), Opnd::UImm(1)); } } /// Generate code that increments a counter if --zjit-stats fn gen_incr_counter(asm: &mut Assembler, counter: Counter) { if get_option!(stats) { let ptr = counter_ptr(counter); gen_incr_counter_ptr(asm, ptr); } } /// Increment a counter for each DynamicSendReason. If the variant has /// a counter prefix to break down the details, increment that as well. fn gen_incr_send_fallback_counter(asm: &mut Assembler, reason: SendFallbackReason) { gen_incr_counter(asm, send_fallback_counter(reason)); use SendFallbackReason::*; match reason { NotOptimizedInstruction(opcode) => { gen_incr_counter_ptr(asm, send_fallback_counter_ptr_for_opcode(opcode)); } SendWithoutBlockNotOptimizedMethodType(method_type) => { gen_incr_counter(asm, send_without_block_fallback_counter_for_method_type(method_type)); } SendWithoutBlockNotOptimizedOptimizedMethodType(method_type) => { gen_incr_counter(asm, send_without_block_fallback_counter_for_optimized_method_type(method_type)); } SendNotOptimizedMethodType(method_type) => { gen_incr_counter(asm, send_fallback_counter_for_method_type(method_type)); } _ => {} } } /// Save the current PC on the CFP as a preparation for calling a C function /// that may allocate objects and trigger GC. Use gen_prepare_non_leaf_call() /// if it may raise exceptions or call arbitrary methods. /// /// Unlike YJIT, we don't need to save the stack slots to protect them from GC /// because the backend spills all live registers onto the C stack on CCall. fn gen_prepare_call_with_gc(asm: &mut Assembler, state: &FrameState, leaf: bool) { let opcode: usize = state.get_opcode().try_into().unwrap(); let next_pc: *const VALUE = unsafe { state.pc.offset(insn_len(opcode) as isize) }; gen_incr_counter(asm, Counter::vm_write_pc_count); asm_comment!(asm, "save PC to CFP"); asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(next_pc)); if leaf { asm.expect_leaf_ccall(state.stack_size()); } } fn gen_prepare_leaf_call_with_gc(asm: &mut Assembler, state: &FrameState) { gen_prepare_call_with_gc(asm, state, true); } /// Save the current SP on the CFP fn gen_save_sp(asm: &mut Assembler, stack_size: usize) { // Update cfp->sp which will be read by the interpreter. We also have the SP register in JIT // code, and ZJIT's codegen currently assumes the SP register doesn't move, e.g. gen_param(). // So we don't update the SP register here. We could update the SP register to avoid using // an extra register for asm.lea(), but you'll need to manage the SP offset like YJIT does. gen_incr_counter(asm, Counter::vm_write_sp_count); asm_comment!(asm, "save SP to CFP: {}", stack_size); let sp_addr = asm.lea(Opnd::mem(64, SP, stack_size as i32 * SIZEOF_VALUE_I32)); let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); asm.mov(cfp_sp, sp_addr); } /// Spill locals onto the stack. fn gen_spill_locals(jit: &JITState, asm: &mut Assembler, state: &FrameState) { // TODO: Avoid spilling locals that have been spilled before and not changed. gen_incr_counter(asm, Counter::vm_write_locals_count); asm_comment!(asm, "spill locals"); for (idx, &insn_id) in state.locals().enumerate() { asm.mov(Opnd::mem(64, SP, (-local_idx_to_ep_offset(jit.iseq, idx) - 1) * SIZEOF_VALUE_I32), jit.get_opnd(insn_id)); } } /// Spill the virtual stack onto the stack. fn gen_spill_stack(jit: &JITState, asm: &mut Assembler, state: &FrameState) { // This function does not call gen_save_sp() at the moment because // gen_send_without_block_direct() spills stack slots above SP for arguments. gen_incr_counter(asm, Counter::vm_write_stack_count); asm_comment!(asm, "spill stack"); for (idx, &insn_id) in state.stack().enumerate() { asm.mov(Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32), jit.get_opnd(insn_id)); } } /// Prepare for calling a C function that may call an arbitrary method. /// Use gen_prepare_leaf_call_with_gc() if the method is leaf but allocates objects. fn gen_prepare_non_leaf_call(jit: &JITState, asm: &mut Assembler, state: &FrameState) { // TODO: Lazily materialize caller frames when needed // Save PC for backtraces and allocation tracing gen_prepare_call_with_gc(asm, state, false); // Save SP and spill the virtual stack in case it raises an exception // and the interpreter uses the stack for handling the exception gen_save_sp(asm, state.stack().len()); gen_spill_stack(jit, asm, state); // Spill locals in case the method looks at caller Bindings gen_spill_locals(jit, asm, state); } /// Frame metadata written by gen_push_frame() struct ControlFrame { recv: Opnd, iseq: Option, cme: *const rb_callable_method_entry_t, frame_type: u32, /// The [`VM_ENV_DATA_INDEX_SPECVAL`] slot of the frame. /// For the type of frames we push, block handler or the parent EP. specval: lir::Opnd, pc: Option<*const VALUE>, } /// Compile an interpreter frame fn gen_push_frame(asm: &mut Assembler, argc: usize, state: &FrameState, frame: ControlFrame) { // Locals are written by the callee frame on side-exits or non-leaf calls // See vm_push_frame() for details asm_comment!(asm, "push cme, specval, frame type"); // ep[-2]: cref of cme let local_size = if let Some(iseq) = frame.iseq { (unsafe { get_iseq_body_local_table_size(iseq) }) as i32 } else { 0 }; let ep_offset = state.stack().len() as i32 + local_size - argc as i32 + VM_ENV_DATA_SIZE as i32 - 1; // ep[-2]: CME asm.store(Opnd::mem(64, SP, (ep_offset - 2) * SIZEOF_VALUE_I32), VALUE::from(frame.cme).into()); // ep[-1]: specval asm.store(Opnd::mem(64, SP, (ep_offset - 1) * SIZEOF_VALUE_I32), frame.specval); // ep[0]: ENV_FLAGS asm.store(Opnd::mem(64, SP, ep_offset * SIZEOF_VALUE_I32), frame.frame_type.into()); // Write to the callee CFP fn cfp_opnd(offset: i32) -> Opnd { Opnd::mem(64, CFP, offset - (RUBY_SIZEOF_CONTROL_FRAME as i32)) } asm_comment!(asm, "push callee control frame"); if let Some(iseq) = frame.iseq { // cfp_opnd(RUBY_OFFSET_CFP_PC): written by the callee frame on side-exits or non-leaf calls // cfp_opnd(RUBY_OFFSET_CFP_SP): written by the callee frame on side-exits or non-leaf calls asm.mov(cfp_opnd(RUBY_OFFSET_CFP_ISEQ), VALUE::from(iseq).into()); } else { // C frames don't have a PC and ISEQ in normal operation. // When runtime checks are enabled we poison the PC so accidental reads stand out. if let Some(pc) = frame.pc { asm.mov(cfp_opnd(RUBY_OFFSET_CFP_PC), Opnd::const_ptr(pc)); } let new_sp = asm.lea(Opnd::mem(64, SP, (ep_offset + 1) * SIZEOF_VALUE_I32)); asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SP), new_sp); asm.mov(cfp_opnd(RUBY_OFFSET_CFP_ISEQ), 0.into()); } asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv); let ep = asm.lea(Opnd::mem(64, SP, ep_offset * SIZEOF_VALUE_I32)); asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep); asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into()); } /// Stack overflow check: fails if CFP<=SP at any point in the callee. fn gen_stack_overflow_check(jit: &mut JITState, asm: &mut Assembler, state: &FrameState, stack_growth: usize) { asm_comment!(asm, "stack overflow check"); // vm_push_frame() checks it against a decremented cfp, and CHECK_VM_STACK_OVERFLOW0 // adds to the margin another control frame with `&bounds[1]`. const { assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)"); } let cfp_growth = 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE); let peak_offset = (cfp_growth + stack_growth) * SIZEOF_VALUE; let stack_limit = asm.lea(Opnd::mem(64, SP, peak_offset as i32)); asm.cmp(CFP, stack_limit); asm.jbe(side_exit(jit, state, StackOverflow)); } /// Return an operand we use for the basic block argument at a given index fn param_opnd(idx: usize) -> Opnd { // To simplify the implementation, allocate a fixed register or a stack slot for each basic block argument for now. // Note that this is implemented here as opposed to automatically inside LIR machineries. // TODO: Allow allocating arbitrary registers for basic block arguments if idx < ALLOC_REGS.len() { Opnd::Reg(ALLOC_REGS[idx]) } else { // With FrameSetup, the address that NATIVE_BASE_PTR points to stores an old value in the register. // To avoid clobbering it, we need to start from the next slot, hence `+ 1` for the index. Opnd::mem(64, NATIVE_BASE_PTR, (idx - ALLOC_REGS.len() + 1) as i32 * -SIZEOF_VALUE_I32) } } /// Inverse of ep_offset_to_local_idx(). See ep_offset_to_local_idx() for details. pub fn local_idx_to_ep_offset(iseq: IseqPtr, local_idx: usize) -> i32 { let local_size = unsafe { get_iseq_body_local_table_size(iseq) }; local_size_and_idx_to_ep_offset(local_size.to_usize(), local_idx) } /// Convert the number of locals and a local index to an offset from the EP pub fn local_size_and_idx_to_ep_offset(local_size: usize, local_idx: usize) -> i32 { local_size as i32 - local_idx as i32 - 1 + VM_ENV_DATA_SIZE as i32 } /// Convert the number of locals and a local index to an offset from the BP. /// We don't move the SP register after entry, so we often use SP as BP. pub fn local_size_and_idx_to_bp_offset(local_size: usize, local_idx: usize) -> i32 { local_size_and_idx_to_ep_offset(local_size, local_idx) + 1 } /// Convert ISEQ into High-level IR fn compile_iseq(iseq: IseqPtr) -> Result { // Convert ZJIT instructions back to bare instructions unsafe { crate::cruby::rb_zjit_profile_disable(iseq) }; // Reject ISEQs with very large temp stacks. // We cannot encode too large offsets to access locals in arm64. let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) }; if stack_max >= i8::MAX as u32 { debug!("ISEQ stack too large: {stack_max}"); return Err(CompileError::IseqStackTooLarge); } let mut function = match iseq_to_hir(iseq) { Ok(function) => function, Err(err) => { debug!("ZJIT: iseq_to_hir: {err:?}: {}", iseq_get_location(iseq, 0)); return Err(CompileError::ParseError(err)); } }; if !get_option!(disable_hir_opt) { function.optimize(); } function.dump_hir(); Ok(function) } /// Build a Target::SideExit for non-PatchPoint instructions fn side_exit(jit: &JITState, state: &FrameState, reason: SideExitReason) -> Target { build_side_exit(jit, state, reason, None) } /// Build a Target::SideExit out of a FrameState fn build_side_exit(jit: &JITState, state: &FrameState, reason: SideExitReason, label: Option