diff options
Diffstat (limited to 'zjit/src/codegen.rs')
| -rw-r--r-- | zjit/src/codegen.rs | 146 |
1 files changed, 69 insertions, 77 deletions
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index fbf62a5c81..7c893a72fe 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -19,13 +19,17 @@ use crate::state::ZJITState; use crate::stats::{CompileError, exit_counter_for_compile_error, exit_counter_for_unhandled_hir_insn, incr_counter, incr_counter_by, send_fallback_counter, send_fallback_counter_for_method_type, send_fallback_counter_for_super_method_type, send_fallback_counter_ptr_for_opcode, send_without_block_fallback_counter_for_method_type, send_without_block_fallback_counter_for_optimized_method_type}; use crate::stats::{counter_ptr, with_time_stat, trace_compile_phase, Counter, Counter::{compile_time_ns, exit_compile_error}}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; -use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NATIVE_BASE_PTR, NATIVE_STACK_PTR, Opnd, SP, SideExit, SideExitRecompile, Target, asm_ccall, asm_comment}; +use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NATIVE_BASE_PTR, Opnd, SP, SideExit, SideExitRecompile, Target, asm_ccall, asm_comment}; use crate::hir::{iseq_to_hir, BlockId, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType}; use crate::hir::{BlockHandler, Const, FieldName, FrameState, Function, Insn, InsnId, Recompile, SendFallbackReason}; use crate::hir_type::{types, Type}; use crate::options::{get_option, PerfMap}; use crate::cast::IntoUsize; +/// The number of stack slots used for JITFrame. gen_save_pc_for_gc() writes +/// JITFrame into this number of slots at the bottom of the native stack. +const JIT_FRAME_SIZE: usize = 1; + /// Default maximum number of compiled versions per ISEQ. const DEFAULT_MAX_VERSIONS: usize = 2; @@ -66,17 +70,22 @@ struct JITState { /// ISEQ calls that need to be compiled later iseq_calls: Vec<IseqCallRef>, + + /// The number of native stack slots reserved for JITFrame. + /// gen_save_pc_for_gc() writes JITFrame into the allocated space. + jit_frame_size: usize, } impl JITState { /// Create a new JITState instance - fn new(version: IseqVersionRef, num_insns: usize, num_blocks: usize) -> Self { + fn new(version: IseqVersionRef, num_insns: usize, num_blocks: usize, jit_frame_size: usize) -> Self { JITState { version, opnds: vec![None; num_insns], labels: vec![None; num_blocks], jit_entries: Vec::default(), iseq_calls: Vec::default(), + jit_frame_size, } } @@ -374,9 +383,8 @@ fn gen_iseq_body(cb: &mut CodeBlock, iseq: IseqPtr, mut version: IseqVersionRef, /// Compile a function fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, function: &Function) -> Result<(IseqCodePtrs, Vec<CodePtr>, Vec<IseqCallRef>), CompileError> { let (mut jit, asm) = trace_compile_phase("codegen", || { - let num_spilled_params = max_num_params(function).saturating_sub(ALLOC_REGS.len()); - let mut jit = JITState::new(version, function.num_insns(), function.num_blocks()); - let mut asm = Assembler::new_with_stack_slots(num_spilled_params + 1); // +1 for JITFrame + let mut jit = JITState::new(version, function.num_insns(), function.num_blocks(), JIT_FRAME_SIZE); + let mut asm = Assembler::new_with_stack_slots(JIT_FRAME_SIZE); // Mapping from HIR block IDs to LIR block IDs. // This is is a one-to-one mapping from HIR to LIR blocks used for finding @@ -583,7 +591,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio gen_const_uint32(val.0) } Insn::Const { .. } => panic!("Unexpected Const in gen_insn: {insn}"), - Insn::NewArray { elements, state } => gen_new_array(asm, opnds!(elements), &function.frame_state(*state)), + Insn::NewArray { elements, state } => gen_new_array(jit, asm, opnds!(elements), &function.frame_state(*state)), Insn::NewHash { elements, state } => gen_new_hash(jit, asm, opnds!(elements), &function.frame_state(*state)), Insn::NewRange { low, high, flag, state } => gen_new_range(jit, asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)), Insn::NewRangeFixnum { low, high, flag, state } => gen_new_range_fixnum(asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)), @@ -1139,10 +1147,9 @@ fn gen_ccall_variadic( asm.mov(CFP, new_cfp); asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); - let argv_ptr = gen_push_opnds(asm, &args); + let argv_ptr = gen_push_opnds(jit, asm, &args); asm.count_call_to(&name.contents_lossy()); let result = asm.ccall(cfunc, vec![args.len().into(), argv_ptr, recv]); - gen_pop_opnds(asm, &args); asm_comment!(asm, "pop C frame"); let new_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); @@ -1699,7 +1706,7 @@ fn gen_invokeblock_ifunc( gen_prepare_non_leaf_call(jit, asm, state); // Push args to memory so we can pass argv pointer - let argv_ptr = gen_push_opnds(asm, &args); + let argv_ptr = gen_push_opnds(jit, asm, &args); // Untag the block handler to get the captured block pointer // captured = block_handler & ~0x3 @@ -1715,9 +1722,7 @@ fn gen_invokeblock_ifunc( argv: *const VALUE, ) -> VALUE; } - let result = asm_ccall!(asm, rb_vm_yield_with_cfunc, EC, captured, (args.len() as i64).into(), argv_ptr); - gen_pop_opnds(asm, &args); - result + asm_ccall!(asm, rb_vm_yield_with_cfunc, EC, captured, (args.len() as i64).into(), argv_ptr) } fn gen_invokeproc( @@ -1732,9 +1737,9 @@ fn gen_invokeproc( asm_comment!(asm, "call invokeproc"); - let argv_ptr = gen_push_opnds(asm, &args); + let argv_ptr = gen_push_opnds(jit, asm, &args); let kw_splat_opnd = Opnd::Imm(i64::from(kw_splat)); - let result = asm_ccall!( + asm_ccall!( asm, rb_optimized_call, recv, @@ -1743,10 +1748,7 @@ fn gen_invokeproc( argv_ptr, kw_splat_opnd, VM_BLOCK_HANDLER_NONE.into() - ); - gen_pop_opnds(asm, &args); - - result + ) } /// Compile a dynamic dispatch for `super` @@ -1820,6 +1822,7 @@ fn gen_array_dup( /// Compile a new array instruction fn gen_new_array( + jit: &JITState, asm: &mut Assembler, elements: Vec<Opnd>, state: &FrameState, @@ -1831,10 +1834,8 @@ fn gen_new_array( if elements.is_empty() { asm_ccall!(asm, rb_ec_ary_new_from_values, EC, 0i64.into(), Opnd::UImm(0)) } else { - let argv = gen_push_opnds(asm, &elements); - let new_array = asm_ccall!(asm, rb_ec_ary_new_from_values, EC, num.into(), argv); - gen_pop_opnds(asm, &elements); - new_array + let argv = gen_push_opnds(jit, asm, &elements); + asm_ccall!(asm, rb_ec_ary_new_from_values, EC, num.into(), argv) } } @@ -2117,10 +2118,8 @@ fn gen_new_hash( } else { gen_prepare_non_leaf_call(jit, asm, state); - let argv = gen_push_opnds(asm, &elements); - let hash = asm_ccall!(asm, rb_hash_new_with_bulk_insert, elements.len().into(), argv); - gen_pop_opnds(asm, &elements); - hash + let argv = gen_push_opnds(jit, asm, &elements); + asm_ccall!(asm, rb_hash_new_with_bulk_insert, elements.len().into(), argv) } } @@ -2552,7 +2551,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard asm.cmp(klass, Opnd::Value(expected_class)); asm.jne(jit, side_exit); - } else if guard_type.is_subtype(types::TypedTData) { + } else if guard_type.is_subtype(types::TData) { let side = side_exit(jit, state, GuardType(guard_type)); // Check special constant @@ -2563,11 +2562,11 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard asm.cmp(val, Qfalse.into()); asm.je(jit, side.clone()); - // Check the builtin type and RUBY_TYPED_FL_IS_TYPED_DATA with mask and compare + // Check the T_DATA builtin type. let val = asm.load_mem(val); let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS)); - let mask = RUBY_T_MASK.to_usize() | RUBY_TYPED_FL_IS_TYPED_DATA.to_usize(); - let expected = RUBY_T_DATA.to_usize() | RUBY_TYPED_FL_IS_TYPED_DATA.to_usize(); + let mask = RUBY_T_MASK.to_usize(); + let expected = RUBY_T_DATA.to_usize(); let masked = asm.and(flags, mask.into()); asm.cmp(masked, expected.into()); asm.jne(jit, side); @@ -2986,20 +2985,6 @@ fn build_side_exit(jit: &JITState, state: &FrameState) -> SideExit { } } -/// Returne the maximum number of arguments for a block in a given function -fn max_num_params(function: &Function) -> usize { - let reverse_post_order = function.reverse_post_order(); - reverse_post_order - .iter() - .filter(|&&block_id| function.is_entry_block(block_id)) - .map(|&block_id| { - let block = function.block(block_id); - block.params().len() - }) - .max() - .unwrap_or(0) -} - #[cfg(target_arch = "x86_64")] macro_rules! c_callable { ($(#[$outer:meta])* @@ -3362,21 +3347,18 @@ pub fn gen_exit_trampoline_with_counter(cb: &mut CodeBlock, exit_trampoline: Cod }) } -fn gen_push_opnds(asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd { - let n = opnds.len(); - let allocation_size = aligned_stack_bytes(n); - - // Bump the stack pointer to reserve the space for opnds - if n != 0 { - asm_comment!(asm, "allocate {} bytes on C stack for {} values", allocation_size, n); - asm.sub_into(NATIVE_STACK_PTR, allocation_size.into()); +/// Reserve native stack space and write operands into it. +fn gen_push_opnds(jit: &JITState, asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd { + let argv = if opnds.len() > 0 { + // Make sure the Assembler will reserve a sufficient stack size for given opnds + asm_comment!(asm, "allocate space on C stack for {} values", opnds.len()); + asm.alloc_stack(jit, opnds.len()) } else { asm_comment!(asm, "no opnds to allocate"); - } + Opnd::UImm(0) + }; - // Load NATIVE_STACK_PTR to get the address of a returned array - // to allow the backend to move it for its own use. - let argv = asm.load(NATIVE_STACK_PTR); + // Write operands into stack slots allocated by asm.alloc_stack() for (idx, &opnd) in opnds.iter().enumerate() { asm.mov(Opnd::mem(VALUE_BITS, argv, idx as i32 * SIZEOF_VALUE_I32), opnd); } @@ -3384,35 +3366,18 @@ fn gen_push_opnds(asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd { argv } -fn gen_pop_opnds(asm: &mut Assembler, opnds: &[Opnd]) { - if opnds.is_empty() { - asm_comment!(asm, "no opnds to restore"); - return - } - - asm_comment!(asm, "restore C stack pointer"); - let allocation_size = aligned_stack_bytes(opnds.len()); - asm.add_into(NATIVE_STACK_PTR, allocation_size.into()); -} - fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec<Opnd>, state: &FrameState) -> Opnd { gen_prepare_non_leaf_call(jit, asm, state); - let first_opnd_ptr = gen_push_opnds(asm, &values); - let result = asm_ccall!(asm, rb_reg_new_from_values, values.len().into(), first_opnd_ptr, opt.into()); - gen_pop_opnds(asm, &values); - - result + let first_opnd_ptr = gen_push_opnds(jit, asm, &values); + asm_ccall!(asm, rb_reg_new_from_values, values.len().into(), first_opnd_ptr, opt.into()) } fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Opnd { gen_prepare_non_leaf_call(jit, asm, state); - let first_string_ptr = gen_push_opnds(asm, &strings); - let result = asm_ccall!(asm, rb_str_concat_literals, strings.len().into(), first_string_ptr); - gen_pop_opnds(asm, &strings); - - result + let first_string_ptr = gen_push_opnds(jit, asm, &strings); + asm_ccall!(asm, rb_str_concat_literals, strings.len().into(), first_string_ptr) } // Generate RSTRING_PTR @@ -3484,6 +3449,33 @@ fn aligned_stack_bytes(num_slots: usize) -> usize { } impl Assembler { + /// Allocate stack space on top of the stack slots reserved for JITFrame, + /// and return a pointer to the allocated space. + fn alloc_stack(&mut self, jit: &JITState, stack_size: usize) -> Opnd { + let total_stack_size = jit.jit_frame_size + stack_size; + self.stack_base_idx = self.stack_base_idx.max(total_stack_size); + // high addr + // +------------------------+ + // | return address | + // +------------------------+ + // | previous frame pointer | <- NATIVE_BASE_PTR == cfp->jit_return + // +------------------------+ + // | JITFrame pointer | <- jit.jit_frame_size, read by CFP_ZJIT_FRAME(cfp) + // +------------------------+ + // | opnds.last() | + // +------------------------+ + // | ... | + // +------------------------+ + // | opnds.first() | <- pointer returned by alloc_stack() + // +------------------------+ + // | register spill slots | if any + // +------------------------+ + // | FrameSetup align slot | if needed + // +------------------------+ + // low addr + self.sub(NATIVE_BASE_PTR, (SIZEOF_VALUE * total_stack_size).into()) + } + /// Emits a load for memory based operands and returns a vreg, /// otherwise returns recv. fn load_mem(&mut self, recv: Opnd) -> Opnd { |
