diff options
| -rw-r--r-- | insns.def | 1 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 43 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 53 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 259 | ||||
| -rw-r--r-- | zjit/src/profile.rs | 2 |
5 files changed, 315 insertions, 43 deletions
@@ -846,6 +846,7 @@ send (CALL_DATA cd, ISEQ blockiseq) (...) (VALUE val) +// attr bool zjit_profile = true; // attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci); // attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci); { diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 1f04e61dbc..87e0ed907a 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -411,7 +411,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio // Give up CCallWithFrame for 7+ args since asm.ccall() doesn't support it. Insn::CCallWithFrame { cd, state, args, .. } if args.len() > C_ARG_OPNDS.len() => gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs), - Insn::CCallWithFrame { cfunc, args, cme, state, .. } => gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, &function.frame_state(*state)), + Insn::CCallWithFrame { cfunc, args, cme, state, blockiseq, .. } => + gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, *blockiseq, &function.frame_state(*state)), Insn::CCallVariadic { cfunc, recv, args, name: _, cme, state, return_type: _, elidable: _ } => { gen_ccall_variadic(jit, asm, *cfunc, opnd!(recv), opnds!(args), *cme, &function.frame_state(*state)) } @@ -673,20 +674,36 @@ fn gen_patch_point(jit: &mut JITState, asm: &mut Assembler, invariant: &Invarian } /// Generate code for a C function call that pushes a frame -fn gen_ccall_with_frame(jit: &mut JITState, asm: &mut Assembler, cfunc: *const u8, args: Vec<Opnd>, cme: *const rb_callable_method_entry_t, state: &FrameState) -> lir::Opnd { +fn gen_ccall_with_frame( + jit: &mut JITState, + asm: &mut Assembler, + cfunc: *const u8, + args: Vec<Opnd>, + cme: *const rb_callable_method_entry_t, + blockiseq: Option<IseqPtr>, + state: &FrameState, +) -> lir::Opnd { gen_incr_counter(asm, Counter::non_variadic_cfunc_optimized_send_count); - gen_prepare_non_leaf_call(jit, asm, state); + let caller_stack_size = state.stack_size() - args.len(); + + // Can't use gen_prepare_non_leaf_call() because we need to adjust the SP + // to account for the receiver and arguments (and block arguments if any) + gen_prepare_call_with_gc(asm, state, false); + gen_save_sp(asm, caller_stack_size); + gen_spill_stack(jit, asm, state); + gen_spill_locals(jit, asm, state); gen_push_frame(asm, args.len(), state, ControlFrame { recv: args[0], iseq: None, cme, frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL, + block_iseq: blockiseq, }); asm_comment!(asm, "switch to new SP register"); - let sp_offset = (state.stack().len() - args.len() + VM_ENV_DATA_SIZE.as_usize()) * SIZEOF_VALUE; + let sp_offset = (caller_stack_size + VM_ENV_DATA_SIZE.as_usize()) * SIZEOF_VALUE; let new_sp = asm.add(SP, sp_offset.into()); asm.mov(SP, new_sp); @@ -738,6 +755,7 @@ fn gen_ccall_variadic( iseq: None, cme, frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL, + block_iseq: None, }); asm_comment!(asm, "switch to new SP register"); @@ -1130,6 +1148,7 @@ fn gen_send_without_block_direct( iseq: Some(iseq), cme, frame_type: VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL, + block_iseq: None, }); asm_comment!(asm, "switch to new SP register"); @@ -1719,6 +1738,7 @@ struct ControlFrame { iseq: Option<IseqPtr>, cme: *const rb_callable_method_entry_t, frame_type: u32, + block_iseq: Option<IseqPtr>, } /// Compile an interpreter frame @@ -1735,9 +1755,20 @@ fn gen_push_frame(asm: &mut Assembler, argc: usize, state: &FrameState, frame: C }; let ep_offset = state.stack().len() as i32 + local_size - argc as i32 + VM_ENV_DATA_SIZE as i32 - 1; asm.store(Opnd::mem(64, SP, (ep_offset - 2) * SIZEOF_VALUE_I32), VALUE::from(frame.cme).into()); + + let block_handler_opnd = if let Some(block_iseq) = frame.block_iseq { + // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). + // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases + // with cfp->block_code. + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); + let cfp_self_addr = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + asm.or(cfp_self_addr, Opnd::Imm(1)) + } else { + VM_BLOCK_HANDLER_NONE.into() + }; + // ep[-1]: block_handler or prev EP - // block_handler is not supported for now - asm.store(Opnd::mem(64, SP, (ep_offset - 1) * SIZEOF_VALUE_I32), VM_BLOCK_HANDLER_NONE.into()); + asm.store(Opnd::mem(64, SP, (ep_offset - 1) * SIZEOF_VALUE_I32), block_handler_opnd); // ep[0]: ENV_FLAGS asm.store(Opnd::mem(64, SP, ep_offset * SIZEOF_VALUE_I32), frame.frame_type.into()); diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index c67e229a80..af604661b2 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -680,32 +680,33 @@ pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; -pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 219; -pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 220; -pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 221; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 243; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 244; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 245; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 370ed56857..1f77f38dc8 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -668,6 +668,7 @@ pub enum Insn { state: InsnId, return_type: Type, elidable: bool, + blockiseq: Option<IseqPtr>, }, /// Call a variadic C function with signature: func(int argc, VALUE *argv, VALUE recv) @@ -1063,11 +1064,14 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { } Ok(()) }, - Insn::CCallWithFrame { cfunc, args, name, .. } => { + Insn::CCallWithFrame { cfunc, args, name, blockiseq, .. } => { write!(f, "CCallWithFrame {}@{:p}", name.contents_lossy(), self.ptr_map.map_ptr(cfunc))?; for arg in args { write!(f, ", {arg}")?; } + if let Some(blockiseq) = blockiseq { + write!(f, ", block={:p}", self.ptr_map.map_ptr(blockiseq))?; + } Ok(()) }, Insn::CCallVariadic { cfunc, recv, args, name, .. } => { @@ -1598,7 +1602,17 @@ impl Function { &ObjectAlloc { val, state } => ObjectAlloc { val: find!(val), state }, &ObjectAllocClass { class, state } => ObjectAllocClass { class, state: find!(state) }, &CCall { cfunc, ref args, name, return_type, elidable } => CCall { cfunc, args: find_vec!(args), name, return_type, elidable }, - &CCallWithFrame { cd, cfunc, ref args, cme, name, state, return_type, elidable } => CCallWithFrame { cd, cfunc, args: find_vec!(args), cme, name, state: find!(state), return_type, elidable }, + &CCallWithFrame { cd, cfunc, ref args, cme, name, state, return_type, elidable, blockiseq } => CCallWithFrame { + cd, + cfunc, + args: find_vec!(args), + cme, + name, + state: find!(state), + return_type, + elidable, + blockiseq, + }, &CCallVariadic { cfunc, recv, ref args, cme, name, state, return_type, elidable } => CCallVariadic { cfunc, recv: find!(recv), args: find_vec!(args), cme, name, state, return_type, elidable }, @@ -2134,7 +2148,7 @@ impl Function { } } // This doesn't actually optimize Send yet, just replaces the fallback reason to be more precise. - // TODO: Optimize Send + // The actual optimization is done in reduce_send_to_ccall. Insn::Send { recv, cd, state, .. } => { let frame_state = self.frame_state(state); let klass = if let Some(klass) = self.type_of(recv).runtime_exact_ruby_class() { @@ -2338,8 +2352,111 @@ impl Function { fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass: recv_class, method: method_id, cme: method }, state }); } - // Try to reduce one SendWithoutBlock to a CCall - fn reduce_to_ccall( + // Try to reduce a Send insn to a CCallWithFrame + fn reduce_send_to_ccall( + fun: &mut Function, + block: BlockId, + self_type: Type, + send: Insn, + send_insn_id: InsnId, + ) -> Result<(), ()> { + let Insn::Send { mut recv, cd, blockiseq, mut args, state, .. } = send else { + return Err(()); + }; + + let call_info = unsafe { (*cd).ci }; + let argc = unsafe { vm_ci_argc(call_info) }; + let method_id = unsafe { rb_vm_ci_mid(call_info) }; + + // If we have info about the class of the receiver + let (recv_class, profiled_type) = if let Some(class) = self_type.runtime_exact_ruby_class() { + (class, None) + } else { + let iseq_insn_idx = fun.frame_state(state).insn_idx; + let Some(recv_type) = fun.profiled_type_of_at(recv, iseq_insn_idx) else { return Err(()) }; + (recv_type.class(), Some(recv_type)) + }; + + // Do method lookup + let method: *const rb_callable_method_entry_struct = unsafe { rb_callable_method_entry(recv_class, method_id) }; + if method.is_null() { + return Err(()); + } + + // Filter for C methods + let def_type = unsafe { get_cme_def_type(method) }; + if def_type != VM_METHOD_TYPE_CFUNC { + return Err(()); + } + + // Find the `argc` (arity) of the C method, which describes the parameters it expects + let cfunc = unsafe { get_cme_def_body_cfunc(method) }; + let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + match cfunc_argc { + 0.. => { + // (self, arg0, arg1, ..., argc) form + // + // Bail on argc mismatch + if argc != cfunc_argc as u32 { + return Err(()); + } + + let ci_flags = unsafe { vm_ci_flag(call_info) }; + + // When seeing &block argument, fall back to dynamic dispatch for now + // TODO: Support block forwarding + if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 { + return Err(()); + } + + // Commit to the replacement. Put PatchPoint. + gen_patch_points_for_optimized_ccall(fun, block, recv_class, method_id, method, state); + if recv_class.instance_can_have_singleton_class() { + fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::NoSingletonClass { klass: recv_class }, state }); + } + + if let Some(profiled_type) = profiled_type { + // Guard receiver class + recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); + fun.insn_types[recv.0] = fun.infer_type(recv); + } + + let blockiseq = if blockiseq.is_null() { None } else { Some(blockiseq) }; + + // Emit a call + let cfunc = unsafe { get_mct_func(cfunc) }.cast(); + let mut cfunc_args = vec![recv]; + cfunc_args.append(&mut args); + + let ccall = fun.push_insn(block, Insn::CCallWithFrame { + cd, + cfunc, + args: cfunc_args, + cme: method, + name: method_id, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq, + }); + fun.make_equal_to(send_insn_id, ccall); + return Ok(()); + } + // Variadic method + -1 => { + // func(int argc, VALUE *argv, VALUE recv) + return Err(()); + } + -2 => { + // (self, args_ruby_array) + return Err(()); + } + _ => unreachable!("unknown cfunc kind: argc={argc}") + } + } + + // Try to reduce a SendWithoutBlock insn to a CCall/CCallWithFrame + fn reduce_send_without_block_to_ccall( fun: &mut Function, block: BlockId, self_type: Type, @@ -2440,7 +2557,17 @@ impl Function { if get_option!(stats) { count_not_inlined_cfunc(fun, block, method); } - let ccall = fun.push_insn(block, Insn::CCallWithFrame { cd, cfunc, args: cfunc_args, cme: method, name: method_id, state, return_type, elidable }); + let ccall = fun.push_insn(block, Insn::CCallWithFrame { + cd, + cfunc, + args: cfunc_args, + cme: method, + name: method_id, + state, + return_type, + elidable, + blockiseq: None, + }); fun.make_equal_to(send_insn_id, ccall); } @@ -2555,11 +2682,21 @@ impl Function { let old_insns = std::mem::take(&mut self.blocks[block.0].insns); assert!(self.blocks[block.0].insns.is_empty()); for insn_id in old_insns { - if let send @ Insn::SendWithoutBlock { recv, .. } = self.find(insn_id) { - let recv_type = self.type_of(recv); - if reduce_to_ccall(self, block, recv_type, send, insn_id).is_ok() { - continue; + let send = self.find(insn_id); + match send { + send @ Insn::SendWithoutBlock { recv, .. } => { + let recv_type = self.type_of(recv); + if reduce_send_without_block_to_ccall(self, block, recv_type, send, insn_id).is_ok() { + continue; + } + } + send @ Insn::Send { recv, .. } => { + let recv_type = self.type_of(recv); + if reduce_send_to_ccall(self, block, recv_type, send, insn_id).is_ok() { + continue; + } } + _ => {} } self.push_insn_id(block, insn_id); } @@ -12584,6 +12721,108 @@ mod opt_tests { } #[test] + fn test_optimize_send_with_block() { + eval(r#" + def test = [1, 2, 3].map { |x| x * 2 } + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:ArrayExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v12:ArrayExact = ArrayDup v10 + PatchPoint MethodRedefined(Array@0x1008, map@0x1010, cme:0x1018) + PatchPoint NoSingletonClass(Array@0x1008) + v23:BasicObject = CCallWithFrame map@0x1040, v12, block=0x1048 + CheckInterrupts + Return v23 + "); + } + + #[test] + fn test_do_not_optimize_send_variadic_with_block() { + eval(r#" + def test = [1, 2, 3].index { |x| x == 2 } + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:ArrayExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v12:ArrayExact = ArrayDup v10 + v14:BasicObject = Send v12, 0x1008, :index + CheckInterrupts + Return v14 + "); + } + + #[test] + fn test_do_not_optimize_send_with_block_forwarding() { + eval(r#" + def test(&block) = [].map(&block) + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:ArrayExact = NewArray + GuardBlockParamProxy l0 + v17:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v19:BasicObject = Send v14, 0x1008, :map, v17 + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_do_not_optimize_send_to_iseq_method_with_block() { + eval(r#" + def foo + yield 1 + end + + def test = foo {} + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:6: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:BasicObject = Send v6, 0x1000, :foo + CheckInterrupts + Return v11 + "); + } + + #[test] fn test_inline_attr_reader_constant() { eval(" class C diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index e935ec9731..a6c837df5a 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -83,7 +83,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_length => profile_operands(profiler, profile, 1), YARVINSN_opt_size => profile_operands(profiler, profile, 1), YARVINSN_opt_succ => profile_operands(profiler, profile, 1), - YARVINSN_opt_send_without_block => { + YARVINSN_opt_send_without_block | YARVINSN_send => { let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); let argc = unsafe { vm_ci_argc((*cd).ci) }; // Profile all the arguments and self (+1). |
