diff options
| author | Max Bernstein <rubybugs@bernsteinbear.com> | 2025-08-29 09:46:08 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-29 12:46:08 -0400 |
| commit | b6f4b5399d22aab18fa3d163b9cc1764ad0df7dd (patch) | |
| tree | ba185e1881644efe94c7987396e4b24af58c8c34 | |
| parent | fc4f8c879a362968d87d36d6f93d30ae9abd0a5b (diff) | |
ZJIT: Specialize monomorphic GetIvar (#14388)
Specialize monomorphic `GetIvar` into:
* `GuardType(HeapObject)`
* `GuardShape`
* `LoadIvarEmbedded` or `LoadIvarExtended`
This requires profiling self for `getinstancevariable` (it's not on the operand
stack).
This also optimizes `GetIvar`s that happen as a result of inlining
`attr_reader` and `attr_accessor`.
Also move some (newly) shared JIT helpers into jit.c.
| -rw-r--r-- | insns.def | 1 | ||||
| -rw-r--r-- | jit.c | 12 | ||||
| -rw-r--r-- | test/ruby/test_zjit.rb | 30 | ||||
| -rw-r--r-- | yjit.c | 12 | ||||
| -rw-r--r-- | yjit/bindgen/src/main.rs | 4 | ||||
| -rw-r--r-- | yjit/src/codegen.rs | 2 | ||||
| -rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 8 | ||||
| -rw-r--r-- | zjit.c | 6 | ||||
| -rw-r--r-- | zjit/bindgen/src/main.rs | 3 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 41 | ||||
| -rw-r--r-- | zjit/src/cruby.rs | 14 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 42 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 240 | ||||
| -rw-r--r-- | zjit/src/options.rs | 8 | ||||
| -rw-r--r-- | zjit/src/profile.rs | 24 | ||||
| -rw-r--r-- | zjit/src/stats.rs | 2 |
16 files changed, 384 insertions, 65 deletions
@@ -212,6 +212,7 @@ getinstancevariable (VALUE val) /* Ractor crashes when it accesses class/module-level instances variables. */ // attr bool leaf = false; /* has IVAR_ACCESSOR_SHOULD_BE_MAIN_RACTOR() */ +// attr bool zjit_profile = true; { val = vm_getinstancevariable(GET_ISEQ(), GET_SELF(), id, ic); } @@ -14,6 +14,12 @@ #include "iseq.h" #include "internal/gc.h" +// Field offsets for the RObject struct +enum robject_offsets { + ROBJECT_OFFSET_AS_HEAP_FIELDS = offsetof(struct RObject, as.heap.fields), + ROBJECT_OFFSET_AS_ARY = offsetof(struct RObject, as.ary), +}; + unsigned int rb_iseq_encoded_size(const rb_iseq_t *iseq) { @@ -454,3 +460,9 @@ rb_set_cfp_sp(struct rb_control_frame_struct *cfp, VALUE *sp) { cfp->sp = sp; } + +bool +rb_jit_shape_too_complex_p(shape_id_t shape_id) +{ + return rb_shape_too_complex_p(shape_id); +} diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 98f747c4d0..8e450458dd 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1162,6 +1162,36 @@ class TestZJIT < Test::Unit::TestCase } end + def test_getinstancevariable_miss + assert_compiles '[1, 1, 4]', %q{ + class C + def foo + @foo + end + + def foo_then_bar + @foo = 1 + @bar = 2 + end + + def bar_then_foo + @bar = 3 + @foo = 4 + end + end + + o1 = C.new + o1.foo_then_bar + result = [] + result << o1.foo + result << o1.foo + o2 = C.new + o2.bar_then_foo + result << o2.foo + result + } + end + def test_setinstancevariable assert_compiles '1', %q{ def test() = @foo = 1 @@ -39,12 +39,6 @@ #include <errno.h> -// Field offsets for the RObject struct -enum robject_offsets { - ROBJECT_OFFSET_AS_HEAP_FIELDS = offsetof(struct RObject, as.heap.fields), - ROBJECT_OFFSET_AS_ARY = offsetof(struct RObject, as.ary), -}; - // Field offsets for the RString struct enum rstring_offsets { RUBY_OFFSET_RSTRING_LEN = offsetof(struct RString, len) @@ -759,12 +753,6 @@ rb_object_shape_count(void) } bool -rb_yjit_shape_too_complex_p(shape_id_t shape_id) -{ - return rb_shape_too_complex_p(shape_id); -} - -bool rb_yjit_shape_obj_too_complex_p(VALUE obj) { return rb_shape_obj_too_complex_p(obj); diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index 2993e05e2f..5cf5caeb9a 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -99,7 +99,6 @@ fn main() { .allowlist_function("rb_shape_get_iv_index") .allowlist_function("rb_shape_transition_add_ivar_no_warnings") .allowlist_function("rb_yjit_shape_obj_too_complex_p") - .allowlist_function("rb_yjit_shape_too_complex_p") .allowlist_function("rb_yjit_shape_capacity") .allowlist_function("rb_yjit_shape_index") .allowlist_var("SHAPE_ID_NUM_BITS") @@ -351,11 +350,12 @@ fn main() { .allowlist_function("rb_yjit_invokeblock_sp_pops") .allowlist_function("rb_yjit_set_exception_return") .allowlist_function("rb_yjit_str_concat_codepoint") - .allowlist_type("robject_offsets") .allowlist_type("rstring_offsets") // From jit.c .allowlist_function("rb_assert_holding_vm_lock") + .allowlist_function("rb_jit_shape_too_complex_p") + .allowlist_type("robject_offsets") // from vm_sync.h .allowlist_function("rb_vm_barrier") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 2dae72280f..9e9759e781 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3103,7 +3103,7 @@ fn gen_set_ivar( // If the VM ran out of shapes, or this class generated too many leaf, // it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table). - new_shape_too_complex = unsafe { rb_yjit_shape_too_complex_p(next_shape_id) }; + new_shape_too_complex = unsafe { rb_jit_shape_too_complex_p(next_shape_id) }; if new_shape_too_complex { Some((next_shape_id, None, 0_usize)) } else { diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index ca459ceafb..c22f4490db 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -994,12 +994,12 @@ pub const DEFINED_REF: defined_type = 15; pub const DEFINED_FUNC: defined_type = 16; pub const DEFINED_CONST_FROM: defined_type = 17; pub type defined_type = u32; -pub const ROBJECT_OFFSET_AS_HEAP_FIELDS: robject_offsets = 16; -pub const ROBJECT_OFFSET_AS_ARY: robject_offsets = 16; -pub type robject_offsets = u32; pub const RUBY_OFFSET_RSTRING_LEN: rstring_offsets = 16; pub type rstring_offsets = u32; pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; +pub const ROBJECT_OFFSET_AS_HEAP_FIELDS: robject_offsets = 16; +pub const ROBJECT_OFFSET_AS_ARY: robject_offsets = 16; +pub type robject_offsets = u32; pub type rb_iseq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; extern "C" { pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void); @@ -1238,7 +1238,6 @@ extern "C" { line: ::std::os::raw::c_int, ); pub fn rb_object_shape_count() -> VALUE; - pub fn rb_yjit_shape_too_complex_p(shape_id: shape_id_t) -> bool; pub fn rb_yjit_shape_obj_too_complex_p(obj: VALUE) -> bool; pub fn rb_yjit_shape_capacity(shape_id: shape_id_t) -> attr_index_t; pub fn rb_yjit_shape_index(shape_id: shape_id_t) -> attr_index_t; @@ -1328,4 +1327,5 @@ extern "C" { pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE); + pub fn rb_jit_shape_too_complex_p(shape_id: shape_id_t) -> bool; } @@ -337,12 +337,6 @@ rb_zjit_print_exception(void) rb_warn("Ruby error: %"PRIsVALUE"", rb_funcall(exception, rb_intern("full_message"), 0)); } -bool -rb_zjit_shape_obj_too_complex_p(VALUE obj) -{ - return rb_shape_obj_too_complex_p(obj); -} - enum { RB_INVALID_SHAPE_ID = INVALID_SHAPE_ID, }; diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 0eb800e1f9..ef8537cbbb 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -111,7 +111,6 @@ fn main() { .allowlist_function("rb_shape_id_offset") .allowlist_function("rb_shape_get_iv_index") .allowlist_function("rb_shape_transition_add_ivar_no_warnings") - .allowlist_function("rb_zjit_shape_obj_too_complex_p") .allowlist_var("SHAPE_ID_NUM_BITS") // From ruby/internal/intern/object.h @@ -367,6 +366,8 @@ fn main() { // From jit.c .allowlist_function("rb_assert_holding_vm_lock") + .allowlist_function("rb_jit_shape_too_complex_p") + .allowlist_type("robject_offsets") // from vm_sync.h .allowlist_function("rb_vm_barrier") diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 36a899438c..441bb9e2f7 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -405,6 +405,9 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::ToArray { val, state } => { gen_to_array(jit, asm, opnd!(val), &function.frame_state(state)) }, &Insn::DefinedIvar { self_val, id, pushval, .. } => { gen_defined_ivar(asm, opnd!(self_val), id, pushval) }, &Insn::ArrayExtend { left, right, state } => { no_output!(gen_array_extend(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state))) }, + &Insn::GuardShape { val, shape, state } => gen_guard_shape(jit, asm, opnd!(val), shape, &function.frame_state(state)), + &Insn::LoadIvarEmbedded { self_val, id, index } => gen_load_ivar_embedded(asm, opnd!(self_val), id, index), + &Insn::LoadIvarExtended { self_val, id, index } => gen_load_ivar_extended(asm, opnd!(self_val), id, index), &Insn::ArrayMax { state, .. } | &Insn::FixnumDiv { state, .. } | &Insn::FixnumMod { state, .. } @@ -716,6 +719,38 @@ fn gen_array_extend(jit: &mut JITState, asm: &mut Assembler, left: Opnd, right: asm_ccall!(asm, rb_ary_concat, left, right); } +fn gen_guard_shape(jit: &mut JITState, asm: &mut Assembler, val: Opnd, shape: ShapeId, state: &FrameState) -> Opnd { + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let val = asm.load(val); + let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, val, shape_id_offset); + asm.cmp(shape_opnd, Opnd::UImm(shape.0 as u64)); + asm.jne(side_exit(jit, state, SideExitReason::GuardShape(shape))); + val +} + +fn gen_load_ivar_embedded(asm: &mut Assembler, self_val: Opnd, id: ID, index: u16) -> Opnd { + // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h + + asm_comment!(asm, "Load embedded ivar id={} index={}", id.contents_lossy(), index); + let offs = ROBJECT_OFFSET_AS_ARY as i32 + (SIZEOF_VALUE * index as usize) as i32; + let self_val = asm.load(self_val); + let ivar_opnd = Opnd::mem(64, self_val, offs); + asm.load(ivar_opnd) +} + +fn gen_load_ivar_extended(asm: &mut Assembler, self_val: Opnd, id: ID, index: u16) -> Opnd { + asm_comment!(asm, "Load extended ivar id={} index={}", id.contents_lossy(), index); + // Compile time value is *not* embedded. + + // Get a pointer to the extended table + let self_val = asm.load(self_val); + let tbl_opnd = asm.load(Opnd::mem(64, self_val, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32)); + + // Read the ivar from the extended table + let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * index as usize) as i32); + asm.load(ivar_opnd) +} + /// Compile an interpreter entry block to be inserted into an ISEQ fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) { asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0)); @@ -1270,6 +1305,12 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard asm.cmp(klass, Opnd::Value(expected_class)); asm.jne(side_exit); + } else if guard_type.bit_equal(types::HeapObject) { + let side_exit = side_exit(jit, state, GuardType(guard_type)); + asm.cmp(val, Opnd::Value(Qfalse)); + asm.je(side_exit.clone()); + asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(side_exit); } else { unimplemented!("unsupported type: {guard_type}"); } diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 015dd7f912..5a34e5a8de 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -271,6 +271,16 @@ pub struct ShapeId(pub u32); pub const INVALID_SHAPE_ID: ShapeId = ShapeId(RB_INVALID_SHAPE_ID); +impl ShapeId { + pub fn is_valid(self) -> bool { + self != INVALID_SHAPE_ID + } + + pub fn is_too_complex(self) -> bool { + unsafe { rb_jit_shape_too_complex_p(self.0) } + } +} + // Given an ISEQ pointer, convert PC to insn_idx pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option<u16> { let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; @@ -489,10 +499,6 @@ impl VALUE { unsafe { rb_obj_frozen_p(self) != VALUE(0) } } - pub fn shape_too_complex(self) -> bool { - unsafe { rb_zjit_shape_obj_too_complex_p(self) } - } - pub fn shape_id_of(self) -> ShapeId { ShapeId(unsafe { rb_obj_shape_id(self) }) } diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 4975308f68..c2f4b37a7a 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -681,24 +681,25 @@ pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214; pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; -pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 218; -pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 219; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 220; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 221; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 234; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 235; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 236; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), @@ -724,6 +725,9 @@ pub const DEFINED_CONST_FROM: defined_type = 17; pub type defined_type = u32; pub const RB_INVALID_SHAPE_ID: _bindgen_ty_38 = 4294967295; pub type _bindgen_ty_38 = u32; +pub const ROBJECT_OFFSET_AS_HEAP_FIELDS: robject_offsets = 16; +pub const ROBJECT_OFFSET_AS_ARY: robject_offsets = 16; +pub type robject_offsets = u32; pub type rb_iseq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; unsafe extern "C" { pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void); @@ -943,7 +947,6 @@ unsafe extern "C" { pub fn rb_iseq_get_zjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void; pub fn rb_iseq_set_zjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void); pub fn rb_zjit_print_exception(); - pub fn rb_zjit_shape_obj_too_complex_p(obj: VALUE) -> bool; pub fn rb_zjit_singleton_class_p(klass: VALUE) -> bool; pub fn rb_zjit_defined_ivar(obj: VALUE, id: ID, pushval: VALUE) -> VALUE; pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; @@ -1025,4 +1028,5 @@ unsafe extern "C" { pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE); + pub fn rb_jit_shape_too_complex_p(shape_id: shape_id_t) -> bool; } diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index fd19edaf04..f4e14eccd1 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -424,6 +424,16 @@ impl PtrPrintMap { fn map_id(&self, id: u64) -> *const c_void { self.map_ptr(id as *const c_void) } + + /// Map an index into a Ruby object (e.g. for an ivar) for printing + fn map_index(&self, id: u64) -> *const c_void { + self.map_ptr(id as *const c_void) + } + + /// Map shape ID into a pointer for printing + fn map_shape(&self, id: ShapeId) -> *const c_void { + self.map_ptr(id.0 as *const c_void) + } } #[derive(Debug, Clone, Copy)] @@ -437,6 +447,7 @@ pub enum SideExitReason { FixnumSubOverflow, FixnumMultOverflow, GuardType(Type), + GuardShape(ShapeId), GuardBitEquals(VALUE), PatchPoint(Invariant), CalleeSideExit, @@ -520,6 +531,11 @@ pub enum Insn { /// Check whether an instance variable exists on `self_val` DefinedIvar { self_val: InsnId, id: ID, pushval: VALUE, state: InsnId }, + /// Read an instance variable at the given index, embedded in the object + LoadIvarEmbedded { self_val: InsnId, id: ID, index: u16 }, + /// Read an instance variable at the given index, from the extended table + LoadIvarExtended { self_val: InsnId, id: ID, index: u16 }, + /// Get a local variable from a higher scope or the heap GetLocal { level: u32, ep_offset: u32 }, /// Set a local variable in a higher scope or the heap @@ -592,6 +608,8 @@ pub enum Insn { GuardType { val: InsnId, guard_type: Type, state: InsnId }, /// Side-exit if val is not the expected VALUE. GuardBitEquals { val: InsnId, expected: VALUE, state: InsnId }, + /// Side-exit if val doesn't have the expected shape. + GuardShape { val: InsnId, shape: ShapeId, state: InsnId }, /// Generate no code (or padding if necessary) and insert a patch point /// that can be rewritten to a side exit when the Invariant is broken. @@ -665,6 +683,8 @@ impl Insn { Insn::FixnumOr { .. } => false, Insn::GetLocal { .. } => false, Insn::IsNil { .. } => false, + Insn::LoadIvarEmbedded { .. } => false, + Insn::LoadIvarExtended { .. } => false, Insn::CCall { elidable, .. } => !elidable, // TODO: NewRange is effects free if we can prove the two ends to be Fixnum, // but we don't have type information here in `impl Insn`. See rb_range_new(). @@ -810,6 +830,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::FixnumOr { left, right, .. } => { write!(f, "FixnumOr {left}, {right}") }, Insn::GuardType { val, guard_type, .. } => { write!(f, "GuardType {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, + &Insn::GuardShape { val, shape, .. } => { write!(f, "GuardShape {val}, {:p}", self.ptr_map.map_shape(shape)) }, Insn::PatchPoint { invariant, .. } => { write!(f, "PatchPoint {}", invariant.print(self.ptr_map)) }, Insn::GetConstantPath { ic, .. } => { write!(f, "GetConstantPath {:p}", self.ptr_map.map_ptr(ic)) }, Insn::CCall { cfun, args, name, return_type: _, elidable: _ } => { @@ -838,6 +859,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { } Insn::DefinedIvar { self_val, id, .. } => write!(f, "DefinedIvar {self_val}, :{}", id.contents_lossy()), Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy()), + &Insn::LoadIvarEmbedded { self_val, id, index } => write!(f, "LoadIvarEmbedded {self_val}, :{}@{:p}", id.contents_lossy(), self.ptr_map.map_index(index as u64)), + &Insn::LoadIvarExtended { self_val, id, index } => write!(f, "LoadIvarExtended {self_val}, :{}@{:p}", id.contents_lossy(), self.ptr_map.map_index(index as u64)), Insn::SetIvar { self_val, id, val, .. } => write!(f, "SetIvar {self_val}, :{}, {val}", id.contents_lossy()), Insn::GetGlobal { id, .. } => write!(f, "GetGlobal :{}", id.contents_lossy()), Insn::SetGlobal { id, val, .. } => write!(f, "SetGlobal :{}, {val}", id.contents_lossy()), @@ -1230,6 +1253,7 @@ impl Function { &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) }, &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type: guard_type, state }, &GuardBitEquals { val, expected, state } => GuardBitEquals { val: find!(val), expected: expected, state }, + &GuardShape { val, shape, state } => GuardShape { val: find!(val), shape, state }, &FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state }, &FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state }, &FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state }, @@ -1292,6 +1316,8 @@ impl Function { &ArrayMax { ref elements, state } => ArrayMax { elements: find_vec!(elements), state: find!(state) }, &SetGlobal { id, val, state } => SetGlobal { id, val: find!(val), state }, &GetIvar { self_val, id, state } => GetIvar { self_val: find!(self_val), id, state }, + &LoadIvarEmbedded { self_val, id, index } => LoadIvarEmbedded { self_val: find!(self_val), id, index }, + &LoadIvarExtended { self_val, id, index } => LoadIvarExtended { self_val: find!(self_val), id, index }, &SetIvar { self_val, id, val, state } => SetIvar { self_val: find!(self_val), id, val: find!(val), state }, &SetLocal { val, ep_offset, level } => SetLocal { val: find!(val), ep_offset, level }, &GetSpecialSymbol { symbol_type, state } => GetSpecialSymbol { symbol_type, state }, @@ -1360,6 +1386,7 @@ impl Function { Insn::CCall { return_type, .. } => *return_type, Insn::GuardType { val, guard_type, .. } => self.type_of(*val).intersection(*guard_type), Insn::GuardBitEquals { val, expected, .. } => self.type_of(*val).intersection(Type::from_value(*expected)), + Insn::GuardShape { val, .. } => self.type_of(*val), Insn::FixnumAdd { .. } => types::Fixnum, Insn::FixnumSub { .. } => types::Fixnum, Insn::FixnumMult { .. } => types::Fixnum, @@ -1384,6 +1411,8 @@ impl Function { Insn::ArrayMax { .. } => types::BasicObject, Insn::GetGlobal { .. } => types::BasicObject, Insn::GetIvar { .. } => types::BasicObject, + Insn::LoadIvarEmbedded { .. } => types::BasicObject, + Insn::LoadIvarExtended { .. } => types::BasicObject, Insn::GetSpecialSymbol { .. } => types::BasicObject, Insn::GetSpecialNumber { .. } => types::BasicObject, Insn::ToNewArray { .. } => types::ArrayExact, @@ -1468,14 +1497,25 @@ impl Function { } } + fn chase_insn(&self, insn: InsnId) -> InsnId { + let id = self.union_find.borrow().find_const(insn); + match self.insns[id.0] { + Insn::GuardType { val, .. } => self.chase_insn(val), + Insn::GuardShape { val, .. } => self.chase_insn(val), + Insn::GuardBitEquals { val, .. } => self.chase_insn(val), + _ => id, + } + } + /// Return the interpreter-profiled type of the HIR instruction at the given ISEQ instruction /// index, if it is known. This historical type record is not a guarantee and must be checked /// with a GuardType or similar. fn profiled_type_of_at(&self, insn: InsnId, iseq_insn_idx: usize) -> Option<ProfiledType> { let Some(ref profiles) = self.profiles else { return None }; let Some(entries) = profiles.types.get(&iseq_insn_idx) else { return None }; + let insn = self.chase_insn(insn); for (entry_insn, entry_type_summary) in entries { - if self.union_find.borrow().find_const(*entry_insn) == self.union_find.borrow().find_const(insn) { + if self.union_find.borrow().find_const(*entry_insn) == insn { if entry_type_summary.is_monomorphic() || entry_type_summary.is_skewed_polymorphic() { return Some(entry_type_summary.bucket(0)); } else { @@ -1727,6 +1767,56 @@ impl Function { self.infer_types(); } + fn optimize_getivar(&mut self) { + for block in self.rpo() { + let old_insns = std::mem::take(&mut self.blocks[block.0].insns); + assert!(self.blocks[block.0].insns.is_empty()); + for insn_id in old_insns { + match self.find(insn_id) { + Insn::GetIvar { self_val, id, state } => { + let frame_state = self.frame_state(state); + let Some(recv_type) = self.profiled_type_of_at(self_val, frame_state.insn_idx) else { + // No (monomorphic) profile info + self.push_insn_id(block, insn_id); continue; + }; + if recv_type.flags().is_immediate() { + // Instance variable lookups on immediate values are always nil + self.push_insn_id(block, insn_id); continue; + } + assert!(recv_type.shape().is_valid()); + if !recv_type.flags().is_t_object() { + // Check if the receiver is a T_OBJECT + self.push_insn_id(block, insn_id); continue; + } + if recv_type.shape().is_too_complex() { + // too-complex shapes can't use index access + self.push_insn_id(block, insn_id); continue; + } + let self_val = self.push_insn(block, Insn::GuardType { val: self_val, guard_type: types::HeapObject, state }); + let self_val = self.push_insn(block, Insn::GuardShape { val: self_val, shape: recv_type.shape(), state }); + let mut ivar_index: u16 = 0; + let replacement = if ! unsafe { rb_shape_get_iv_index(recv_type.shape().0, id, &mut ivar_index) } { + // If there is no IVAR index, then the ivar was undefined when we + // entered the compiler. That means we can just return nil for this + // shape + iv name + Insn::Const { val: Const::Value(Qnil) } + } else { + if recv_type.flags().is_embedded() { + Insn::LoadIvarEmbedded { self_val, id, index: ivar_index } + } else { + Insn::LoadIvarExtended { self_val, id, index: ivar_index } + } + }; + let replacement = self.push_insn(block, replacement); + self.make_equal_to(insn_id, replacement); + } + _ => { self.push_insn_id(block, insn_id); } + } + } + } + self.infer_types(); + } + /// Optimize SendWithoutBlock that land in a C method to a direct CCall without /// runtime lookup. fn optimize_c_calls(&mut self) { @@ -2011,6 +2101,7 @@ impl Function { | &Insn::StringCopy { val, state, .. } | &Insn::GuardType { val, state, .. } | &Insn::GuardBitEquals { val, state, .. } + | &Insn::GuardShape { val, state, .. } | &Insn::ToArray { val, state } | &Insn::ToNewArray { val, state } => { worklist.push_back(val); @@ -2089,6 +2180,10 @@ impl Function { worklist.push_back(str); worklist.push_back(state); } + &Insn::LoadIvarEmbedded { self_val, .. } + | &Insn::LoadIvarExtended { self_val, .. } => { + worklist.push_back(self_val); + } &Insn::GetGlobal { state, .. } | &Insn::GetSpecialSymbol { state, .. } | &Insn::GetSpecialNumber { state, .. } | @@ -2232,6 +2327,8 @@ impl Function { // Function is assumed to have types inferred already self.optimize_direct_sends(); #[cfg(debug_assertions)] self.assert_validates(); + self.optimize_getivar(); + #[cfg(debug_assertions)] self.assert_validates(); self.optimize_c_calls(); #[cfg(debug_assertions)] self.assert_validates(); self.fold_constants(); @@ -2794,6 +2891,18 @@ impl ProfileOracle { entry.push((insn, TypeDistributionSummary::new(insn_type_distribution))) } } + + /// Map the interpreter-recorded types of self onto the HIR self + fn profile_self(&mut self, state: &FrameState, self_param: InsnId) { + let iseq_insn_idx = state.insn_idx; + let Some(operand_types) = self.payload.profile.get_operand_types(iseq_insn_idx) else { return }; + let entry = self.types.entry(iseq_insn_idx).or_insert_with(|| vec![]); + if operand_types.is_empty() { + return; + } + let self_type_distribution = &operand_types[0]; + entry.push((self_param, TypeDistributionSummary::new(self_type_distribution))) + } } /// The index of the self parameter in the HIR function @@ -2891,17 +3000,22 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> { let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; state.pc = pc; let exit_state = state.clone(); - profiles.profile_stack(&exit_state); - - // Increment zjit_insn_count for each YARV instruction if --zjit-stats is enabled. - if get_option!(stats) { - fun.push_insn(block, Insn::IncrCounter(Counter::zjit_insn_count)); - } // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes. let opcode: u32 = unsafe { rb_iseq_opcode_at_pc(iseq, pc) } .try_into() .unwrap(); + + if opcode == YARVINSN_getinstancevariable { + profiles.profile_self(&exit_state, self_param); + } else { + profiles.profile_stack(&exit_state); + } + + // Increment zjit_insn_count for each YARV instruction if --zjit-stats is enabled. + if get_option!(stats) { + fun.push_insn(block, Insn::IncrCounter(Counter::zjit_insn_count)); + } // Move to the next instruction to compile insn_idx += insn_len(opcode as usize); @@ -8314,6 +8428,96 @@ mod opt_tests { } #[test] + fn test_optimize_getivar_embedded() { + eval(" + class C + attr_reader :foo + def initialize + @foo = 42 + end + end + + O = C.new + def test(o) = o.foo + test O + test O + "); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:10: + bb0(v0:BasicObject, v1:BasicObject): + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + v9:HeapObject[class_exact:C] = GuardType v1, HeapObject[class_exact:C] + v12:HeapObject[class_exact:C] = GuardShape v9, 0x1038 + v13:BasicObject = LoadIvarEmbedded v12, :@foo@0x1039 + CheckInterrupts + Return v13 + "); + } + + #[test] + fn test_optimize_getivar_extended() { + eval(" + class C + attr_reader :foo + def initialize + @foo = 42 + end + end + + O = C.new + def test(o) = o.foo + test O + test O + "); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:10: + bb0(v0:BasicObject, v1:BasicObject): + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + v9:HeapObject[class_exact:C] = GuardType v1, HeapObject[class_exact:C] + v12:HeapObject[class_exact:C] = GuardShape v9, 0x1038 + v13:BasicObject = LoadIvarEmbedded v12, :@foo@0x1039 + CheckInterrupts + Return v13 + "); + } + + #[test] + fn test_dont_optimize_getivar_polymorphic() { + crate::options::rb_zjit_prepare_options(); + crate::options::internal_set_num_profiles(3); + eval(" + class C + attr_reader :foo, :bar + + def foo_then_bar + @foo = 1 + @bar = 2 + end + + def bar_then_foo + @bar = 3 + @foo = 4 + end + end + + O1 = C.new + O1.foo_then_bar + O2 = C.new + O2.bar_then_foo + def test(o) = o.foo + test O1 + test O2 + "); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:20: + bb0(v0:BasicObject, v1:BasicObject): + v4:BasicObject = SendWithoutBlock v1, :foo + CheckInterrupts + Return v4 + "); + } + + #[test] fn test_inline_attr_reader_constant() { eval(" class C @@ -8332,9 +8536,11 @@ mod opt_tests { PatchPoint StableConstantNames(0x1000, O) v11:BasicObject[VALUE(0x1008)] = Const Value(VALUE(0x1008)) PatchPoint MethodRedefined(C@0x1010, foo@0x1018, cme:0x1020) - v13:BasicObject = GetIvar v11, :@foo + v14:HeapObject[VALUE(0x1008)] = GuardType v11, HeapObject + v15:HeapObject[VALUE(0x1008)] = GuardShape v14, 0x1048 + v16:NilClass = Const Value(nil) CheckInterrupts - Return v13 + Return v16 "); } @@ -8357,9 +8563,11 @@ mod opt_tests { PatchPoint StableConstantNames(0x1000, O) v11:BasicObject[VALUE(0x1008)] = Const Value(VALUE(0x1008)) PatchPoint MethodRedefined(C@0x1010, foo@0x1018, cme:0x1020) - v13:BasicObject = GetIvar v11, :@foo + v14:HeapObject[VALUE(0x1008)] = GuardType v11, HeapObject + v15:HeapObject[VALUE(0x1008)] = GuardShape v14, 0x1048 + v16:NilClass = Const Value(nil) CheckInterrupts - Return v13 + Return v16 "); } @@ -8379,9 +8587,10 @@ mod opt_tests { bb0(v0:BasicObject, v1:BasicObject): PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) v9:HeapObject[class_exact:C] = GuardType v1, HeapObject[class_exact:C] - v10:BasicObject = GetIvar v9, :@foo + v12:HeapObject[class_exact:C] = GuardShape v9, 0x1038 + v13:NilClass = Const Value(nil) CheckInterrupts - Return v10 + Return v13 "); } @@ -8401,9 +8610,10 @@ mod opt_tests { bb0(v0:BasicObject, v1:BasicObject): PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) v9:HeapObject[class_exact:C] = GuardType v1, HeapObject[class_exact:C] - v10:BasicObject = GetIvar v9, :@foo + v12:HeapObject[class_exact:C] = GuardShape v9, 0x1038 + v13:NilClass = Const Value(nil) CheckInterrupts - Return v10 + Return v13 "); } } diff --git a/zjit/src/options.rs b/zjit/src/options.rs index c2c9eea085..155c3805ba 100644 --- a/zjit/src/options.rs +++ b/zjit/src/options.rs @@ -279,6 +279,14 @@ fn update_profile_threshold() { } } +pub fn internal_set_num_profiles(n: u8) { + let options = unsafe { OPTIONS.as_mut().unwrap() }; + options.num_profiles = n; + let call_threshold = n.saturating_add(1); + unsafe { rb_zjit_call_threshold = call_threshold as u64; } + update_profile_threshold(); +} + /// Print YJIT options for `ruby --help`. `width` is width of option parts, and /// `columns` is indent width of descriptions. #[unsafe(no_mangle)] diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index 9da48a0705..f2b12516c7 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -37,6 +37,10 @@ impl Profiler { *(sp.offset(-1 - n)) } } + + fn peek_at_self(&self) -> VALUE { + unsafe { rb_get_cfp_self(self.cfp) } + } } /// API called from zjit_* instruction. opcode is the bare (non-zjit_*) instruction. @@ -68,6 +72,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_or => profile_operands(profiler, profile, 2), YARVINSN_opt_empty_p => profile_operands(profiler, profile, 1), YARVINSN_opt_not => profile_operands(profiler, profile, 1), + YARVINSN_getinstancevariable => profile_self(profiler, profile), YARVINSN_opt_send_without_block => { let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); let argc = unsafe { vm_ci_argc((*cd).ci) }; @@ -106,8 +111,21 @@ fn profile_operands(profiler: &mut Profiler, profile: &mut IseqProfile, n: usize } } +fn profile_self(profiler: &mut Profiler, profile: &mut IseqProfile) { + let types = &mut profile.opnd_types[profiler.insn_idx]; + if types.is_empty() { + types.resize(1, TypeDistribution::new()); + } + let obj = profiler.peek_at_self(); + // TODO(max): Handle GC-hidden classes like Array, Hash, etc and make them look normal or + // drop them or something + let ty = ProfiledType::new(obj); + unsafe { rb_gc_writebarrier(profiler.iseq.into(), ty.class()) }; + types[0].observe(ty); +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] -struct Flags(u32); +pub struct Flags(u32); impl Flags { const NONE: u32 = 0; @@ -206,6 +224,10 @@ impl ProfiledType { self.shape } + pub fn flags(&self) -> Flags { + self.flags + } + pub fn is_fixnum(&self) -> bool { self.class == unsafe { rb_cInteger } && self.flags.is_immediate() } diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 64bef8ca6a..cde2d4e80b 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -94,6 +94,7 @@ make_counters! { exit_fixnum_mult_overflow, exit_guard_type_failure, exit_guard_bit_equals_failure, + exit_guard_shape_failure, exit_patchpoint, exit_callee_side_exit, exit_obj_to_string_fallback, @@ -151,6 +152,7 @@ pub fn exit_counter_ptr(reason: crate::hir::SideExitReason) -> *mut u64 { FixnumMultOverflow => exit_fixnum_mult_overflow, GuardType(_) => exit_guard_type_failure, GuardBitEquals(_) => exit_guard_bit_equals_failure, + GuardShape(_) => exit_guard_shape_failure, PatchPoint(_) => exit_patchpoint, CalleeSideExit => exit_callee_side_exit, ObjToStringFallback => exit_obj_to_string_fallback, |
