summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Bernstein <rubybugs@bernsteinbear.com>2025-08-29 09:46:08 -0700
committerGitHub <noreply@github.com>2025-08-29 12:46:08 -0400
commitb6f4b5399d22aab18fa3d163b9cc1764ad0df7dd (patch)
treeba185e1881644efe94c7987396e4b24af58c8c34
parentfc4f8c879a362968d87d36d6f93d30ae9abd0a5b (diff)
ZJIT: Specialize monomorphic GetIvar (#14388)
Specialize monomorphic `GetIvar` into: * `GuardType(HeapObject)` * `GuardShape` * `LoadIvarEmbedded` or `LoadIvarExtended` This requires profiling self for `getinstancevariable` (it's not on the operand stack). This also optimizes `GetIvar`s that happen as a result of inlining `attr_reader` and `attr_accessor`. Also move some (newly) shared JIT helpers into jit.c.
-rw-r--r--insns.def1
-rw-r--r--jit.c12
-rw-r--r--test/ruby/test_zjit.rb30
-rw-r--r--yjit.c12
-rw-r--r--yjit/bindgen/src/main.rs4
-rw-r--r--yjit/src/codegen.rs2
-rw-r--r--yjit/src/cruby_bindings.inc.rs8
-rw-r--r--zjit.c6
-rw-r--r--zjit/bindgen/src/main.rs3
-rw-r--r--zjit/src/codegen.rs41
-rw-r--r--zjit/src/cruby.rs14
-rw-r--r--zjit/src/cruby_bindings.inc.rs42
-rw-r--r--zjit/src/hir.rs240
-rw-r--r--zjit/src/options.rs8
-rw-r--r--zjit/src/profile.rs24
-rw-r--r--zjit/src/stats.rs2
16 files changed, 384 insertions, 65 deletions
diff --git a/insns.def b/insns.def
index c9de612912..953f6bb87c 100644
--- a/insns.def
+++ b/insns.def
@@ -212,6 +212,7 @@ getinstancevariable
(VALUE val)
/* Ractor crashes when it accesses class/module-level instances variables. */
// attr bool leaf = false; /* has IVAR_ACCESSOR_SHOULD_BE_MAIN_RACTOR() */
+// attr bool zjit_profile = true;
{
val = vm_getinstancevariable(GET_ISEQ(), GET_SELF(), id, ic);
}
diff --git a/jit.c b/jit.c
index e68758368a..7d7b5fd4fd 100644
--- a/jit.c
+++ b/jit.c
@@ -14,6 +14,12 @@
#include "iseq.h"
#include "internal/gc.h"
+// Field offsets for the RObject struct
+enum robject_offsets {
+ ROBJECT_OFFSET_AS_HEAP_FIELDS = offsetof(struct RObject, as.heap.fields),
+ ROBJECT_OFFSET_AS_ARY = offsetof(struct RObject, as.ary),
+};
+
unsigned int
rb_iseq_encoded_size(const rb_iseq_t *iseq)
{
@@ -454,3 +460,9 @@ rb_set_cfp_sp(struct rb_control_frame_struct *cfp, VALUE *sp)
{
cfp->sp = sp;
}
+
+bool
+rb_jit_shape_too_complex_p(shape_id_t shape_id)
+{
+ return rb_shape_too_complex_p(shape_id);
+}
diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb
index 98f747c4d0..8e450458dd 100644
--- a/test/ruby/test_zjit.rb
+++ b/test/ruby/test_zjit.rb
@@ -1162,6 +1162,36 @@ class TestZJIT < Test::Unit::TestCase
}
end
+ def test_getinstancevariable_miss
+ assert_compiles '[1, 1, 4]', %q{
+ class C
+ def foo
+ @foo
+ end
+
+ def foo_then_bar
+ @foo = 1
+ @bar = 2
+ end
+
+ def bar_then_foo
+ @bar = 3
+ @foo = 4
+ end
+ end
+
+ o1 = C.new
+ o1.foo_then_bar
+ result = []
+ result << o1.foo
+ result << o1.foo
+ o2 = C.new
+ o2.bar_then_foo
+ result << o2.foo
+ result
+ }
+ end
+
def test_setinstancevariable
assert_compiles '1', %q{
def test() = @foo = 1
diff --git a/yjit.c b/yjit.c
index 44788eaf2c..cadae3a5df 100644
--- a/yjit.c
+++ b/yjit.c
@@ -39,12 +39,6 @@
#include <errno.h>
-// Field offsets for the RObject struct
-enum robject_offsets {
- ROBJECT_OFFSET_AS_HEAP_FIELDS = offsetof(struct RObject, as.heap.fields),
- ROBJECT_OFFSET_AS_ARY = offsetof(struct RObject, as.ary),
-};
-
// Field offsets for the RString struct
enum rstring_offsets {
RUBY_OFFSET_RSTRING_LEN = offsetof(struct RString, len)
@@ -759,12 +753,6 @@ rb_object_shape_count(void)
}
bool
-rb_yjit_shape_too_complex_p(shape_id_t shape_id)
-{
- return rb_shape_too_complex_p(shape_id);
-}
-
-bool
rb_yjit_shape_obj_too_complex_p(VALUE obj)
{
return rb_shape_obj_too_complex_p(obj);
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index 2993e05e2f..5cf5caeb9a 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -99,7 +99,6 @@ fn main() {
.allowlist_function("rb_shape_get_iv_index")
.allowlist_function("rb_shape_transition_add_ivar_no_warnings")
.allowlist_function("rb_yjit_shape_obj_too_complex_p")
- .allowlist_function("rb_yjit_shape_too_complex_p")
.allowlist_function("rb_yjit_shape_capacity")
.allowlist_function("rb_yjit_shape_index")
.allowlist_var("SHAPE_ID_NUM_BITS")
@@ -351,11 +350,12 @@ fn main() {
.allowlist_function("rb_yjit_invokeblock_sp_pops")
.allowlist_function("rb_yjit_set_exception_return")
.allowlist_function("rb_yjit_str_concat_codepoint")
- .allowlist_type("robject_offsets")
.allowlist_type("rstring_offsets")
// From jit.c
.allowlist_function("rb_assert_holding_vm_lock")
+ .allowlist_function("rb_jit_shape_too_complex_p")
+ .allowlist_type("robject_offsets")
// from vm_sync.h
.allowlist_function("rb_vm_barrier")
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 2dae72280f..9e9759e781 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -3103,7 +3103,7 @@ fn gen_set_ivar(
// If the VM ran out of shapes, or this class generated too many leaf,
// it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table).
- new_shape_too_complex = unsafe { rb_yjit_shape_too_complex_p(next_shape_id) };
+ new_shape_too_complex = unsafe { rb_jit_shape_too_complex_p(next_shape_id) };
if new_shape_too_complex {
Some((next_shape_id, None, 0_usize))
} else {
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index ca459ceafb..c22f4490db 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -994,12 +994,12 @@ pub const DEFINED_REF: defined_type = 15;
pub const DEFINED_FUNC: defined_type = 16;
pub const DEFINED_CONST_FROM: defined_type = 17;
pub type defined_type = u32;
-pub const ROBJECT_OFFSET_AS_HEAP_FIELDS: robject_offsets = 16;
-pub const ROBJECT_OFFSET_AS_ARY: robject_offsets = 16;
-pub type robject_offsets = u32;
pub const RUBY_OFFSET_RSTRING_LEN: rstring_offsets = 16;
pub type rstring_offsets = u32;
pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
+pub const ROBJECT_OFFSET_AS_HEAP_FIELDS: robject_offsets = 16;
+pub const ROBJECT_OFFSET_AS_ARY: robject_offsets = 16;
+pub type robject_offsets = u32;
pub type rb_iseq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
extern "C" {
pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void);
@@ -1238,7 +1238,6 @@ extern "C" {
line: ::std::os::raw::c_int,
);
pub fn rb_object_shape_count() -> VALUE;
- pub fn rb_yjit_shape_too_complex_p(shape_id: shape_id_t) -> bool;
pub fn rb_yjit_shape_obj_too_complex_p(obj: VALUE) -> bool;
pub fn rb_yjit_shape_capacity(shape_id: shape_id_t) -> attr_index_t;
pub fn rb_yjit_shape_index(shape_id: shape_id_t) -> attr_index_t;
@@ -1328,4 +1327,5 @@ extern "C" {
pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE;
pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE);
pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE);
+ pub fn rb_jit_shape_too_complex_p(shape_id: shape_id_t) -> bool;
}
diff --git a/zjit.c b/zjit.c
index 311bc1fe80..8a7314cb7f 100644
--- a/zjit.c
+++ b/zjit.c
@@ -337,12 +337,6 @@ rb_zjit_print_exception(void)
rb_warn("Ruby error: %"PRIsVALUE"", rb_funcall(exception, rb_intern("full_message"), 0));
}
-bool
-rb_zjit_shape_obj_too_complex_p(VALUE obj)
-{
- return rb_shape_obj_too_complex_p(obj);
-}
-
enum {
RB_INVALID_SHAPE_ID = INVALID_SHAPE_ID,
};
diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs
index 0eb800e1f9..ef8537cbbb 100644
--- a/zjit/bindgen/src/main.rs
+++ b/zjit/bindgen/src/main.rs
@@ -111,7 +111,6 @@ fn main() {
.allowlist_function("rb_shape_id_offset")
.allowlist_function("rb_shape_get_iv_index")
.allowlist_function("rb_shape_transition_add_ivar_no_warnings")
- .allowlist_function("rb_zjit_shape_obj_too_complex_p")
.allowlist_var("SHAPE_ID_NUM_BITS")
// From ruby/internal/intern/object.h
@@ -367,6 +366,8 @@ fn main() {
// From jit.c
.allowlist_function("rb_assert_holding_vm_lock")
+ .allowlist_function("rb_jit_shape_too_complex_p")
+ .allowlist_type("robject_offsets")
// from vm_sync.h
.allowlist_function("rb_vm_barrier")
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 36a899438c..441bb9e2f7 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -405,6 +405,9 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
&Insn::ToArray { val, state } => { gen_to_array(jit, asm, opnd!(val), &function.frame_state(state)) },
&Insn::DefinedIvar { self_val, id, pushval, .. } => { gen_defined_ivar(asm, opnd!(self_val), id, pushval) },
&Insn::ArrayExtend { left, right, state } => { no_output!(gen_array_extend(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state))) },
+ &Insn::GuardShape { val, shape, state } => gen_guard_shape(jit, asm, opnd!(val), shape, &function.frame_state(state)),
+ &Insn::LoadIvarEmbedded { self_val, id, index } => gen_load_ivar_embedded(asm, opnd!(self_val), id, index),
+ &Insn::LoadIvarExtended { self_val, id, index } => gen_load_ivar_extended(asm, opnd!(self_val), id, index),
&Insn::ArrayMax { state, .. }
| &Insn::FixnumDiv { state, .. }
| &Insn::FixnumMod { state, .. }
@@ -716,6 +719,38 @@ fn gen_array_extend(jit: &mut JITState, asm: &mut Assembler, left: Opnd, right:
asm_ccall!(asm, rb_ary_concat, left, right);
}
+fn gen_guard_shape(jit: &mut JITState, asm: &mut Assembler, val: Opnd, shape: ShapeId, state: &FrameState) -> Opnd {
+ let shape_id_offset = unsafe { rb_shape_id_offset() };
+ let val = asm.load(val);
+ let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, val, shape_id_offset);
+ asm.cmp(shape_opnd, Opnd::UImm(shape.0 as u64));
+ asm.jne(side_exit(jit, state, SideExitReason::GuardShape(shape)));
+ val
+}
+
+fn gen_load_ivar_embedded(asm: &mut Assembler, self_val: Opnd, id: ID, index: u16) -> Opnd {
+ // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h
+
+ asm_comment!(asm, "Load embedded ivar id={} index={}", id.contents_lossy(), index);
+ let offs = ROBJECT_OFFSET_AS_ARY as i32 + (SIZEOF_VALUE * index as usize) as i32;
+ let self_val = asm.load(self_val);
+ let ivar_opnd = Opnd::mem(64, self_val, offs);
+ asm.load(ivar_opnd)
+}
+
+fn gen_load_ivar_extended(asm: &mut Assembler, self_val: Opnd, id: ID, index: u16) -> Opnd {
+ asm_comment!(asm, "Load extended ivar id={} index={}", id.contents_lossy(), index);
+ // Compile time value is *not* embedded.
+
+ // Get a pointer to the extended table
+ let self_val = asm.load(self_val);
+ let tbl_opnd = asm.load(Opnd::mem(64, self_val, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32));
+
+ // Read the ivar from the extended table
+ let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * index as usize) as i32);
+ asm.load(ivar_opnd)
+}
+
/// Compile an interpreter entry block to be inserted into an ISEQ
fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) {
asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0));
@@ -1270,6 +1305,12 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard
asm.cmp(klass, Opnd::Value(expected_class));
asm.jne(side_exit);
+ } else if guard_type.bit_equal(types::HeapObject) {
+ let side_exit = side_exit(jit, state, GuardType(guard_type));
+ asm.cmp(val, Opnd::Value(Qfalse));
+ asm.je(side_exit.clone());
+ asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into());
+ asm.jnz(side_exit);
} else {
unimplemented!("unsupported type: {guard_type}");
}
diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs
index 015dd7f912..5a34e5a8de 100644
--- a/zjit/src/cruby.rs
+++ b/zjit/src/cruby.rs
@@ -271,6 +271,16 @@ pub struct ShapeId(pub u32);
pub const INVALID_SHAPE_ID: ShapeId = ShapeId(RB_INVALID_SHAPE_ID);
+impl ShapeId {
+ pub fn is_valid(self) -> bool {
+ self != INVALID_SHAPE_ID
+ }
+
+ pub fn is_too_complex(self) -> bool {
+ unsafe { rb_jit_shape_too_complex_p(self.0) }
+ }
+}
+
// Given an ISEQ pointer, convert PC to insn_idx
pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option<u16> {
let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) };
@@ -489,10 +499,6 @@ impl VALUE {
unsafe { rb_obj_frozen_p(self) != VALUE(0) }
}
- pub fn shape_too_complex(self) -> bool {
- unsafe { rb_zjit_shape_obj_too_complex_p(self) }
- }
-
pub fn shape_id_of(self) -> ShapeId {
ShapeId(unsafe { rb_obj_shape_id(self) })
}
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index 4975308f68..c2f4b37a7a 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -681,24 +681,25 @@ pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214;
pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215;
pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216;
pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217;
-pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 218;
-pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 219;
-pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 220;
-pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 221;
-pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 222;
-pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 223;
-pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 224;
-pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 225;
-pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 226;
-pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 227;
-pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 228;
-pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 229;
-pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 230;
-pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 231;
-pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 232;
-pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 233;
-pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 234;
-pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 235;
+pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218;
+pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 219;
+pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 220;
+pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 221;
+pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 222;
+pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 223;
+pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 224;
+pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 225;
+pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 226;
+pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 227;
+pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 228;
+pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 229;
+pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 230;
+pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 231;
+pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 232;
+pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 233;
+pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 234;
+pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 235;
+pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 236;
pub type ruby_vminsn_type = u32;
pub type rb_iseq_callback = ::std::option::Option<
unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void),
@@ -724,6 +725,9 @@ pub const DEFINED_CONST_FROM: defined_type = 17;
pub type defined_type = u32;
pub const RB_INVALID_SHAPE_ID: _bindgen_ty_38 = 4294967295;
pub type _bindgen_ty_38 = u32;
+pub const ROBJECT_OFFSET_AS_HEAP_FIELDS: robject_offsets = 16;
+pub const ROBJECT_OFFSET_AS_ARY: robject_offsets = 16;
+pub type robject_offsets = u32;
pub type rb_iseq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
unsafe extern "C" {
pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void);
@@ -943,7 +947,6 @@ unsafe extern "C" {
pub fn rb_iseq_get_zjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void;
pub fn rb_iseq_set_zjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void);
pub fn rb_zjit_print_exception();
- pub fn rb_zjit_shape_obj_too_complex_p(obj: VALUE) -> bool;
pub fn rb_zjit_singleton_class_p(klass: VALUE) -> bool;
pub fn rb_zjit_defined_ivar(obj: VALUE, id: ID, pushval: VALUE) -> VALUE;
pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
@@ -1025,4 +1028,5 @@ unsafe extern "C" {
pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE;
pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE);
pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE);
+ pub fn rb_jit_shape_too_complex_p(shape_id: shape_id_t) -> bool;
}
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index fd19edaf04..f4e14eccd1 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -424,6 +424,16 @@ impl PtrPrintMap {
fn map_id(&self, id: u64) -> *const c_void {
self.map_ptr(id as *const c_void)
}
+
+ /// Map an index into a Ruby object (e.g. for an ivar) for printing
+ fn map_index(&self, id: u64) -> *const c_void {
+ self.map_ptr(id as *const c_void)
+ }
+
+ /// Map shape ID into a pointer for printing
+ fn map_shape(&self, id: ShapeId) -> *const c_void {
+ self.map_ptr(id.0 as *const c_void)
+ }
}
#[derive(Debug, Clone, Copy)]
@@ -437,6 +447,7 @@ pub enum SideExitReason {
FixnumSubOverflow,
FixnumMultOverflow,
GuardType(Type),
+ GuardShape(ShapeId),
GuardBitEquals(VALUE),
PatchPoint(Invariant),
CalleeSideExit,
@@ -520,6 +531,11 @@ pub enum Insn {
/// Check whether an instance variable exists on `self_val`
DefinedIvar { self_val: InsnId, id: ID, pushval: VALUE, state: InsnId },
+ /// Read an instance variable at the given index, embedded in the object
+ LoadIvarEmbedded { self_val: InsnId, id: ID, index: u16 },
+ /// Read an instance variable at the given index, from the extended table
+ LoadIvarExtended { self_val: InsnId, id: ID, index: u16 },
+
/// Get a local variable from a higher scope or the heap
GetLocal { level: u32, ep_offset: u32 },
/// Set a local variable in a higher scope or the heap
@@ -592,6 +608,8 @@ pub enum Insn {
GuardType { val: InsnId, guard_type: Type, state: InsnId },
/// Side-exit if val is not the expected VALUE.
GuardBitEquals { val: InsnId, expected: VALUE, state: InsnId },
+ /// Side-exit if val doesn't have the expected shape.
+ GuardShape { val: InsnId, shape: ShapeId, state: InsnId },
/// Generate no code (or padding if necessary) and insert a patch point
/// that can be rewritten to a side exit when the Invariant is broken.
@@ -665,6 +683,8 @@ impl Insn {
Insn::FixnumOr { .. } => false,
Insn::GetLocal { .. } => false,
Insn::IsNil { .. } => false,
+ Insn::LoadIvarEmbedded { .. } => false,
+ Insn::LoadIvarExtended { .. } => false,
Insn::CCall { elidable, .. } => !elidable,
// TODO: NewRange is effects free if we can prove the two ends to be Fixnum,
// but we don't have type information here in `impl Insn`. See rb_range_new().
@@ -810,6 +830,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
Insn::FixnumOr { left, right, .. } => { write!(f, "FixnumOr {left}, {right}") },
Insn::GuardType { val, guard_type, .. } => { write!(f, "GuardType {val}, {}", guard_type.print(self.ptr_map)) },
Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) },
+ &Insn::GuardShape { val, shape, .. } => { write!(f, "GuardShape {val}, {:p}", self.ptr_map.map_shape(shape)) },
Insn::PatchPoint { invariant, .. } => { write!(f, "PatchPoint {}", invariant.print(self.ptr_map)) },
Insn::GetConstantPath { ic, .. } => { write!(f, "GetConstantPath {:p}", self.ptr_map.map_ptr(ic)) },
Insn::CCall { cfun, args, name, return_type: _, elidable: _ } => {
@@ -838,6 +859,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
}
Insn::DefinedIvar { self_val, id, .. } => write!(f, "DefinedIvar {self_val}, :{}", id.contents_lossy()),
Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy()),
+ &Insn::LoadIvarEmbedded { self_val, id, index } => write!(f, "LoadIvarEmbedded {self_val}, :{}@{:p}", id.contents_lossy(), self.ptr_map.map_index(index as u64)),
+ &Insn::LoadIvarExtended { self_val, id, index } => write!(f, "LoadIvarExtended {self_val}, :{}@{:p}", id.contents_lossy(), self.ptr_map.map_index(index as u64)),
Insn::SetIvar { self_val, id, val, .. } => write!(f, "SetIvar {self_val}, :{}, {val}", id.contents_lossy()),
Insn::GetGlobal { id, .. } => write!(f, "GetGlobal :{}", id.contents_lossy()),
Insn::SetGlobal { id, val, .. } => write!(f, "SetGlobal :{}, {val}", id.contents_lossy()),
@@ -1230,6 +1253,7 @@ impl Function {
&IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) },
&GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type: guard_type, state },
&GuardBitEquals { val, expected, state } => GuardBitEquals { val: find!(val), expected: expected, state },
+ &GuardShape { val, shape, state } => GuardShape { val: find!(val), shape, state },
&FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state },
&FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state },
&FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state },
@@ -1292,6 +1316,8 @@ impl Function {
&ArrayMax { ref elements, state } => ArrayMax { elements: find_vec!(elements), state: find!(state) },
&SetGlobal { id, val, state } => SetGlobal { id, val: find!(val), state },
&GetIvar { self_val, id, state } => GetIvar { self_val: find!(self_val), id, state },
+ &LoadIvarEmbedded { self_val, id, index } => LoadIvarEmbedded { self_val: find!(self_val), id, index },
+ &LoadIvarExtended { self_val, id, index } => LoadIvarExtended { self_val: find!(self_val), id, index },
&SetIvar { self_val, id, val, state } => SetIvar { self_val: find!(self_val), id, val: find!(val), state },
&SetLocal { val, ep_offset, level } => SetLocal { val: find!(val), ep_offset, level },
&GetSpecialSymbol { symbol_type, state } => GetSpecialSymbol { symbol_type, state },
@@ -1360,6 +1386,7 @@ impl Function {
Insn::CCall { return_type, .. } => *return_type,
Insn::GuardType { val, guard_type, .. } => self.type_of(*val).intersection(*guard_type),
Insn::GuardBitEquals { val, expected, .. } => self.type_of(*val).intersection(Type::from_value(*expected)),
+ Insn::GuardShape { val, .. } => self.type_of(*val),
Insn::FixnumAdd { .. } => types::Fixnum,
Insn::FixnumSub { .. } => types::Fixnum,
Insn::FixnumMult { .. } => types::Fixnum,
@@ -1384,6 +1411,8 @@ impl Function {
Insn::ArrayMax { .. } => types::BasicObject,
Insn::GetGlobal { .. } => types::BasicObject,
Insn::GetIvar { .. } => types::BasicObject,
+ Insn::LoadIvarEmbedded { .. } => types::BasicObject,
+ Insn::LoadIvarExtended { .. } => types::BasicObject,
Insn::GetSpecialSymbol { .. } => types::BasicObject,
Insn::GetSpecialNumber { .. } => types::BasicObject,
Insn::ToNewArray { .. } => types::ArrayExact,
@@ -1468,14 +1497,25 @@ impl Function {
}
}
+ fn chase_insn(&self, insn: InsnId) -> InsnId {
+ let id = self.union_find.borrow().find_const(insn);
+ match self.insns[id.0] {
+ Insn::GuardType { val, .. } => self.chase_insn(val),
+ Insn::GuardShape { val, .. } => self.chase_insn(val),
+ Insn::GuardBitEquals { val, .. } => self.chase_insn(val),
+ _ => id,
+ }
+ }
+
/// Return the interpreter-profiled type of the HIR instruction at the given ISEQ instruction
/// index, if it is known. This historical type record is not a guarantee and must be checked
/// with a GuardType or similar.
fn profiled_type_of_at(&self, insn: InsnId, iseq_insn_idx: usize) -> Option<ProfiledType> {
let Some(ref profiles) = self.profiles else { return None };
let Some(entries) = profiles.types.get(&iseq_insn_idx) else { return None };
+ let insn = self.chase_insn(insn);
for (entry_insn, entry_type_summary) in entries {
- if self.union_find.borrow().find_const(*entry_insn) == self.union_find.borrow().find_const(insn) {
+ if self.union_find.borrow().find_const(*entry_insn) == insn {
if entry_type_summary.is_monomorphic() || entry_type_summary.is_skewed_polymorphic() {
return Some(entry_type_summary.bucket(0));
} else {
@@ -1727,6 +1767,56 @@ impl Function {
self.infer_types();
}
+ fn optimize_getivar(&mut self) {
+ for block in self.rpo() {
+ let old_insns = std::mem::take(&mut self.blocks[block.0].insns);
+ assert!(self.blocks[block.0].insns.is_empty());
+ for insn_id in old_insns {
+ match self.find(insn_id) {
+ Insn::GetIvar { self_val, id, state } => {
+ let frame_state = self.frame_state(state);
+ let Some(recv_type) = self.profiled_type_of_at(self_val, frame_state.insn_idx) else {
+ // No (monomorphic) profile info
+ self.push_insn_id(block, insn_id); continue;
+ };
+ if recv_type.flags().is_immediate() {
+ // Instance variable lookups on immediate values are always nil
+ self.push_insn_id(block, insn_id); continue;
+ }
+ assert!(recv_type.shape().is_valid());
+ if !recv_type.flags().is_t_object() {
+ // Check if the receiver is a T_OBJECT
+ self.push_insn_id(block, insn_id); continue;
+ }
+ if recv_type.shape().is_too_complex() {
+ // too-complex shapes can't use index access
+ self.push_insn_id(block, insn_id); continue;
+ }
+ let self_val = self.push_insn(block, Insn::GuardType { val: self_val, guard_type: types::HeapObject, state });
+ let self_val = self.push_insn(block, Insn::GuardShape { val: self_val, shape: recv_type.shape(), state });
+ let mut ivar_index: u16 = 0;
+ let replacement = if ! unsafe { rb_shape_get_iv_index(recv_type.shape().0, id, &mut ivar_index) } {
+ // If there is no IVAR index, then the ivar was undefined when we
+ // entered the compiler. That means we can just return nil for this
+ // shape + iv name
+ Insn::Const { val: Const::Value(Qnil) }
+ } else {
+ if recv_type.flags().is_embedded() {
+ Insn::LoadIvarEmbedded { self_val, id, index: ivar_index }
+ } else {
+ Insn::LoadIvarExtended { self_val, id, index: ivar_index }
+ }
+ };
+ let replacement = self.push_insn(block, replacement);
+ self.make_equal_to(insn_id, replacement);
+ }
+ _ => { self.push_insn_id(block, insn_id); }
+ }
+ }
+ }
+ self.infer_types();
+ }
+
/// Optimize SendWithoutBlock that land in a C method to a direct CCall without
/// runtime lookup.
fn optimize_c_calls(&mut self) {
@@ -2011,6 +2101,7 @@ impl Function {
| &Insn::StringCopy { val, state, .. }
| &Insn::GuardType { val, state, .. }
| &Insn::GuardBitEquals { val, state, .. }
+ | &Insn::GuardShape { val, state, .. }
| &Insn::ToArray { val, state }
| &Insn::ToNewArray { val, state } => {
worklist.push_back(val);
@@ -2089,6 +2180,10 @@ impl Function {
worklist.push_back(str);
worklist.push_back(state);
}
+ &Insn::LoadIvarEmbedded { self_val, .. }
+ | &Insn::LoadIvarExtended { self_val, .. } => {
+ worklist.push_back(self_val);
+ }
&Insn::GetGlobal { state, .. } |
&Insn::GetSpecialSymbol { state, .. } |
&Insn::GetSpecialNumber { state, .. } |
@@ -2232,6 +2327,8 @@ impl Function {
// Function is assumed to have types inferred already
self.optimize_direct_sends();
#[cfg(debug_assertions)] self.assert_validates();
+ self.optimize_getivar();
+ #[cfg(debug_assertions)] self.assert_validates();
self.optimize_c_calls();
#[cfg(debug_assertions)] self.assert_validates();
self.fold_constants();
@@ -2794,6 +2891,18 @@ impl ProfileOracle {
entry.push((insn, TypeDistributionSummary::new(insn_type_distribution)))
}
}
+
+ /// Map the interpreter-recorded types of self onto the HIR self
+ fn profile_self(&mut self, state: &FrameState, self_param: InsnId) {
+ let iseq_insn_idx = state.insn_idx;
+ let Some(operand_types) = self.payload.profile.get_operand_types(iseq_insn_idx) else { return };
+ let entry = self.types.entry(iseq_insn_idx).or_insert_with(|| vec![]);
+ if operand_types.is_empty() {
+ return;
+ }
+ let self_type_distribution = &operand_types[0];
+ entry.push((self_param, TypeDistributionSummary::new(self_type_distribution)))
+ }
}
/// The index of the self parameter in the HIR function
@@ -2891,17 +3000,22 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
state.pc = pc;
let exit_state = state.clone();
- profiles.profile_stack(&exit_state);
-
- // Increment zjit_insn_count for each YARV instruction if --zjit-stats is enabled.
- if get_option!(stats) {
- fun.push_insn(block, Insn::IncrCounter(Counter::zjit_insn_count));
- }
// try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes.
let opcode: u32 = unsafe { rb_iseq_opcode_at_pc(iseq, pc) }
.try_into()
.unwrap();
+
+ if opcode == YARVINSN_getinstancevariable {
+ profiles.profile_self(&exit_state, self_param);
+ } else {
+ profiles.profile_stack(&exit_state);
+ }
+
+ // Increment zjit_insn_count for each YARV instruction if --zjit-stats is enabled.
+ if get_option!(stats) {
+ fun.push_insn(block, Insn::IncrCounter(Counter::zjit_insn_count));
+ }
// Move to the next instruction to compile
insn_idx += insn_len(opcode as usize);
@@ -8314,6 +8428,96 @@ mod opt_tests {
}
#[test]
+ fn test_optimize_getivar_embedded() {
+ eval("
+ class C
+ attr_reader :foo
+ def initialize
+ @foo = 42
+ end
+ end
+
+ O = C.new
+ def test(o) = o.foo
+ test O
+ test O
+ ");
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@<compiled>:10:
+ bb0(v0:BasicObject, v1:BasicObject):
+ PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010)
+ v9:HeapObject[class_exact:C] = GuardType v1, HeapObject[class_exact:C]
+ v12:HeapObject[class_exact:C] = GuardShape v9, 0x1038
+ v13:BasicObject = LoadIvarEmbedded v12, :@foo@0x1039
+ CheckInterrupts
+ Return v13
+ ");
+ }
+
+ #[test]
+ fn test_optimize_getivar_extended() {
+ eval("
+ class C
+ attr_reader :foo
+ def initialize
+ @foo = 42
+ end
+ end
+
+ O = C.new
+ def test(o) = o.foo
+ test O
+ test O
+ ");
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@<compiled>:10:
+ bb0(v0:BasicObject, v1:BasicObject):
+ PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010)
+ v9:HeapObject[class_exact:C] = GuardType v1, HeapObject[class_exact:C]
+ v12:HeapObject[class_exact:C] = GuardShape v9, 0x1038
+ v13:BasicObject = LoadIvarEmbedded v12, :@foo@0x1039
+ CheckInterrupts
+ Return v13
+ ");
+ }
+
+ #[test]
+ fn test_dont_optimize_getivar_polymorphic() {
+ crate::options::rb_zjit_prepare_options();
+ crate::options::internal_set_num_profiles(3);
+ eval("
+ class C
+ attr_reader :foo, :bar
+
+ def foo_then_bar
+ @foo = 1
+ @bar = 2
+ end
+
+ def bar_then_foo
+ @bar = 3
+ @foo = 4
+ end
+ end
+
+ O1 = C.new
+ O1.foo_then_bar
+ O2 = C.new
+ O2.bar_then_foo
+ def test(o) = o.foo
+ test O1
+ test O2
+ ");
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@<compiled>:20:
+ bb0(v0:BasicObject, v1:BasicObject):
+ v4:BasicObject = SendWithoutBlock v1, :foo
+ CheckInterrupts
+ Return v4
+ ");
+ }
+
+ #[test]
fn test_inline_attr_reader_constant() {
eval("
class C
@@ -8332,9 +8536,11 @@ mod opt_tests {
PatchPoint StableConstantNames(0x1000, O)
v11:BasicObject[VALUE(0x1008)] = Const Value(VALUE(0x1008))
PatchPoint MethodRedefined(C@0x1010, foo@0x1018, cme:0x1020)
- v13:BasicObject = GetIvar v11, :@foo
+ v14:HeapObject[VALUE(0x1008)] = GuardType v11, HeapObject
+ v15:HeapObject[VALUE(0x1008)] = GuardShape v14, 0x1048
+ v16:NilClass = Const Value(nil)
CheckInterrupts
- Return v13
+ Return v16
");
}
@@ -8357,9 +8563,11 @@ mod opt_tests {
PatchPoint StableConstantNames(0x1000, O)
v11:BasicObject[VALUE(0x1008)] = Const Value(VALUE(0x1008))
PatchPoint MethodRedefined(C@0x1010, foo@0x1018, cme:0x1020)
- v13:BasicObject = GetIvar v11, :@foo
+ v14:HeapObject[VALUE(0x1008)] = GuardType v11, HeapObject
+ v15:HeapObject[VALUE(0x1008)] = GuardShape v14, 0x1048
+ v16:NilClass = Const Value(nil)
CheckInterrupts
- Return v13
+ Return v16
");
}
@@ -8379,9 +8587,10 @@ mod opt_tests {
bb0(v0:BasicObject, v1:BasicObject):
PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010)
v9:HeapObject[class_exact:C] = GuardType v1, HeapObject[class_exact:C]
- v10:BasicObject = GetIvar v9, :@foo
+ v12:HeapObject[class_exact:C] = GuardShape v9, 0x1038
+ v13:NilClass = Const Value(nil)
CheckInterrupts
- Return v10
+ Return v13
");
}
@@ -8401,9 +8610,10 @@ mod opt_tests {
bb0(v0:BasicObject, v1:BasicObject):
PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010)
v9:HeapObject[class_exact:C] = GuardType v1, HeapObject[class_exact:C]
- v10:BasicObject = GetIvar v9, :@foo
+ v12:HeapObject[class_exact:C] = GuardShape v9, 0x1038
+ v13:NilClass = Const Value(nil)
CheckInterrupts
- Return v10
+ Return v13
");
}
}
diff --git a/zjit/src/options.rs b/zjit/src/options.rs
index c2c9eea085..155c3805ba 100644
--- a/zjit/src/options.rs
+++ b/zjit/src/options.rs
@@ -279,6 +279,14 @@ fn update_profile_threshold() {
}
}
+pub fn internal_set_num_profiles(n: u8) {
+ let options = unsafe { OPTIONS.as_mut().unwrap() };
+ options.num_profiles = n;
+ let call_threshold = n.saturating_add(1);
+ unsafe { rb_zjit_call_threshold = call_threshold as u64; }
+ update_profile_threshold();
+}
+
/// Print YJIT options for `ruby --help`. `width` is width of option parts, and
/// `columns` is indent width of descriptions.
#[unsafe(no_mangle)]
diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs
index 9da48a0705..f2b12516c7 100644
--- a/zjit/src/profile.rs
+++ b/zjit/src/profile.rs
@@ -37,6 +37,10 @@ impl Profiler {
*(sp.offset(-1 - n))
}
}
+
+ fn peek_at_self(&self) -> VALUE {
+ unsafe { rb_get_cfp_self(self.cfp) }
+ }
}
/// API called from zjit_* instruction. opcode is the bare (non-zjit_*) instruction.
@@ -68,6 +72,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) {
YARVINSN_opt_or => profile_operands(profiler, profile, 2),
YARVINSN_opt_empty_p => profile_operands(profiler, profile, 1),
YARVINSN_opt_not => profile_operands(profiler, profile, 1),
+ YARVINSN_getinstancevariable => profile_self(profiler, profile),
YARVINSN_opt_send_without_block => {
let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr();
let argc = unsafe { vm_ci_argc((*cd).ci) };
@@ -106,8 +111,21 @@ fn profile_operands(profiler: &mut Profiler, profile: &mut IseqProfile, n: usize
}
}
+fn profile_self(profiler: &mut Profiler, profile: &mut IseqProfile) {
+ let types = &mut profile.opnd_types[profiler.insn_idx];
+ if types.is_empty() {
+ types.resize(1, TypeDistribution::new());
+ }
+ let obj = profiler.peek_at_self();
+ // TODO(max): Handle GC-hidden classes like Array, Hash, etc and make them look normal or
+ // drop them or something
+ let ty = ProfiledType::new(obj);
+ unsafe { rb_gc_writebarrier(profiler.iseq.into(), ty.class()) };
+ types[0].observe(ty);
+}
+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-struct Flags(u32);
+pub struct Flags(u32);
impl Flags {
const NONE: u32 = 0;
@@ -206,6 +224,10 @@ impl ProfiledType {
self.shape
}
+ pub fn flags(&self) -> Flags {
+ self.flags
+ }
+
pub fn is_fixnum(&self) -> bool {
self.class == unsafe { rb_cInteger } && self.flags.is_immediate()
}
diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs
index 64bef8ca6a..cde2d4e80b 100644
--- a/zjit/src/stats.rs
+++ b/zjit/src/stats.rs
@@ -94,6 +94,7 @@ make_counters! {
exit_fixnum_mult_overflow,
exit_guard_type_failure,
exit_guard_bit_equals_failure,
+ exit_guard_shape_failure,
exit_patchpoint,
exit_callee_side_exit,
exit_obj_to_string_fallback,
@@ -151,6 +152,7 @@ pub fn exit_counter_ptr(reason: crate::hir::SideExitReason) -> *mut u64 {
FixnumMultOverflow => exit_fixnum_mult_overflow,
GuardType(_) => exit_guard_type_failure,
GuardBitEquals(_) => exit_guard_bit_equals_failure,
+ GuardShape(_) => exit_guard_shape_failure,
PatchPoint(_) => exit_patchpoint,
CalleeSideExit => exit_callee_side_exit,
ObjToStringFallback => exit_obj_to_string_fallback,