summaryrefslogtreecommitdiff
path: root/yjit/src/codegen.rs
diff options
context:
space:
mode:
Diffstat (limited to 'yjit/src/codegen.rs')
-rw-r--r--yjit/src/codegen.rs715
1 files changed, 415 insertions, 300 deletions
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 97ea3877aa..743a10e15a 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -234,20 +234,36 @@ impl<'a> JITState<'a> {
result
}
- /// Return true if the current ISEQ could escape an environment.
+ /// Return true if the JIT code can use [`Self::assume_no_ep_escape`]
+ /// and will run with an on-stack (`!VM_ENV_ESCAPED_P`) environment.
///
- /// As of vm_push_frame(), EP is always equal to BP. However, after pushing
- /// a frame, some ISEQ setups call vm_bind_update_env(), which redirects EP.
- /// Also, some method calls escape the environment to the heap.
- fn escapes_ep(&self) -> bool {
+ /// ## Reasoning about ISEQs that are not currently running
+ ///
+ /// As of vm_push_frame() and its JIT code equivalent, EP is always equal to BP (the
+ /// environment is on-stack and has not escaped). We can usually assume this is the starting
+ /// condition upon entry into JIT code. However, after pushing a frame and before entry into
+ /// JIT code, some ISEQ setups call vm_bind_update_env(), which redirects EP.
+ ///
+ /// ## After making the assumption
+ ///
+ /// After JIT code entry, many ruby operations can have the environment escape to heap. These
+ /// are handled by [`crate::invariants`].
+ ///
+ /// Exceptional entry through jit_exec_exception() is an extreme case of the environment state
+ /// changing between vm_push_frame() and entry into JIT code. The frame could have been pushed
+ /// before YJIT is enabled. The exception entry point refuses entry with an escaped environment.
+ fn can_assume_on_stack_env(&self) -> bool {
match unsafe { get_iseq_body_type(self.iseq) } {
// <main> frame is always associated to TOPLEVEL_BINDING.
ISEQ_TYPE_MAIN |
// Kernel#eval uses a heap EP when a Binding argument is not nil.
- ISEQ_TYPE_EVAL => true,
- // If this ISEQ has previously escaped EP, give up the optimization.
- _ if iseq_escapes_ep(self.iseq) => true,
- _ => false,
+ ISEQ_TYPE_EVAL => false,
+ // Check the running environment if compiling for it
+ _ if unsafe { self.iseq == get_cfp_iseq(self.get_cfp()) && cfp_env_has_escaped(self.get_cfp()) } => false,
+ // If we've seen this ISEQ run with an escaped environment, give up the optimization
+ // to avoid excessive invalidations (even though it may be fine for soundness).
+ _ if seen_escaped_env(self.iseq) => false,
+ _ => true,
}
}
@@ -376,8 +392,8 @@ impl<'a> JITState<'a> {
if jit_ensure_block_entry_exit(self, asm).is_none() {
return false; // out of space, give up
}
- if self.escapes_ep() {
- return false; // EP has been escaped in this ISEQ. disable the optimization to avoid an invalidation loop.
+ if !self.can_assume_on_stack_env() {
+ return false; // Unsound or unprofitable to make the assumption
}
self.no_ep_escape = true;
true
@@ -438,7 +454,7 @@ impl<'a> JITState<'a> {
fn flush_perf_symbols(&self, cb: &CodeBlock) {
assert_eq!(0, self.perf_stack.len());
let path = format!("/tmp/perf-{}.map", std::process::id());
- let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap();
+ let mut f = std::io::BufWriter::new(std::fs::File::options().create(true).append(true).open(path).unwrap());
for sym in self.perf_map.borrow().iter() {
if let (start, Some(end), name) = sym {
// In case the code straddles two pages, part of it belongs to the symbol.
@@ -821,11 +837,11 @@ fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> {
/// Generate an exit to return to the interpreter
fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) {
- #[cfg(all(feature = "disasm", not(test)))]
- {
+ #[cfg(not(test))]
+ asm_comment!(asm, "exit to interpreter on {}", {
let opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) };
- asm_comment!(asm, "exit to interpreter on {}", insn_name(opcode as usize));
- }
+ insn_name(opcode as usize)
+ });
if asm.ctx.is_return_landing() {
asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP));
@@ -1094,11 +1110,7 @@ pub fn gen_entry_prologue(
let code_ptr = cb.get_write_ptr();
let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) });
- if get_option_ref!(dump_disasm).is_some() {
- asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0));
- } else {
- asm_comment!(asm, "YJIT entry");
- }
+ asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0));
asm.frame_setup();
@@ -1212,7 +1224,7 @@ fn gen_check_ints(
// Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages,
// signal_exec, or rb_postponed_job_flush.
- let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG));
+ let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32));
asm.test(interrupt_flag, interrupt_flag);
asm.jnz(Target::side_exit(counter));
@@ -1296,7 +1308,6 @@ pub fn gen_single_block(
let mut asm = Assembler::new(jit.num_locals());
asm.ctx = ctx;
- #[cfg(feature = "disasm")]
if get_option_ref!(dump_disasm).is_some() {
let blockid_idx = blockid.idx;
let chain_depth = if asm.ctx.get_chain_depth() > 0 { format!("(chain_depth: {})", asm.ctx.get_chain_depth()) } else { "".to_string() };
@@ -2263,7 +2274,8 @@ fn gen_expandarray(
let comptime_recv = jit.peek_at_stack(&asm.ctx, 0);
- // If the comptime receiver is not an array
+ // If the comptime receiver is not an array, speculate for when the `rb_check_array_type()`
+ // conversion returns nil and without side-effects (e.g. arbitrary method calls).
if !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_ARRAY) } {
// at compile time, ensure to_ary is not defined
let target_cme = unsafe { rb_callable_method_entry_or_negative(comptime_recv.class_of(), ID!(to_ary)) };
@@ -2275,13 +2287,19 @@ fn gen_expandarray(
return None;
}
+ // Bail when method_missing is defined to avoid generating code to call it.
+ // Also, for simplicity, bail when BasicObject#method_missing has been removed.
+ if !assume_method_basic_definition(jit, asm, comptime_recv.class_of(), ID!(method_missing)) {
+ gen_counter_incr(jit, asm, Counter::expandarray_method_missing);
+ return None;
+ }
+
// invalidate compile block if to_ary is later defined
jit.assume_method_lookup_stable(asm, target_cme);
jit_guard_known_klass(
jit,
asm,
- comptime_recv.class_of(),
array_opnd,
array_opnd.into(),
comptime_recv,
@@ -2311,7 +2329,7 @@ fn gen_expandarray(
}
// Get the compile-time array length
- let comptime_len = unsafe { rb_yjit_array_len(comptime_recv) as u32 };
+ let comptime_len = unsafe { rb_jit_array_len(comptime_recv) as u32 };
// Move the array from the stack and check that it's an array.
guard_object_is_array(
@@ -2423,28 +2441,94 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd {
ep_opnd
}
-// Gets the EP of the ISeq of the containing method, or "local level".
-// Equivalent of GET_LEP() macro.
+/// Get the EP of the ISeq of the containing method, or "local level EP".
+/// Equivalent to `GET_LEP()` with a constraint:
+/// `ISEQ_TYPE_METHOD == iseq_under_compilation->body->local_iseq->body->type`.
+/// No bad memory access happens when this condition is not met, just that the
+/// EP returned may not be `VM_ENV_FLAG_LOCAL`. Practically, ISeqs that don't
+/// meet this condition, such as `ISEQ_TYPE_TOP`, also don't need to use this operation.
fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd {
- // Equivalent of get_lvar_level() in compile.c
- fn get_lvar_level(iseq: IseqPtr) -> u32 {
- if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } {
- 0
- } else {
- 1 + get_lvar_level(unsafe { rb_get_iseq_body_parent_iseq(iseq) })
- }
- }
-
- let level = get_lvar_level(jit.get_iseq());
+ // GET_LEP() chases the parent environment pointer to reach the local environment. Inside
+ // a descendant of ISEQ_TYPE_METHOD, it chases the same number of times as chasing
+ // the parent iseq pointer to reach the local iseq. See get_lvar_level() in compile.c
+ let mut iseq = jit.get_iseq();
+ let local_iseq = unsafe { rb_get_iseq_body_local_iseq(iseq) };
+ let mut level = 0;
+ while iseq != local_iseq {
+ iseq = unsafe { rb_get_iseq_body_parent_iseq(iseq) };
+ level += 1;
+ }
+ asm_comment!(asm, "get_lep(level: {level})");
gen_get_ep(asm, level)
}
+/// Load the value in the ME_CREF slot of the EP that contains the running CME.
+/// Returns the slot value directly, which is only useful for guarding that the
+/// CME has not changed. Unlike rb_vm_frame_method_entry(), we never dereference
+/// `ep[VM_ENV_DATA_INDEX_ME_CREF]` but rather rely on `ep[VM_ENV_DATA_INDEX_FLAGS]`
+/// to terminate the search, since we load the flags anyways for EP hopping.
+/// When `ep[VM_ENV_DATA_INDEX_ME_CREF]` is not a CME, it can't match the expected
+/// CME, and the guard fails.
+fn gen_get_running_cme_or_sentinal(jit: &JITState, asm: &mut Assembler) -> Opnd {
+ // When not in a block, the running CME is at `cfp->ep`.
+ if jit.iseq == unsafe { rb_get_iseq_body_local_iseq(jit.iseq) } {
+ asm_comment!(asm, "cfp->ep[VM_ENV_DATA_INDEX_ME_CREF]");
+ let lep_opnd = gen_get_ep(asm, 0);
+ Opnd::mem(
+ VALUE_BITS,
+ lep_opnd,
+ SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF,
+ )
+ } else {
+ // Loop through EPs to find the one containing the CME.
+ // Stop when we find an EP with VM_FRAME_FLAG_BMETHOD (bmethod frame with CME)
+ // or VM_ENV_FLAG_LOCAL (local frame which is METHOD/CFUNC/IFUNC with CME).
+ //
+ // We cannot unroll to a static hop count like gen_get_lep() because bmethods
+ // defined inside methods may have a CME that lives at a EP cloers to the
+ // starting EP than at the local and final EP level. Each level of nesting can
+ // dynamically run with and without VM_FRAME_FLAG_BMETHOD set.
+ asm_comment!(asm, "search for running cme");
+ let loop_label = asm.new_label("cme_loop");
+ let done_label = asm.new_label("cme_done");
+
+ let ep_opnd = Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_EP);
+ let ep_opnd = asm.load(ep_opnd);
+
+ asm.write_label(loop_label);
+ // Load flags from ep[VM_ENV_DATA_INDEX_FLAGS]
+ let flags = asm.load(Opnd::mem(VALUE_BITS, ep_opnd, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)));
+ // Check if VM_FRAME_FLAG_BMETHOD or VM_ENV_FLAG_LOCAL is set.
+ // If either is set, this EP contains the CME.
+ let check_flags = (VM_FRAME_FLAG_BMETHOD | VM_ENV_FLAG_LOCAL).as_usize();
+ asm.test(flags, check_flags.into());
+ asm.jnz(done_label);
+ // Get the previous EP from the current EP
+ // See GET_PREV_EP(ep) macro
+ // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
+ let offs = SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL;
+ let next_ep_opnd = asm.load(Opnd::mem(VALUE_BITS, ep_opnd, offs));
+ let next_ep_opnd = asm.and(next_ep_opnd, (!0x03_i64).into());
+ asm.load_into(ep_opnd, next_ep_opnd);
+ asm.jmp(loop_label);
+ asm.write_label(done_label);
+
+ // Load and return the CME from ep[VM_ENV_DATA_INDEX_ME_CREF]
+ asm.load(Opnd::mem(VALUE_BITS, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF))
+ }
+}
+
fn gen_getlocal_generic(
jit: &mut JITState,
asm: &mut Assembler,
ep_offset: u32,
level: u32,
) -> Option<CodegenStatus> {
+ // Split the block if we need to invalidate this instruction when EP escapes
+ if level == 0 && jit.can_assume_on_stack_env() && !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
+ }
+
let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm) {
// Load the local using SP register
asm.local_opnd(ep_offset)
@@ -2511,6 +2595,7 @@ fn gen_setlocal_generic(
ep_offset: u32,
level: u32,
) -> Option<CodegenStatus> {
+ // Post condition: The type of of the set local is updated in the Context.
let value_type = asm.ctx.get_opnd_type(StackOpnd(0));
// Fallback because of write barrier
@@ -2532,9 +2617,19 @@ fn gen_setlocal_generic(
);
asm.stack_pop(1);
+ // Set local type in the context
+ if level == 0 {
+ let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize();
+ asm.ctx.set_local_type(local_idx, value_type);
+ }
return Some(KeepCompiling);
}
+ // Split the block if we need to invalidate this instruction when EP escapes
+ if level == 0 && jit.can_assume_on_stack_env() && !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
+ }
+
let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm) {
// Load flags and the local using SP register
let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32);
@@ -2579,6 +2674,7 @@ fn gen_setlocal_generic(
);
}
+ // Set local type in the context
if level == 0 {
let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize();
asm.ctx.set_local_type(local_idx, value_type);
@@ -2668,7 +2764,7 @@ fn gen_newhash(
Some(KeepCompiling)
}
-fn gen_putstring(
+fn gen_dupstring(
jit: &mut JITState,
asm: &mut Assembler,
) -> Option<CodegenStatus> {
@@ -2688,7 +2784,7 @@ fn gen_putstring(
Some(KeepCompiling)
}
-fn gen_putchilledstring(
+fn gen_dupchilledstring(
jit: &mut JITState,
asm: &mut Assembler,
) -> Option<CodegenStatus> {
@@ -2743,7 +2839,7 @@ fn gen_checkkeyword(
) -> Option<CodegenStatus> {
// When a keyword is unspecified past index 32, a hash will be used
// instead. This can only happen in iseqs taking more than 32 keywords.
- if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= 32 } {
+ if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= VM_KW_SPECIFIED_BITS_MAX.try_into().unwrap() } {
return None;
}
@@ -2838,24 +2934,12 @@ fn gen_get_ivar(
recv: Opnd,
recv_opnd: YARVOpnd,
) -> Option<CodegenStatus> {
- let comptime_val_klass = comptime_receiver.class_of();
-
// If recv isn't already a register, load it.
let recv = match recv {
Opnd::InsnOut { .. } => recv,
_ => asm.load(recv),
};
- // Check if the comptime class uses a custom allocator
- let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) };
- let uses_custom_allocator = match custom_allocator {
- Some(alloc_fun) => {
- let allocate_instance = rb_class_allocate_instance as *const u8;
- alloc_fun as *const u8 != allocate_instance
- }
- None => false,
- };
-
// Check if the comptime receiver is a T_OBJECT
let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) };
// Use a general C call at the last chain to avoid exits on megamorphic shapes
@@ -2864,12 +2948,9 @@ fn gen_get_ivar(
gen_counter_incr(jit, asm, Counter::num_getivar_megamorphic);
}
- // If the class uses the default allocator, instances should all be T_OBJECT
- // NOTE: This assumes nobody changes the allocator of the class after allocation.
- // Eventually, we can encode whether an object is T_OBJECT or not
- // inside object shapes.
+ // NOTE: This assumes T_OBJECT can't ever have the same shape_id as any other type.
// too-complex shapes can't use index access, so we use rb_ivar_get for them too.
- if !receiver_t_object || uses_custom_allocator || comptime_receiver.shape_too_complex() || megamorphic {
+ if !comptime_receiver.heap_object_p() || comptime_receiver.shape_complex() || megamorphic {
// General case. Call rb_ivar_get().
// VALUE rb_ivar_get(VALUE obj, ID id)
asm_comment!(asm, "call rb_ivar_get()");
@@ -2894,9 +2975,8 @@ fn gen_get_ivar(
let ivar_index = unsafe {
let shape_id = comptime_receiver.shape_id_of();
- let shape = rb_shape_get_shape_by_id(shape_id);
- let mut ivar_index: u32 = 0;
- if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) {
+ let mut ivar_index: attr_index_t = 0;
+ if rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) {
Some(ivar_index as usize)
} else {
None
@@ -2906,10 +2986,7 @@ fn gen_get_ivar(
// Guard heap object (recv_opnd must be used before stack_pop)
guard_object_is_heap(asm, recv, recv_opnd, Counter::getivar_not_heap);
- // Compile time self is embedded and the ivar index lands within the object
- let embed_test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) };
-
- let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) };
+ let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) };
let shape_id_offset = unsafe { rb_shape_id_offset() };
let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset);
@@ -2937,28 +3014,37 @@ fn gen_get_ivar(
asm.mov(out_opnd, Qnil.into());
}
Some(ivar_index) => {
- if embed_test_result {
- // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
-
- // Load the variable
- let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32;
- let ivar_opnd = Opnd::mem(64, recv, offs);
-
- // Push the ivar on the stack
- let out_opnd = asm.stack_push(Type::Unknown);
- asm.mov(out_opnd, ivar_opnd);
+ let ivar_opnd = if receiver_t_object {
+ if comptime_receiver.embedded_p() {
+ // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h
+
+ // Load the variable
+ let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32;
+ Opnd::mem(64, recv, offs)
+ } else {
+ // Compile time value is *not* embedded.
+
+ // Get a pointer to the extended table
+ let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32));
+
+ // Read the ivar from the extended table
+ Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32)
+ }
} else {
- // Compile time value is *not* embedded.
+ asm_comment!(asm, "call rb_ivar_get_at()");
- // Get a pointer to the extended table
- let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32));
-
- // Read the ivar from the extended table
- let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32);
+ if assume_single_ractor_mode(jit, asm) {
+ asm.ccall(rb_ivar_get_at_no_ractor_check as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into())])
+ } else {
+ // The function could raise RactorIsolationError.
+ jit_prepare_non_leaf_call(jit, asm);
+ asm.ccall(rb_ivar_get_at as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into()), Opnd::UImm(ivar_name)])
+ }
+ };
- let out_opnd = asm.stack_push(Type::Unknown);
- asm.mov(out_opnd, ivar_opnd);
- }
+ // Push the ivar on the stack
+ let out_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(out_opnd, ivar_opnd);
}
}
@@ -3020,7 +3106,7 @@ fn gen_write_iv(
// Compile time value is *not* embedded.
// Get a pointer to the extended table
- let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32));
+ let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32));
// Write the ivar in to the extended table
let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32);
@@ -3064,8 +3150,6 @@ fn gen_set_ivar(
recv_opnd: YARVOpnd,
ic: Option<*const iseq_inline_iv_cache_entry>,
) -> Option<CodegenStatus> {
- let comptime_val_klass = comptime_receiver.class_of();
-
// If the comptime receiver is frozen, writing an IV will raise an exception
// and we don't want to JIT code to deal with that situation.
if comptime_receiver.is_frozen() {
@@ -3075,16 +3159,6 @@ fn gen_set_ivar(
let stack_type = asm.ctx.get_opnd_type(StackOpnd(0));
- // Check if the comptime class uses a custom allocator
- let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) };
- let uses_custom_allocator = match custom_allocator {
- Some(alloc_fun) => {
- let allocate_instance = rb_class_allocate_instance as *const u8;
- alloc_fun as *const u8 != allocate_instance
- }
- None => false,
- };
-
// Check if the comptime receiver is a T_OBJECT
let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) };
// Use a general C call at the last chain to avoid exits on megamorphic shapes
@@ -3094,12 +3168,11 @@ fn gen_set_ivar(
}
// Get the iv index
- let shape_too_complex = comptime_receiver.shape_too_complex();
- let ivar_index = if !shape_too_complex {
+ let shape_complex = comptime_receiver.shape_complex();
+ let ivar_index = if !comptime_receiver.special_const_p() && !shape_complex {
let shape_id = comptime_receiver.shape_id_of();
- let shape = unsafe { rb_shape_get_shape_by_id(shape_id) };
- let mut ivar_index: u32 = 0;
- if unsafe { rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) } {
+ let mut ivar_index: attr_index_t = 0;
+ if unsafe { rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) } {
Some(ivar_index as usize)
} else {
None
@@ -3109,27 +3182,31 @@ fn gen_set_ivar(
};
// The current shape doesn't contain this iv, we need to transition to another shape.
- let new_shape = if !shape_too_complex && receiver_t_object && ivar_index.is_none() {
- let current_shape = comptime_receiver.shape_of();
- let next_shape = unsafe { rb_shape_get_next_no_warnings(current_shape, comptime_receiver, ivar_name) };
- let next_shape_id = unsafe { rb_shape_id(next_shape) };
+ let mut new_shape_complex = false;
+ let new_shape = if !shape_complex && receiver_t_object && ivar_index.is_none() {
+ let current_shape_id = comptime_receiver.shape_id_of();
+ // We don't need to check about imemo_fields here because we're definitely looking at a T_OBJECT.
+ let klass = unsafe { rb_obj_class(comptime_receiver) };
+ let next_shape_id = unsafe { rb_shape_transition_add_ivar_no_warnings(current_shape_id, ivar_name, klass) };
// If the VM ran out of shapes, or this class generated too many leaf,
- // it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table).
- if next_shape_id == OBJ_TOO_COMPLEX_SHAPE_ID {
+ // it may be de-optimized into OBJ_COMPLEX_SHAPE (hash-table).
+ new_shape_complex = unsafe { rb_jit_shape_complex_p(next_shape_id) };
+ if new_shape_complex {
Some((next_shape_id, None, 0_usize))
} else {
- let current_capacity = unsafe { (*current_shape).capacity };
+ let current_capacity = unsafe { rb_yjit_shape_capacity(current_shape_id) };
+ let next_capacity = unsafe { rb_yjit_shape_capacity(next_shape_id) };
- // If the new shape has a different capacity, or is TOO_COMPLEX, we'll have to
+ // If the new shape has a different capacity, or is COMPLEX, we'll have to
// reallocate it.
- let needs_extension = unsafe { (*current_shape).capacity != (*next_shape).capacity };
+ let needs_extension = next_capacity != current_capacity;
// We can write to the object, but we need to transition the shape
- let ivar_index = unsafe { (*current_shape).next_iv_index } as usize;
+ let ivar_index = unsafe { rb_yjit_shape_index(next_shape_id) } as usize;
let needs_extension = if needs_extension {
- Some((current_capacity, unsafe { (*next_shape).capacity }))
+ Some((current_capacity, next_capacity))
} else {
None
};
@@ -3138,12 +3215,10 @@ fn gen_set_ivar(
} else {
None
};
- let new_shape_too_complex = matches!(new_shape, Some((OBJ_TOO_COMPLEX_SHAPE_ID, _, _)));
- // If the receiver isn't a T_OBJECT, or uses a custom allocator,
- // then just write out the IV write as a function call.
+ // If the receiver isn't a T_OBJECT, then just write out the IV write as a function call.
// too-complex shapes can't use index access, so we use rb_ivar_get for them too.
- if !receiver_t_object || uses_custom_allocator || shape_too_complex || new_shape_too_complex || megamorphic {
+ if !receiver_t_object || shape_complex || new_shape_complex || megamorphic {
// The function could raise FrozenError.
// Note that this modifies REG_SP, which is why we do it first
jit_prepare_non_leaf_call(jit, asm);
@@ -3167,7 +3242,7 @@ fn gen_set_ivar(
asm.ccall(
rb_vm_setinstancevariable as *const u8,
vec![
- Opnd::const_ptr(jit.iseq as *const u8),
+ VALUE(jit.iseq as usize).into(),
Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF),
ivar_name.into(),
val_opnd,
@@ -3186,7 +3261,7 @@ fn gen_set_ivar(
// Upgrade type
guard_object_is_heap(asm, recv, recv_opnd, Counter::setivar_not_heap);
- let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) };
+ let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) };
let shape_id_offset = unsafe { rb_shape_id_offset() };
let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset);
@@ -3360,7 +3435,7 @@ fn gen_definedivar(
// Specialize base on compile time values
let comptime_receiver = jit.peek_at_self();
- if comptime_receiver.shape_too_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH {
+ if comptime_receiver.special_const_p() || comptime_receiver.shape_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH {
// Fall back to calling rb_ivar_defined
// Save the PC and SP because the callee may allocate
@@ -3386,9 +3461,8 @@ fn gen_definedivar(
let shape_id = comptime_receiver.shape_id_of();
let ivar_exists = unsafe {
- let shape = rb_shape_get_shape_by_id(shape_id);
- let mut ivar_index: u32 = 0;
- rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index)
+ let mut ivar_index: attr_index_t = 0;
+ rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index)
};
// Guard heap object (recv_opnd must be used before stack_pop)
@@ -3686,7 +3760,6 @@ fn gen_equality_specialized(
jit_guard_known_klass(
jit,
asm,
- unsafe { rb_cString },
a_opnd,
a_opnd.into(),
comptime_a,
@@ -3712,7 +3785,6 @@ fn gen_equality_specialized(
jit_guard_known_klass(
jit,
asm,
- unsafe { rb_cString },
b_opnd,
b_opnd.into(),
comptime_b,
@@ -3809,7 +3881,6 @@ fn gen_opt_aref(
jit_guard_known_klass(
jit,
asm,
- unsafe { rb_cArray },
recv_opnd,
recv_opnd.into(),
comptime_recv,
@@ -3849,7 +3920,6 @@ fn gen_opt_aref(
jit_guard_known_klass(
jit,
asm,
- unsafe { rb_cHash },
recv_opnd,
recv_opnd.into(),
comptime_recv,
@@ -3902,7 +3972,6 @@ fn gen_opt_aset(
jit_guard_known_klass(
jit,
asm,
- unsafe { rb_cArray },
recv,
recv.into(),
comptime_recv,
@@ -3914,7 +3983,6 @@ fn gen_opt_aset(
jit_guard_known_klass(
jit,
asm,
- unsafe { rb_cInteger },
key,
key.into(),
comptime_key,
@@ -3947,7 +4015,6 @@ fn gen_opt_aset(
jit_guard_known_klass(
jit,
asm,
- unsafe { rb_cHash },
recv,
recv.into(),
comptime_recv,
@@ -3975,38 +4042,6 @@ fn gen_opt_aset(
}
}
-fn gen_opt_aref_with(
- jit: &mut JITState,
- asm: &mut Assembler,
-) -> Option<CodegenStatus>{
- // We might allocate or raise
- jit_prepare_non_leaf_call(jit, asm);
-
- let key_opnd = Opnd::Value(jit.get_arg(0));
- let recv_opnd = asm.stack_opnd(0);
-
- extern "C" {
- fn rb_vm_opt_aref_with(recv: VALUE, key: VALUE) -> VALUE;
- }
-
- let val_opnd = asm.ccall(
- rb_vm_opt_aref_with as *const u8,
- vec![
- recv_opnd,
- key_opnd
- ],
- );
- asm.stack_pop(1); // Keep it on stack during GC
-
- asm.cmp(val_opnd, Qundef.into());
- asm.je(Target::side_exit(Counter::opt_aref_with_qundef));
-
- let top = asm.stack_push(Type::Unknown);
- asm.mov(top, val_opnd);
-
- return Some(KeepCompiling);
-}
-
fn gen_opt_and(
jit: &mut JITState,
asm: &mut Assembler,
@@ -4273,11 +4308,11 @@ fn gen_opt_ary_freeze(
return None;
}
- let str = jit.get_arg(0);
+ let ary = jit.get_arg(0);
// Push the return value onto the stack
let stack_ret = asm.stack_push(Type::CArray);
- asm.mov(stack_ret, str.into());
+ asm.mov(stack_ret, ary.into());
Some(KeepCompiling)
}
@@ -4290,11 +4325,11 @@ fn gen_opt_hash_freeze(
return None;
}
- let str = jit.get_arg(0);
+ let hash = jit.get_arg(0);
// Push the return value onto the stack
let stack_ret = asm.stack_push(Type::CHash);
- asm.mov(stack_ret, str.into());
+ asm.mov(stack_ret, hash.into());
Some(KeepCompiling)
}
@@ -4616,21 +4651,8 @@ fn gen_opt_case_dispatch(
// Check that all cases are fixnums to avoid having to register BOP assumptions on
// all the types that case hashes support. This spends compile time to save memory.
- fn case_hash_all_fixnum_p(hash: VALUE) -> bool {
- let mut all_fixnum = true;
- unsafe {
- unsafe extern "C" fn per_case(key: st_data_t, _value: st_data_t, data: st_data_t) -> c_int {
- (if VALUE(key as usize).fixnum_p() {
- ST_CONTINUE
- } else {
- (data as *mut bool).write(false);
- ST_STOP
- }) as c_int
- }
- rb_hash_stlike_foreach(hash, Some(per_case), (&mut all_fixnum) as *mut _ as st_data_t);
- }
-
- all_fixnum
+ fn case_hash_all_fixnum_p(cdhash: VALUE) -> bool {
+ unsafe { rb_yjit_cdhash_all_fixnum_p(cdhash) }
}
// If megamorphic, fallback to compiling branch instructions after opt_case_dispatch
@@ -4657,12 +4679,11 @@ fn gen_opt_case_dispatch(
// Get the offset for the compile-time key
let mut offset = 0;
- unsafe { rb_hash_stlike_lookup(case_hash, comptime_key.0 as _, &mut offset) };
- let jump_offset = if offset == 0 {
+ let jump_offset = if unsafe { rb_yjit_cdhash_lookup(case_hash, comptime_key.0 as _, &mut offset) } == 0 {
// NOTE: If we hit the else branch with various values, it could negatively impact the performance.
else_offset
} else {
- (offset as u32) >> 1 // FIX2LONG
+ offset as u32
};
// Jump to the offset of case or else
@@ -4683,7 +4704,7 @@ fn gen_branchif(
let jump_offset = jit.get_arg(0).as_i32();
// Check for interrupts, but only on backward branches that may create loops
- if jump_offset < 0 {
+ if jump_offset < 0 && jit.get_opcode() != YARVINSN_branchif_without_ints as usize {
gen_check_ints(asm, Counter::branchif_interrupted);
}
@@ -4735,7 +4756,7 @@ fn gen_branchunless(
let jump_offset = jit.get_arg(0).as_i32();
// Check for interrupts, but only on backward branches that may create loops
- if jump_offset < 0 {
+ if jump_offset < 0 && jit.get_opcode() != YARVINSN_branchunless_without_ints as usize {
gen_check_ints(asm, Counter::branchunless_interrupted);
}
@@ -4788,7 +4809,7 @@ fn gen_branchnil(
let jump_offset = jit.get_arg(0).as_i32();
// Check for interrupts, but only on backward branches that may create loops
- if jump_offset < 0 {
+ if jump_offset < 0 && jit.get_opcode() != YARVINSN_branchnil_without_ints as usize {
gen_check_ints(asm, Counter::branchnil_interrupted);
}
@@ -4873,6 +4894,69 @@ fn gen_throw(
Some(EndBlock)
}
+fn gen_opt_new(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ let cd = jit.get_arg(0).as_ptr();
+ let jump_offset = jit.get_arg(1).as_i32();
+
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
+ }
+
+ let ci = unsafe { get_call_data_ci(cd) }; // info about the call site
+ let mid = unsafe { vm_ci_mid(ci) };
+ let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap();
+
+ let recv_idx = argc;
+ let comptime_recv = jit.peek_at_stack(&asm.ctx, recv_idx as isize);
+
+ // This is a singleton class
+ let comptime_recv_klass = comptime_recv.class_of();
+
+ let recv = asm.stack_opnd(recv_idx);
+
+ perf_call!("opt_new: ", jit_guard_known_klass(
+ jit,
+ asm,
+ recv,
+ recv.into(),
+ comptime_recv,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_klass_megamorphic,
+ ));
+
+ // We now know that it's always comptime_recv_klass
+ if jit.assume_expected_cfunc(asm, comptime_recv_klass, mid, rb_class_new_instance_pass_kw as _) {
+ // Fast path
+ // call rb_class_alloc to actually allocate
+ jit_prepare_non_leaf_call(jit, asm);
+ let obj = asm.ccall(rb_obj_alloc as _, vec![comptime_recv.into()]);
+
+ // Get a reference to the stack location where we need to save the
+ // return instance.
+ let result = asm.stack_opnd(recv_idx + 1);
+ let recv = asm.stack_opnd(recv_idx);
+
+ // Replace the receiver for the upcoming initialize call
+ asm.ctx.set_opnd_mapping(recv.into(), TempMapping::MapToStack(Type::UnknownHeap));
+ asm.mov(recv, obj);
+
+ // Save the allocated object for return
+ asm.ctx.set_opnd_mapping(result.into(), TempMapping::MapToStack(Type::UnknownHeap));
+ asm.mov(result, obj);
+
+ jump_to_next_insn(jit, asm)
+ } else {
+ // general case
+
+ // Get the branch target instruction offsets
+ let jump_idx = jit.next_insn_idx() as i32 + jump_offset;
+ return end_block_with_jump(jit, asm, jump_idx as u16);
+ }
+}
+
fn gen_jump(
jit: &mut JITState,
asm: &mut Assembler,
@@ -4880,7 +4964,7 @@ fn gen_jump(
let jump_offset = jit.get_arg(0).as_i32();
// Check for interrupts, but only on backward branches that may create loops
- if jump_offset < 0 {
+ if jump_offset < 0 && jit.get_opcode() != YARVINSN_jump_without_ints as usize {
gen_check_ints(asm, Counter::jump_interrupted);
}
@@ -4906,13 +4990,13 @@ fn gen_jump(
fn jit_guard_known_klass(
jit: &mut JITState,
asm: &mut Assembler,
- known_klass: VALUE,
obj_opnd: Opnd,
insn_opnd: YARVOpnd,
sample_instance: VALUE,
max_chain_depth: u8,
counter: Counter,
) {
+ let known_klass = sample_instance.class_of();
let val_type = asm.ctx.get_opnd_type(insn_opnd);
if val_type.known_class() == Some(known_klass) {
@@ -5018,7 +5102,7 @@ fn jit_guard_known_klass(
assert_eq!(sample_instance.class_of(), rb_cString, "context says class is exactly ::String")
};
} else {
- assert!(!val_type.is_imm());
+ assert!(!val_type.is_imm(), "{insn_opnd:?} should be a heap object, but was {val_type:?} for {sample_instance:?}");
// Check that the receiver is a heap object
// Note: if we get here, the class doesn't have immediate instances.
@@ -5662,7 +5746,6 @@ fn jit_rb_float_plus(
jit_guard_known_klass(
jit,
asm,
- comptime_obj.class_of(),
obj,
obj.into(),
comptime_obj,
@@ -5704,7 +5787,6 @@ fn jit_rb_float_minus(
jit_guard_known_klass(
jit,
asm,
- comptime_obj.class_of(),
obj,
obj.into(),
comptime_obj,
@@ -5746,7 +5828,6 @@ fn jit_rb_float_mul(
jit_guard_known_klass(
jit,
asm,
- comptime_obj.class_of(),
obj,
obj.into(),
comptime_obj,
@@ -5788,7 +5869,6 @@ fn jit_rb_float_div(
jit_guard_known_klass(
jit,
asm,
- comptime_obj.class_of(),
obj,
obj.into(),
comptime_obj,
@@ -6052,7 +6132,6 @@ fn jit_rb_str_getbyte(
jit_guard_known_klass(
jit,
asm,
- comptime_idx.class_of(),
idx,
idx.into(),
comptime_idx,
@@ -6169,16 +6248,19 @@ fn jit_rb_str_dup(
jit_prepare_call_with_gc(jit, asm);
- // Check !FL_ANY_RAW(str, FL_EXIVAR), which is part of BARE_STRING_P.
- let recv_opnd = asm.stack_pop(1);
+ let recv_opnd = asm.stack_opnd(0);
let recv_opnd = asm.load(recv_opnd);
- let flags_opnd = Opnd::mem(64, recv_opnd, RUBY_OFFSET_RBASIC_FLAGS);
- asm.test(flags_opnd, Opnd::Imm(RUBY_FL_EXIVAR as i64));
+
+ let shape_id_offset = unsafe { rb_shape_id_offset() };
+ let shape_opnd = Opnd::mem(64, recv_opnd, shape_id_offset);
+ asm.test(shape_opnd, Opnd::UImm(SHAPE_ID_HAS_IVAR_MASK as u64));
asm.jnz(Target::side_exit(Counter::send_str_dup_exivar));
// Call rb_str_dup
- let stack_ret = asm.stack_push(Type::CString);
let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]);
+
+ asm.stack_pop(1);
+ let stack_ret = asm.stack_push(Type::CString);
asm.mov(stack_ret, ret_opnd);
true
@@ -6234,7 +6316,7 @@ fn jit_rb_str_concat_codepoint(
guard_object_is_fixnum(jit, asm, codepoint, StackOpnd(0));
- asm.ccall(rb_yjit_str_concat_codepoint as *const u8, vec![recv, codepoint]);
+ asm.ccall(rb_jit_str_concat_codepoint as *const u8, vec![recv, codepoint]);
// The receiver is the return value, so we only need to pop the codepoint argument off the stack.
// We can reuse the receiver slot in the stack as the return value.
@@ -6540,6 +6622,7 @@ fn jit_rb_f_block_given_p(
true
}
+/// Codegen for `block_given?` and `defined?(yield)`
fn gen_block_given(
jit: &mut JITState,
asm: &mut Assembler,
@@ -6549,16 +6632,24 @@ fn gen_block_given(
) {
asm_comment!(asm, "block_given?");
- // Same as rb_vm_frame_block_handler
- let ep_opnd = gen_get_lep(jit, asm);
- let block_handler = asm.load(
- Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)
- );
+ // `yield` goes to the block handler stowed in the "local" iseq which is
+ // the current iseq or a parent. Only the "method" iseq type can be passed a
+ // block handler. (e.g. `yield` in the top level script is a syntax error.)
+ let local_iseq = unsafe { rb_get_iseq_body_local_iseq(jit.iseq) };
+ if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD {
+ // Same as rb_vm_frame_block_handler
+ let ep_opnd = gen_get_lep(jit, asm);
+ let block_handler = asm.load(
+ Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)
+ );
- // Return `block_handler != VM_BLOCK_HANDLER_NONE`
- asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into());
- let block_given = asm.csel_ne(true_opnd, false_opnd);
- asm.mov(out_opnd, block_given);
+ // Return `block_handler != VM_BLOCK_HANDLER_NONE`
+ asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into());
+ let block_given = asm.csel_ne(true_opnd, false_opnd);
+ asm.mov(out_opnd, block_given);
+ } else {
+ asm.mov(out_opnd, false_opnd);
+ }
}
// Codegen for rb_class_superclass()
@@ -6631,7 +6722,7 @@ fn jit_thread_s_current(
asm.stack_pop(1);
// ec->thread_ptr
- let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR));
+ let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR as i32));
// thread->self
let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF);
@@ -7096,7 +7187,7 @@ fn gen_send_cfunc(
asm_comment!(asm, "set ec->cfp");
let new_cfp = asm.lea(Opnd::mem(64, CFP, -(RUBY_SIZEOF_CONTROL_FRAME as i32)));
- asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), new_cfp);
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), new_cfp);
if !kw_arg.is_null() {
// Build a hash from all kwargs passed
@@ -7192,7 +7283,7 @@ fn gen_send_cfunc(
// Pop the stack frame (ec->cfp++)
// Instead of recalculating, we can reuse the previous CFP, which is stored in a callee-saved
// register
- let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP);
+ let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32);
asm.store(ec_cfp_opnd, CFP);
// cfunc calls may corrupt types
@@ -7358,7 +7449,7 @@ fn gen_send_bmethod(
) -> Option<CodegenStatus> {
let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) };
- let proc = unsafe { rb_yjit_get_proc_ptr(procv) };
+ let proc = unsafe { rb_jit_get_proc_ptr(procv) };
let proc_block = unsafe { &(*proc).block };
if proc_block.type_ != block_type_iseq {
@@ -7368,11 +7459,12 @@ fn gen_send_bmethod(
let capture = unsafe { proc_block.as_.captured.as_ref() };
let iseq = unsafe { *capture.code.iseq.as_ref() };
- // Optimize for single ractor mode and avoid runtime check for
- // "defined with an un-shareable Proc in a different Ractor"
- if !assume_single_ractor_mode(jit, asm) {
- gen_counter_incr(jit, asm, Counter::send_bmethod_ractor);
- return None;
+ if !procv.shareable_p() {
+ let ractor_serial = unsafe { rb_yjit_cme_ractor_serial(cme) };
+ asm_comment!(asm, "guard current ractor == {}", ractor_serial);
+ let current_ractor_serial = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_RACTOR_ID as i32));
+ asm.cmp(current_ractor_serial, ractor_serial.into());
+ asm.jne(Target::side_exit(Counter::send_bmethod_ractor));
}
// Passing a block to a block needs logic different from passing
@@ -7438,6 +7530,12 @@ fn iseq_get_return_value(iseq: IseqPtr, captured_opnd: Option<Opnd>, block: Opti
let ep_offset = unsafe { *rb_iseq_pc_at_idx(iseq, 1) }.as_u32();
let local_idx = ep_offset_to_local_idx(iseq, ep_offset);
+ // Only inline getlocal on a parameter. DCE in the IESQ builder can
+ // make a two-instruction ISEQ that does not return a parameter.
+ if local_idx >= unsafe { get_iseq_body_param_size(iseq) } {
+ return None;
+ }
+
if unsafe { rb_simple_iseq_p(iseq) } {
return Some(IseqReturn::LocalVariable(local_idx));
} else if unsafe { rb_iseq_only_kwparam_p(iseq) } {
@@ -7594,7 +7692,7 @@ fn gen_send_iseq(
gen_counter_incr(jit, asm, Counter::send_iseq_splat_not_array);
return None;
} else {
- unsafe { rb_yjit_array_len(array) as u32}
+ unsafe { rb_jit_array_len(array) as u32}
};
// Arity check accounting for size of the splat. When callee has rest parameters, we insert
@@ -7685,7 +7783,7 @@ fn gen_send_iseq(
gen_counter_incr(jit, asm, Counter::num_send_iseq);
// Shortcut for special `Primitive.attr! :leaf` builtins
- let builtin_attrs = unsafe { rb_yjit_iseq_builtin_attrs(iseq) };
+ let builtin_attrs = unsafe { rb_jit_iseq_builtin_attrs(iseq) };
let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) };
let builtin_func = if builtin_func_raw.is_null() { None } else { Some(builtin_func_raw) };
let opt_send_call = flags & VM_CALL_OPT_SEND != 0; // .send call is not currently supported for builtins
@@ -7862,6 +7960,11 @@ fn gen_send_iseq(
gen_counter_incr(jit, asm, Counter::send_iseq_clobbering_block_arg);
return None;
}
+ if iseq_has_rest || has_kwrest {
+ // The proc would be stored above the current stack top, where GC can't see it
+ gen_counter_incr(jit, asm, Counter::send_iseq_block_arg_gc_unsafe);
+ return None;
+ }
let proc = asm.stack_pop(1); // Pop first, as argc doesn't account for the block arg
let callee_specval = asm.ctx.sp_opnd(callee_specval);
asm.store(callee_specval, proc);
@@ -8274,7 +8377,7 @@ fn gen_send_iseq(
// We also do this after spill_regs() to avoid doubly spilling the same thing on asm.ccall().
if get_option!(gen_stats) {
// Protect caller-saved registers in case they're used for arguments
- asm.cpush_all();
+ let mapping = asm.cpush_all();
// Assemble the ISEQ name string
let name_str = get_iseq_name(iseq);
@@ -8284,7 +8387,7 @@ fn gen_send_iseq(
// Increment the counter for this cfunc
asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
- asm.cpop_all();
+ asm.cpop_all(mapping);
}
// The callee might change locals through Kernel#binding and other means.
@@ -8319,7 +8422,7 @@ fn gen_send_iseq(
asm_comment!(asm, "switch to new CFP");
let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
asm.mov(CFP, new_cfp);
- asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP);
// Directly jump to the entry point of the callee
gen_direct_jump(
@@ -8870,9 +8973,9 @@ fn gen_struct_aref(
handle_opt_send_shift_stack(asm, argc);
}
- // All structs from the same Struct class should have the same
+ // All structs from the same Struct class and shape_id should have the same
// length. So if our comptime_recv is embedded all runtime
- // structs of the same class should be as well, and the same is
+ // structs of the same class and shape_id should be as well, and the same is
// true of the converse.
let embedded = unsafe { FL_TEST_RAW(comptime_recv, VALUE(RSTRUCT_EMBED_LEN_MASK)) };
@@ -8907,6 +9010,12 @@ fn gen_struct_aset(
return None;
}
+ // If the comptime receiver is frozen, writing a struct member will raise an exception
+ // and we don't want to JIT code to deal with that situation.
+ if comptime_recv.is_frozen() {
+ return None;
+ }
+
if c_method_tracing_currently_enabled(jit) {
// Struct accesses need fire c_call and c_return events, which we can't support
// See :attr-tracing:
@@ -8927,6 +9036,17 @@ fn gen_struct_aset(
assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) });
assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) });
+ // Even if the comptime recv was not frozen, future recv may be. So we need to emit a guard
+ // that the recv is not frozen.
+ // We know all structs are heap objects, so we can check the flag directly.
+ let recv = asm.stack_opnd(1);
+ let recv = asm.load(recv);
+ let flags = asm.load(Opnd::mem(VALUE_BITS, recv, RUBY_OFFSET_RBASIC_FLAGS));
+ asm.test(flags, (RUBY_FL_FREEZE as u64).into());
+ asm.jnz(Target::side_exit(Counter::opt_aset_frozen));
+
+ // Not frozen, so we can proceed.
+
asm_comment!(asm, "struct aset");
let val = asm.stack_pop(1);
@@ -9038,7 +9158,6 @@ fn gen_send_general(
let recv_opnd: YARVOpnd = recv.into();
// Log the name of the method we're calling to
- #[cfg(feature = "disasm")]
asm_comment!(asm, "call to {}", get_method_name(Some(comptime_recv_klass), mid));
// Gather some statistics about sends
@@ -9058,7 +9177,6 @@ fn gen_send_general(
perf_call!("gen_send_general: ", jit_guard_known_klass(
jit,
asm,
- comptime_recv_klass,
recv,
recv_opnd,
comptime_recv,
@@ -9311,7 +9429,6 @@ fn gen_send_general(
}
OPTIMIZED_METHOD_TYPE_CALL => {
-
if block.is_some() {
gen_counter_incr(jit, asm, Counter::send_call_block);
return None;
@@ -9327,13 +9444,6 @@ fn gen_send_general(
return None;
}
- // Optimize for single ractor mode and avoid runtime check for
- // "defined with an un-shareable Proc in a different Ractor"
- if !assume_single_ractor_mode(jit, asm) {
- gen_counter_incr(jit, asm, Counter::send_call_multi_ractor);
- return None;
- }
-
// If this is a .send call we need to adjust the stack
if flags & VM_CALL_OPT_SEND != 0 {
handle_opt_send_shift_stack(asm, argc);
@@ -9363,8 +9473,9 @@ fn gen_send_general(
let stack_ret = asm.stack_push(Type::Unknown);
asm.mov(stack_ret, ret);
- return Some(KeepCompiling);
+ // End the block to allow invalidating the next instruction
+ return jump_to_next_insn(jit, asm);
}
OPTIMIZED_METHOD_TYPE_BLOCK_CALL => {
gen_counter_incr(jit, asm, Counter::send_optimized_method_block_call);
@@ -9537,7 +9648,24 @@ fn gen_sendforward(
jit: &mut JITState,
asm: &mut Assembler,
) -> Option<CodegenStatus> {
- return gen_send(jit, asm);
+ // Generate specialized code if possible
+ let cd = jit.get_arg(0).as_ptr();
+ let block = jit.get_arg(1).as_optional_ptr().map(|iseq| BlockHandler::BlockISeq(iseq));
+ if let Some(status) = perf_call! { gen_send_general(jit, asm, cd, block) } {
+ return Some(status);
+ }
+
+ // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of sendforward
+ let blockiseq = jit.get_arg(1).as_iseq();
+ gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| {
+ extern "C" {
+ fn rb_vm_sendforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE;
+ }
+ asm.ccall(
+ rb_vm_sendforward as *const u8,
+ vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()],
+ )
+ })
}
fn gen_invokeblock(
@@ -9611,7 +9739,7 @@ fn gen_invokeblock_specialized(
// If the current ISEQ is annotated to be inlined but it's not being inlined here,
// generate a dynamic dispatch to avoid making this yield megamorphic.
- if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() {
+ if unsafe { rb_jit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() {
gen_counter_incr(jit, asm, Counter::invokeblock_iseq_not_inlined);
return None;
}
@@ -9702,7 +9830,7 @@ fn gen_invokesuper(
return Some(status);
}
- // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send
+ // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of invokesuper
let blockiseq = jit.get_arg(1).as_iseq();
gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| {
extern "C" {
@@ -9719,7 +9847,23 @@ fn gen_invokesuperforward(
jit: &mut JITState,
asm: &mut Assembler,
) -> Option<CodegenStatus> {
- return gen_invokesuper(jit, asm);
+ // Generate specialized code if possible
+ let cd = jit.get_arg(0).as_ptr();
+ if let Some(status) = gen_invokesuper_specialized(jit, asm, cd) {
+ return Some(status);
+ }
+
+ // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of invokesuperforward
+ let blockiseq = jit.get_arg(1).as_iseq();
+ gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| {
+ extern "C" {
+ fn rb_vm_invokesuperforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE;
+ }
+ asm.ccall(
+ rb_vm_invokesuperforward as *const u8,
+ vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()],
+ )
+ })
}
fn gen_invokesuper_specialized(
@@ -9751,6 +9895,15 @@ fn gen_invokesuper_specialized(
return None;
}
+ let ci = unsafe { get_call_data_ci(cd) };
+ let ci_flags = unsafe { vm_ci_flag(ci) };
+
+ // Bail on ZSUPER inside a block method. They always raise.
+ if ci_flags & VM_CALL_ZSUPER != 0 && VM_METHOD_TYPE_BMETHOD == unsafe { get_cme_def_type(me) } {
+ gen_counter_incr(jit, asm, Counter::invokesuper_bmethod_zsuper);
+ return None;
+ }
+
// FIXME: We should track and invalidate this block when this cme is invalidated
let current_defined_class = unsafe { (*me).defined_class };
let mid = unsafe { get_def_original_id((*me).def) };
@@ -9766,14 +9919,8 @@ fn gen_invokesuper_specialized(
let comptime_superclass =
unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) };
- let ci = unsafe { get_call_data_ci(cd) };
- let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap();
-
- let ci_flags = unsafe { vm_ci_flag(ci) };
-
// Don't JIT calls that aren't simple
// Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
-
if ci_flags & VM_CALL_KWARG != 0 {
gen_counter_incr(jit, asm, Counter::invokesuper_kwarg);
return None;
@@ -9792,6 +9939,7 @@ fn gen_invokesuper_specialized(
// cheaper calculations first, but since we specialize on the method entry
// and so only have to do this once at compile time this is fine to always
// check and side exit.
+ let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap();
let comptime_recv = jit.peek_at_stack(&asm.ctx, argc as isize);
if unsafe { rb_obj_is_kind_of(comptime_recv, current_defined_class) } == VALUE(0) {
gen_counter_incr(jit, asm, Counter::invokesuper_defined_class_mismatch);
@@ -9819,16 +9967,8 @@ fn gen_invokesuper_specialized(
return None;
}
- asm_comment!(asm, "guard known me");
- let lep_opnd = gen_get_lep(jit, asm);
- let ep_me_opnd = Opnd::mem(
- 64,
- lep_opnd,
- SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF,
- );
-
- let me_as_value = VALUE(me as usize);
- asm.cmp(ep_me_opnd, me_as_value.into());
+ let cme_opnd = gen_get_running_cme_or_sentinal(jit, asm);
+ asm.cmp(cme_opnd, VALUE::from(me).into());
jit_chain_guard(
JCC_JNE,
jit,
@@ -9873,7 +10013,7 @@ fn gen_leave(
asm_comment!(asm, "pop stack frame");
let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
asm.mov(CFP, incr_cfp);
- asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
+ asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP);
// Load the return value
let retval_opnd = asm.stack_pop(1);
@@ -9972,7 +10112,6 @@ fn gen_objtostring(
jit_guard_known_klass(
jit,
asm,
- comptime_recv.class_of(),
recv,
recv.into(),
comptime_recv,
@@ -9986,7 +10125,6 @@ fn gen_objtostring(
jit_guard_known_klass(
jit,
asm,
- comptime_recv.class_of(),
recv,
recv.into(),
comptime_recv,
@@ -10042,45 +10180,18 @@ fn gen_toregexp(
let opt = jit.get_arg(0).as_i64();
let cnt = jit.get_arg(1).as_usize();
- // Save the PC and SP because this allocates an object and could
- // raise an exception.
+ // Allocates objects and could raise an exception.
jit_prepare_non_leaf_call(jit, asm);
let values_ptr = asm.lea(asm.ctx.sp_opnd(-(cnt as i32)));
- let ary = asm.ccall(
- rb_ary_tmp_new_from_values as *const u8,
- vec![
- Opnd::Imm(0),
- cnt.into(),
- values_ptr,
- ]
+ let regexp = asm.ccall(
+ rb_reg_new_from_values as _,
+ vec![cnt.into(), values_ptr, opt.into()],
);
asm.stack_pop(cnt); // Let ccall spill them
-
- // Save the array so we can clear it later
- asm.cpush(ary);
- asm.cpush(ary); // Alignment
-
- let val = asm.ccall(
- rb_reg_new_ary as *const u8,
- vec![
- ary,
- Opnd::Imm(opt),
- ]
- );
-
- // The actual regex is in RAX now. Pop the temp array from
- // rb_ary_tmp_new_from_values into C arg regs so we can clear it
- let ary = asm.cpop(); // Alignment
- asm.cpop_into(ary);
-
- // The value we want to push on the stack is in RAX right now
let stack_ret = asm.stack_push(Type::UnknownHeap);
- asm.mov(stack_ret, val);
-
- // Clear the temp array.
- asm.ccall(rb_ary_clear as *const u8, vec![ary]);
+ asm.mov(stack_ret, regexp);
Some(KeepCompiling)
}
@@ -10170,7 +10281,7 @@ fn gen_getclassvariable(
let val_opnd = asm.ccall(
rb_vm_getclassvariable as *const u8,
vec![
- Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ),
+ VALUE(jit.iseq as usize).into(),
CFP,
Opnd::UImm(jit.get_arg(0).as_u64()),
Opnd::UImm(jit.get_arg(1).as_u64()),
@@ -10194,7 +10305,7 @@ fn gen_setclassvariable(
asm.ccall(
rb_vm_setclassvariable as *const u8,
vec![
- Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ),
+ VALUE(jit.iseq as usize).into(),
CFP,
Opnd::UImm(jit.get_arg(0).as_u64()),
val,
@@ -10662,8 +10773,8 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_concattoarray => Some(gen_concattoarray),
YARVINSN_pushtoarray => Some(gen_pushtoarray),
YARVINSN_newrange => Some(gen_newrange),
- YARVINSN_putstring => Some(gen_putstring),
- YARVINSN_putchilledstring => Some(gen_putchilledstring),
+ YARVINSN_dupstring => Some(gen_dupstring),
+ YARVINSN_dupchilledstring => Some(gen_dupchilledstring),
YARVINSN_expandarray => Some(gen_expandarray),
YARVINSN_defined => Some(gen_defined),
YARVINSN_definedivar => Some(gen_definedivar),
@@ -10677,7 +10788,6 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_opt_neq => Some(gen_opt_neq),
YARVINSN_opt_aref => Some(gen_opt_aref),
YARVINSN_opt_aset => Some(gen_opt_aset),
- YARVINSN_opt_aref_with => Some(gen_opt_aref_with),
YARVINSN_opt_mult => Some(gen_opt_mult),
YARVINSN_opt_div => Some(gen_opt_div),
YARVINSN_opt_ltlt => Some(gen_opt_ltlt),
@@ -10699,6 +10809,11 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_branchnil => Some(gen_branchnil),
YARVINSN_throw => Some(gen_throw),
YARVINSN_jump => Some(gen_jump),
+ YARVINSN_branchif_without_ints => Some(gen_branchif),
+ YARVINSN_branchunless_without_ints => Some(gen_branchunless),
+ YARVINSN_branchnil_without_ints => Some(gen_branchnil),
+ YARVINSN_jump_without_ints => Some(gen_jump),
+ YARVINSN_opt_new => Some(gen_opt_new),
YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy),
YARVINSN_getblockparam => Some(gen_getblockparam),
@@ -10908,7 +11023,7 @@ impl CodegenGlobals {
#[cfg(not(test))]
let (mut cb, mut ocb) = {
- let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(exec_mem_size as u32) };
+ let virt_block: *mut u8 = unsafe { rb_jit_reserve_addr_space(exec_mem_size as u32) };
// Memory protection syscalls need page-aligned addresses, so check it here. Assuming
// `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
@@ -10917,7 +11032,7 @@ impl CodegenGlobals {
//
// Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB
// (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though.
- let page_size = unsafe { rb_yjit_get_page_size() };
+ let page_size = unsafe { rb_jit_get_page_size() };
assert_eq!(
virt_block as usize % page_size.as_usize(), 0,
"Start of virtual address block should be page-aligned",
@@ -10933,7 +11048,7 @@ impl CodegenGlobals {
exec_mem_size,
get_option!(mem_size),
);
- let mem_block = Rc::new(RefCell::new(mem_block));
+ let mem_block = Rc::new(mem_block);
let freed_pages = Rc::new(None);