diff options
Diffstat (limited to 'yjit/src/codegen.rs')
| -rw-r--r-- | yjit/src/codegen.rs | 715 |
1 files changed, 415 insertions, 300 deletions
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 97ea3877aa..743a10e15a 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -234,20 +234,36 @@ impl<'a> JITState<'a> { result } - /// Return true if the current ISEQ could escape an environment. + /// Return true if the JIT code can use [`Self::assume_no_ep_escape`] + /// and will run with an on-stack (`!VM_ENV_ESCAPED_P`) environment. /// - /// As of vm_push_frame(), EP is always equal to BP. However, after pushing - /// a frame, some ISEQ setups call vm_bind_update_env(), which redirects EP. - /// Also, some method calls escape the environment to the heap. - fn escapes_ep(&self) -> bool { + /// ## Reasoning about ISEQs that are not currently running + /// + /// As of vm_push_frame() and its JIT code equivalent, EP is always equal to BP (the + /// environment is on-stack and has not escaped). We can usually assume this is the starting + /// condition upon entry into JIT code. However, after pushing a frame and before entry into + /// JIT code, some ISEQ setups call vm_bind_update_env(), which redirects EP. + /// + /// ## After making the assumption + /// + /// After JIT code entry, many ruby operations can have the environment escape to heap. These + /// are handled by [`crate::invariants`]. + /// + /// Exceptional entry through jit_exec_exception() is an extreme case of the environment state + /// changing between vm_push_frame() and entry into JIT code. The frame could have been pushed + /// before YJIT is enabled. The exception entry point refuses entry with an escaped environment. + fn can_assume_on_stack_env(&self) -> bool { match unsafe { get_iseq_body_type(self.iseq) } { // <main> frame is always associated to TOPLEVEL_BINDING. ISEQ_TYPE_MAIN | // Kernel#eval uses a heap EP when a Binding argument is not nil. - ISEQ_TYPE_EVAL => true, - // If this ISEQ has previously escaped EP, give up the optimization. - _ if iseq_escapes_ep(self.iseq) => true, - _ => false, + ISEQ_TYPE_EVAL => false, + // Check the running environment if compiling for it + _ if unsafe { self.iseq == get_cfp_iseq(self.get_cfp()) && cfp_env_has_escaped(self.get_cfp()) } => false, + // If we've seen this ISEQ run with an escaped environment, give up the optimization + // to avoid excessive invalidations (even though it may be fine for soundness). + _ if seen_escaped_env(self.iseq) => false, + _ => true, } } @@ -376,8 +392,8 @@ impl<'a> JITState<'a> { if jit_ensure_block_entry_exit(self, asm).is_none() { return false; // out of space, give up } - if self.escapes_ep() { - return false; // EP has been escaped in this ISEQ. disable the optimization to avoid an invalidation loop. + if !self.can_assume_on_stack_env() { + return false; // Unsound or unprofitable to make the assumption } self.no_ep_escape = true; true @@ -438,7 +454,7 @@ impl<'a> JITState<'a> { fn flush_perf_symbols(&self, cb: &CodeBlock) { assert_eq!(0, self.perf_stack.len()); let path = format!("/tmp/perf-{}.map", std::process::id()); - let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap(); + let mut f = std::io::BufWriter::new(std::fs::File::options().create(true).append(true).open(path).unwrap()); for sym in self.perf_map.borrow().iter() { if let (start, Some(end), name) = sym { // In case the code straddles two pages, part of it belongs to the symbol. @@ -821,11 +837,11 @@ fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> { /// Generate an exit to return to the interpreter fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) { - #[cfg(all(feature = "disasm", not(test)))] - { + #[cfg(not(test))] + asm_comment!(asm, "exit to interpreter on {}", { let opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) }; - asm_comment!(asm, "exit to interpreter on {}", insn_name(opcode as usize)); - } + insn_name(opcode as usize) + }); if asm.ctx.is_return_landing() { asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); @@ -1094,11 +1110,7 @@ pub fn gen_entry_prologue( let code_ptr = cb.get_write_ptr(); let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) }); - if get_option_ref!(dump_disasm).is_some() { - asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0)); - } else { - asm_comment!(asm, "YJIT entry"); - } + asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0)); asm.frame_setup(); @@ -1212,7 +1224,7 @@ fn gen_check_ints( // Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages, // signal_exec, or rb_postponed_job_flush. - let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG)); + let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32)); asm.test(interrupt_flag, interrupt_flag); asm.jnz(Target::side_exit(counter)); @@ -1296,7 +1308,6 @@ pub fn gen_single_block( let mut asm = Assembler::new(jit.num_locals()); asm.ctx = ctx; - #[cfg(feature = "disasm")] if get_option_ref!(dump_disasm).is_some() { let blockid_idx = blockid.idx; let chain_depth = if asm.ctx.get_chain_depth() > 0 { format!("(chain_depth: {})", asm.ctx.get_chain_depth()) } else { "".to_string() }; @@ -2263,7 +2274,8 @@ fn gen_expandarray( let comptime_recv = jit.peek_at_stack(&asm.ctx, 0); - // If the comptime receiver is not an array + // If the comptime receiver is not an array, speculate for when the `rb_check_array_type()` + // conversion returns nil and without side-effects (e.g. arbitrary method calls). if !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_ARRAY) } { // at compile time, ensure to_ary is not defined let target_cme = unsafe { rb_callable_method_entry_or_negative(comptime_recv.class_of(), ID!(to_ary)) }; @@ -2275,13 +2287,19 @@ fn gen_expandarray( return None; } + // Bail when method_missing is defined to avoid generating code to call it. + // Also, for simplicity, bail when BasicObject#method_missing has been removed. + if !assume_method_basic_definition(jit, asm, comptime_recv.class_of(), ID!(method_missing)) { + gen_counter_incr(jit, asm, Counter::expandarray_method_missing); + return None; + } + // invalidate compile block if to_ary is later defined jit.assume_method_lookup_stable(asm, target_cme); jit_guard_known_klass( jit, asm, - comptime_recv.class_of(), array_opnd, array_opnd.into(), comptime_recv, @@ -2311,7 +2329,7 @@ fn gen_expandarray( } // Get the compile-time array length - let comptime_len = unsafe { rb_yjit_array_len(comptime_recv) as u32 }; + let comptime_len = unsafe { rb_jit_array_len(comptime_recv) as u32 }; // Move the array from the stack and check that it's an array. guard_object_is_array( @@ -2423,28 +2441,94 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd { ep_opnd } -// Gets the EP of the ISeq of the containing method, or "local level". -// Equivalent of GET_LEP() macro. +/// Get the EP of the ISeq of the containing method, or "local level EP". +/// Equivalent to `GET_LEP()` with a constraint: +/// `ISEQ_TYPE_METHOD == iseq_under_compilation->body->local_iseq->body->type`. +/// No bad memory access happens when this condition is not met, just that the +/// EP returned may not be `VM_ENV_FLAG_LOCAL`. Practically, ISeqs that don't +/// meet this condition, such as `ISEQ_TYPE_TOP`, also don't need to use this operation. fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd { - // Equivalent of get_lvar_level() in compile.c - fn get_lvar_level(iseq: IseqPtr) -> u32 { - if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } { - 0 - } else { - 1 + get_lvar_level(unsafe { rb_get_iseq_body_parent_iseq(iseq) }) - } - } - - let level = get_lvar_level(jit.get_iseq()); + // GET_LEP() chases the parent environment pointer to reach the local environment. Inside + // a descendant of ISEQ_TYPE_METHOD, it chases the same number of times as chasing + // the parent iseq pointer to reach the local iseq. See get_lvar_level() in compile.c + let mut iseq = jit.get_iseq(); + let local_iseq = unsafe { rb_get_iseq_body_local_iseq(iseq) }; + let mut level = 0; + while iseq != local_iseq { + iseq = unsafe { rb_get_iseq_body_parent_iseq(iseq) }; + level += 1; + } + asm_comment!(asm, "get_lep(level: {level})"); gen_get_ep(asm, level) } +/// Load the value in the ME_CREF slot of the EP that contains the running CME. +/// Returns the slot value directly, which is only useful for guarding that the +/// CME has not changed. Unlike rb_vm_frame_method_entry(), we never dereference +/// `ep[VM_ENV_DATA_INDEX_ME_CREF]` but rather rely on `ep[VM_ENV_DATA_INDEX_FLAGS]` +/// to terminate the search, since we load the flags anyways for EP hopping. +/// When `ep[VM_ENV_DATA_INDEX_ME_CREF]` is not a CME, it can't match the expected +/// CME, and the guard fails. +fn gen_get_running_cme_or_sentinal(jit: &JITState, asm: &mut Assembler) -> Opnd { + // When not in a block, the running CME is at `cfp->ep`. + if jit.iseq == unsafe { rb_get_iseq_body_local_iseq(jit.iseq) } { + asm_comment!(asm, "cfp->ep[VM_ENV_DATA_INDEX_ME_CREF]"); + let lep_opnd = gen_get_ep(asm, 0); + Opnd::mem( + VALUE_BITS, + lep_opnd, + SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF, + ) + } else { + // Loop through EPs to find the one containing the CME. + // Stop when we find an EP with VM_FRAME_FLAG_BMETHOD (bmethod frame with CME) + // or VM_ENV_FLAG_LOCAL (local frame which is METHOD/CFUNC/IFUNC with CME). + // + // We cannot unroll to a static hop count like gen_get_lep() because bmethods + // defined inside methods may have a CME that lives at a EP cloers to the + // starting EP than at the local and final EP level. Each level of nesting can + // dynamically run with and without VM_FRAME_FLAG_BMETHOD set. + asm_comment!(asm, "search for running cme"); + let loop_label = asm.new_label("cme_loop"); + let done_label = asm.new_label("cme_done"); + + let ep_opnd = Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_EP); + let ep_opnd = asm.load(ep_opnd); + + asm.write_label(loop_label); + // Load flags from ep[VM_ENV_DATA_INDEX_FLAGS] + let flags = asm.load(Opnd::mem(VALUE_BITS, ep_opnd, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32))); + // Check if VM_FRAME_FLAG_BMETHOD or VM_ENV_FLAG_LOCAL is set. + // If either is set, this EP contains the CME. + let check_flags = (VM_FRAME_FLAG_BMETHOD | VM_ENV_FLAG_LOCAL).as_usize(); + asm.test(flags, check_flags.into()); + asm.jnz(done_label); + // Get the previous EP from the current EP + // See GET_PREV_EP(ep) macro + // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03)) + let offs = SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL; + let next_ep_opnd = asm.load(Opnd::mem(VALUE_BITS, ep_opnd, offs)); + let next_ep_opnd = asm.and(next_ep_opnd, (!0x03_i64).into()); + asm.load_into(ep_opnd, next_ep_opnd); + asm.jmp(loop_label); + asm.write_label(done_label); + + // Load and return the CME from ep[VM_ENV_DATA_INDEX_ME_CREF] + asm.load(Opnd::mem(VALUE_BITS, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF)) + } +} + fn gen_getlocal_generic( jit: &mut JITState, asm: &mut Assembler, ep_offset: u32, level: u32, ) -> Option<CodegenStatus> { + // Split the block if we need to invalidate this instruction when EP escapes + if level == 0 && jit.can_assume_on_stack_env() && !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm) { // Load the local using SP register asm.local_opnd(ep_offset) @@ -2511,6 +2595,7 @@ fn gen_setlocal_generic( ep_offset: u32, level: u32, ) -> Option<CodegenStatus> { + // Post condition: The type of of the set local is updated in the Context. let value_type = asm.ctx.get_opnd_type(StackOpnd(0)); // Fallback because of write barrier @@ -2532,9 +2617,19 @@ fn gen_setlocal_generic( ); asm.stack_pop(1); + // Set local type in the context + if level == 0 { + let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize(); + asm.ctx.set_local_type(local_idx, value_type); + } return Some(KeepCompiling); } + // Split the block if we need to invalidate this instruction when EP escapes + if level == 0 && jit.can_assume_on_stack_env() && !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm) { // Load flags and the local using SP register let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32); @@ -2579,6 +2674,7 @@ fn gen_setlocal_generic( ); } + // Set local type in the context if level == 0 { let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize(); asm.ctx.set_local_type(local_idx, value_type); @@ -2668,7 +2764,7 @@ fn gen_newhash( Some(KeepCompiling) } -fn gen_putstring( +fn gen_dupstring( jit: &mut JITState, asm: &mut Assembler, ) -> Option<CodegenStatus> { @@ -2688,7 +2784,7 @@ fn gen_putstring( Some(KeepCompiling) } -fn gen_putchilledstring( +fn gen_dupchilledstring( jit: &mut JITState, asm: &mut Assembler, ) -> Option<CodegenStatus> { @@ -2743,7 +2839,7 @@ fn gen_checkkeyword( ) -> Option<CodegenStatus> { // When a keyword is unspecified past index 32, a hash will be used // instead. This can only happen in iseqs taking more than 32 keywords. - if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= 32 } { + if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= VM_KW_SPECIFIED_BITS_MAX.try_into().unwrap() } { return None; } @@ -2838,24 +2934,12 @@ fn gen_get_ivar( recv: Opnd, recv_opnd: YARVOpnd, ) -> Option<CodegenStatus> { - let comptime_val_klass = comptime_receiver.class_of(); - // If recv isn't already a register, load it. let recv = match recv { Opnd::InsnOut { .. } => recv, _ => asm.load(recv), }; - // Check if the comptime class uses a custom allocator - let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; - let uses_custom_allocator = match custom_allocator { - Some(alloc_fun) => { - let allocate_instance = rb_class_allocate_instance as *const u8; - alloc_fun as *const u8 != allocate_instance - } - None => false, - }; - // Check if the comptime receiver is a T_OBJECT let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; // Use a general C call at the last chain to avoid exits on megamorphic shapes @@ -2864,12 +2948,9 @@ fn gen_get_ivar( gen_counter_incr(jit, asm, Counter::num_getivar_megamorphic); } - // If the class uses the default allocator, instances should all be T_OBJECT - // NOTE: This assumes nobody changes the allocator of the class after allocation. - // Eventually, we can encode whether an object is T_OBJECT or not - // inside object shapes. + // NOTE: This assumes T_OBJECT can't ever have the same shape_id as any other type. // too-complex shapes can't use index access, so we use rb_ivar_get for them too. - if !receiver_t_object || uses_custom_allocator || comptime_receiver.shape_too_complex() || megamorphic { + if !comptime_receiver.heap_object_p() || comptime_receiver.shape_complex() || megamorphic { // General case. Call rb_ivar_get(). // VALUE rb_ivar_get(VALUE obj, ID id) asm_comment!(asm, "call rb_ivar_get()"); @@ -2894,9 +2975,8 @@ fn gen_get_ivar( let ivar_index = unsafe { let shape_id = comptime_receiver.shape_id_of(); - let shape = rb_shape_get_shape_by_id(shape_id); - let mut ivar_index: u32 = 0; - if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) { + let mut ivar_index: attr_index_t = 0; + if rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) { Some(ivar_index as usize) } else { None @@ -2906,10 +2986,7 @@ fn gen_get_ivar( // Guard heap object (recv_opnd must be used before stack_pop) guard_object_is_heap(asm, recv, recv_opnd, Counter::getivar_not_heap); - // Compile time self is embedded and the ivar index lands within the object - let embed_test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) }; - - let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) }; let shape_id_offset = unsafe { rb_shape_id_offset() }; let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); @@ -2937,28 +3014,37 @@ fn gen_get_ivar( asm.mov(out_opnd, Qnil.into()); } Some(ivar_index) => { - if embed_test_result { - // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h - - // Load the variable - let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; - let ivar_opnd = Opnd::mem(64, recv, offs); - - // Push the ivar on the stack - let out_opnd = asm.stack_push(Type::Unknown); - asm.mov(out_opnd, ivar_opnd); + let ivar_opnd = if receiver_t_object { + if comptime_receiver.embedded_p() { + // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h + + // Load the variable + let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; + Opnd::mem(64, recv, offs) + } else { + // Compile time value is *not* embedded. + + // Get a pointer to the extended table + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32)); + + // Read the ivar from the extended table + Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32) + } } else { - // Compile time value is *not* embedded. + asm_comment!(asm, "call rb_ivar_get_at()"); - // Get a pointer to the extended table - let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32)); - - // Read the ivar from the extended table - let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); + if assume_single_ractor_mode(jit, asm) { + asm.ccall(rb_ivar_get_at_no_ractor_check as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into())]) + } else { + // The function could raise RactorIsolationError. + jit_prepare_non_leaf_call(jit, asm); + asm.ccall(rb_ivar_get_at as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into()), Opnd::UImm(ivar_name)]) + } + }; - let out_opnd = asm.stack_push(Type::Unknown); - asm.mov(out_opnd, ivar_opnd); - } + // Push the ivar on the stack + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ivar_opnd); } } @@ -3020,7 +3106,7 @@ fn gen_write_iv( // Compile time value is *not* embedded. // Get a pointer to the extended table - let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32)); + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32)); // Write the ivar in to the extended table let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); @@ -3064,8 +3150,6 @@ fn gen_set_ivar( recv_opnd: YARVOpnd, ic: Option<*const iseq_inline_iv_cache_entry>, ) -> Option<CodegenStatus> { - let comptime_val_klass = comptime_receiver.class_of(); - // If the comptime receiver is frozen, writing an IV will raise an exception // and we don't want to JIT code to deal with that situation. if comptime_receiver.is_frozen() { @@ -3075,16 +3159,6 @@ fn gen_set_ivar( let stack_type = asm.ctx.get_opnd_type(StackOpnd(0)); - // Check if the comptime class uses a custom allocator - let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; - let uses_custom_allocator = match custom_allocator { - Some(alloc_fun) => { - let allocate_instance = rb_class_allocate_instance as *const u8; - alloc_fun as *const u8 != allocate_instance - } - None => false, - }; - // Check if the comptime receiver is a T_OBJECT let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; // Use a general C call at the last chain to avoid exits on megamorphic shapes @@ -3094,12 +3168,11 @@ fn gen_set_ivar( } // Get the iv index - let shape_too_complex = comptime_receiver.shape_too_complex(); - let ivar_index = if !shape_too_complex { + let shape_complex = comptime_receiver.shape_complex(); + let ivar_index = if !comptime_receiver.special_const_p() && !shape_complex { let shape_id = comptime_receiver.shape_id_of(); - let shape = unsafe { rb_shape_get_shape_by_id(shape_id) }; - let mut ivar_index: u32 = 0; - if unsafe { rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) } { + let mut ivar_index: attr_index_t = 0; + if unsafe { rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) } { Some(ivar_index as usize) } else { None @@ -3109,27 +3182,31 @@ fn gen_set_ivar( }; // The current shape doesn't contain this iv, we need to transition to another shape. - let new_shape = if !shape_too_complex && receiver_t_object && ivar_index.is_none() { - let current_shape = comptime_receiver.shape_of(); - let next_shape = unsafe { rb_shape_get_next_no_warnings(current_shape, comptime_receiver, ivar_name) }; - let next_shape_id = unsafe { rb_shape_id(next_shape) }; + let mut new_shape_complex = false; + let new_shape = if !shape_complex && receiver_t_object && ivar_index.is_none() { + let current_shape_id = comptime_receiver.shape_id_of(); + // We don't need to check about imemo_fields here because we're definitely looking at a T_OBJECT. + let klass = unsafe { rb_obj_class(comptime_receiver) }; + let next_shape_id = unsafe { rb_shape_transition_add_ivar_no_warnings(current_shape_id, ivar_name, klass) }; // If the VM ran out of shapes, or this class generated too many leaf, - // it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table). - if next_shape_id == OBJ_TOO_COMPLEX_SHAPE_ID { + // it may be de-optimized into OBJ_COMPLEX_SHAPE (hash-table). + new_shape_complex = unsafe { rb_jit_shape_complex_p(next_shape_id) }; + if new_shape_complex { Some((next_shape_id, None, 0_usize)) } else { - let current_capacity = unsafe { (*current_shape).capacity }; + let current_capacity = unsafe { rb_yjit_shape_capacity(current_shape_id) }; + let next_capacity = unsafe { rb_yjit_shape_capacity(next_shape_id) }; - // If the new shape has a different capacity, or is TOO_COMPLEX, we'll have to + // If the new shape has a different capacity, or is COMPLEX, we'll have to // reallocate it. - let needs_extension = unsafe { (*current_shape).capacity != (*next_shape).capacity }; + let needs_extension = next_capacity != current_capacity; // We can write to the object, but we need to transition the shape - let ivar_index = unsafe { (*current_shape).next_iv_index } as usize; + let ivar_index = unsafe { rb_yjit_shape_index(next_shape_id) } as usize; let needs_extension = if needs_extension { - Some((current_capacity, unsafe { (*next_shape).capacity })) + Some((current_capacity, next_capacity)) } else { None }; @@ -3138,12 +3215,10 @@ fn gen_set_ivar( } else { None }; - let new_shape_too_complex = matches!(new_shape, Some((OBJ_TOO_COMPLEX_SHAPE_ID, _, _))); - // If the receiver isn't a T_OBJECT, or uses a custom allocator, - // then just write out the IV write as a function call. + // If the receiver isn't a T_OBJECT, then just write out the IV write as a function call. // too-complex shapes can't use index access, so we use rb_ivar_get for them too. - if !receiver_t_object || uses_custom_allocator || shape_too_complex || new_shape_too_complex || megamorphic { + if !receiver_t_object || shape_complex || new_shape_complex || megamorphic { // The function could raise FrozenError. // Note that this modifies REG_SP, which is why we do it first jit_prepare_non_leaf_call(jit, asm); @@ -3167,7 +3242,7 @@ fn gen_set_ivar( asm.ccall( rb_vm_setinstancevariable as *const u8, vec![ - Opnd::const_ptr(jit.iseq as *const u8), + VALUE(jit.iseq as usize).into(), Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), ivar_name.into(), val_opnd, @@ -3186,7 +3261,7 @@ fn gen_set_ivar( // Upgrade type guard_object_is_heap(asm, recv, recv_opnd, Counter::setivar_not_heap); - let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) }; let shape_id_offset = unsafe { rb_shape_id_offset() }; let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); @@ -3360,7 +3435,7 @@ fn gen_definedivar( // Specialize base on compile time values let comptime_receiver = jit.peek_at_self(); - if comptime_receiver.shape_too_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH { + if comptime_receiver.special_const_p() || comptime_receiver.shape_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH { // Fall back to calling rb_ivar_defined // Save the PC and SP because the callee may allocate @@ -3386,9 +3461,8 @@ fn gen_definedivar( let shape_id = comptime_receiver.shape_id_of(); let ivar_exists = unsafe { - let shape = rb_shape_get_shape_by_id(shape_id); - let mut ivar_index: u32 = 0; - rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) + let mut ivar_index: attr_index_t = 0; + rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) }; // Guard heap object (recv_opnd must be used before stack_pop) @@ -3686,7 +3760,6 @@ fn gen_equality_specialized( jit_guard_known_klass( jit, asm, - unsafe { rb_cString }, a_opnd, a_opnd.into(), comptime_a, @@ -3712,7 +3785,6 @@ fn gen_equality_specialized( jit_guard_known_klass( jit, asm, - unsafe { rb_cString }, b_opnd, b_opnd.into(), comptime_b, @@ -3809,7 +3881,6 @@ fn gen_opt_aref( jit_guard_known_klass( jit, asm, - unsafe { rb_cArray }, recv_opnd, recv_opnd.into(), comptime_recv, @@ -3849,7 +3920,6 @@ fn gen_opt_aref( jit_guard_known_klass( jit, asm, - unsafe { rb_cHash }, recv_opnd, recv_opnd.into(), comptime_recv, @@ -3902,7 +3972,6 @@ fn gen_opt_aset( jit_guard_known_klass( jit, asm, - unsafe { rb_cArray }, recv, recv.into(), comptime_recv, @@ -3914,7 +3983,6 @@ fn gen_opt_aset( jit_guard_known_klass( jit, asm, - unsafe { rb_cInteger }, key, key.into(), comptime_key, @@ -3947,7 +4015,6 @@ fn gen_opt_aset( jit_guard_known_klass( jit, asm, - unsafe { rb_cHash }, recv, recv.into(), comptime_recv, @@ -3975,38 +4042,6 @@ fn gen_opt_aset( } } -fn gen_opt_aref_with( - jit: &mut JITState, - asm: &mut Assembler, -) -> Option<CodegenStatus>{ - // We might allocate or raise - jit_prepare_non_leaf_call(jit, asm); - - let key_opnd = Opnd::Value(jit.get_arg(0)); - let recv_opnd = asm.stack_opnd(0); - - extern "C" { - fn rb_vm_opt_aref_with(recv: VALUE, key: VALUE) -> VALUE; - } - - let val_opnd = asm.ccall( - rb_vm_opt_aref_with as *const u8, - vec![ - recv_opnd, - key_opnd - ], - ); - asm.stack_pop(1); // Keep it on stack during GC - - asm.cmp(val_opnd, Qundef.into()); - asm.je(Target::side_exit(Counter::opt_aref_with_qundef)); - - let top = asm.stack_push(Type::Unknown); - asm.mov(top, val_opnd); - - return Some(KeepCompiling); -} - fn gen_opt_and( jit: &mut JITState, asm: &mut Assembler, @@ -4273,11 +4308,11 @@ fn gen_opt_ary_freeze( return None; } - let str = jit.get_arg(0); + let ary = jit.get_arg(0); // Push the return value onto the stack let stack_ret = asm.stack_push(Type::CArray); - asm.mov(stack_ret, str.into()); + asm.mov(stack_ret, ary.into()); Some(KeepCompiling) } @@ -4290,11 +4325,11 @@ fn gen_opt_hash_freeze( return None; } - let str = jit.get_arg(0); + let hash = jit.get_arg(0); // Push the return value onto the stack let stack_ret = asm.stack_push(Type::CHash); - asm.mov(stack_ret, str.into()); + asm.mov(stack_ret, hash.into()); Some(KeepCompiling) } @@ -4616,21 +4651,8 @@ fn gen_opt_case_dispatch( // Check that all cases are fixnums to avoid having to register BOP assumptions on // all the types that case hashes support. This spends compile time to save memory. - fn case_hash_all_fixnum_p(hash: VALUE) -> bool { - let mut all_fixnum = true; - unsafe { - unsafe extern "C" fn per_case(key: st_data_t, _value: st_data_t, data: st_data_t) -> c_int { - (if VALUE(key as usize).fixnum_p() { - ST_CONTINUE - } else { - (data as *mut bool).write(false); - ST_STOP - }) as c_int - } - rb_hash_stlike_foreach(hash, Some(per_case), (&mut all_fixnum) as *mut _ as st_data_t); - } - - all_fixnum + fn case_hash_all_fixnum_p(cdhash: VALUE) -> bool { + unsafe { rb_yjit_cdhash_all_fixnum_p(cdhash) } } // If megamorphic, fallback to compiling branch instructions after opt_case_dispatch @@ -4657,12 +4679,11 @@ fn gen_opt_case_dispatch( // Get the offset for the compile-time key let mut offset = 0; - unsafe { rb_hash_stlike_lookup(case_hash, comptime_key.0 as _, &mut offset) }; - let jump_offset = if offset == 0 { + let jump_offset = if unsafe { rb_yjit_cdhash_lookup(case_hash, comptime_key.0 as _, &mut offset) } == 0 { // NOTE: If we hit the else branch with various values, it could negatively impact the performance. else_offset } else { - (offset as u32) >> 1 // FIX2LONG + offset as u32 }; // Jump to the offset of case or else @@ -4683,7 +4704,7 @@ fn gen_branchif( let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops - if jump_offset < 0 { + if jump_offset < 0 && jit.get_opcode() != YARVINSN_branchif_without_ints as usize { gen_check_ints(asm, Counter::branchif_interrupted); } @@ -4735,7 +4756,7 @@ fn gen_branchunless( let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops - if jump_offset < 0 { + if jump_offset < 0 && jit.get_opcode() != YARVINSN_branchunless_without_ints as usize { gen_check_ints(asm, Counter::branchunless_interrupted); } @@ -4788,7 +4809,7 @@ fn gen_branchnil( let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops - if jump_offset < 0 { + if jump_offset < 0 && jit.get_opcode() != YARVINSN_branchnil_without_ints as usize { gen_check_ints(asm, Counter::branchnil_interrupted); } @@ -4873,6 +4894,69 @@ fn gen_throw( Some(EndBlock) } +fn gen_opt_new( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let cd = jit.get_arg(0).as_ptr(); + let jump_offset = jit.get_arg(1).as_i32(); + + if !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + + let ci = unsafe { get_call_data_ci(cd) }; // info about the call site + let mid = unsafe { vm_ci_mid(ci) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); + + let recv_idx = argc; + let comptime_recv = jit.peek_at_stack(&asm.ctx, recv_idx as isize); + + // This is a singleton class + let comptime_recv_klass = comptime_recv.class_of(); + + let recv = asm.stack_opnd(recv_idx); + + perf_call!("opt_new: ", jit_guard_known_klass( + jit, + asm, + recv, + recv.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::guard_send_klass_megamorphic, + )); + + // We now know that it's always comptime_recv_klass + if jit.assume_expected_cfunc(asm, comptime_recv_klass, mid, rb_class_new_instance_pass_kw as _) { + // Fast path + // call rb_class_alloc to actually allocate + jit_prepare_non_leaf_call(jit, asm); + let obj = asm.ccall(rb_obj_alloc as _, vec![comptime_recv.into()]); + + // Get a reference to the stack location where we need to save the + // return instance. + let result = asm.stack_opnd(recv_idx + 1); + let recv = asm.stack_opnd(recv_idx); + + // Replace the receiver for the upcoming initialize call + asm.ctx.set_opnd_mapping(recv.into(), TempMapping::MapToStack(Type::UnknownHeap)); + asm.mov(recv, obj); + + // Save the allocated object for return + asm.ctx.set_opnd_mapping(result.into(), TempMapping::MapToStack(Type::UnknownHeap)); + asm.mov(result, obj); + + jump_to_next_insn(jit, asm) + } else { + // general case + + // Get the branch target instruction offsets + let jump_idx = jit.next_insn_idx() as i32 + jump_offset; + return end_block_with_jump(jit, asm, jump_idx as u16); + } +} + fn gen_jump( jit: &mut JITState, asm: &mut Assembler, @@ -4880,7 +4964,7 @@ fn gen_jump( let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops - if jump_offset < 0 { + if jump_offset < 0 && jit.get_opcode() != YARVINSN_jump_without_ints as usize { gen_check_ints(asm, Counter::jump_interrupted); } @@ -4906,13 +4990,13 @@ fn gen_jump( fn jit_guard_known_klass( jit: &mut JITState, asm: &mut Assembler, - known_klass: VALUE, obj_opnd: Opnd, insn_opnd: YARVOpnd, sample_instance: VALUE, max_chain_depth: u8, counter: Counter, ) { + let known_klass = sample_instance.class_of(); let val_type = asm.ctx.get_opnd_type(insn_opnd); if val_type.known_class() == Some(known_klass) { @@ -5018,7 +5102,7 @@ fn jit_guard_known_klass( assert_eq!(sample_instance.class_of(), rb_cString, "context says class is exactly ::String") }; } else { - assert!(!val_type.is_imm()); + assert!(!val_type.is_imm(), "{insn_opnd:?} should be a heap object, but was {val_type:?} for {sample_instance:?}"); // Check that the receiver is a heap object // Note: if we get here, the class doesn't have immediate instances. @@ -5662,7 +5746,6 @@ fn jit_rb_float_plus( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -5704,7 +5787,6 @@ fn jit_rb_float_minus( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -5746,7 +5828,6 @@ fn jit_rb_float_mul( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -5788,7 +5869,6 @@ fn jit_rb_float_div( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -6052,7 +6132,6 @@ fn jit_rb_str_getbyte( jit_guard_known_klass( jit, asm, - comptime_idx.class_of(), idx, idx.into(), comptime_idx, @@ -6169,16 +6248,19 @@ fn jit_rb_str_dup( jit_prepare_call_with_gc(jit, asm); - // Check !FL_ANY_RAW(str, FL_EXIVAR), which is part of BARE_STRING_P. - let recv_opnd = asm.stack_pop(1); + let recv_opnd = asm.stack_opnd(0); let recv_opnd = asm.load(recv_opnd); - let flags_opnd = Opnd::mem(64, recv_opnd, RUBY_OFFSET_RBASIC_FLAGS); - asm.test(flags_opnd, Opnd::Imm(RUBY_FL_EXIVAR as i64)); + + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(64, recv_opnd, shape_id_offset); + asm.test(shape_opnd, Opnd::UImm(SHAPE_ID_HAS_IVAR_MASK as u64)); asm.jnz(Target::side_exit(Counter::send_str_dup_exivar)); // Call rb_str_dup - let stack_ret = asm.stack_push(Type::CString); let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); + + asm.stack_pop(1); + let stack_ret = asm.stack_push(Type::CString); asm.mov(stack_ret, ret_opnd); true @@ -6234,7 +6316,7 @@ fn jit_rb_str_concat_codepoint( guard_object_is_fixnum(jit, asm, codepoint, StackOpnd(0)); - asm.ccall(rb_yjit_str_concat_codepoint as *const u8, vec![recv, codepoint]); + asm.ccall(rb_jit_str_concat_codepoint as *const u8, vec![recv, codepoint]); // The receiver is the return value, so we only need to pop the codepoint argument off the stack. // We can reuse the receiver slot in the stack as the return value. @@ -6540,6 +6622,7 @@ fn jit_rb_f_block_given_p( true } +/// Codegen for `block_given?` and `defined?(yield)` fn gen_block_given( jit: &mut JITState, asm: &mut Assembler, @@ -6549,16 +6632,24 @@ fn gen_block_given( ) { asm_comment!(asm, "block_given?"); - // Same as rb_vm_frame_block_handler - let ep_opnd = gen_get_lep(jit, asm); - let block_handler = asm.load( - Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) - ); + // `yield` goes to the block handler stowed in the "local" iseq which is + // the current iseq or a parent. Only the "method" iseq type can be passed a + // block handler. (e.g. `yield` in the top level script is a syntax error.) + let local_iseq = unsafe { rb_get_iseq_body_local_iseq(jit.iseq) }; + if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD { + // Same as rb_vm_frame_block_handler + let ep_opnd = gen_get_lep(jit, asm); + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); - // Return `block_handler != VM_BLOCK_HANDLER_NONE` - asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); - let block_given = asm.csel_ne(true_opnd, false_opnd); - asm.mov(out_opnd, block_given); + // Return `block_handler != VM_BLOCK_HANDLER_NONE` + asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); + let block_given = asm.csel_ne(true_opnd, false_opnd); + asm.mov(out_opnd, block_given); + } else { + asm.mov(out_opnd, false_opnd); + } } // Codegen for rb_class_superclass() @@ -6631,7 +6722,7 @@ fn jit_thread_s_current( asm.stack_pop(1); // ec->thread_ptr - let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR)); + let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR as i32)); // thread->self let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF); @@ -7096,7 +7187,7 @@ fn gen_send_cfunc( asm_comment!(asm, "set ec->cfp"); let new_cfp = asm.lea(Opnd::mem(64, CFP, -(RUBY_SIZEOF_CONTROL_FRAME as i32))); - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), new_cfp); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), new_cfp); if !kw_arg.is_null() { // Build a hash from all kwargs passed @@ -7192,7 +7283,7 @@ fn gen_send_cfunc( // Pop the stack frame (ec->cfp++) // Instead of recalculating, we can reuse the previous CFP, which is stored in a callee-saved // register - let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); + let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32); asm.store(ec_cfp_opnd, CFP); // cfunc calls may corrupt types @@ -7358,7 +7449,7 @@ fn gen_send_bmethod( ) -> Option<CodegenStatus> { let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; - let proc = unsafe { rb_yjit_get_proc_ptr(procv) }; + let proc = unsafe { rb_jit_get_proc_ptr(procv) }; let proc_block = unsafe { &(*proc).block }; if proc_block.type_ != block_type_iseq { @@ -7368,11 +7459,12 @@ fn gen_send_bmethod( let capture = unsafe { proc_block.as_.captured.as_ref() }; let iseq = unsafe { *capture.code.iseq.as_ref() }; - // Optimize for single ractor mode and avoid runtime check for - // "defined with an un-shareable Proc in a different Ractor" - if !assume_single_ractor_mode(jit, asm) { - gen_counter_incr(jit, asm, Counter::send_bmethod_ractor); - return None; + if !procv.shareable_p() { + let ractor_serial = unsafe { rb_yjit_cme_ractor_serial(cme) }; + asm_comment!(asm, "guard current ractor == {}", ractor_serial); + let current_ractor_serial = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_RACTOR_ID as i32)); + asm.cmp(current_ractor_serial, ractor_serial.into()); + asm.jne(Target::side_exit(Counter::send_bmethod_ractor)); } // Passing a block to a block needs logic different from passing @@ -7438,6 +7530,12 @@ fn iseq_get_return_value(iseq: IseqPtr, captured_opnd: Option<Opnd>, block: Opti let ep_offset = unsafe { *rb_iseq_pc_at_idx(iseq, 1) }.as_u32(); let local_idx = ep_offset_to_local_idx(iseq, ep_offset); + // Only inline getlocal on a parameter. DCE in the IESQ builder can + // make a two-instruction ISEQ that does not return a parameter. + if local_idx >= unsafe { get_iseq_body_param_size(iseq) } { + return None; + } + if unsafe { rb_simple_iseq_p(iseq) } { return Some(IseqReturn::LocalVariable(local_idx)); } else if unsafe { rb_iseq_only_kwparam_p(iseq) } { @@ -7594,7 +7692,7 @@ fn gen_send_iseq( gen_counter_incr(jit, asm, Counter::send_iseq_splat_not_array); return None; } else { - unsafe { rb_yjit_array_len(array) as u32} + unsafe { rb_jit_array_len(array) as u32} }; // Arity check accounting for size of the splat. When callee has rest parameters, we insert @@ -7685,7 +7783,7 @@ fn gen_send_iseq( gen_counter_incr(jit, asm, Counter::num_send_iseq); // Shortcut for special `Primitive.attr! :leaf` builtins - let builtin_attrs = unsafe { rb_yjit_iseq_builtin_attrs(iseq) }; + let builtin_attrs = unsafe { rb_jit_iseq_builtin_attrs(iseq) }; let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) }; let builtin_func = if builtin_func_raw.is_null() { None } else { Some(builtin_func_raw) }; let opt_send_call = flags & VM_CALL_OPT_SEND != 0; // .send call is not currently supported for builtins @@ -7862,6 +7960,11 @@ fn gen_send_iseq( gen_counter_incr(jit, asm, Counter::send_iseq_clobbering_block_arg); return None; } + if iseq_has_rest || has_kwrest { + // The proc would be stored above the current stack top, where GC can't see it + gen_counter_incr(jit, asm, Counter::send_iseq_block_arg_gc_unsafe); + return None; + } let proc = asm.stack_pop(1); // Pop first, as argc doesn't account for the block arg let callee_specval = asm.ctx.sp_opnd(callee_specval); asm.store(callee_specval, proc); @@ -8274,7 +8377,7 @@ fn gen_send_iseq( // We also do this after spill_regs() to avoid doubly spilling the same thing on asm.ccall(). if get_option!(gen_stats) { // Protect caller-saved registers in case they're used for arguments - asm.cpush_all(); + let mapping = asm.cpush_all(); // Assemble the ISEQ name string let name_str = get_iseq_name(iseq); @@ -8284,7 +8387,7 @@ fn gen_send_iseq( // Increment the counter for this cfunc asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]); - asm.cpop_all(); + asm.cpop_all(mapping); } // The callee might change locals through Kernel#binding and other means. @@ -8319,7 +8422,7 @@ fn gen_send_iseq( asm_comment!(asm, "switch to new CFP"); let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); // Directly jump to the entry point of the callee gen_direct_jump( @@ -8870,9 +8973,9 @@ fn gen_struct_aref( handle_opt_send_shift_stack(asm, argc); } - // All structs from the same Struct class should have the same + // All structs from the same Struct class and shape_id should have the same // length. So if our comptime_recv is embedded all runtime - // structs of the same class should be as well, and the same is + // structs of the same class and shape_id should be as well, and the same is // true of the converse. let embedded = unsafe { FL_TEST_RAW(comptime_recv, VALUE(RSTRUCT_EMBED_LEN_MASK)) }; @@ -8907,6 +9010,12 @@ fn gen_struct_aset( return None; } + // If the comptime receiver is frozen, writing a struct member will raise an exception + // and we don't want to JIT code to deal with that situation. + if comptime_recv.is_frozen() { + return None; + } + if c_method_tracing_currently_enabled(jit) { // Struct accesses need fire c_call and c_return events, which we can't support // See :attr-tracing: @@ -8927,6 +9036,17 @@ fn gen_struct_aset( assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) }); assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) }); + // Even if the comptime recv was not frozen, future recv may be. So we need to emit a guard + // that the recv is not frozen. + // We know all structs are heap objects, so we can check the flag directly. + let recv = asm.stack_opnd(1); + let recv = asm.load(recv); + let flags = asm.load(Opnd::mem(VALUE_BITS, recv, RUBY_OFFSET_RBASIC_FLAGS)); + asm.test(flags, (RUBY_FL_FREEZE as u64).into()); + asm.jnz(Target::side_exit(Counter::opt_aset_frozen)); + + // Not frozen, so we can proceed. + asm_comment!(asm, "struct aset"); let val = asm.stack_pop(1); @@ -9038,7 +9158,6 @@ fn gen_send_general( let recv_opnd: YARVOpnd = recv.into(); // Log the name of the method we're calling to - #[cfg(feature = "disasm")] asm_comment!(asm, "call to {}", get_method_name(Some(comptime_recv_klass), mid)); // Gather some statistics about sends @@ -9058,7 +9177,6 @@ fn gen_send_general( perf_call!("gen_send_general: ", jit_guard_known_klass( jit, asm, - comptime_recv_klass, recv, recv_opnd, comptime_recv, @@ -9311,7 +9429,6 @@ fn gen_send_general( } OPTIMIZED_METHOD_TYPE_CALL => { - if block.is_some() { gen_counter_incr(jit, asm, Counter::send_call_block); return None; @@ -9327,13 +9444,6 @@ fn gen_send_general( return None; } - // Optimize for single ractor mode and avoid runtime check for - // "defined with an un-shareable Proc in a different Ractor" - if !assume_single_ractor_mode(jit, asm) { - gen_counter_incr(jit, asm, Counter::send_call_multi_ractor); - return None; - } - // If this is a .send call we need to adjust the stack if flags & VM_CALL_OPT_SEND != 0 { handle_opt_send_shift_stack(asm, argc); @@ -9363,8 +9473,9 @@ fn gen_send_general( let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, ret); - return Some(KeepCompiling); + // End the block to allow invalidating the next instruction + return jump_to_next_insn(jit, asm); } OPTIMIZED_METHOD_TYPE_BLOCK_CALL => { gen_counter_incr(jit, asm, Counter::send_optimized_method_block_call); @@ -9537,7 +9648,24 @@ fn gen_sendforward( jit: &mut JITState, asm: &mut Assembler, ) -> Option<CodegenStatus> { - return gen_send(jit, asm); + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + let block = jit.get_arg(1).as_optional_ptr().map(|iseq| BlockHandler::BlockISeq(iseq)); + if let Some(status) = perf_call! { gen_send_general(jit, asm, cd, block) } { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of sendforward + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_sendforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_sendforward as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) } fn gen_invokeblock( @@ -9611,7 +9739,7 @@ fn gen_invokeblock_specialized( // If the current ISEQ is annotated to be inlined but it's not being inlined here, // generate a dynamic dispatch to avoid making this yield megamorphic. - if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() { + if unsafe { rb_jit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() { gen_counter_incr(jit, asm, Counter::invokeblock_iseq_not_inlined); return None; } @@ -9702,7 +9830,7 @@ fn gen_invokesuper( return Some(status); } - // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of invokesuper let blockiseq = jit.get_arg(1).as_iseq(); gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { extern "C" { @@ -9719,7 +9847,23 @@ fn gen_invokesuperforward( jit: &mut JITState, asm: &mut Assembler, ) -> Option<CodegenStatus> { - return gen_invokesuper(jit, asm); + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = gen_invokesuper_specialized(jit, asm, cd) { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of invokesuperforward + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_invokesuperforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_invokesuperforward as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) } fn gen_invokesuper_specialized( @@ -9751,6 +9895,15 @@ fn gen_invokesuper_specialized( return None; } + let ci = unsafe { get_call_data_ci(cd) }; + let ci_flags = unsafe { vm_ci_flag(ci) }; + + // Bail on ZSUPER inside a block method. They always raise. + if ci_flags & VM_CALL_ZSUPER != 0 && VM_METHOD_TYPE_BMETHOD == unsafe { get_cme_def_type(me) } { + gen_counter_incr(jit, asm, Counter::invokesuper_bmethod_zsuper); + return None; + } + // FIXME: We should track and invalidate this block when this cme is invalidated let current_defined_class = unsafe { (*me).defined_class }; let mid = unsafe { get_def_original_id((*me).def) }; @@ -9766,14 +9919,8 @@ fn gen_invokesuper_specialized( let comptime_superclass = unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) }; - let ci = unsafe { get_call_data_ci(cd) }; - let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); - - let ci_flags = unsafe { vm_ci_flag(ci) }; - // Don't JIT calls that aren't simple // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. - if ci_flags & VM_CALL_KWARG != 0 { gen_counter_incr(jit, asm, Counter::invokesuper_kwarg); return None; @@ -9792,6 +9939,7 @@ fn gen_invokesuper_specialized( // cheaper calculations first, but since we specialize on the method entry // and so only have to do this once at compile time this is fine to always // check and side exit. + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); let comptime_recv = jit.peek_at_stack(&asm.ctx, argc as isize); if unsafe { rb_obj_is_kind_of(comptime_recv, current_defined_class) } == VALUE(0) { gen_counter_incr(jit, asm, Counter::invokesuper_defined_class_mismatch); @@ -9819,16 +9967,8 @@ fn gen_invokesuper_specialized( return None; } - asm_comment!(asm, "guard known me"); - let lep_opnd = gen_get_lep(jit, asm); - let ep_me_opnd = Opnd::mem( - 64, - lep_opnd, - SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF, - ); - - let me_as_value = VALUE(me as usize); - asm.cmp(ep_me_opnd, me_as_value.into()); + let cme_opnd = gen_get_running_cme_or_sentinal(jit, asm); + asm.cmp(cme_opnd, VALUE::from(me).into()); jit_chain_guard( JCC_JNE, jit, @@ -9873,7 +10013,7 @@ fn gen_leave( asm_comment!(asm, "pop stack frame"); let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, incr_cfp); - asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); + asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); // Load the return value let retval_opnd = asm.stack_pop(1); @@ -9972,7 +10112,6 @@ fn gen_objtostring( jit_guard_known_klass( jit, asm, - comptime_recv.class_of(), recv, recv.into(), comptime_recv, @@ -9986,7 +10125,6 @@ fn gen_objtostring( jit_guard_known_klass( jit, asm, - comptime_recv.class_of(), recv, recv.into(), comptime_recv, @@ -10042,45 +10180,18 @@ fn gen_toregexp( let opt = jit.get_arg(0).as_i64(); let cnt = jit.get_arg(1).as_usize(); - // Save the PC and SP because this allocates an object and could - // raise an exception. + // Allocates objects and could raise an exception. jit_prepare_non_leaf_call(jit, asm); let values_ptr = asm.lea(asm.ctx.sp_opnd(-(cnt as i32))); - let ary = asm.ccall( - rb_ary_tmp_new_from_values as *const u8, - vec![ - Opnd::Imm(0), - cnt.into(), - values_ptr, - ] + let regexp = asm.ccall( + rb_reg_new_from_values as _, + vec![cnt.into(), values_ptr, opt.into()], ); asm.stack_pop(cnt); // Let ccall spill them - - // Save the array so we can clear it later - asm.cpush(ary); - asm.cpush(ary); // Alignment - - let val = asm.ccall( - rb_reg_new_ary as *const u8, - vec![ - ary, - Opnd::Imm(opt), - ] - ); - - // The actual regex is in RAX now. Pop the temp array from - // rb_ary_tmp_new_from_values into C arg regs so we can clear it - let ary = asm.cpop(); // Alignment - asm.cpop_into(ary); - - // The value we want to push on the stack is in RAX right now let stack_ret = asm.stack_push(Type::UnknownHeap); - asm.mov(stack_ret, val); - - // Clear the temp array. - asm.ccall(rb_ary_clear as *const u8, vec![ary]); + asm.mov(stack_ret, regexp); Some(KeepCompiling) } @@ -10170,7 +10281,7 @@ fn gen_getclassvariable( let val_opnd = asm.ccall( rb_vm_getclassvariable as *const u8, vec![ - Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + VALUE(jit.iseq as usize).into(), CFP, Opnd::UImm(jit.get_arg(0).as_u64()), Opnd::UImm(jit.get_arg(1).as_u64()), @@ -10194,7 +10305,7 @@ fn gen_setclassvariable( asm.ccall( rb_vm_setclassvariable as *const u8, vec![ - Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + VALUE(jit.iseq as usize).into(), CFP, Opnd::UImm(jit.get_arg(0).as_u64()), val, @@ -10662,8 +10773,8 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { YARVINSN_concattoarray => Some(gen_concattoarray), YARVINSN_pushtoarray => Some(gen_pushtoarray), YARVINSN_newrange => Some(gen_newrange), - YARVINSN_putstring => Some(gen_putstring), - YARVINSN_putchilledstring => Some(gen_putchilledstring), + YARVINSN_dupstring => Some(gen_dupstring), + YARVINSN_dupchilledstring => Some(gen_dupchilledstring), YARVINSN_expandarray => Some(gen_expandarray), YARVINSN_defined => Some(gen_defined), YARVINSN_definedivar => Some(gen_definedivar), @@ -10677,7 +10788,6 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { YARVINSN_opt_neq => Some(gen_opt_neq), YARVINSN_opt_aref => Some(gen_opt_aref), YARVINSN_opt_aset => Some(gen_opt_aset), - YARVINSN_opt_aref_with => Some(gen_opt_aref_with), YARVINSN_opt_mult => Some(gen_opt_mult), YARVINSN_opt_div => Some(gen_opt_div), YARVINSN_opt_ltlt => Some(gen_opt_ltlt), @@ -10699,6 +10809,11 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { YARVINSN_branchnil => Some(gen_branchnil), YARVINSN_throw => Some(gen_throw), YARVINSN_jump => Some(gen_jump), + YARVINSN_branchif_without_ints => Some(gen_branchif), + YARVINSN_branchunless_without_ints => Some(gen_branchunless), + YARVINSN_branchnil_without_ints => Some(gen_branchnil), + YARVINSN_jump_without_ints => Some(gen_jump), + YARVINSN_opt_new => Some(gen_opt_new), YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), YARVINSN_getblockparam => Some(gen_getblockparam), @@ -10908,7 +11023,7 @@ impl CodegenGlobals { #[cfg(not(test))] let (mut cb, mut ocb) = { - let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(exec_mem_size as u32) }; + let virt_block: *mut u8 = unsafe { rb_jit_reserve_addr_space(exec_mem_size as u32) }; // Memory protection syscalls need page-aligned addresses, so check it here. Assuming // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the @@ -10917,7 +11032,7 @@ impl CodegenGlobals { // // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though. - let page_size = unsafe { rb_yjit_get_page_size() }; + let page_size = unsafe { rb_jit_get_page_size() }; assert_eq!( virt_block as usize % page_size.as_usize(), 0, "Start of virtual address block should be page-aligned", @@ -10933,7 +11048,7 @@ impl CodegenGlobals { exec_mem_size, get_option!(mem_size), ); - let mem_block = Rc::new(RefCell::new(mem_block)); + let mem_block = Rc::new(mem_block); let freed_pages = Rc::new(None); |
