diff options
Diffstat (limited to 'yjit/src')
| -rw-r--r-- | yjit/src/asm/mod.rs | 54 | ||||
| -rw-r--r-- | yjit/src/asm/x86_64/mod.rs | 10 | ||||
| -rw-r--r-- | yjit/src/asm/x86_64/tests.rs | 1 | ||||
| -rw-r--r-- | yjit/src/backend/arm64/mod.rs | 30 | ||||
| -rw-r--r-- | yjit/src/backend/ir.rs | 20 | ||||
| -rw-r--r-- | yjit/src/backend/tests.rs | 4 | ||||
| -rw-r--r-- | yjit/src/backend/x86_64/mod.rs | 30 | ||||
| -rw-r--r-- | yjit/src/codegen.rs | 1216 | ||||
| -rw-r--r-- | yjit/src/core.rs | 258 | ||||
| -rw-r--r-- | yjit/src/cruby.rs | 49 | ||||
| -rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 475 | ||||
| -rw-r--r-- | yjit/src/disasm.rs | 46 | ||||
| -rw-r--r-- | yjit/src/invariants.rs | 8 | ||||
| -rw-r--r-- | yjit/src/lib.rs | 13 | ||||
| -rw-r--r-- | yjit/src/log.rs | 2 | ||||
| -rw-r--r-- | yjit/src/options.rs | 25 | ||||
| -rw-r--r-- | yjit/src/stats.rs | 104 | ||||
| -rw-r--r-- | yjit/src/utils.rs | 22 | ||||
| -rw-r--r-- | yjit/src/virtualmem.rs | 99 | ||||
| -rw-r--r-- | yjit/src/yjit.rs | 47 |
20 files changed, 1604 insertions, 909 deletions
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index ed6feb3174..9ef675b34d 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -1,4 +1,3 @@ -use std::cell::RefCell; use std::fmt; use std::mem; use std::rc::Rc; @@ -44,7 +43,7 @@ pub struct LabelRef { /// Block of memory into which instructions can be assembled pub struct CodeBlock { // Memory for storing the encoded instructions - mem_block: Rc<RefCell<VirtualMem>>, + mem_block: Rc<VirtualMem>, // Size of a code page in bytes. Each code page is split into an inlined and an outlined portion. // Code GC collects code memory at this granularity. @@ -107,16 +106,16 @@ impl CodeBlock { const PREFERRED_CODE_PAGE_SIZE: usize = 16 * 1024; /// Make a new CodeBlock - pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>, keep_comments: bool) -> Self { + pub fn new(mem_block: Rc<VirtualMem>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>, keep_comments: bool) -> Self { // Pick the code page size - let system_page_size = mem_block.borrow().system_page_size(); + let system_page_size = mem_block.system_page_size(); let page_size = if 0 == Self::PREFERRED_CODE_PAGE_SIZE % system_page_size { Self::PREFERRED_CODE_PAGE_SIZE } else { system_page_size }; - let mem_size = mem_block.borrow().virtual_region_size(); + let mem_size = mem_block.virtual_region_size(); let mut cb = Self { mem_block, mem_size, @@ -145,6 +144,7 @@ impl CodeBlock { /// Move the CodeBlock to the next page. If it's on the furthest page, /// move the other CodeBlock to the next page as well. + #[must_use] pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool { let old_write_ptr = self.get_write_ptr(); self.set_write_ptr(base_ptr); @@ -237,9 +237,9 @@ impl CodeBlock { } // Free the grouped pages at once - let start_ptr = self.mem_block.borrow().start_ptr().add_bytes(page_idx * self.page_size); + let start_ptr = self.mem_block.start_ptr().add_bytes(page_idx * self.page_size); let batch_size = self.page_size * batch_idxs.len(); - self.mem_block.borrow_mut().free_bytes(start_ptr, batch_size as u32); + self.mem_block.free_bytes(start_ptr, batch_size as u32); } } @@ -248,13 +248,13 @@ impl CodeBlock { } pub fn mapped_region_size(&self) -> usize { - self.mem_block.borrow().mapped_region_size() + self.mem_block.mapped_region_size() } /// Size of the region in bytes where writes could be attempted. #[cfg(target_arch = "aarch64")] pub fn virtual_region_size(&self) -> usize { - self.mem_block.borrow().virtual_region_size() + self.mem_block.virtual_region_size() } /// Return the number of code pages that have been mapped by the VirtualMemory. @@ -266,7 +266,7 @@ impl CodeBlock { /// Return the number of code pages that have been reserved by the VirtualMemory. pub fn num_virtual_pages(&self) -> usize { - let virtual_region_size = self.mem_block.borrow().virtual_region_size(); + let virtual_region_size = self.mem_block.virtual_region_size(); // CodeBlock's page size != VirtualMem's page size on Linux, // so mapped_region_size % self.page_size may not be 0 ((virtual_region_size - 1) / self.page_size) + 1 @@ -408,7 +408,7 @@ impl CodeBlock { } pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> { - self.mem_block.borrow_mut().write_byte(write_ptr, byte) + self.mem_block.write_byte(write_ptr, byte) } // Set the current write position @@ -422,31 +422,31 @@ impl CodeBlock { // Set the current write position from a pointer pub fn set_write_ptr(&mut self, code_ptr: CodePtr) { - let pos = code_ptr.as_offset() - self.mem_block.borrow().start_ptr().as_offset(); + let pos = code_ptr.as_offset() - self.mem_block.start_ptr().as_offset(); self.set_pos(pos.try_into().unwrap()); } /// Get a (possibly dangling) direct pointer into the executable memory block pub fn get_ptr(&self, offset: usize) -> CodePtr { - self.mem_block.borrow().start_ptr().add_bytes(offset) + self.mem_block.start_ptr().add_bytes(offset) } /// Convert an address range to memory page indexes against a num_pages()-sized array. pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> impl Iterator<Item = usize> { - let mem_start = self.mem_block.borrow().start_ptr().raw_addr(self); - let mem_end = self.mem_block.borrow().mapped_end_ptr().raw_addr(self); + let mem_start = self.mem_block.start_ptr().raw_addr(self); + let mem_end = self.mem_block.mapped_end_ptr().raw_addr(self); assert!(mem_start <= start_addr.raw_addr(self)); assert!(start_addr.raw_addr(self) <= end_addr.raw_addr(self)); assert!(end_addr.raw_addr(self) <= mem_end); // Ignore empty code ranges if start_addr == end_addr { - return (0..0).into_iter(); + return 0..0; } let start_page = (start_addr.raw_addr(self) - mem_start) / self.page_size; let end_page = (end_addr.raw_addr(self) - mem_start - 1) / self.page_size; - (start_page..end_page + 1).into_iter() + start_page..end_page + 1 } /// Get a (possibly dangling) direct pointer to the current write position @@ -457,7 +457,7 @@ impl CodeBlock { /// Write a single byte at the current position. pub fn write_byte(&mut self, byte: u8) { let write_ptr = self.get_write_ptr(); - if self.has_capacity(1) && self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_ok() { + if self.has_capacity(1) && self.mem_block.write_byte(write_ptr, byte).is_ok() { self.write_pos += 1; } else { self.dropped_bytes = true; @@ -589,8 +589,12 @@ impl CodeBlock { self.label_refs = state.label_refs; } + pub fn mark_all_writeable(&mut self) { + self.mem_block.mark_all_writeable(); + } + pub fn mark_all_executable(&mut self) { - self.mem_block.borrow_mut().mark_all_executable(); + self.mem_block.mark_all_executable(); } /// Code GC. Free code pages that are not on stack and reuse them. @@ -688,7 +692,7 @@ impl CodeBlock { let mem_start: *const u8 = alloc.mem_start(); let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024); - Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(None), true) + Self::new(Rc::new(virt_mem), false, Rc::new(None), true) } /// Stubbed CodeBlock for testing conditions that can arise due to code GC. Can't execute generated code. @@ -706,7 +710,7 @@ impl CodeBlock { let mem_start: *const u8 = alloc.mem_start(); let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024); - Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(Some(freed_pages)), true) + Self::new(Rc::new(virt_mem), false, Rc::new(Some(freed_pages)), true) } } @@ -714,7 +718,7 @@ impl CodeBlock { impl fmt::LowerHex for CodeBlock { fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { for pos in 0..self.write_pos { - let mem_block = &*self.mem_block.borrow(); + let mem_block = &*self.mem_block; let byte = unsafe { mem_block.start_ptr().raw_ptr(mem_block).add(pos).read() }; fmtr.write_fmt(format_args!("{:02x}", byte))?; } @@ -724,7 +728,7 @@ impl fmt::LowerHex for CodeBlock { impl crate::virtualmem::CodePtrBase for CodeBlock { fn base_ptr(&self) -> std::ptr::NonNull<u8> { - self.mem_block.borrow().base_ptr() + self.mem_block.base_ptr() } } @@ -823,7 +827,7 @@ mod tests assert_eq!(cb.code_size(), 4); // Moving to the next page should not increase code_size - cb.next_page(cb.get_write_ptr(), |_, _| {}); + assert!(cb.next_page(cb.get_write_ptr(), |_, _| {})); assert_eq!(cb.code_size(), 4); // Write 4 bytes in the second page @@ -836,7 +840,7 @@ mod tests cb.write_bytes(&[1, 1, 1, 1]); // Moving from an old page to the next page should not increase code_size - cb.next_page(cb.get_write_ptr(), |_, _| {}); + assert!(cb.next_page(cb.get_write_ptr(), |_, _| {})); cb.set_pos(old_write_pos); assert_eq!(cb.code_size(), 8); } diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index fbbfa714d8..0ef5e92117 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -1027,7 +1027,10 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { } let output_num_bits:u32 = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() }; - assert!(imm_num_bits(imm.value) <= (output_num_bits as u8)); + assert!( + mem.num_bits < 64 || imm_num_bits(imm.value) <= (output_num_bits as u8), + "immediate value should be small enough to survive sign extension" + ); cb.write_int(imm.value as u64, output_num_bits); }, // M + UImm @@ -1042,7 +1045,10 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { } let output_num_bits = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() }; - assert!(imm_num_bits(uimm.value as i64) <= (output_num_bits as u8)); + assert!( + mem.num_bits < 64 || imm_num_bits(uimm.value as i64) <= (output_num_bits as u8), + "immediate value should be small enough to survive sign extension" + ); cb.write_int(uimm.value, output_num_bits); }, // * + Imm/UImm diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs index 5ae983270f..eefcbfd52e 100644 --- a/yjit/src/asm/x86_64/tests.rs +++ b/yjit/src/asm/x86_64/tests.rs @@ -193,6 +193,7 @@ fn test_mov() { check_bytes("48c7470801000000", |cb| mov(cb, mem_opnd(64, RDI, 8), imm_opnd(1))); //check_bytes("67c7400411000000", |cb| mov(cb, mem_opnd(32, EAX, 4), imm_opnd(0x34))); // We don't distinguish between EAX and RAX here - that's probably fine? check_bytes("c7400411000000", |cb| mov(cb, mem_opnd(32, RAX, 4), imm_opnd(17))); + check_bytes("c7400401000080", |cb| mov(cb, mem_opnd(32, RAX, 4), uimm_opnd(0x80000001))); check_bytes("41895814", |cb| mov(cb, mem_opnd(32, R8, 20), EBX)); check_bytes("4d8913", |cb| mov(cb, mem_opnd(64, R11, 0), R10)); check_bytes("48c742f8f4ffffff", |cb| mov(cb, mem_opnd(64, RDX, -8), imm_opnd(-12))); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index b695f8da96..0521e09d0b 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -98,7 +98,7 @@ fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) { #[cfg(not(test))] { let end = cb.get_write_ptr(); - unsafe { rb_yjit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) }; + unsafe { rb_jit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) }; } } @@ -878,14 +878,13 @@ impl Assembler } } - /// Emit a push instruction for the given operand by adding to the stack - /// pointer and then storing the given value. + /// Push a value to the stack by subtracting from the stack pointer then storing, + /// leaving an 8-byte gap for alignment. fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP)); } - /// Emit a pop instruction into the given operand by loading the value - /// and then subtracting from the stack pointer. + /// Pop a value from the stack by loading `[sp]` then adding to the stack pointer. fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) { ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); } @@ -1155,8 +1154,8 @@ impl Assembler let regs = Assembler::get_caller_save_regs(); // Pop the state/flags register - msr(cb, SystemRegister::NZCV, Self::SCRATCH0); emit_pop(cb, Self::SCRATCH0); + msr(cb, SystemRegister::NZCV, Self::SCRATCH0); for reg in regs.into_iter().rev() { emit_pop(cb, A64Opnd::Reg(reg)); @@ -1341,16 +1340,13 @@ impl Assembler Err(EmitError::RetryOnNextPage) => { // we want to lower jumps to labels to b.cond instructions, which have a 1 MiB // range limit. We can easily exceed the limit in case the jump straddles two pages. - // In this case, we retry with a fresh page. + // In this case, we retry with a fresh page once. cb.set_label_state(starting_label_state); - cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation); - let result = asm.arm64_emit(cb, &mut ocb); - assert_ne!( - Err(EmitError::RetryOnNextPage), - result, - "should not fail when writing to a fresh code page" - ); - result + if cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation) { + asm.arm64_emit(cb, &mut ocb) + } else { + Err(EmitError::OutOfMemory) + } } result => result }; @@ -1364,7 +1360,7 @@ impl Assembler #[cfg(not(test))] cb.without_page_end_reserve(|cb| { for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) { - unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; + unsafe { rb_jit_icache_invalidate(start as _, end as _) }; } }); @@ -1422,7 +1418,7 @@ mod tests { fn test_emit_cpop_all() { let (mut asm, mut cb) = setup_asm(); - asm.cpop_all(); + asm.cpop_all(crate::core::RegMapping::default()); asm.compile_with_num_regs(&mut cb, 0); } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index eb32dac987..3fb67bc7cc 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -528,13 +528,13 @@ pub enum Insn { impl Insn { /// Create an iterator that will yield a non-mutable reference to each /// operand in turn for this instruction. - pub(super) fn opnd_iter(&self) -> InsnOpndIterator { + pub(super) fn opnd_iter(&self) -> InsnOpndIterator<'_> { InsnOpndIterator::new(self) } /// Create an iterator that will yield a mutable reference to each operand /// in turn for this instruction. - pub(super) fn opnd_iter_mut(&mut self) -> InsnOpndMutIterator { + pub(super) fn opnd_iter_mut(&mut self) -> InsnOpndMutIterator<'_> { InsnOpndMutIterator::new(self) } @@ -1086,7 +1086,7 @@ impl Assembler } /// Get the list of registers that can be used for stack temps. - pub fn get_temp_regs2() -> &'static [Reg] { + pub fn get_temp_regs() -> &'static [Reg] { let num_regs = get_option!(num_temp_regs); &TEMP_REGS[0..num_regs] } @@ -1204,7 +1204,7 @@ impl Assembler // Convert Opnd::Stack to Opnd::Reg fn reg_opnd(opnd: &Opnd, reg_idx: usize) -> Opnd { - let regs = Assembler::get_temp_regs2(); + let regs = Assembler::get_temp_regs(); if let Opnd::Stack { num_bits, .. } = *opnd { incr_counter!(temp_reg_opnd); Opnd::Reg(regs[reg_idx]).with_num_bits(num_bits).unwrap() @@ -1317,7 +1317,7 @@ impl Assembler } /// Spill a stack temp from a register to the stack - fn spill_reg(&mut self, opnd: Opnd) { + pub fn spill_reg(&mut self, opnd: Opnd) { assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None); // Use different RegMappings for dest and src operands @@ -1602,7 +1602,7 @@ impl Assembler if c_args.len() > 0 { // Resolve C argument dependencies let c_args_len = c_args.len() as isize; - let moves = Self::reorder_reg_moves(&c_args.drain(..).into_iter().collect()); + let moves = Self::reorder_reg_moves(&std::mem::take(&mut c_args)); shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len); // Push batched C arguments @@ -1824,12 +1824,12 @@ impl Assembler { out } - pub fn cpop_all(&mut self) { + pub fn cpop_all(&mut self, reg_mapping: RegMapping) { self.push_insn(Insn::CPopAll); // Re-enable ccall's RegMappings assertion disabled by cpush_all. // cpush_all + cpop_all preserve all stack temp registers, so it's safe. - self.set_reg_mapping(self.ctx.get_reg_mapping()); + self.set_reg_mapping(reg_mapping); } pub fn cpop_into(&mut self, opnd: Opnd) { @@ -1840,14 +1840,16 @@ impl Assembler { self.push_insn(Insn::CPush(opnd)); } - pub fn cpush_all(&mut self) { + pub fn cpush_all(&mut self) -> RegMapping { self.push_insn(Insn::CPushAll); // Mark all temps as not being in registers. // Temps will be marked back as being in registers by cpop_all. // We assume that cpush_all + cpop_all are used for C functions in utils.rs // that don't require spill_regs for GC. + let mapping = self.ctx.get_reg_mapping(); self.set_reg_mapping(RegMapping::default()); + mapping } pub fn cret(&mut self, opnd: Opnd) { diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index ac2f35b3d9..bfeea5163a 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -232,9 +232,9 @@ fn test_jcc_ptr() let (mut asm, mut cb) = setup_asm(); let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4)); - let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); + let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK as i32)); asm.test( - Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), + Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32), not_mask, ); asm.jnz(side_exit); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index c0d42e79e6..ef435bca7e 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -315,19 +315,24 @@ impl Assembler let opnd1 = asm.load(*src); asm.mov(*dest, opnd1); }, - (Opnd::Mem(_), Opnd::UImm(value)) => { - // 32-bit values will be sign-extended - if imm_num_bits(*value as i64) > 32 { + (Opnd::Mem(Mem { num_bits, .. }), Opnd::UImm(value)) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if *num_bits == 64 && imm_num_bits(*value as i64) > 32 { let opnd1 = asm.load(*src); asm.mov(*dest, opnd1); } else { asm.mov(*dest, *src); } }, - (Opnd::Mem(_), Opnd::Imm(value)) => { - if imm_num_bits(*value) > 32 { + (Opnd::Mem(Mem { num_bits, .. }), Opnd::Imm(value)) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if *num_bits == 64 && imm_num_bits(*value) > 32 { let opnd1 = asm.load(*src); asm.mov(*dest, opnd1); + } else if uimm_num_bits(*value as u64) <= *num_bits { + // If the bit string is short enough for the destination, use the unsigned representation. + // Note that 64-bit and negative values are ruled out. + asm.mov(*dest, Opnd::UImm(*value as u64)); } else { asm.mov(*dest, *src); } @@ -1317,4 +1322,19 @@ mod tests { 0x13: mov qword ptr [rbx], rax "}); } + + #[test] + fn test_mov_m32_imm32() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(32, C_RET_OPND, 0); + asm.mov(shape_opnd, Opnd::UImm(0x8000_0001)); + asm.mov(shape_opnd, Opnd::Imm(0x8000_0001)); + + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "c70001000080c70001000080", {" + 0x0: mov dword ptr [rax], 0x80000001 + 0x6: mov dword ptr [rax], 0x80000001 + "}); + } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index ded89457c6..0fbca85716 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -195,6 +195,45 @@ impl<'a> JITState<'a> { self.outlined_code_block } + /// Leave a code stub to re-enter the compiler at runtime when the compiling program point is + /// reached. Should always be used in tail position like `return jit.defer_compilation(asm);`. + #[must_use] + fn defer_compilation(&mut self, asm: &mut Assembler) -> Option<CodegenStatus> { + if crate::core::defer_compilation(self, asm).is_err() { + // If we can't leave a stub, the block isn't usable and we have to bail. + self.block_abandoned = true; + } + Some(EndBlock) + } + + /// Generate a branch with either end possibly stubbed out + fn gen_branch( + &mut self, + asm: &mut Assembler, + target0: BlockId, + ctx0: &Context, + target1: Option<BlockId>, + ctx1: Option<&Context>, + gen_fn: BranchGenFn, + ) { + if crate::core::gen_branch(self, asm, target0, ctx0, target1, ctx1, gen_fn).is_none() { + // If we can't meet the request for a branch, the code is + // essentially corrupt and we have to discard the block. + self.block_abandoned = true; + } + } + + /// Wrapper for [self::gen_outlined_exit] with error handling. + fn gen_outlined_exit(&mut self, exit_pc: *mut VALUE, ctx: &Context) -> Option<CodePtr> { + let result = gen_outlined_exit(exit_pc, self.num_locals(), ctx, self.get_ocb()); + if result.is_none() { + // When we can't have the exits, the code is incomplete and we have to bail. + self.block_abandoned = true; + } + + result + } + /// Return true if the current ISEQ could escape an environment. /// /// As of vm_push_frame(), EP is always equal to BP. However, after pushing @@ -399,7 +438,7 @@ impl<'a> JITState<'a> { fn flush_perf_symbols(&self, cb: &CodeBlock) { assert_eq!(0, self.perf_stack.len()); let path = format!("/tmp/perf-{}.map", std::process::id()); - let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap(); + let mut f = std::io::BufWriter::new(std::fs::File::options().create(true).append(true).open(path).unwrap()); for sym in self.perf_map.borrow().iter() { if let (start, Some(end), name) = sym { // In case the code straddles two pages, part of it belongs to the symbol. @@ -782,11 +821,11 @@ fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> { /// Generate an exit to return to the interpreter fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) { - #[cfg(all(feature = "disasm", not(test)))] - { + #[cfg(not(test))] + asm_comment!(asm, "exit to interpreter on {}", { let opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) }; - asm_comment!(asm, "exit to interpreter on {}", insn_name(opcode as usize)); - } + insn_name(opcode as usize) + }); if asm.ctx.is_return_landing() { asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); @@ -851,6 +890,10 @@ fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) { /// moment, so there is one unique side exit for each context. Note that /// it's incorrect to jump to the side exit after any ctx stack push operations /// since they change the logic required for reconstructing interpreter state. +/// +/// If you're in [the codegen module][self], use [JITState::gen_outlined_exit] +/// instead of calling this directly. +#[must_use] pub fn gen_outlined_exit(exit_pc: *mut VALUE, num_locals: u32, ctx: &Context, ocb: &mut OutlinedCb) -> Option<CodePtr> { let mut cb = ocb.unwrap(); let mut asm = Assembler::new(num_locals); @@ -915,7 +958,7 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler) -> O jit.block_entry_exit = Some(entry_exit?); } else { let block_entry_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, jit.starting_insn_idx.into()) }; - jit.block_entry_exit = Some(gen_outlined_exit(block_entry_pc, jit.num_locals(), block_starting_context, jit.get_ocb())?); + jit.block_entry_exit = Some(jit.gen_outlined_exit(block_entry_pc, block_starting_context)?); } Some(()) @@ -1018,14 +1061,13 @@ fn gen_leave_exception(ocb: &mut OutlinedCb) -> Option<CodePtr> { pub fn gen_entry_chain_guard( asm: &mut Assembler, ocb: &mut OutlinedCb, - iseq: IseqPtr, - insn_idx: u16, + blockid: BlockId, ) -> Option<PendingEntryRef> { let entry = new_pending_entry(); let stub_addr = gen_entry_stub(entry.uninit_entry.as_ptr() as usize, ocb)?; let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC); - let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; + let expected_pc = unsafe { rb_iseq_pc_at_idx(blockid.iseq, blockid.idx.into()) }; let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8); asm_comment!(asm, "guard expected PC"); @@ -1044,18 +1086,15 @@ pub fn gen_entry_chain_guard( pub fn gen_entry_prologue( cb: &mut CodeBlock, ocb: &mut OutlinedCb, - iseq: IseqPtr, - insn_idx: u16, + blockid: BlockId, + stack_size: u8, jit_exception: bool, -) -> Option<CodePtr> { +) -> Option<(CodePtr, RegMapping)> { + let iseq = blockid.iseq; let code_ptr = cb.get_write_ptr(); let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) }); - if get_option_ref!(dump_disasm).is_some() { - asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0)); - } else { - asm_comment!(asm, "YJIT entry"); - } + asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0)); asm.frame_setup(); @@ -1102,10 +1141,11 @@ pub fn gen_entry_prologue( // If they don't match, then we'll jump to an entry stub and generate // another PC check and entry there. let pending_entry = if unsafe { get_iseq_flags_has_opt(iseq) } || jit_exception { - Some(gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?) + Some(gen_entry_chain_guard(&mut asm, ocb, blockid)?) } else { None }; + let reg_mapping = gen_entry_reg_mapping(&mut asm, blockid, stack_size); asm.compile(cb, Some(ocb))?; @@ -1123,8 +1163,37 @@ pub fn gen_entry_prologue( .ok().expect("PendingEntry should be unique"); iseq_payload.entries.push(pending_entry.into_entry()); } - Some(code_ptr) + Some((code_ptr, reg_mapping)) + } +} + +/// Generate code to load registers for a JIT entry. When the entry block is compiled for +/// the first time, it loads no register. When it has been already compiled as a callee +/// block, it loads some registers to reuse the block. +pub fn gen_entry_reg_mapping(asm: &mut Assembler, blockid: BlockId, stack_size: u8) -> RegMapping { + // Find an existing callee block. If it's not found or uses no register, skip loading registers. + let mut ctx = Context::default(); + ctx.set_stack_size(stack_size); + let reg_mapping = find_most_compatible_reg_mapping(blockid, &ctx).unwrap_or(RegMapping::default()); + if reg_mapping == RegMapping::default() { + return reg_mapping; + } + + // If found, load the same registers to reuse the block. + asm_comment!(asm, "reuse maps: {:?}", reg_mapping); + let local_table_size: u32 = unsafe { get_iseq_body_local_table_size(blockid.iseq) }.try_into().unwrap(); + for ®_opnd in reg_mapping.get_reg_opnds().iter() { + match reg_opnd { + RegOpnd::Local(local_idx) => { + let loaded_reg = TEMP_REGS[reg_mapping.get_reg(reg_opnd).unwrap()]; + let loaded_temp = asm.local_opnd(local_table_size - local_idx as u32 + VM_ENV_DATA_SIZE - 1); + asm.load_into(Opnd::Reg(loaded_reg), loaded_temp); + } + RegOpnd::Stack(_) => unreachable!("find_most_compatible_reg_mapping should not leave {:?}", reg_opnd), + } } + + reg_mapping } // Generate code to check for interrupts and take a side-exit. @@ -1139,7 +1208,7 @@ fn gen_check_ints( // Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages, // signal_exec, or rb_postponed_job_flush. - let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG)); + let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32)); asm.test(interrupt_flag, interrupt_flag); asm.jnz(Target::side_exit(counter)); @@ -1173,7 +1242,7 @@ fn end_block_with_jump( if jit.record_boundary_patch_point { jit.record_boundary_patch_point = false; let exit_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, continuation_insn_idx.into())}; - let exit_pos = gen_outlined_exit(exit_pc, jit.num_locals(), &reset_depth, jit.get_ocb()); + let exit_pos = jit.gen_outlined_exit(exit_pc, &reset_depth); record_global_inval_patch(asm, exit_pos?); } @@ -1223,7 +1292,6 @@ pub fn gen_single_block( let mut asm = Assembler::new(jit.num_locals()); asm.ctx = ctx; - #[cfg(feature = "disasm")] if get_option_ref!(dump_disasm).is_some() { let blockid_idx = blockid.idx; let chain_depth = if asm.ctx.get_chain_depth() > 0 { format!("(chain_depth: {})", asm.ctx.get_chain_depth()) } else { "".to_string() }; @@ -1282,7 +1350,7 @@ pub fn gen_single_block( // If previous instruction requested to record the boundary if jit.record_boundary_patch_point { // Generate an exit to this instruction and record it - let exit_pos = gen_outlined_exit(jit.pc, jit.num_locals(), &asm.ctx, jit.get_ocb()).ok_or(())?; + let exit_pos = jit.gen_outlined_exit(jit.pc, &asm.ctx).ok_or(())?; record_global_inval_patch(&mut asm, exit_pos); jit.record_boundary_patch_point = false; } @@ -1446,6 +1514,18 @@ fn gen_dupn( Some(KeepCompiling) } +// Reverse top X stack entries +fn gen_opt_reverse( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let count = jit.get_arg(0).as_i32(); + for n in 0..(count/2) { + stack_swap(asm, n, count - 1 - n); + } + Some(KeepCompiling) +} + // Swap top 2 stack entries fn gen_swap( _jit: &mut JITState, @@ -1538,8 +1618,7 @@ fn fuse_putobject_opt_ltlt( return None; } if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } let lhs = jit.peek_at_stack(&asm.ctx, 0); @@ -1661,8 +1740,7 @@ fn gen_opt_plus( let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { Some(two_fixnums) => two_fixnums, None => { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } }; @@ -1802,8 +1880,7 @@ fn gen_splatkw( ) -> Option<CodegenStatus> { // Defer compilation so we can specialize on a runtime hash operand if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } let comptime_hash = jit.peek_at_stack(&asm.ctx, 1); @@ -2176,13 +2253,13 @@ fn gen_expandarray( // Defer compilation so we can specialize on a runtime `self` if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } let comptime_recv = jit.peek_at_stack(&asm.ctx, 0); - // If the comptime receiver is not an array + // If the comptime receiver is not an array, speculate for when the `rb_check_array_type()` + // conversion returns nil and without side-effects (e.g. arbitrary method calls). if !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_ARRAY) } { // at compile time, ensure to_ary is not defined let target_cme = unsafe { rb_callable_method_entry_or_negative(comptime_recv.class_of(), ID!(to_ary)) }; @@ -2194,13 +2271,19 @@ fn gen_expandarray( return None; } + // Bail when method_missing is defined to avoid generating code to call it. + // Also, for simplicity, bail when BasicObject#method_missing has been removed. + if !assume_method_basic_definition(jit, asm, comptime_recv.class_of(), ID!(method_missing)) { + gen_counter_incr(jit, asm, Counter::expandarray_method_missing); + return None; + } + // invalidate compile block if to_ary is later defined jit.assume_method_lookup_stable(asm, target_cme); jit_guard_known_klass( jit, asm, - comptime_recv.class_of(), array_opnd, array_opnd.into(), comptime_recv, @@ -2230,7 +2313,7 @@ fn gen_expandarray( } // Get the compile-time array length - let comptime_len = unsafe { rb_yjit_array_len(comptime_recv) as u32 }; + let comptime_len = unsafe { rb_jit_array_len(comptime_recv) as u32 }; // Move the array from the stack and check that it's an array. guard_object_is_array( @@ -2364,6 +2447,11 @@ fn gen_getlocal_generic( ep_offset: u32, level: u32, ) -> Option<CodegenStatus> { + // Split the block if we need to invalidate this instruction when EP escapes + if level == 0 && !jit.escapes_ep() && !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm) { // Load the local using SP register asm.local_opnd(ep_offset) @@ -2430,6 +2518,7 @@ fn gen_setlocal_generic( ep_offset: u32, level: u32, ) -> Option<CodegenStatus> { + // Post condition: The type of of the set local is updated in the Context. let value_type = asm.ctx.get_opnd_type(StackOpnd(0)); // Fallback because of write barrier @@ -2451,9 +2540,19 @@ fn gen_setlocal_generic( ); asm.stack_pop(1); + // Set local type in the context + if level == 0 { + let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize(); + asm.ctx.set_local_type(local_idx, value_type); + } return Some(KeepCompiling); } + // Split the block if we need to invalidate this instruction when EP escapes + if level == 0 && !jit.escapes_ep() && !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm) { // Load flags and the local using SP register let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32); @@ -2498,6 +2597,7 @@ fn gen_setlocal_generic( ); } + // Set local type in the context if level == 0 { let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize(); asm.ctx.set_local_type(local_idx, value_type); @@ -2662,7 +2762,7 @@ fn gen_checkkeyword( ) -> Option<CodegenStatus> { // When a keyword is unspecified past index 32, a hash will be used // instead. This can only happen in iseqs taking more than 32 keywords. - if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= 32 } { + if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= VM_KW_SPECIFIED_BITS_MAX.try_into().unwrap() } { return None; } @@ -2718,10 +2818,7 @@ fn jit_chain_guard( idx: jit.insn_idx, }; - // Bail if we can't generate the branch - if gen_branch(jit, asm, bid, &deeper, None, None, target0_gen_fn).is_none() { - jit.block_abandoned = true; - } + jit.gen_branch(asm, bid, &deeper, None, None, target0_gen_fn); } else { target0_gen_fn.call(asm, Target::side_exit(counter), None); } @@ -2760,24 +2857,12 @@ fn gen_get_ivar( recv: Opnd, recv_opnd: YARVOpnd, ) -> Option<CodegenStatus> { - let comptime_val_klass = comptime_receiver.class_of(); - // If recv isn't already a register, load it. let recv = match recv { Opnd::InsnOut { .. } => recv, _ => asm.load(recv), }; - // Check if the comptime class uses a custom allocator - let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; - let uses_custom_allocator = match custom_allocator { - Some(alloc_fun) => { - let allocate_instance = rb_class_allocate_instance as *const u8; - alloc_fun as *const u8 != allocate_instance - } - None => false, - }; - // Check if the comptime receiver is a T_OBJECT let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; // Use a general C call at the last chain to avoid exits on megamorphic shapes @@ -2786,12 +2871,9 @@ fn gen_get_ivar( gen_counter_incr(jit, asm, Counter::num_getivar_megamorphic); } - // If the class uses the default allocator, instances should all be T_OBJECT - // NOTE: This assumes nobody changes the allocator of the class after allocation. - // Eventually, we can encode whether an object is T_OBJECT or not - // inside object shapes. + // NOTE: This assumes T_OBJECT can't ever have the same shape_id as any other type. // too-complex shapes can't use index access, so we use rb_ivar_get for them too. - if !receiver_t_object || uses_custom_allocator || comptime_receiver.shape_too_complex() || megamorphic { + if !comptime_receiver.heap_object_p() || comptime_receiver.shape_too_complex() || megamorphic { // General case. Call rb_ivar_get(). // VALUE rb_ivar_get(VALUE obj, ID id) asm_comment!(asm, "call rb_ivar_get()"); @@ -2816,9 +2898,8 @@ fn gen_get_ivar( let ivar_index = unsafe { let shape_id = comptime_receiver.shape_id_of(); - let shape = rb_shape_get_shape_by_id(shape_id); - let mut ivar_index: u32 = 0; - if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) { + let mut ivar_index: u16 = 0; + if rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) { Some(ivar_index as usize) } else { None @@ -2828,10 +2909,7 @@ fn gen_get_ivar( // Guard heap object (recv_opnd must be used before stack_pop) guard_object_is_heap(asm, recv, recv_opnd, Counter::getivar_not_heap); - // Compile time self is embedded and the ivar index lands within the object - let embed_test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) }; - - let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) }; let shape_id_offset = unsafe { rb_shape_id_offset() }; let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); @@ -2859,28 +2937,37 @@ fn gen_get_ivar( asm.mov(out_opnd, Qnil.into()); } Some(ivar_index) => { - if embed_test_result { - // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h - - // Load the variable - let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; - let ivar_opnd = Opnd::mem(64, recv, offs); - - // Push the ivar on the stack - let out_opnd = asm.stack_push(Type::Unknown); - asm.mov(out_opnd, ivar_opnd); + let ivar_opnd = if receiver_t_object { + if comptime_receiver.embedded_p() { + // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h + + // Load the variable + let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; + Opnd::mem(64, recv, offs) + } else { + // Compile time value is *not* embedded. + + // Get a pointer to the extended table + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32)); + + // Read the ivar from the extended table + Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32) + } } else { - // Compile time value is *not* embedded. - - // Get a pointer to the extended table - let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32)); + asm_comment!(asm, "call rb_ivar_get_at()"); - // Read the ivar from the extended table - let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); + if assume_single_ractor_mode(jit, asm) { + asm.ccall(rb_ivar_get_at_no_ractor_check as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into())]) + } else { + // The function could raise RactorIsolationError. + jit_prepare_non_leaf_call(jit, asm); + asm.ccall(rb_ivar_get_at as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into()), Opnd::UImm(ivar_name)]) + } + }; - let out_opnd = asm.stack_push(Type::Unknown); - asm.mov(out_opnd, ivar_opnd); - } + // Push the ivar on the stack + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ivar_opnd); } } @@ -2895,8 +2982,7 @@ fn gen_getinstancevariable( ) -> Option<CodegenStatus> { // Defer compilation so we can specialize on a runtime `self` if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } let ivar_name = jit.get_arg(0).as_u64(); @@ -2943,7 +3029,7 @@ fn gen_write_iv( // Compile time value is *not* embedded. // Get a pointer to the extended table - let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32)); + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32)); // Write the ivar in to the extended table let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); @@ -2959,8 +3045,7 @@ fn gen_setinstancevariable( ) -> Option<CodegenStatus> { // Defer compilation so we can specialize on a runtime `self` if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } let ivar_name = jit.get_arg(0).as_u64(); @@ -2988,8 +3073,6 @@ fn gen_set_ivar( recv_opnd: YARVOpnd, ic: Option<*const iseq_inline_iv_cache_entry>, ) -> Option<CodegenStatus> { - let comptime_val_klass = comptime_receiver.class_of(); - // If the comptime receiver is frozen, writing an IV will raise an exception // and we don't want to JIT code to deal with that situation. if comptime_receiver.is_frozen() { @@ -2999,16 +3082,6 @@ fn gen_set_ivar( let stack_type = asm.ctx.get_opnd_type(StackOpnd(0)); - // Check if the comptime class uses a custom allocator - let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; - let uses_custom_allocator = match custom_allocator { - Some(alloc_fun) => { - let allocate_instance = rb_class_allocate_instance as *const u8; - alloc_fun as *const u8 != allocate_instance - } - None => false, - }; - // Check if the comptime receiver is a T_OBJECT let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; // Use a general C call at the last chain to avoid exits on megamorphic shapes @@ -3019,11 +3092,10 @@ fn gen_set_ivar( // Get the iv index let shape_too_complex = comptime_receiver.shape_too_complex(); - let ivar_index = if !shape_too_complex { + let ivar_index = if !comptime_receiver.special_const_p() && !shape_too_complex { let shape_id = comptime_receiver.shape_id_of(); - let shape = unsafe { rb_shape_get_shape_by_id(shape_id) }; - let mut ivar_index: u32 = 0; - if unsafe { rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) } { + let mut ivar_index: u16 = 0; + if unsafe { rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) } { Some(ivar_index as usize) } else { None @@ -3033,27 +3105,31 @@ fn gen_set_ivar( }; // The current shape doesn't contain this iv, we need to transition to another shape. + let mut new_shape_too_complex = false; let new_shape = if !shape_too_complex && receiver_t_object && ivar_index.is_none() { - let current_shape = comptime_receiver.shape_of(); - let next_shape = unsafe { rb_shape_get_next_no_warnings(current_shape, comptime_receiver, ivar_name) }; - let next_shape_id = unsafe { rb_shape_id(next_shape) }; + let current_shape_id = comptime_receiver.shape_id_of(); + // We don't need to check about imemo_fields here because we're definitely looking at a T_OBJECT. + let klass = unsafe { rb_obj_class(comptime_receiver) }; + let next_shape_id = unsafe { rb_shape_transition_add_ivar_no_warnings(klass, current_shape_id, ivar_name) }; // If the VM ran out of shapes, or this class generated too many leaf, // it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table). - if next_shape_id == OBJ_TOO_COMPLEX_SHAPE_ID { + new_shape_too_complex = unsafe { rb_jit_shape_too_complex_p(next_shape_id) }; + if new_shape_too_complex { Some((next_shape_id, None, 0_usize)) } else { - let current_capacity = unsafe { (*current_shape).capacity }; + let current_capacity = unsafe { rb_yjit_shape_capacity(current_shape_id) }; + let next_capacity = unsafe { rb_yjit_shape_capacity(next_shape_id) }; // If the new shape has a different capacity, or is TOO_COMPLEX, we'll have to // reallocate it. - let needs_extension = unsafe { (*current_shape).capacity != (*next_shape).capacity }; + let needs_extension = next_capacity != current_capacity; // We can write to the object, but we need to transition the shape - let ivar_index = unsafe { (*current_shape).next_iv_index } as usize; + let ivar_index = unsafe { rb_yjit_shape_index(next_shape_id) } as usize; let needs_extension = if needs_extension { - Some((current_capacity, unsafe { (*next_shape).capacity })) + Some((current_capacity, next_capacity)) } else { None }; @@ -3062,12 +3138,10 @@ fn gen_set_ivar( } else { None }; - let new_shape_too_complex = matches!(new_shape, Some((OBJ_TOO_COMPLEX_SHAPE_ID, _, _))); - // If the receiver isn't a T_OBJECT, or uses a custom allocator, - // then just write out the IV write as a function call. + // If the receiver isn't a T_OBJECT, then just write out the IV write as a function call. // too-complex shapes can't use index access, so we use rb_ivar_get for them too. - if !receiver_t_object || uses_custom_allocator || shape_too_complex || new_shape_too_complex || megamorphic { + if !receiver_t_object || shape_too_complex || new_shape_too_complex || megamorphic { // The function could raise FrozenError. // Note that this modifies REG_SP, which is why we do it first jit_prepare_non_leaf_call(jit, asm); @@ -3091,7 +3165,7 @@ fn gen_set_ivar( asm.ccall( rb_vm_setinstancevariable as *const u8, vec![ - Opnd::const_ptr(jit.iseq as *const u8), + VALUE(jit.iseq as usize).into(), Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), ivar_name.into(), val_opnd, @@ -3110,7 +3184,7 @@ fn gen_set_ivar( // Upgrade type guard_object_is_heap(asm, recv, recv_opnd, Counter::setivar_not_heap); - let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) }; let shape_id_offset = unsafe { rb_shape_id_offset() }; let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); @@ -3270,8 +3344,7 @@ fn gen_definedivar( ) -> Option<CodegenStatus> { // Defer compilation so we can specialize base on a runtime receiver if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } let ivar_name = jit.get_arg(0).as_u64(); @@ -3285,7 +3358,7 @@ fn gen_definedivar( // Specialize base on compile time values let comptime_receiver = jit.peek_at_self(); - if comptime_receiver.shape_too_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH { + if comptime_receiver.special_const_p() || comptime_receiver.shape_too_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH { // Fall back to calling rb_ivar_defined // Save the PC and SP because the callee may allocate @@ -3311,9 +3384,8 @@ fn gen_definedivar( let shape_id = comptime_receiver.shape_id_of(); let ivar_exists = unsafe { - let shape = rb_shape_get_shape_by_id(shape_id); - let mut ivar_index: u32 = 0; - rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) + let mut ivar_index: u16 = 0; + rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) }; // Guard heap object (recv_opnd must be used before stack_pop) @@ -3336,9 +3408,7 @@ fn gen_definedivar( jit_putobject(asm, result); // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, asm); - - return Some(EndBlock); + return jump_to_next_insn(jit, asm); } fn gen_checktype( @@ -3500,8 +3570,7 @@ fn gen_fixnum_cmp( Some(two_fixnums) => two_fixnums, None => { // Defer compilation so we can specialize based on a runtime receiver - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } }; @@ -3614,7 +3683,6 @@ fn gen_equality_specialized( jit_guard_known_klass( jit, asm, - unsafe { rb_cString }, a_opnd, a_opnd.into(), comptime_a, @@ -3640,7 +3708,6 @@ fn gen_equality_specialized( jit_guard_known_klass( jit, asm, - unsafe { rb_cString }, b_opnd, b_opnd.into(), comptime_b, @@ -3680,14 +3747,12 @@ fn gen_opt_eq( Some(specialized) => specialized, None => { // Defer compilation so we can specialize base on a runtime receiver - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } }; if specialized { - jump_to_next_insn(jit, asm); - Some(EndBlock) + jump_to_next_insn(jit, asm) } else { gen_opt_send_without_block(jit, asm) } @@ -3718,8 +3783,7 @@ fn gen_opt_aref( // Defer compilation so we can specialize base on a runtime receiver if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } // Specialize base on compile time values @@ -3740,7 +3804,6 @@ fn gen_opt_aref( jit_guard_known_klass( jit, asm, - unsafe { rb_cArray }, recv_opnd, recv_opnd.into(), comptime_recv, @@ -3768,8 +3831,7 @@ fn gen_opt_aref( } // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, asm); - return Some(EndBlock); + return jump_to_next_insn(jit, asm); } else if comptime_recv.class_of() == unsafe { rb_cHash } { if !assume_bop_not_redefined(jit, asm, HASH_REDEFINED_OP_FLAG, BOP_AREF) { return None; @@ -3781,7 +3843,6 @@ fn gen_opt_aref( jit_guard_known_klass( jit, asm, - unsafe { rb_cHash }, recv_opnd, recv_opnd.into(), comptime_recv, @@ -3805,8 +3866,7 @@ fn gen_opt_aref( asm.mov(stack_ret, val); // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, asm); - Some(EndBlock) + jump_to_next_insn(jit, asm) } else { // General case. Call the [] method. gen_opt_send_without_block(jit, asm) @@ -3819,8 +3879,7 @@ fn gen_opt_aset( ) -> Option<CodegenStatus> { // Defer compilation so we can specialize on a runtime `self` if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } let comptime_recv = jit.peek_at_stack(&asm.ctx, 2); @@ -3836,7 +3895,6 @@ fn gen_opt_aset( jit_guard_known_klass( jit, asm, - unsafe { rb_cArray }, recv, recv.into(), comptime_recv, @@ -3848,7 +3906,6 @@ fn gen_opt_aset( jit_guard_known_klass( jit, asm, - unsafe { rb_cInteger }, key, key.into(), comptime_key, @@ -3875,14 +3932,12 @@ fn gen_opt_aset( let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val); - jump_to_next_insn(jit, asm); - return Some(EndBlock); + return jump_to_next_insn(jit, asm) } else if comptime_recv.class_of() == unsafe { rb_cHash } { // Guard receiver is a Hash jit_guard_known_klass( jit, asm, - unsafe { rb_cHash }, recv, recv.into(), comptime_recv, @@ -3904,45 +3959,12 @@ fn gen_opt_aset( let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, ret); - jump_to_next_insn(jit, asm); - Some(EndBlock) + jump_to_next_insn(jit, asm) } else { gen_opt_send_without_block(jit, asm) } } -fn gen_opt_aref_with( - jit: &mut JITState, - asm: &mut Assembler, -) -> Option<CodegenStatus>{ - // We might allocate or raise - jit_prepare_non_leaf_call(jit, asm); - - let key_opnd = Opnd::Value(jit.get_arg(0)); - let recv_opnd = asm.stack_opnd(0); - - extern "C" { - fn rb_vm_opt_aref_with(recv: VALUE, key: VALUE) -> VALUE; - } - - let val_opnd = asm.ccall( - rb_vm_opt_aref_with as *const u8, - vec![ - recv_opnd, - key_opnd - ], - ); - asm.stack_pop(1); // Keep it on stack during GC - - asm.cmp(val_opnd, Qundef.into()); - asm.je(Target::side_exit(Counter::opt_aref_with_qundef)); - - let top = asm.stack_push(Type::Unknown); - asm.mov(top, val_opnd); - - return Some(KeepCompiling); -} - fn gen_opt_and( jit: &mut JITState, asm: &mut Assembler, @@ -3951,8 +3973,7 @@ fn gen_opt_and( Some(two_fixnums) => two_fixnums, None => { // Defer compilation so we can specialize on a runtime `self` - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } }; @@ -3990,8 +4011,7 @@ fn gen_opt_or( Some(two_fixnums) => two_fixnums, None => { // Defer compilation so we can specialize on a runtime `self` - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } }; @@ -4029,8 +4049,7 @@ fn gen_opt_minus( Some(two_fixnums) => two_fixnums, None => { // Defer compilation so we can specialize on a runtime `self` - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } }; @@ -4069,8 +4088,7 @@ fn gen_opt_mult( let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { Some(two_fixnums) => two_fixnums, None => { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } }; @@ -4121,8 +4139,7 @@ fn gen_opt_mod( Some(two_fixnums) => two_fixnums, None => { // Defer compilation so we can specialize on a runtime `self` - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } }; @@ -4214,11 +4231,11 @@ fn gen_opt_ary_freeze( return None; } - let str = jit.get_arg(0); + let ary = jit.get_arg(0); // Push the return value onto the stack let stack_ret = asm.stack_push(Type::CArray); - asm.mov(stack_ret, str.into()); + asm.mov(stack_ret, ary.into()); Some(KeepCompiling) } @@ -4231,11 +4248,11 @@ fn gen_opt_hash_freeze( return None; } - let str = jit.get_arg(0); + let hash = jit.get_arg(0); // Push the return value onto the stack let stack_ret = asm.stack_push(Type::CHash); - asm.mov(stack_ret, str.into()); + asm.mov(stack_ret, hash.into()); Some(KeepCompiling) } @@ -4289,6 +4306,53 @@ fn gen_opt_newarray_max( Some(KeepCompiling) } +fn gen_opt_duparray_send( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let method = jit.get_arg(1).as_u64(); + + if method == ID!(include_p) { + gen_opt_duparray_send_include_p(jit, asm) + } else { + None + } +} + +fn gen_opt_duparray_send_include_p( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + asm_comment!(asm, "opt_duparray_send include_p"); + + let ary = jit.get_arg(0); + let argc = jit.get_arg(2).as_usize(); + + // Save the PC and SP because we may call #include? + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_duparray_include_p(ec: EcPtr, ary: VALUE, target: VALUE) -> VALUE; + } + + let target = asm.ctx.sp_opnd(-1); + + let val_opnd = asm.ccall( + rb_vm_opt_duparray_include_p as *const u8, + vec![ + EC, + ary.into(), + target, + ], + ); + + asm.stack_pop(argc); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) +} + fn gen_opt_newarray_send( jit: &mut JITState, asm: &mut Assembler, @@ -4301,6 +4365,8 @@ fn gen_opt_newarray_send( gen_opt_newarray_max(jit, asm) } else if method == VM_OPT_NEWARRAY_SEND_HASH { gen_opt_newarray_hash(jit, asm) + } else if method == VM_OPT_NEWARRAY_SEND_INCLUDE_P { + gen_opt_newarray_include_p(jit, asm) } else if method == VM_OPT_NEWARRAY_SEND_PACK { gen_opt_newarray_pack_buffer(jit, asm, 1, None) } else if method == VM_OPT_NEWARRAY_SEND_PACK_BUFFER { @@ -4386,6 +4452,42 @@ fn gen_opt_newarray_hash( Some(KeepCompiling) } +fn gen_opt_newarray_include_p( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + asm_comment!(asm, "opt_newarray_send include?"); + + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call customized methods. + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_include_p(ec: EcPtr, num: u32, elts: *const VALUE, target: VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + let target = asm.ctx.sp_opnd(-1); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_include_p as *const u8, + vec![ + EC, + (num - 1).into(), + values_ptr, + target + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) +} + fn gen_opt_newarray_min( jit: &mut JITState, asm: &mut Assembler, @@ -4459,8 +4561,7 @@ fn gen_opt_case_dispatch( // hash lookup, at least for small hashes, but it's worth revisiting this // assumption in the future. if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } let case_hash = jit.get_arg(0); @@ -4572,15 +4673,14 @@ fn gen_branchif( // Generate the branch instructions let ctx = asm.ctx; - gen_branch( - jit, + jit.gen_branch( asm, jump_block, &ctx, Some(next_block), Some(&ctx), BranchGenFn::BranchIf(Cell::new(BranchShape::Default)), - )?; + ); } Some(EndBlock) @@ -4626,15 +4726,14 @@ fn gen_branchunless( // Generate the branch instructions let ctx = asm.ctx; - gen_branch( - jit, + jit.gen_branch( asm, jump_block, &ctx, Some(next_block), Some(&ctx), BranchGenFn::BranchUnless(Cell::new(BranchShape::Default)), - )?; + ); } Some(EndBlock) @@ -4677,15 +4776,14 @@ fn gen_branchnil( asm.cmp(val_opnd, Opnd::UImm(Qnil.into())); // Generate the branch instructions let ctx = asm.ctx; - gen_branch( - jit, + jit.gen_branch( asm, jump_block, &ctx, Some(next_block), Some(&ctx), BranchGenFn::BranchNil(Cell::new(BranchShape::Default)), - )?; + ); } Some(EndBlock) @@ -4733,6 +4831,69 @@ fn gen_throw( Some(EndBlock) } +fn gen_opt_new( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let cd = jit.get_arg(0).as_ptr(); + let jump_offset = jit.get_arg(1).as_i32(); + + if !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + + let ci = unsafe { get_call_data_ci(cd) }; // info about the call site + let mid = unsafe { vm_ci_mid(ci) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); + + let recv_idx = argc; + let comptime_recv = jit.peek_at_stack(&asm.ctx, recv_idx as isize); + + // This is a singleton class + let comptime_recv_klass = comptime_recv.class_of(); + + let recv = asm.stack_opnd(recv_idx); + + perf_call!("opt_new: ", jit_guard_known_klass( + jit, + asm, + recv, + recv.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::guard_send_klass_megamorphic, + )); + + // We now know that it's always comptime_recv_klass + if jit.assume_expected_cfunc(asm, comptime_recv_klass, mid, rb_class_new_instance_pass_kw as _) { + // Fast path + // call rb_class_alloc to actually allocate + jit_prepare_non_leaf_call(jit, asm); + let obj = asm.ccall(rb_obj_alloc as _, vec![comptime_recv.into()]); + + // Get a reference to the stack location where we need to save the + // return instance. + let result = asm.stack_opnd(recv_idx + 1); + let recv = asm.stack_opnd(recv_idx); + + // Replace the receiver for the upcoming initialize call + asm.ctx.set_opnd_mapping(recv.into(), TempMapping::MapToStack(Type::UnknownHeap)); + asm.mov(recv, obj); + + // Save the allocated object for return + asm.ctx.set_opnd_mapping(result.into(), TempMapping::MapToStack(Type::UnknownHeap)); + asm.mov(result, obj); + + jump_to_next_insn(jit, asm) + } else { + // general case + + // Get the branch target instruction offsets + let jump_idx = jit.next_insn_idx() as i32 + jump_offset; + return end_block_with_jump(jit, asm, jump_idx as u16); + } +} + fn gen_jump( jit: &mut JITState, asm: &mut Assembler, @@ -4766,13 +4927,13 @@ fn gen_jump( fn jit_guard_known_klass( jit: &mut JITState, asm: &mut Assembler, - known_klass: VALUE, obj_opnd: Opnd, insn_opnd: YARVOpnd, sample_instance: VALUE, max_chain_depth: u8, counter: Counter, ) { + let known_klass = sample_instance.class_of(); let val_type = asm.ctx.get_opnd_type(insn_opnd); if val_type.known_class() == Some(known_klass) { @@ -4878,7 +5039,7 @@ fn jit_guard_known_klass( assert_eq!(sample_instance.class_of(), rb_cString, "context says class is exactly ::String") }; } else { - assert!(!val_type.is_imm()); + assert!(!val_type.is_imm(), "{insn_opnd:?} should be a heap object, but was {val_type:?} for {sample_instance:?}"); // Check that the receiver is a heap object // Note: if we get here, the class doesn't have immediate instances. @@ -5272,6 +5433,35 @@ fn jit_rb_int_succ( true } +fn jit_rb_int_pred( + _jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard the receiver is fixnum + let recv_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let recv = asm.stack_pop(1); + if recv_type != Type::Fixnum { + asm_comment!(asm, "guard object is fixnum"); + asm.test(recv, Opnd::Imm(RUBY_FIXNUM_FLAG as i64)); + asm.jz(Target::side_exit(Counter::send_pred_not_fixnum)); + } + + asm_comment!(asm, "Integer#pred"); + let out_val = asm.sub(recv, Opnd::Imm(2)); // 2 is untagged Fixnum 1 + asm.jo(Target::side_exit(Counter::send_pred_underflow)); + + // Push the output onto the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, out_val); + + true +} + fn jit_rb_int_div( jit: &mut JITState, asm: &mut Assembler, @@ -5493,7 +5683,6 @@ fn jit_rb_float_plus( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -5535,7 +5724,6 @@ fn jit_rb_float_minus( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -5577,7 +5765,6 @@ fn jit_rb_float_mul( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -5619,7 +5806,6 @@ fn jit_rb_float_div( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -5787,6 +5973,82 @@ fn jit_rb_str_byteslice( true } +fn jit_rb_str_aref_m( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // In yjit-bench the most common usages by far are single fixnum or two fixnums. + // rb_str_substr should be leaf if indexes are fixnums + if argc == 2 { + match (asm.ctx.get_opnd_type(StackOpnd(0)), asm.ctx.get_opnd_type(StackOpnd(1))) { + (Type::Fixnum, Type::Fixnum) => {}, + // There is a two-argument form of (RegExp, Fixnum) which needs a different c func. + // Other types will raise. + _ => { return false }, + } + } else if argc == 1 { + match asm.ctx.get_opnd_type(StackOpnd(0)) { + Type::Fixnum => {}, + // Besides Fixnum this could also be a Range or a RegExp which are handled by separate c funcs. + // Other types will raise. + _ => { + // If the context doesn't have the type info we try a little harder. + let comptime_arg = jit.peek_at_stack(&asm.ctx, 0); + let arg0 = asm.stack_opnd(0); + if comptime_arg.fixnum_p() { + asm.test(arg0, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + jit_chain_guard( + JCC_JZ, + jit, + asm, + SEND_MAX_DEPTH, + Counter::guard_send_str_aref_not_fixnum, + ); + } else { + return false + } + }, + } + } else { + return false + } + + asm_comment!(asm, "String#[]"); + + // rb_str_substr allocates a substring + jit_prepare_call_with_gc(jit, asm); + + // Get stack operands after potential SP change + + // The "empty" arg distinguishes between the normal "one arg" behavior + // and the "two arg" special case that returns an empty string + // when the begin index is the length of the string. + // See the usages of rb_str_substr in string.c for more information. + let (beg_idx, empty, len) = if argc == 2 { + (1, Opnd::Imm(1), asm.stack_opnd(0)) + } else { + // If there is only one arg, the length will be 1. + (0, Opnd::Imm(0), VALUE::fixnum_from_usize(1).into()) + }; + + let beg = asm.stack_opnd(beg_idx); + let recv = asm.stack_opnd(beg_idx + 1); + + let ret_opnd = asm.ccall(rb_str_substr_two_fixnums as *const u8, vec![recv, beg, len, empty]); + asm.stack_pop(beg_idx as usize + 2); + + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ret_opnd); + + true +} + fn jit_rb_str_getbyte( jit: &mut JITState, asm: &mut Assembler, @@ -5807,7 +6069,6 @@ fn jit_rb_str_getbyte( jit_guard_known_klass( jit, asm, - comptime_idx.class_of(), idx, idx.into(), comptime_idx, @@ -5907,6 +6168,41 @@ fn jit_rb_str_to_s( false } +fn jit_rb_str_dup( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + // We specialize only the BARE_STRING_P case. Otherwise it's not leaf. + if unsafe { known_recv_class != Some(rb_cString) } { + return false; + } + asm_comment!(asm, "String#dup"); + + jit_prepare_call_with_gc(jit, asm); + + let recv_opnd = asm.stack_opnd(0); + let recv_opnd = asm.load(recv_opnd); + + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(64, recv_opnd, shape_id_offset); + asm.test(shape_opnd, Opnd::UImm(SHAPE_ID_HAS_IVAR_MASK as u64)); + asm.jnz(Target::side_exit(Counter::send_str_dup_exivar)); + + // Call rb_str_dup + let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); + + asm.stack_pop(1); + let stack_ret = asm.stack_push(Type::CString); + asm.mov(stack_ret, ret_opnd); + + true +} + // Codegen for rb_str_empty_p() fn jit_rb_str_empty_p( _jit: &mut JITState, @@ -5957,7 +6253,7 @@ fn jit_rb_str_concat_codepoint( guard_object_is_fixnum(jit, asm, codepoint, StackOpnd(0)); - asm.ccall(rb_yjit_str_concat_codepoint as *const u8, vec![recv, codepoint]); + asm.ccall(rb_jit_str_concat_codepoint as *const u8, vec![recv, codepoint]); // The receiver is the return value, so we only need to pop the codepoint argument off the stack. // We can reuse the receiver slot in the stack as the return value. @@ -6263,6 +6559,7 @@ fn jit_rb_f_block_given_p( true } +/// Codegen for `block_given?` and `defined?(yield)` fn gen_block_given( jit: &mut JITState, asm: &mut Assembler, @@ -6272,16 +6569,24 @@ fn gen_block_given( ) { asm_comment!(asm, "block_given?"); - // Same as rb_vm_frame_block_handler - let ep_opnd = gen_get_lep(jit, asm); - let block_handler = asm.load( - Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) - ); + // `yield` goes to the block handler stowed in the "local" iseq which is + // the current iseq or a parent. Only the "method" iseq type can be passed a + // block handler. (e.g. `yield` in the top level script is a syntax error.) + let local_iseq = unsafe { rb_get_iseq_body_local_iseq(jit.iseq) }; + if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD { + // Same as rb_vm_frame_block_handler + let ep_opnd = gen_get_lep(jit, asm); + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); - // Return `block_handler != VM_BLOCK_HANDLER_NONE` - asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); - let block_given = asm.csel_ne(true_opnd, false_opnd); - asm.mov(out_opnd, block_given); + // Return `block_handler != VM_BLOCK_HANDLER_NONE` + asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); + let block_given = asm.csel_ne(true_opnd, false_opnd); + asm.mov(out_opnd, block_given); + } else { + asm.mov(out_opnd, false_opnd); + } } // Codegen for rb_class_superclass() @@ -6298,6 +6603,7 @@ fn jit_rb_class_superclass( fn rb_class_superclass(klass: VALUE) -> VALUE; } + // It may raise "uninitialized class" if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(0)) { return false; } @@ -6353,7 +6659,7 @@ fn jit_thread_s_current( asm.stack_pop(1); // ec->thread_ptr - let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR)); + let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR as i32)); // thread->self let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF); @@ -6616,11 +6922,12 @@ fn gen_send_cfunc( // Increment total cfunc send count gen_counter_incr(jit, asm, Counter::num_send_cfunc); - // Delegate to codegen for C methods if we have it. + // Delegate to codegen for C methods if we have it and the callsite is simple enough. if kw_arg.is_null() && !kw_splat && flags & VM_CALL_OPT_SEND == 0 && flags & VM_CALL_ARGS_SPLAT == 0 && + flags & VM_CALL_ARGS_BLOCKARG == 0 && (cfunc_argc == -1 || argc == cfunc_argc) { let expected_stack_after = asm.ctx.get_stack_size() as i32 - argc; if let Some(known_cfunc_codegen) = lookup_cfunc_codegen(unsafe { (*cme).def }) { @@ -6640,8 +6947,7 @@ fn gen_send_cfunc( gen_counter_incr(jit, asm, Counter::num_send_cfunc_inline); // cfunc codegen generated code. Terminate the block so // there isn't multiple calls in the same block. - jump_to_next_insn(jit, asm); - return Some(EndBlock); + return jump_to_next_insn(jit, asm); } } } @@ -6702,7 +7008,7 @@ fn gen_send_cfunc( return None; } - let block_arg_type = if block_arg { + let mut block_arg_type = if block_arg { Some(asm.ctx.get_opnd_type(StackOpnd(0))) } else { None @@ -6710,33 +7016,25 @@ fn gen_send_cfunc( match block_arg_type { Some(Type::Nil | Type::BlockParamProxy) => { - // We'll handle this later - } - None => { - // Nothing to do - } - _ => { - gen_counter_incr(jit, asm, Counter::send_cfunc_block_arg); - return None; - } - } - - match block_arg_type { - Some(Type::Nil) => { - // We have a nil block arg, so let's pop it off the args + // We don't need the actual stack value for these asm.stack_pop(1); } - Some(Type::BlockParamProxy) => { - // We don't need the actual stack value + Some(Type::Unknown | Type::UnknownImm) if jit.peek_at_stack(&asm.ctx, 0).nil_p() => { + // The sample blockarg is nil, so speculate that's the case. + asm.cmp(asm.stack_opnd(0), Qnil.into()); + asm.jne(Target::side_exit(Counter::guard_send_cfunc_block_not_nil)); + block_arg_type = Some(Type::Nil); asm.stack_pop(1); } None => { // Nothing to do } _ => { - assert!(false); + gen_counter_incr(jit, asm, Counter::send_cfunc_block_arg); + return None; } } + let block_arg_type = block_arg_type; // drop `mut` // Pop the empty kw_splat hash if kw_splat { @@ -6826,7 +7124,7 @@ fn gen_send_cfunc( asm_comment!(asm, "set ec->cfp"); let new_cfp = asm.lea(Opnd::mem(64, CFP, -(RUBY_SIZEOF_CONTROL_FRAME as i32))); - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), new_cfp); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), new_cfp); if !kw_arg.is_null() { // Build a hash from all kwargs passed @@ -6922,7 +7220,7 @@ fn gen_send_cfunc( // Pop the stack frame (ec->cfp++) // Instead of recalculating, we can reuse the previous CFP, which is stored in a callee-saved // register - let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); + let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32); asm.store(ec_cfp_opnd, CFP); // cfunc calls may corrupt types @@ -6933,8 +7231,7 @@ fn gen_send_cfunc( // Jump (fall through) to the call continuation block // We do this to end the current block after the call - jump_to_next_insn(jit, asm); - Some(EndBlock) + jump_to_next_insn(jit, asm) } // Generate RARRAY_LEN. For array_opnd, use Opnd::Reg to reduce memory access, @@ -7089,7 +7386,7 @@ fn gen_send_bmethod( ) -> Option<CodegenStatus> { let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; - let proc = unsafe { rb_yjit_get_proc_ptr(procv) }; + let proc = unsafe { rb_jit_get_proc_ptr(procv) }; let proc_block = unsafe { &(*proc).block }; if proc_block.type_ != block_type_iseq { @@ -7099,11 +7396,12 @@ fn gen_send_bmethod( let capture = unsafe { proc_block.as_.captured.as_ref() }; let iseq = unsafe { *capture.code.iseq.as_ref() }; - // Optimize for single ractor mode and avoid runtime check for - // "defined with an un-shareable Proc in a different Ractor" - if !assume_single_ractor_mode(jit, asm) { - gen_counter_incr(jit, asm, Counter::send_bmethod_ractor); - return None; + if !procv.shareable_p() { + let ractor_serial = unsafe { rb_yjit_cme_ractor_serial(cme) }; + asm_comment!(asm, "guard current ractor == {}", ractor_serial); + let current_ractor_serial = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_RACTOR_ID as i32)); + asm.cmp(current_ractor_serial, ractor_serial.into()); + asm.jne(Target::side_exit(Counter::send_bmethod_ractor)); } // Passing a block to a block needs logic different from passing @@ -7124,7 +7422,7 @@ enum IseqReturn { Receiver, } -extern { +extern "C" { fn rb_simple_iseq_p(iseq: IseqPtr) -> bool; fn rb_iseq_only_kwparam_p(iseq: IseqPtr) -> bool; } @@ -7169,6 +7467,12 @@ fn iseq_get_return_value(iseq: IseqPtr, captured_opnd: Option<Opnd>, block: Opti let ep_offset = unsafe { *rb_iseq_pc_at_idx(iseq, 1) }.as_u32(); let local_idx = ep_offset_to_local_idx(iseq, ep_offset); + // Only inline getlocal on a parameter. DCE in the IESQ builder can + // make a two-instruction ISEQ that does not return a parameter. + if local_idx >= unsafe { get_iseq_body_param_size(iseq) } { + return None; + } + if unsafe { rb_simple_iseq_p(iseq) } { return Some(IseqReturn::LocalVariable(local_idx)); } else if unsafe { rb_iseq_only_kwparam_p(iseq) } { @@ -7325,7 +7629,7 @@ fn gen_send_iseq( gen_counter_incr(jit, asm, Counter::send_iseq_splat_not_array); return None; } else { - unsafe { rb_yjit_array_len(array) as u32} + unsafe { rb_jit_array_len(array) as u32} }; // Arity check accounting for size of the splat. When callee has rest parameters, we insert @@ -7416,7 +7720,7 @@ fn gen_send_iseq( gen_counter_incr(jit, asm, Counter::num_send_iseq); // Shortcut for special `Primitive.attr! :leaf` builtins - let builtin_attrs = unsafe { rb_yjit_iseq_builtin_attrs(iseq) }; + let builtin_attrs = unsafe { rb_jit_iseq_builtin_attrs(iseq) }; let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) }; let builtin_func = if builtin_func_raw.is_null() { None } else { Some(builtin_func_raw) }; let opt_send_call = flags & VM_CALL_OPT_SEND != 0; // .send call is not currently supported for builtins @@ -7473,8 +7777,7 @@ fn gen_send_iseq( // Seems like a safe assumption. // Let guard chains share the same successor - jump_to_next_insn(jit, asm); - return Some(EndBlock); + return jump_to_next_insn(jit, asm); } } @@ -7512,8 +7815,7 @@ fn gen_send_iseq( } // Let guard chains share the same successor - jump_to_next_insn(jit, asm); - return Some(EndBlock); + return jump_to_next_insn(jit, asm); } // Stack overflow check @@ -7595,6 +7897,11 @@ fn gen_send_iseq( gen_counter_incr(jit, asm, Counter::send_iseq_clobbering_block_arg); return None; } + if iseq_has_rest || has_kwrest { + // The proc would be stored above the current stack top, where GC can't see it + gen_counter_incr(jit, asm, Counter::send_iseq_block_arg_gc_unsafe); + return None; + } let proc = asm.stack_pop(1); // Pop first, as argc doesn't account for the block arg let callee_specval = asm.ctx.sp_opnd(callee_specval); asm.store(callee_specval, proc); @@ -7745,14 +8052,14 @@ fn gen_send_iseq( // Pop surplus positional arguments when yielding if arg_setup_block { - let extras = argc - required_num - opt_num; + let extras = argc - required_num - opt_num - kw_arg_num; if extras > 0 { // Checked earlier. If there are keyword args, then // the positional arguments are not at the stack top. assert_eq!(0, kw_arg_num); asm.stack_pop(extras as usize); - argc = required_num + opt_num; + argc = required_num + opt_num + kw_arg_num; } } @@ -7802,7 +8109,6 @@ fn gen_send_iseq( } } - // Don't nil fill forwarding iseqs if !forwarding { // Nil-initialize missing optional parameters nil_fill( @@ -7823,13 +8129,13 @@ fn gen_send_iseq( // Nil-initialize non-parameter locals nil_fill( "nil-initialize locals", - { - let begin = -argc + num_params; - let end = -argc + num_locals; + { + let begin = -argc + num_params; + let end = -argc + num_locals; - begin..end - }, - asm + begin..end + }, + asm ); } @@ -7837,9 +8143,13 @@ fn gen_send_iseq( assert_eq!(1, num_params); // Write the CI in to the stack and ensure that it actually gets // flushed to memory + asm_comment!(asm, "put call info for forwarding"); let ci_opnd = asm.stack_opnd(-1); asm.ctx.dealloc_reg(ci_opnd.reg_opnd()); asm.mov(ci_opnd, VALUE(ci as usize).into()); + + // Nil-initialize other locals which are above the CI + nil_fill("nil-initialize locals", 1..num_locals, asm); } // Points to the receiver operand on the stack unless a captured environment is used @@ -7893,53 +8203,16 @@ fn gen_send_iseq( pc: None, // We are calling into jitted code, which will set the PC as necessary })); - // Create a context for the callee - let mut callee_ctx = Context::default(); - - // Transfer some stack temp registers to the callee's locals for arguments. - let mapped_temps = if !forwarding { - asm.map_temp_regs_to_args(&mut callee_ctx, argc) - } else { - // When forwarding, the callee's local table has only a callinfo, - // so we can't map the actual arguments to the callee's locals. - vec![] - }; - - // Spill stack temps and locals that are not used by the callee. - // This must be done before changing the SP register. - asm.spill_regs_except(&mapped_temps); - - // Saving SP before calculating ep avoids a dependency on a register - // However this must be done after referencing frame.recv, which may be SP-relative - asm.mov(SP, callee_sp); - - // Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs. - // We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall(). - if get_option!(gen_stats) { - // Protect caller-saved registers in case they're used for arguments - asm.cpush_all(); - - // Assemble the ISEQ name string - let name_str = get_iseq_name(iseq); - - // Get an index for this ISEQ name - let iseq_idx = get_iseq_idx(&name_str); - - // Increment the counter for this cfunc - asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]); - asm.cpop_all(); - } - // No need to set cfp->pc since the callee sets it whenever calling into routines // that could look at it through jit_save_pc(). // mov(cb, REG0, const_ptr_opnd(start_pc)); // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0); - // Stub so we can return to JITted code - let return_block = BlockId { - iseq: jit.iseq, - idx: jit.next_insn_idx(), - }; + // Create a blockid for the callee + let callee_blockid = BlockId { iseq, idx: start_pc_offset }; + + // Create a context for the callee + let mut callee_ctx = Context::default(); // If the callee has :inline_block annotation and the callsite has a block ISEQ, // duplicate a callee block for each block ISEQ to make its `yield` monomorphic. @@ -7968,29 +8241,92 @@ fn gen_send_iseq( }; callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type); - // Now that callee_ctx is prepared, discover a block that can be reused if we move some registers. - // If there's such a block, move registers accordingly to avoid creating a new block. - let blockid = BlockId { iseq, idx: start_pc_offset }; - if !mapped_temps.is_empty() { - // Discover a block that have the same things in different (or same) registers - if let Some(block_ctx) = find_block_ctx_with_same_regs(blockid, &callee_ctx) { - // List pairs of moves for making the register mappings compatible + // Spill or preserve argument registers + if forwarding { + // When forwarding, the callee's local table has only a callinfo, + // so we can't map the actual arguments to the callee's locals. + asm.spill_regs(); + } else { + // Discover stack temp registers that can be used as the callee's locals + let mapped_temps = asm.map_temp_regs_to_args(&mut callee_ctx, argc); + + // Spill stack temps and locals that are not used by the callee. + // This must be done before changing the SP register. + asm.spill_regs_except(&mapped_temps); + + // If the callee block has been compiled before, spill/move registers to reuse the existing block + // for minimizing the number of blocks we need to compile. + if let Some(existing_reg_mapping) = find_most_compatible_reg_mapping(callee_blockid, &callee_ctx) { + asm_comment!(asm, "reuse maps: {:?} -> {:?}", callee_ctx.get_reg_mapping(), existing_reg_mapping); + + // Spill the registers that are not used in the existing block. + // When the same ISEQ is compiled as an entry block, it starts with no registers allocated. + for ®_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() { + if existing_reg_mapping.get_reg(reg_opnd).is_none() { + match reg_opnd { + RegOpnd::Local(local_idx) => { + let spilled_temp = asm.stack_opnd(argc - local_idx as i32 - 1); + asm.spill_reg(spilled_temp); + callee_ctx.dealloc_reg(reg_opnd); + } + RegOpnd::Stack(_) => unreachable!("callee {:?} should have been spilled", reg_opnd), + } + } + } + assert!(callee_ctx.get_reg_mapping().get_reg_opnds().len() <= existing_reg_mapping.get_reg_opnds().len()); + + // Load the registers that are spilled in this block but used in the existing block. + // When there are multiple callsites, some registers spilled in this block may be used at other callsites. + for ®_opnd in existing_reg_mapping.get_reg_opnds().iter() { + if callee_ctx.get_reg_mapping().get_reg(reg_opnd).is_none() { + match reg_opnd { + RegOpnd::Local(local_idx) => { + callee_ctx.alloc_reg(reg_opnd); + let loaded_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()]; + let loaded_temp = asm.stack_opnd(argc - local_idx as i32 - 1); + asm.load_into(Opnd::Reg(loaded_reg), loaded_temp); + } + RegOpnd::Stack(_) => unreachable!("find_most_compatible_reg_mapping should not leave {:?}", reg_opnd), + } + } + } + assert_eq!(callee_ctx.get_reg_mapping().get_reg_opnds().len(), existing_reg_mapping.get_reg_opnds().len()); + + // Shuffle registers to make the register mappings compatible let mut moves = vec![]; for ®_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() { let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()]; - let new_reg = TEMP_REGS[block_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()]; + let new_reg = TEMP_REGS[existing_reg_mapping.get_reg(reg_opnd).unwrap()]; moves.push((new_reg, Opnd::Reg(old_reg))); } - - // Shuffle them to break cycles and generate the moves - let moves = Assembler::reorder_reg_moves(&moves); - for (reg, opnd) in moves { + for (reg, opnd) in Assembler::reorder_reg_moves(&moves) { asm.load_into(Opnd::Reg(reg), opnd); } - callee_ctx.set_reg_mapping(block_ctx.get_reg_mapping()); + callee_ctx.set_reg_mapping(existing_reg_mapping); } } + // Update SP register for the callee. This must be done after referencing frame.recv, + // which may be SP-relative. + asm.mov(SP, callee_sp); + + // Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs. + // We also do this after spill_regs() to avoid doubly spilling the same thing on asm.ccall(). + if get_option!(gen_stats) { + // Protect caller-saved registers in case they're used for arguments + let mapping = asm.cpush_all(); + + // Assemble the ISEQ name string + let name_str = get_iseq_name(iseq); + + // Get an index for this ISEQ name + let iseq_idx = get_iseq_idx(&name_str); + + // Increment the counter for this cfunc + asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]); + asm.cpop_all(mapping); + } + // The callee might change locals through Kernel#binding and other means. asm.clear_local_types(); @@ -8003,32 +8339,33 @@ fn gen_send_iseq( return_asm.ctx.reset_chain_depth_and_defer(); return_asm.ctx.set_as_return_landing(); + // Stub so we can return to JITted code + let return_block = BlockId { + iseq: jit.iseq, + idx: jit.next_insn_idx(), + }; + // Write the JIT return address on the callee frame - if gen_branch( - jit, + jit.gen_branch( asm, return_block, &return_asm.ctx, None, None, BranchGenFn::JITReturn, - ).is_none() { - // Returning None here would have send_dynamic() code following incomplete - // send code. Abandon the block instead. - jit.block_abandoned = true; - } + ); // ec->cfp is updated after cfp->jit_return for rb_profile_frames() safety asm_comment!(asm, "switch to new CFP"); let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); // Directly jump to the entry point of the callee gen_direct_jump( jit, &callee_ctx, - blockid, + callee_blockid, asm, ); @@ -8594,8 +8931,7 @@ fn gen_struct_aref( let ret = asm.stack_push(Type::Unknown); asm.mov(ret, val); - jump_to_next_insn(jit, asm); - Some(EndBlock) + jump_to_next_insn(jit, asm) } fn gen_struct_aset( @@ -8611,6 +8947,12 @@ fn gen_struct_aset( return None; } + // If the comptime receiver is frozen, writing a struct member will raise an exception + // and we don't want to JIT code to deal with that situation. + if comptime_recv.is_frozen() { + return None; + } + if c_method_tracing_currently_enabled(jit) { // Struct accesses need fire c_call and c_return events, which we can't support // See :attr-tracing: @@ -8631,6 +8973,17 @@ fn gen_struct_aset( assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) }); assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) }); + // Even if the comptime recv was not frozen, future recv may be. So we need to emit a guard + // that the recv is not frozen. + // We know all structs are heap objects, so we can check the flag directly. + let recv = asm.stack_opnd(1); + let recv = asm.load(recv); + let flags = asm.load(Opnd::mem(VALUE_BITS, recv, RUBY_OFFSET_RBASIC_FLAGS)); + asm.test(flags, (RUBY_FL_FREEZE as u64).into()); + asm.jnz(Target::side_exit(Counter::opt_aset_frozen)); + + // Not frozen, so we can proceed. + asm_comment!(asm, "struct aset"); let val = asm.stack_pop(1); @@ -8641,8 +8994,7 @@ fn gen_struct_aset( let ret = asm.stack_push(Type::Unknown); asm.mov(ret, val); - jump_to_next_insn(jit, asm); - Some(EndBlock) + jump_to_next_insn(jit, asm) } // Generate code that calls a method with dynamic dispatch @@ -8684,8 +9036,7 @@ fn gen_send_dynamic<F: Fn(&mut Assembler) -> Opnd>( jit_perf_symbol_pop!(jit, asm, PerfMap::Codegen); // End the current block for invalidationg and sharing the same successor - jump_to_next_insn(jit, asm); - Some(EndBlock) + jump_to_next_insn(jit, asm) } fn gen_send_general( @@ -8711,15 +9062,14 @@ fn gen_send_general( // Defer compilation so we can specialize on class of receiver if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } let ci_flags = unsafe { vm_ci_flag(ci) }; // Dynamic stack layout. No good way to support without inlining. if ci_flags & VM_CALL_FORWARDING != 0 { - gen_counter_incr(jit, asm, Counter::send_iseq_forwarding); + gen_counter_incr(jit, asm, Counter::send_forwarding); return None; } @@ -8745,7 +9095,6 @@ fn gen_send_general( let recv_opnd: YARVOpnd = recv.into(); // Log the name of the method we're calling to - #[cfg(feature = "disasm")] asm_comment!(asm, "call to {}", get_method_name(Some(comptime_recv_klass), mid)); // Gather some statistics about sends @@ -8765,7 +9114,6 @@ fn gen_send_general( perf_call!("gen_send_general: ", jit_guard_known_klass( jit, asm, - comptime_recv_klass, recv, recv_opnd, comptime_recv, @@ -9018,7 +9366,6 @@ fn gen_send_general( } OPTIMIZED_METHOD_TYPE_CALL => { - if block.is_some() { gen_counter_incr(jit, asm, Counter::send_call_block); return None; @@ -9034,13 +9381,6 @@ fn gen_send_general( return None; } - // Optimize for single ractor mode and avoid runtime check for - // "defined with an un-shareable Proc in a different Ractor" - if !assume_single_ractor_mode(jit, asm) { - gen_counter_incr(jit, asm, Counter::send_call_multi_ractor); - return None; - } - // If this is a .send call we need to adjust the stack if flags & VM_CALL_OPT_SEND != 0 { handle_opt_send_shift_stack(asm, argc); @@ -9070,8 +9410,9 @@ fn gen_send_general( let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, ret); - return Some(KeepCompiling); + // End the block to allow invalidating the next instruction + return jump_to_next_insn(jit, asm); } OPTIMIZED_METHOD_TYPE_BLOCK_CALL => { gen_counter_incr(jit, asm, Counter::send_optimized_method_block_call); @@ -9244,7 +9585,24 @@ fn gen_sendforward( jit: &mut JITState, asm: &mut Assembler, ) -> Option<CodegenStatus> { - return gen_send(jit, asm); + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + let block = jit.get_arg(1).as_optional_ptr().map(|iseq| BlockHandler::BlockISeq(iseq)); + if let Some(status) = perf_call! { gen_send_general(jit, asm, cd, block) } { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of sendforward + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_sendforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_sendforward as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) } fn gen_invokeblock( @@ -9275,8 +9633,7 @@ fn gen_invokeblock_specialized( cd: *const rb_call_data, ) -> Option<CodegenStatus> { if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } // Fallback to dynamic dispatch if this callsite is megamorphic @@ -9319,7 +9676,7 @@ fn gen_invokeblock_specialized( // If the current ISEQ is annotated to be inlined but it's not being inlined here, // generate a dynamic dispatch to avoid making this yield megamorphic. - if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() { + if unsafe { rb_jit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() { gen_counter_incr(jit, asm, Counter::invokeblock_iseq_not_inlined); return None; } @@ -9390,8 +9747,7 @@ fn gen_invokeblock_specialized( asm.clear_local_types(); // Share the successor with other chains - jump_to_next_insn(jit, asm); - Some(EndBlock) + jump_to_next_insn(jit, asm) } else if comptime_handler.symbol_p() { gen_counter_incr(jit, asm, Counter::invokeblock_symbol); None @@ -9411,7 +9767,7 @@ fn gen_invokesuper( return Some(status); } - // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of invokesuper let blockiseq = jit.get_arg(1).as_iseq(); gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { extern "C" { @@ -9428,7 +9784,23 @@ fn gen_invokesuperforward( jit: &mut JITState, asm: &mut Assembler, ) -> Option<CodegenStatus> { - return gen_invokesuper(jit, asm); + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = gen_invokesuper_specialized(jit, asm, cd) { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of invokesuperforward + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_invokesuperforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_invokesuperforward as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) } fn gen_invokesuper_specialized( @@ -9438,8 +9810,7 @@ fn gen_invokesuper_specialized( ) -> Option<CodegenStatus> { // Defer compilation so we can specialize on class of receiver if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } // Handle the last two branches of vm_caller_setup_arg_block @@ -9583,7 +9954,7 @@ fn gen_leave( asm_comment!(asm, "pop stack frame"); let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, incr_cfp); - asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); + asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); // Load the return value let retval_opnd = asm.stack_pop(1); @@ -9672,8 +10043,7 @@ fn gen_objtostring( asm: &mut Assembler, ) -> Option<CodegenStatus> { if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } let recv = asm.stack_opnd(0); @@ -9683,7 +10053,6 @@ fn gen_objtostring( jit_guard_known_klass( jit, asm, - comptime_recv.class_of(), recv, recv.into(), comptime_recv, @@ -9693,6 +10062,34 @@ fn gen_objtostring( // No work needed. The string value is already on the top of the stack. Some(KeepCompiling) + } else if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_SYMBOL) } && assume_method_basic_definition(jit, asm, comptime_recv.class_of(), ID!(to_s)) { + jit_guard_known_klass( + jit, + asm, + recv, + recv.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::objtostring_not_string, + ); + + extern "C" { + fn rb_sym2str(sym: VALUE) -> VALUE; + } + + // Same optimization done in the interpreter: rb_sym_to_s() allocates a mutable string, but since we are only + // going to use this string for interpolation, it's fine to use the + // frozen string. + // rb_sym2str does not allocate. + let sym = recv; + let str = asm.ccall(rb_sym2str as *const u8, vec![sym]); + asm.stack_pop(1); + + // Push the return value + let stack_ret = asm.stack_push(Type::TString); + asm.mov(stack_ret, str); + + Some(KeepCompiling) } else { let cd = jit.get_arg(0).as_ptr(); perf_call! { gen_send_general(jit, asm, cd, None) } @@ -9852,7 +10249,7 @@ fn gen_getclassvariable( let val_opnd = asm.ccall( rb_vm_getclassvariable as *const u8, vec![ - Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + VALUE(jit.iseq as usize).into(), CFP, Opnd::UImm(jit.get_arg(0).as_u64()), Opnd::UImm(jit.get_arg(1).as_u64()), @@ -9876,7 +10273,7 @@ fn gen_setclassvariable( asm.ccall( rb_vm_setclassvariable as *const u8, vec![ - Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + VALUE(jit.iseq as usize).into(), CFP, Opnd::UImm(jit.get_arg(0).as_u64()), val, @@ -9953,8 +10350,7 @@ fn gen_opt_getconstant_path( let stack_top = asm.stack_push(Type::Unknown); asm.store(stack_top, val); - jump_to_next_insn(jit, asm); - return Some(EndBlock); + return jump_to_next_insn(jit, asm); } let cref_sensitive = !unsafe { (*ice).ic_cref }.is_null(); @@ -10002,8 +10398,7 @@ fn gen_opt_getconstant_path( jit_putobject(asm, unsafe { (*ice).value }); } - jump_to_next_insn(jit, asm); - Some(EndBlock) + jump_to_next_insn(jit, asm) } // Push the explicit block parameter onto the temporary stack. Part of the @@ -10014,8 +10409,7 @@ fn gen_getblockparamproxy( asm: &mut Assembler, ) -> Option<CodegenStatus> { if !jit.at_compile_target() { - defer_compilation(jit, asm); - return Some(EndBlock); + return jit.defer_compilation(asm); } // EP level @@ -10129,9 +10523,7 @@ fn gen_getblockparamproxy( unreachable!("absurd given initial filtering"); } - jump_to_next_insn(jit, asm); - - Some(EndBlock) + jump_to_next_insn(jit, asm) } fn gen_getblockparam( @@ -10306,6 +10698,7 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { YARVINSN_dup => Some(gen_dup), YARVINSN_dupn => Some(gen_dupn), YARVINSN_swap => Some(gen_swap), + YARVINSN_opt_reverse => Some(gen_opt_reverse), YARVINSN_putnil => Some(gen_putnil), YARVINSN_putobject => Some(gen_putobject), YARVINSN_putobject_INT2FIX_0_ => Some(gen_putobject_int2fix), @@ -10340,6 +10733,7 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { YARVINSN_opt_hash_freeze => Some(gen_opt_hash_freeze), YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), + YARVINSN_opt_duparray_send => Some(gen_opt_duparray_send), YARVINSN_opt_newarray_send => Some(gen_opt_newarray_send), YARVINSN_splatarray => Some(gen_splatarray), YARVINSN_splatkw => Some(gen_splatkw), @@ -10362,7 +10756,6 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { YARVINSN_opt_neq => Some(gen_opt_neq), YARVINSN_opt_aref => Some(gen_opt_aref), YARVINSN_opt_aset => Some(gen_opt_aset), - YARVINSN_opt_aref_with => Some(gen_opt_aref_with), YARVINSN_opt_mult => Some(gen_opt_mult), YARVINSN_opt_div => Some(gen_opt_div), YARVINSN_opt_ltlt => Some(gen_opt_ltlt), @@ -10384,6 +10777,7 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { YARVINSN_branchnil => Some(gen_branchnil), YARVINSN_throw => Some(gen_throw), YARVINSN_jump => Some(gen_jump), + YARVINSN_opt_new => Some(gen_opt_new), YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), YARVINSN_getblockparam => Some(gen_getblockparam), @@ -10454,6 +10848,7 @@ pub fn yjit_reg_method_codegen_fns() { reg_method_codegen(rb_cInteger, "===", jit_rb_int_equal); reg_method_codegen(rb_cInteger, "succ", jit_rb_int_succ); + reg_method_codegen(rb_cInteger, "pred", jit_rb_int_pred); reg_method_codegen(rb_cInteger, "/", jit_rb_int_div); reg_method_codegen(rb_cInteger, "<<", jit_rb_int_lshift); reg_method_codegen(rb_cInteger, ">>", jit_rb_int_rshift); @@ -10465,6 +10860,7 @@ pub fn yjit_reg_method_codegen_fns() { reg_method_codegen(rb_cFloat, "*", jit_rb_float_mul); reg_method_codegen(rb_cFloat, "/", jit_rb_float_div); + reg_method_codegen(rb_cString, "dup", jit_rb_str_dup); reg_method_codegen(rb_cString, "empty?", jit_rb_str_empty_p); reg_method_codegen(rb_cString, "to_s", jit_rb_str_to_s); reg_method_codegen(rb_cString, "to_str", jit_rb_str_to_s); @@ -10474,6 +10870,8 @@ pub fn yjit_reg_method_codegen_fns() { reg_method_codegen(rb_cString, "getbyte", jit_rb_str_getbyte); reg_method_codegen(rb_cString, "setbyte", jit_rb_str_setbyte); reg_method_codegen(rb_cString, "byteslice", jit_rb_str_byteslice); + reg_method_codegen(rb_cString, "[]", jit_rb_str_aref_m); + reg_method_codegen(rb_cString, "slice", jit_rb_str_aref_m); reg_method_codegen(rb_cString, "<<", jit_rb_str_concat); reg_method_codegen(rb_cString, "+@", jit_rb_str_uplus); @@ -10506,13 +10904,12 @@ pub fn yjit_reg_method_codegen_fns() { /// and do not make method calls. /// /// See also: [lookup_cfunc_codegen]. -fn reg_method_codegen(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) { - let id_string = std::ffi::CString::new(mid_str).expect("couldn't convert to CString!"); - let mid = unsafe { rb_intern(id_string.as_ptr()) }; +fn reg_method_codegen(klass: VALUE, method_name: &str, gen_fn: MethodGenFn) { + let mid = unsafe { rb_intern2(method_name.as_ptr().cast(), method_name.len().try_into().unwrap()) }; let me = unsafe { rb_method_entry_at(klass, mid) }; if me.is_null() { - panic!("undefined optimized method!: {mid_str}"); + panic!("undefined optimized method!: {method_name}"); } // For now, only cfuncs are supported (me->cme cast fine since it's just me->def->type). @@ -10526,6 +10923,10 @@ fn reg_method_codegen(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) { unsafe { METHOD_CODEGEN_TABLE.as_mut().unwrap().insert(method_serial, gen_fn); } } +pub fn yjit_shutdown_free_codegen_table() { + unsafe { METHOD_CODEGEN_TABLE = None; }; +} + /// Global state needed for code generation pub struct CodegenGlobals { /// Flat vector of bits to store compressed context data @@ -10586,7 +10987,7 @@ impl CodegenGlobals { #[cfg(not(test))] let (mut cb, mut ocb) = { - let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(exec_mem_size as u32) }; + let virt_block: *mut u8 = unsafe { rb_jit_reserve_addr_space(exec_mem_size as u32) }; // Memory protection syscalls need page-aligned addresses, so check it here. Assuming // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the @@ -10595,7 +10996,7 @@ impl CodegenGlobals { // // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though. - let page_size = unsafe { rb_yjit_get_page_size() }; + let page_size = unsafe { rb_jit_get_page_size() }; assert_eq!( virt_block as usize % page_size.as_usize(), 0, "Start of virtual address block should be page-aligned", @@ -10611,7 +11012,7 @@ impl CodegenGlobals { exec_mem_size, get_option!(mem_size), ); - let mem_block = Rc::new(RefCell::new(mem_block)); + let mem_block = Rc::new(mem_block); let freed_pages = Rc::new(None); @@ -10874,6 +11275,41 @@ mod tests { } #[test] + fn test_gen_opt_reverse() { + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + + // Odd number of elements + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); + + let mut value_array: [u64; 2] = [0, 3]; + let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; + jit.pc = pc; + + let mut status = gen_opt_reverse(&mut jit, &mut asm); + + assert_eq!(status, Some(KeepCompiling)); + + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(0))); + + // Try again with an even number of elements. + asm.stack_push(Type::Nil); + value_array[1] = 4; + status = gen_opt_reverse(&mut jit, &mut asm); + + assert_eq!(status, Some(KeepCompiling)); + + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(3))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(0))); + } + + #[test] fn test_gen_swap() { let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); let mut jit = dummy_jit_state(&mut cb, &mut ocb); diff --git a/yjit/src/core.rs b/yjit/src/core.rs index aaf9ca2055..0590135392 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -447,25 +447,9 @@ impl RegMapping { self.0.iter().filter_map(|®_opnd| reg_opnd).collect() } - /// Return TypeDiff::Compatible(diff) if dst has a mapping that can be made by moving registers - /// in self `diff` times. TypeDiff::Incompatible if they have different things in registers. - pub fn diff(&self, dst: RegMapping) -> TypeDiff { - let src_opnds = self.get_reg_opnds(); - let dst_opnds = dst.get_reg_opnds(); - if src_opnds.len() != dst_opnds.len() { - return TypeDiff::Incompatible; - } - - let mut diff = 0; - for ®_opnd in src_opnds.iter() { - match (self.get_reg(reg_opnd), dst.get_reg(reg_opnd)) { - (Some(src_idx), Some(dst_idx)) => if src_idx != dst_idx { - diff += 1; - } - _ => return TypeDiff::Incompatible, - } - } - TypeDiff::Compatible(diff) + /// Count the number of registers that store a different operand from `dst`. + pub fn diff(&self, dst: RegMapping) -> usize { + self.0.iter().enumerate().filter(|&(reg_idx, ®)| reg != dst.0[reg_idx]).count() } } @@ -974,13 +958,13 @@ impl Context { if CTX_DECODE_CACHE == None { // Here we use the vec syntax to avoid allocating the large table on the stack, // as this can cause a stack overflow - let tbl = vec![(Context::default(), 0); CTX_ENCODE_CACHE_SIZE].into_boxed_slice().try_into().unwrap(); + let tbl = vec![(Context::default(), 0); CTX_DECODE_CACHE_SIZE].into_boxed_slice().try_into().unwrap(); CTX_DECODE_CACHE = Some(tbl); } // Write a cache entry for this context let cache = CTX_DECODE_CACHE.as_mut().unwrap(); - cache[idx as usize % CTX_ENCODE_CACHE_SIZE] = (*ctx, idx); + cache[idx as usize % CTX_DECODE_CACHE_SIZE] = (*ctx, idx); } } @@ -1115,7 +1099,7 @@ impl Context { MapToLocal(local_idx) => { bits.push_op(CtxOp::MapTempLocal); bits.push_u3(stack_idx as u8); - bits.push_u3(local_idx as u8); + bits.push_u3(local_idx); } MapToSelf => { @@ -1834,7 +1818,7 @@ pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) { callback(iseq); } let mut data: &mut dyn FnMut(IseqPtr) = &mut callback; - unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; + unsafe { rb_jit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; } /// Iterate over all on-stack ISEQs @@ -1936,7 +1920,7 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { // For aliasing, having the VM lock hopefully also implies that no one // else has an overlapping &mut IseqPayload. unsafe { - rb_yjit_assert_holding_vm_lock(); + rb_assert_holding_vm_lock(); &*(payload as *const IseqPayload) } }; @@ -2025,7 +2009,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) { // For aliasing, having the VM lock hopefully also implies that no one // else has an overlapping &mut IseqPayload. unsafe { - rb_yjit_assert_holding_vm_lock(); + rb_assert_holding_vm_lock(); &*(payload as *const IseqPayload) } }; @@ -2051,13 +2035,6 @@ pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) { block_update_references(block, cb, true); } - // Note that we would have returned already if YJIT is off. - cb.mark_all_executable(); - - CodegenGlobals::get_outlined_cb() - .unwrap() - .mark_all_executable(); - return; fn block_update_references(block: &Block, cb: &mut CodeBlock, dead: bool) { @@ -2114,11 +2091,9 @@ pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) { // Only write when the VALUE moves, to be copy-on-write friendly. if new_addr != object { - for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() { - let byte_code_ptr = value_code_ptr.add_bytes(byte_idx); - cb.write_mem(byte_code_ptr, byte) - .expect("patching existing code should be within bounds"); - } + // SAFETY: Since we already set code memory writable before the compacting phase, + // we can use raw memory accesses directly. + unsafe { value_ptr.write_unaligned(new_addr); } } } } @@ -2126,6 +2101,34 @@ pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) { } } +/// Mark all code memory as writable. +/// This function is useful for garbage collectors that update references in JIT-compiled code in +/// bulk. +#[no_mangle] +pub extern "C" fn rb_yjit_mark_all_writeable() { + if CodegenGlobals::has_instance() { + CodegenGlobals::get_inline_cb().mark_all_writeable(); + + CodegenGlobals::get_outlined_cb() + .unwrap() + .mark_all_writeable(); + } +} + +/// Mark all code memory as executable. +/// This function is useful for garbage collectors that update references in JIT-compiled code in +/// bulk. +#[no_mangle] +pub extern "C" fn rb_yjit_mark_all_executable() { + if CodegenGlobals::has_instance() { + CodegenGlobals::get_inline_cb().mark_all_executable(); + + CodegenGlobals::get_outlined_cb() + .unwrap() + .mark_all_executable(); + } +} + /// Get all blocks for a particular place in an iseq. fn get_version_list(blockid: BlockId) -> Option<&'static mut VersionList> { let insn_idx = blockid.idx.as_usize(); @@ -2240,13 +2243,12 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> { return best_version; } -/// Basically find_block_version() but allows RegMapping incompatibility -/// that can be fixed by register moves and returns Context -pub fn find_block_ctx_with_same_regs(blockid: BlockId, ctx: &Context) -> Option<Context> { +/// Find the closest RegMapping among ones that have already been compiled. +pub fn find_most_compatible_reg_mapping(blockid: BlockId, ctx: &Context) -> Option<RegMapping> { let versions = get_version_list(blockid)?; // Best match found - let mut best_ctx: Option<Context> = None; + let mut best_mapping: Option<RegMapping> = None; let mut best_diff = usize::MAX; // For each version matching the blockid @@ -2254,17 +2256,17 @@ pub fn find_block_ctx_with_same_regs(blockid: BlockId, ctx: &Context) -> Option< let block = unsafe { blockref.as_ref() }; let block_ctx = Context::decode(block.ctx); - // Discover the best block that is compatible if we move registers - match ctx.diff_with_same_regs(&block_ctx) { + // Discover the best block that is compatible if we load/spill registers + match ctx.diff_allowing_reg_mismatch(&block_ctx) { TypeDiff::Compatible(diff) if diff < best_diff => { - best_ctx = Some(block_ctx); + best_mapping = Some(block_ctx.get_reg_mapping()); best_diff = diff; } _ => {} } } - best_ctx + best_mapping } /// Allow inlining a Block up to MAX_INLINE_VERSIONS times. @@ -2309,7 +2311,9 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context { return generic_ctx; } - incr_counter_to!(max_inline_versions, next_versions); + if ctx.inline() { + incr_counter_to!(max_inline_versions, next_versions); + } return *ctx; } @@ -2367,6 +2371,9 @@ unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) { } incr_counter!(compiled_block_count); + if Context::decode(block.ctx).inline() { + incr_counter!(inline_block_count); + } // Mark code pages for code GC let iseq_payload = get_iseq_payload(block.iseq.get()).unwrap(); @@ -2412,7 +2419,9 @@ impl<'a> JITState<'a> { // Pending branches => actual branches outgoing: MutableBranchList(Cell::new(self.pending_outgoing.into_iter().map(|pending_out| { let pending_out = Rc::try_unwrap(pending_out) - .ok().expect("all PendingBranchRefs should be unique when ready to construct a Block"); + .unwrap_or_else(|rc| panic!( + "PendingBranchRef should be unique when ready to construct a Block. \ + strong={} weak={}", Rc::strong_count(&rc), Rc::weak_count(&rc))); pending_out.into_branch(NonNull::new(blockref as *mut Block).expect("no null from Box")) }).collect())) }); @@ -2420,7 +2429,7 @@ impl<'a> JITState<'a> { // SAFETY: allocated with Box above unsafe { ptr::write(blockref, block) }; - // Block is initialized now. Note that MaybeUnint<T> has the same layout as T. + // Block is initialized now. Note that MaybeUninit<T> has the same layout as T. let blockref = NonNull::new(blockref as *mut Block).expect("no null from Box"); // Track all the assumptions the block makes as invariants @@ -2591,6 +2600,14 @@ impl Context { self.sp_opnd(-ep_offset + offset) } + /// Start using a register for a given stack temp or a local. + pub fn alloc_reg(&mut self, opnd: RegOpnd) { + let mut reg_mapping = self.get_reg_mapping(); + if reg_mapping.alloc_reg(opnd) { + self.set_reg_mapping(reg_mapping); + } + } + /// Stop using a register for a given stack temp or a local. /// This allows us to reuse the register for a value that we know is dead /// and will no longer be used (e.g. popped stack temp). @@ -2893,19 +2910,26 @@ impl Context { return TypeDiff::Compatible(diff); } - /// Basically diff() but allows RegMapping incompatibility that can be fixed - /// by register moves. - pub fn diff_with_same_regs(&self, dst: &Context) -> TypeDiff { + /// Basically diff() but allows RegMapping incompatibility that could be fixed by + /// spilling, loading, or shuffling registers. + pub fn diff_allowing_reg_mismatch(&self, dst: &Context) -> TypeDiff { + // We shuffle only RegOpnd::Local and spill any other RegOpnd::Stack. + // If dst has RegOpnd::Stack, we can't reuse the block as a callee. + for reg_opnd in dst.get_reg_mapping().get_reg_opnds() { + if matches!(reg_opnd, RegOpnd::Stack(_)) { + return TypeDiff::Incompatible; + } + } + // Prepare a Context with the same registers let mut dst_with_same_regs = dst.clone(); dst_with_same_regs.set_reg_mapping(self.get_reg_mapping()); // Diff registers and other stuff separately, and merge them - match (self.diff(&dst_with_same_regs), self.get_reg_mapping().diff(dst.get_reg_mapping())) { - (TypeDiff::Compatible(ctx_diff), TypeDiff::Compatible(reg_diff)) => { - TypeDiff::Compatible(ctx_diff + reg_diff) - } - _ => TypeDiff::Incompatible + if let TypeDiff::Compatible(ctx_diff) = self.diff(&dst_with_same_regs) { + TypeDiff::Compatible(ctx_diff + self.get_reg_mapping().diff(dst.get_reg_mapping())) + } else { + TypeDiff::Incompatible } } @@ -3198,16 +3222,33 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option< let cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); + let code_ptr = gen_entry_point_body(blockid, stack_size, ec, jit_exception, cb, ocb); + + cb.mark_all_executable(); + ocb.unwrap().mark_all_executable(); + + code_ptr +} + +fn gen_entry_point_body(blockid: BlockId, stack_size: u8, ec: EcPtr, jit_exception: bool, cb: &mut CodeBlock, ocb: &mut OutlinedCb) -> Option<*const u8> { // Write the interpreter entry prologue. Might be NULL when out of memory. - let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx, jit_exception); + let (code_ptr, reg_mapping) = gen_entry_prologue(cb, ocb, blockid, stack_size, jit_exception)?; - // Try to generate code for the entry block + // Find or compile a block version let mut ctx = Context::default(); ctx.stack_size = stack_size; - let block = gen_block_series(blockid, &ctx, ec, cb, ocb); - - cb.mark_all_executable(); - ocb.unwrap().mark_all_executable(); + ctx.reg_mapping = reg_mapping; + let block = match find_block_version(blockid, &ctx) { + // If an existing block is found, generate a jump to the block. + Some(blockref) => { + let mut asm = Assembler::new_without_iseq(); + asm.jmp(unsafe { blockref.as_ref() }.start_addr.into()); + asm.compile(cb, Some(ocb))?; + Some(blockref) + } + // If this block hasn't yet been compiled, generate blocks after the entry guard. + None => gen_block_series(blockid, &ctx, ec, cb, ocb), + }; match block { // Compilation failed @@ -3232,7 +3273,7 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option< incr_counter!(compiled_iseq_entry); // Compilation successful and block not empty - code_ptr.map(|ptr| ptr.raw_ptr(cb)) + Some(code_ptr.raw_ptr(cb)) } // Change the entry's jump target from an entry stub to a next entry @@ -3307,20 +3348,22 @@ fn entry_stub_hit_body( let cfp = unsafe { get_ec_cfp(ec) }; let iseq = unsafe { get_cfp_iseq(cfp) }; let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?; + let blockid = BlockId { iseq, idx: insn_idx }; let stack_size: u8 = unsafe { u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? }; // Compile a new entry guard as a next entry let next_entry = cb.get_write_ptr(); - let mut asm = Assembler::new_without_iseq(); - let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?; + let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) }); + let pending_entry = gen_entry_chain_guard(&mut asm, ocb, blockid)?; + let reg_mapping = gen_entry_reg_mapping(&mut asm, blockid, stack_size); asm.compile(cb, Some(ocb))?; // Find or compile a block version - let blockid = BlockId { iseq, idx: insn_idx }; let mut ctx = Context::default(); ctx.stack_size = stack_size; + ctx.reg_mapping = reg_mapping; let blockref = match find_block_version(blockid, &ctx) { // If an existing block is found, generate a jump to the block. Some(blockref) => { @@ -3344,8 +3387,9 @@ fn entry_stub_hit_body( get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry()); } - // Let the stub jump to the block - blockref.map(|block| unsafe { block.as_ref() }.start_addr.raw_ptr(cb)) + // Return a code pointer if the block is successfully compiled. The entry stub needs + // to jump to the entry preceding the block to load the registers in reg_mapping. + blockref.map(|_block| next_entry.raw_ptr(cb)) } /// Generate a stub that calls entry_stub_hit @@ -3549,6 +3593,13 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); } + // Bail if this branch is housed in an invalidated (dead) block. + // This only happens in rare invalidation scenarios and we need + // to avoid linking a dead block to a live block with a branch. + if branch.block.get().as_ref().iseq.get().is_null() { + return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); + } + (cfp, original_interp_sp) }; @@ -3748,7 +3799,7 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> { let mut asm = Assembler::new_without_iseq(); // For `branch_stub_hit(branch_ptr, target_idx, ec)`, - // `branch_ptr` and `target_idx` is different for each stub, + // `branch_ptr` and `target_idx` are different for each stub, // but the call and what's after is the same. This trampoline // is the unchanging part. // Since this trampoline is static, it allows code GC inside @@ -3782,7 +3833,7 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> { /// Return registers to be pushed and popped on branch_stub_hit. pub fn caller_saved_temp_regs() -> impl Iterator<Item = &'static Reg> + DoubleEndedIterator { - let temp_regs = Assembler::get_temp_regs2().iter(); + let temp_regs = Assembler::get_temp_regs().iter(); let len = temp_regs.len(); // The return value gen_leave() leaves in C_RET_REG // needs to survive the branch_stub_hit() call. @@ -3916,10 +3967,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: } /// Create a stub to force the code up to this point to be executed -pub fn defer_compilation( - jit: &mut JITState, - asm: &mut Assembler, -) { +pub fn defer_compilation(jit: &mut JITState, asm: &mut Assembler) -> Result<(), ()> { if asm.ctx.is_deferred() { panic!("Double defer!"); } @@ -3936,7 +3984,7 @@ pub fn defer_compilation( }; // Likely a stub since the context is marked as deferred(). - let target0_address = branch.set_target(0, blockid, &next_ctx, jit); + let dst_addr = branch.set_target(0, blockid, &next_ctx, jit).ok_or(())?; // Pad the block if it has the potential to be invalidated. This must be // done before gen_fn() in case the jump is overwritten by a fallthrough. @@ -3947,9 +3995,7 @@ pub fn defer_compilation( // Call the branch generation function asm_comment!(asm, "defer_compilation"); asm.mark_branch_start(&branch); - if let Some(dst_addr) = target0_address { - branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None); - } + branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None); asm.mark_branch_end(&branch); // If the block we're deferring from is empty @@ -3958,6 +4004,8 @@ pub fn defer_compilation( } incr_counter!(defer_count); + + Ok(()) } /// Remove a block from the live control flow graph. @@ -4138,7 +4186,23 @@ pub fn invalidate_block_version(blockref: &BlockRef) { } // For each incoming branch - for branchref in block.incoming.0.take().iter() { + let mut incoming_branches = block.incoming.0.take(); + + // An adjacent branch will write into the start of the block being invalidated, possibly + // overwriting the block's exit. If we run out of memory after doing this, any subsequent + // incoming branches we rewrite won't be able use the block's exit as a fallback when they + // are unable to generate a stub. To avoid this, if there's an incoming branch that's + // adjacent to the invalidated block, make sure we process it last. + let adjacent_branch_idx = incoming_branches.iter().position(|branchref| { + let branch = unsafe { branchref.as_ref() }; + let target_next = block.start_addr == branch.end_addr.get(); + target_next + }); + if let Some(adjacent_branch_idx) = adjacent_branch_idx { + incoming_branches.swap(adjacent_branch_idx, incoming_branches.len() - 1) + } + + for (i, branchref) in incoming_branches.iter().enumerate() { let branch = unsafe { branchref.as_ref() }; let target_idx = if branch.get_target_address(0) == Some(block_start) { 0 @@ -4178,10 +4242,18 @@ pub fn invalidate_block_version(blockref: &BlockRef) { let target_next = block.start_addr == branch.end_addr.get(); if target_next { - // The new block will no longer be adjacent. - // Note that we could be enlarging the branch and writing into the - // start of the block being invalidated. - branch.gen_fn.set_shape(BranchShape::Default); + if stub_addr != block.start_addr { + // The new block will no longer be adjacent. + // Note that we could be enlarging the branch and writing into the + // start of the block being invalidated. + branch.gen_fn.set_shape(BranchShape::Default); + } else { + // The branch target is still adjacent, so the branch must remain + // a fallthrough so we don't overwrite the target with a jump. + // + // This can happen if we're unable to generate a stub and the + // target block also exits on entry (block_start == block_entry_exit). + } } // Rewrite the branch with the new jump target address @@ -4191,6 +4263,11 @@ pub fn invalidate_block_version(blockref: &BlockRef) { if target_next && branch.end_addr > block.end_addr { panic!("yjit invalidate rewrote branch past end of invalidated block: {:?} (code_size: {})", branch, block.code_size()); } + let is_last_incoming_branch = i == incoming_branches.len() - 1; + if target_next && branch.end_addr.get() > block_entry_exit && !is_last_incoming_branch { + // We might still need to jump to this exit if we run out of memory when rewriting another incoming branch. + panic!("yjit invalidate rewrote branch over exit of invalidated block: {:?}", branch); + } if !target_next && branch.code_size() > old_branch_size { panic!( "invalidated branch grew in size (start_addr: {:?}, old_size: {}, new_size: {})", @@ -4229,11 +4306,9 @@ pub fn invalidate_block_version(blockref: &BlockRef) { incr_counter!(invalidation_count); } -// We cannot deallocate blocks immediately after invalidation since there -// could be stubs waiting to access branch pointers. Return stubs can do -// this since patching the code for setting up return addresses does not -// affect old return addresses that are already set up to use potentially -// invalidated branch pointers. Example: +// We cannot deallocate blocks immediately after invalidation since patching the code for setting +// up return addresses does not affect outstanding return addresses that are on stack and will use +// invalidated branch pointers when hit. Example: // def foo(n) // if n == 2 // # 1.times.each to create a cfunc frame to preserve the JIT frame @@ -4241,13 +4316,16 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // return 1.times.each { Object.define_method(:foo) {} } // end // -// foo(n + 1) +// foo(n + 1) # The block for this call houses the return branch stub // end // p foo(1) pub fn delayed_deallocation(blockref: BlockRef) { block_assumptions_free(blockref); - let payload = get_iseq_payload(unsafe { blockref.as_ref() }.iseq.get()).unwrap(); + let block = unsafe { blockref.as_ref() }; + // Set null ISEQ on the block to signal that it's dead. + let iseq = block.iseq.replace(ptr::null()); + let payload = get_iseq_payload(iseq).unwrap(); payload.dead_blocks.push(blockref); } diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 25fabec1d0..d34b049a45 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -123,7 +123,6 @@ extern "C" { pub fn rb_float_new(d: f64) -> VALUE; pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; - pub fn rb_yjit_str_concat_codepoint(str: VALUE, codepoint: VALUE); pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; @@ -198,8 +197,8 @@ pub use rb_get_cikw_keywords_idx as get_cikw_keywords_idx; pub use rb_get_call_data_ci as get_call_data_ci; pub use rb_yarv_str_eql_internal as rb_str_eql_internal; pub use rb_yarv_ary_entry_internal as rb_ary_entry_internal; -pub use rb_yjit_fix_div_fix as rb_fix_div_fix; -pub use rb_yjit_fix_mod_fix as rb_fix_mod_fix; +pub use rb_jit_fix_div_fix as rb_fix_div_fix; +pub use rb_jit_fix_mod_fix as rb_fix_mod_fix; pub use rb_FL_TEST as FL_TEST; pub use rb_FL_TEST_RAW as FL_TEST_RAW; pub use rb_RB_TYPE_P as RB_TYPE_P; @@ -362,6 +361,11 @@ impl VALUE { !self.special_const_p() } + /// Shareability between ractors. `RB_OBJ_SHAREABLE_P()`. + pub fn shareable_p(self) -> bool { + (self.builtin_flags() & RUBY_FL_SHAREABLE as usize) != 0 + } + /// Return true if the value is a Ruby Fixnum (immediate-size integer) pub fn fixnum_p(self) -> bool { let VALUE(cval) = self; @@ -441,28 +445,16 @@ impl VALUE { } pub fn shape_too_complex(self) -> bool { - unsafe { rb_shape_obj_too_complex(self) } + unsafe { rb_yjit_shape_obj_too_complex_p(self) } } pub fn shape_id_of(self) -> u32 { - unsafe { rb_shape_get_shape_id(self) } - } - - pub fn shape_of(self) -> *mut rb_shape { - unsafe { - let shape = rb_shape_get_shape_by_id(self.shape_id_of()); - - if shape.is_null() { - panic!("Shape should not be null"); - } else { - shape - } - } + unsafe { rb_obj_shape_id(self) } } pub fn embedded_p(self) -> bool { unsafe { - FL_TEST_RAW(self, VALUE(ROBJECT_EMBED as usize)) != VALUE(0) + FL_TEST_RAW(self, VALUE(ROBJECT_HEAP as usize)) == VALUE(0) } } @@ -613,9 +605,15 @@ pub fn rust_str_to_ruby(str: &str) -> VALUE { /// Produce a Ruby symbol from a Rust string slice pub fn rust_str_to_sym(str: &str) -> VALUE { + let id = rust_str_to_id(str); + unsafe { rb_id2sym(id) } +} + +/// Produce an ID from a Rust string slice +pub fn rust_str_to_id(str: &str) -> ID { let c_str = CString::new(str).unwrap(); let c_ptr: *const c_char = c_str.as_ptr(); - unsafe { rb_id2sym(rb_intern(c_ptr)) } + unsafe { rb_intern(c_ptr) } } /// Produce an owned Rust String from a C char pointer @@ -683,7 +681,7 @@ where let line = loc.line; let mut recursive_lock_level: c_uint = 0; - unsafe { rb_yjit_vm_lock_then_barrier(&mut recursive_lock_level, file, line) }; + unsafe { rb_jit_vm_lock_then_barrier(&mut recursive_lock_level, file, line) }; let ret = match catch_unwind(func) { Ok(result) => result, @@ -703,7 +701,7 @@ where } }; - unsafe { rb_yjit_vm_unlock(&mut recursive_lock_level, file, line) }; + unsafe { rb_jit_vm_unlock(&mut recursive_lock_level, file, line) }; ret } @@ -774,12 +772,6 @@ mod manual_defs { pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 48; pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 56; - // Constants from rb_execution_context_t vm_core.h - pub const RUBY_OFFSET_EC_CFP: i32 = 16; - pub const RUBY_OFFSET_EC_INTERRUPT_FLAG: i32 = 32; // rb_atomic_t (u32) - pub const RUBY_OFFSET_EC_INTERRUPT_MASK: i32 = 36; // rb_atomic_t (u32) - pub const RUBY_OFFSET_EC_THREAD_PTR: i32 = 48; - // Constants from rb_thread_t in vm_core.h pub const RUBY_OFFSET_THREAD_SELF: i32 = 16; @@ -822,8 +814,11 @@ pub(crate) mod ids { def_ids! { name: NULL content: b"" name: respond_to_missing content: b"respond_to_missing?" + name: method_missing content: b"method_missing" name: to_ary content: b"to_ary" + name: to_s content: b"to_s" name: eq content: b"==" + name: include_p content: b"include?" } } diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 4eb44634a1..56994388a3 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -165,13 +165,13 @@ pub const NIL_REDEFINED_OP_FLAG: u32 = 512; pub const TRUE_REDEFINED_OP_FLAG: u32 = 1024; pub const FALSE_REDEFINED_OP_FLAG: u32 = 2048; pub const PROC_REDEFINED_OP_FLAG: u32 = 4096; +pub const VM_KW_SPECIFIED_BITS_MAX: u32 = 31; pub const VM_ENV_DATA_SIZE: u32 = 3; pub const VM_ENV_DATA_INDEX_ME_CREF: i32 = -2; pub const VM_ENV_DATA_INDEX_SPECVAL: i32 = -1; pub const VM_ENV_DATA_INDEX_FLAGS: u32 = 0; pub const VM_BLOCK_HANDLER_NONE: u32 = 0; pub const SHAPE_ID_NUM_BITS: u32 = 32; -pub const OBJ_TOO_COMPLEX_SHAPE_ID: u32 = 2; pub type ID = ::std::os::raw::c_ulong; pub type rb_alloc_func_t = ::std::option::Option<unsafe extern "C" fn(klass: VALUE) -> VALUE>; pub const RUBY_Qfalse: ruby_special_consts = 0; @@ -223,13 +223,12 @@ pub const RUBY_FL_USHIFT: ruby_fl_ushift = 12; pub type ruby_fl_ushift = u32; pub const RUBY_FL_WB_PROTECTED: ruby_fl_type = 32; pub const RUBY_FL_PROMOTED: ruby_fl_type = 32; -pub const RUBY_FL_UNUSED6: ruby_fl_type = 64; +pub const RUBY_FL_USERPRIV0: ruby_fl_type = 64; pub const RUBY_FL_FINALIZE: ruby_fl_type = 128; -pub const RUBY_FL_TAINT: ruby_fl_type = 0; +pub const RUBY_FL_EXIVAR: ruby_fl_type = 0; pub const RUBY_FL_SHAREABLE: ruby_fl_type = 256; -pub const RUBY_FL_UNTRUSTED: ruby_fl_type = 0; -pub const RUBY_FL_SEEN_OBJ_ID: ruby_fl_type = 512; -pub const RUBY_FL_EXIVAR: ruby_fl_type = 1024; +pub const RUBY_FL_WEAK_REFERENCE: ruby_fl_type = 512; +pub const RUBY_FL_UNUSED10: ruby_fl_type = 1024; pub const RUBY_FL_FREEZE: ruby_fl_type = 2048; pub const RUBY_FL_USER0: ruby_fl_type = 4096; pub const RUBY_FL_USER1: ruby_fl_type = 8192; @@ -251,7 +250,7 @@ pub const RUBY_FL_USER16: ruby_fl_type = 268435456; pub const RUBY_FL_USER17: ruby_fl_type = 536870912; pub const RUBY_FL_USER18: ruby_fl_type = 1073741824; pub const RUBY_FL_USER19: ruby_fl_type = -2147483648; -pub const RUBY_ELTS_SHARED: ruby_fl_type = 16384; +pub const RUBY_ELTS_SHARED: ruby_fl_type = 4096; pub const RUBY_FL_SINGLETON: ruby_fl_type = 8192; pub type ruby_fl_type = i32; pub const RSTRING_NOEMBED: ruby_rstring_flags = 8192; @@ -277,9 +276,9 @@ pub const RARRAY_EMBED_LEN_MASK: ruby_rarray_flags = 4161536; pub type ruby_rarray_flags = u32; pub const RARRAY_EMBED_LEN_SHIFT: ruby_rarray_consts = 15; pub type ruby_rarray_consts = u32; -pub const RMODULE_IS_REFINEMENT: ruby_rmodule_flags = 32768; +pub const RMODULE_IS_REFINEMENT: ruby_rmodule_flags = 8192; pub type ruby_rmodule_flags = u32; -pub const ROBJECT_EMBED: ruby_robject_flags = 8192; +pub const ROBJECT_HEAP: ruby_robject_flags = 65536; pub type ruby_robject_flags = u32; pub type rb_block_call_func = ::std::option::Option< unsafe extern "C" fn( @@ -329,21 +328,23 @@ pub const BOP_NIL_P: ruby_basic_operators = 15; pub const BOP_SUCC: ruby_basic_operators = 16; pub const BOP_GT: ruby_basic_operators = 17; pub const BOP_GE: ruby_basic_operators = 18; -pub const BOP_NOT: ruby_basic_operators = 19; -pub const BOP_NEQ: ruby_basic_operators = 20; -pub const BOP_MATCH: ruby_basic_operators = 21; -pub const BOP_FREEZE: ruby_basic_operators = 22; -pub const BOP_UMINUS: ruby_basic_operators = 23; -pub const BOP_MAX: ruby_basic_operators = 24; -pub const BOP_MIN: ruby_basic_operators = 25; -pub const BOP_HASH: ruby_basic_operators = 26; -pub const BOP_CALL: ruby_basic_operators = 27; -pub const BOP_AND: ruby_basic_operators = 28; -pub const BOP_OR: ruby_basic_operators = 29; -pub const BOP_CMP: ruby_basic_operators = 30; -pub const BOP_DEFAULT: ruby_basic_operators = 31; -pub const BOP_PACK: ruby_basic_operators = 32; -pub const BOP_LAST_: ruby_basic_operators = 33; +pub const BOP_GTGT: ruby_basic_operators = 19; +pub const BOP_NOT: ruby_basic_operators = 20; +pub const BOP_NEQ: ruby_basic_operators = 21; +pub const BOP_MATCH: ruby_basic_operators = 22; +pub const BOP_FREEZE: ruby_basic_operators = 23; +pub const BOP_UMINUS: ruby_basic_operators = 24; +pub const BOP_MAX: ruby_basic_operators = 25; +pub const BOP_MIN: ruby_basic_operators = 26; +pub const BOP_HASH: ruby_basic_operators = 27; +pub const BOP_CALL: ruby_basic_operators = 28; +pub const BOP_AND: ruby_basic_operators = 29; +pub const BOP_OR: ruby_basic_operators = 30; +pub const BOP_CMP: ruby_basic_operators = 31; +pub const BOP_DEFAULT: ruby_basic_operators = 32; +pub const BOP_PACK: ruby_basic_operators = 33; +pub const BOP_INCLUDE_P: ruby_basic_operators = 34; +pub const BOP_LAST_: ruby_basic_operators = 35; pub type ruby_basic_operators = u32; pub type rb_serial_t = ::std::os::raw::c_ulonglong; pub const imemo_env: imemo_type = 0; @@ -355,11 +356,10 @@ pub const imemo_memo: imemo_type = 5; pub const imemo_ment: imemo_type = 6; pub const imemo_iseq: imemo_type = 7; pub const imemo_tmpbuf: imemo_type = 8; -pub const imemo_ast: imemo_type = 9; -pub const imemo_parser_strterm: imemo_type = 10; -pub const imemo_callinfo: imemo_type = 11; -pub const imemo_callcache: imemo_type = 12; -pub const imemo_constcache: imemo_type = 13; +pub const imemo_callinfo: imemo_type = 10; +pub const imemo_callcache: imemo_type = 11; +pub const imemo_constcache: imemo_type = 12; +pub const imemo_fields: imemo_type = 13; pub type imemo_type = u32; #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -434,11 +434,6 @@ pub const OPTIMIZED_METHOD_TYPE_STRUCT_AREF: method_optimized_type = 3; pub const OPTIMIZED_METHOD_TYPE_STRUCT_ASET: method_optimized_type = 4; pub const OPTIMIZED_METHOD_TYPE__MAX: method_optimized_type = 5; pub type method_optimized_type = u32; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct rb_id_table { - _unused: [u8; 0], -} pub type rb_num_t = ::std::os::raw::c_ulong; pub const RUBY_TAG_NONE: ruby_tag_type = 0; pub const RUBY_TAG_RETURN: ruby_tag_type = 1; @@ -458,8 +453,6 @@ pub type ruby_vm_throw_flags = u32; pub struct iseq_inline_constant_cache_entry { pub flags: VALUE, pub value: VALUE, - pub _unused1: VALUE, - pub _unused2: VALUE, pub ic_cref: *const rb_cref_t, } #[repr(C)] @@ -471,7 +464,7 @@ pub struct iseq_inline_constant_cache { #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct iseq_inline_iv_cache_entry { - pub value: usize, + pub value: u64, pub iv_set_name: ID, } #[repr(C)] @@ -492,10 +485,11 @@ pub type rb_iseq_type = u32; pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1; pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2; pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4; +pub const BUILTIN_ATTR_C_TRACE: rb_builtin_attr = 8; pub type rb_builtin_attr = u32; #[repr(C)] #[derive(Debug, Copy, Clone)] -pub struct rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword { +pub struct rb_iseq_constant_body_rb_iseq_parameters_rb_iseq_param_keyword { pub num: ::std::os::raw::c_int, pub required_num: ::std::os::raw::c_int, pub bits_start: ::std::os::raw::c_int, @@ -606,6 +600,7 @@ pub const VM_OPT_NEWARRAY_SEND_MIN: vm_opt_newarray_send_type = 2; pub const VM_OPT_NEWARRAY_SEND_HASH: vm_opt_newarray_send_type = 3; pub const VM_OPT_NEWARRAY_SEND_PACK: vm_opt_newarray_send_type = 4; pub const VM_OPT_NEWARRAY_SEND_PACK_BUFFER: vm_opt_newarray_send_type = 5; +pub const VM_OPT_NEWARRAY_SEND_INCLUDE_P: vm_opt_newarray_send_type = 6; pub type vm_opt_newarray_send_type = u32; pub const VM_SPECIAL_OBJECT_VMCORE: vm_special_object_type = 1; pub const VM_SPECIAL_OBJECT_CBASE: vm_special_object_type = 2; @@ -631,36 +626,16 @@ pub const VM_FRAME_FLAG_LAMBDA: vm_frame_env_flags = 256; pub const VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM: vm_frame_env_flags = 512; pub const VM_FRAME_FLAG_CFRAME_KW: vm_frame_env_flags = 1024; pub const VM_FRAME_FLAG_PASSED: vm_frame_env_flags = 2048; +pub const VM_FRAME_FLAG_BOX_REQUIRE: vm_frame_env_flags = 4096; pub const VM_ENV_FLAG_LOCAL: vm_frame_env_flags = 2; pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4; pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8; pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16; pub type vm_frame_env_flags = u32; -pub type attr_index_t = u32; +pub type attr_index_t = u16; pub type shape_id_t = u32; -pub type redblack_id_t = u32; -pub type redblack_node_t = redblack_node; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct rb_shape { - pub edges: *mut rb_id_table, - pub edge_name: ID, - pub next_iv_index: attr_index_t, - pub capacity: u32, - pub type_: u8, - pub heap_index: u8, - pub parent_id: shape_id_t, - pub ancestor_index: *mut redblack_node_t, -} -pub type rb_shape_t = rb_shape; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct redblack_node { - pub key: ID, - pub value: *mut rb_shape_t, - pub l: redblack_id_t, - pub r: redblack_id_t, -} +pub const SHAPE_ID_HAS_IVAR_MASK: shape_id_mask = 134742014; +pub type shape_id_mask = u32; #[repr(C)] pub struct rb_cvar_class_tbl_entry { pub index: u32, @@ -704,7 +679,7 @@ pub struct rb_call_data { pub ci: *const rb_callinfo, pub cc: *const rb_callcache, } -pub const RSTRING_CHILLED: ruby_rstring_private_flags = 32768; +pub const RSTRING_CHILLED: ruby_rstring_private_flags = 49152; pub type ruby_rstring_private_flags = u32; pub const RHASH_PASS_AS_KEYWORDS: ruby_rhash_flags = 8192; pub const RHASH_PROC_DEFAULT: ruby_rhash_flags = 16384; @@ -782,42 +757,42 @@ pub const YARVINSN_definesmethod: ruby_vminsn_type = 54; pub const YARVINSN_send: ruby_vminsn_type = 55; pub const YARVINSN_sendforward: ruby_vminsn_type = 56; pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 57; -pub const YARVINSN_objtostring: ruby_vminsn_type = 58; -pub const YARVINSN_opt_ary_freeze: ruby_vminsn_type = 59; -pub const YARVINSN_opt_hash_freeze: ruby_vminsn_type = 60; -pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 61; -pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 62; -pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 63; -pub const YARVINSN_opt_newarray_send: ruby_vminsn_type = 64; -pub const YARVINSN_invokesuper: ruby_vminsn_type = 65; -pub const YARVINSN_invokesuperforward: ruby_vminsn_type = 66; -pub const YARVINSN_invokeblock: ruby_vminsn_type = 67; -pub const YARVINSN_leave: ruby_vminsn_type = 68; -pub const YARVINSN_throw: ruby_vminsn_type = 69; -pub const YARVINSN_jump: ruby_vminsn_type = 70; -pub const YARVINSN_branchif: ruby_vminsn_type = 71; -pub const YARVINSN_branchunless: ruby_vminsn_type = 72; -pub const YARVINSN_branchnil: ruby_vminsn_type = 73; -pub const YARVINSN_once: ruby_vminsn_type = 74; -pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 75; -pub const YARVINSN_opt_plus: ruby_vminsn_type = 76; -pub const YARVINSN_opt_minus: ruby_vminsn_type = 77; -pub const YARVINSN_opt_mult: ruby_vminsn_type = 78; -pub const YARVINSN_opt_div: ruby_vminsn_type = 79; -pub const YARVINSN_opt_mod: ruby_vminsn_type = 80; -pub const YARVINSN_opt_eq: ruby_vminsn_type = 81; -pub const YARVINSN_opt_neq: ruby_vminsn_type = 82; -pub const YARVINSN_opt_lt: ruby_vminsn_type = 83; -pub const YARVINSN_opt_le: ruby_vminsn_type = 84; -pub const YARVINSN_opt_gt: ruby_vminsn_type = 85; -pub const YARVINSN_opt_ge: ruby_vminsn_type = 86; -pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 87; -pub const YARVINSN_opt_and: ruby_vminsn_type = 88; -pub const YARVINSN_opt_or: ruby_vminsn_type = 89; -pub const YARVINSN_opt_aref: ruby_vminsn_type = 90; -pub const YARVINSN_opt_aset: ruby_vminsn_type = 91; -pub const YARVINSN_opt_aset_with: ruby_vminsn_type = 92; -pub const YARVINSN_opt_aref_with: ruby_vminsn_type = 93; +pub const YARVINSN_opt_new: ruby_vminsn_type = 58; +pub const YARVINSN_objtostring: ruby_vminsn_type = 59; +pub const YARVINSN_opt_ary_freeze: ruby_vminsn_type = 60; +pub const YARVINSN_opt_hash_freeze: ruby_vminsn_type = 61; +pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 62; +pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 63; +pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 64; +pub const YARVINSN_opt_duparray_send: ruby_vminsn_type = 65; +pub const YARVINSN_opt_newarray_send: ruby_vminsn_type = 66; +pub const YARVINSN_invokesuper: ruby_vminsn_type = 67; +pub const YARVINSN_invokesuperforward: ruby_vminsn_type = 68; +pub const YARVINSN_invokeblock: ruby_vminsn_type = 69; +pub const YARVINSN_leave: ruby_vminsn_type = 70; +pub const YARVINSN_throw: ruby_vminsn_type = 71; +pub const YARVINSN_jump: ruby_vminsn_type = 72; +pub const YARVINSN_branchif: ruby_vminsn_type = 73; +pub const YARVINSN_branchunless: ruby_vminsn_type = 74; +pub const YARVINSN_branchnil: ruby_vminsn_type = 75; +pub const YARVINSN_once: ruby_vminsn_type = 76; +pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 77; +pub const YARVINSN_opt_plus: ruby_vminsn_type = 78; +pub const YARVINSN_opt_minus: ruby_vminsn_type = 79; +pub const YARVINSN_opt_mult: ruby_vminsn_type = 80; +pub const YARVINSN_opt_div: ruby_vminsn_type = 81; +pub const YARVINSN_opt_mod: ruby_vminsn_type = 82; +pub const YARVINSN_opt_eq: ruby_vminsn_type = 83; +pub const YARVINSN_opt_neq: ruby_vminsn_type = 84; +pub const YARVINSN_opt_lt: ruby_vminsn_type = 85; +pub const YARVINSN_opt_le: ruby_vminsn_type = 86; +pub const YARVINSN_opt_gt: ruby_vminsn_type = 87; +pub const YARVINSN_opt_ge: ruby_vminsn_type = 88; +pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 89; +pub const YARVINSN_opt_and: ruby_vminsn_type = 90; +pub const YARVINSN_opt_or: ruby_vminsn_type = 91; +pub const YARVINSN_opt_aref: ruby_vminsn_type = 92; +pub const YARVINSN_opt_aset: ruby_vminsn_type = 93; pub const YARVINSN_opt_length: ruby_vminsn_type = 94; pub const YARVINSN_opt_size: ruby_vminsn_type = 95; pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 96; @@ -891,42 +866,42 @@ pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 163; pub const YARVINSN_trace_send: ruby_vminsn_type = 164; pub const YARVINSN_trace_sendforward: ruby_vminsn_type = 165; pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 166; -pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 167; -pub const YARVINSN_trace_opt_ary_freeze: ruby_vminsn_type = 168; -pub const YARVINSN_trace_opt_hash_freeze: ruby_vminsn_type = 169; -pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 170; -pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 171; -pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 172; -pub const YARVINSN_trace_opt_newarray_send: ruby_vminsn_type = 173; -pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 174; -pub const YARVINSN_trace_invokesuperforward: ruby_vminsn_type = 175; -pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 176; -pub const YARVINSN_trace_leave: ruby_vminsn_type = 177; -pub const YARVINSN_trace_throw: ruby_vminsn_type = 178; -pub const YARVINSN_trace_jump: ruby_vminsn_type = 179; -pub const YARVINSN_trace_branchif: ruby_vminsn_type = 180; -pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 181; -pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 182; -pub const YARVINSN_trace_once: ruby_vminsn_type = 183; -pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 184; -pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 185; -pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 186; -pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 187; -pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 188; -pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 189; -pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 190; -pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 191; -pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 192; -pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 193; -pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 194; -pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 195; -pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 196; -pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 197; -pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 198; -pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 199; -pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 200; -pub const YARVINSN_trace_opt_aset_with: ruby_vminsn_type = 201; -pub const YARVINSN_trace_opt_aref_with: ruby_vminsn_type = 202; +pub const YARVINSN_trace_opt_new: ruby_vminsn_type = 167; +pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 168; +pub const YARVINSN_trace_opt_ary_freeze: ruby_vminsn_type = 169; +pub const YARVINSN_trace_opt_hash_freeze: ruby_vminsn_type = 170; +pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 171; +pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 172; +pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 173; +pub const YARVINSN_trace_opt_duparray_send: ruby_vminsn_type = 174; +pub const YARVINSN_trace_opt_newarray_send: ruby_vminsn_type = 175; +pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 176; +pub const YARVINSN_trace_invokesuperforward: ruby_vminsn_type = 177; +pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 178; +pub const YARVINSN_trace_leave: ruby_vminsn_type = 179; +pub const YARVINSN_trace_throw: ruby_vminsn_type = 180; +pub const YARVINSN_trace_jump: ruby_vminsn_type = 181; +pub const YARVINSN_trace_branchif: ruby_vminsn_type = 182; +pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 183; +pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 184; +pub const YARVINSN_trace_once: ruby_vminsn_type = 185; +pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 186; +pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 187; +pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 188; +pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 189; +pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 190; +pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 191; +pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 192; +pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 193; +pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 194; +pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 195; +pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 196; +pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 197; +pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 198; +pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 199; +pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 200; +pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 201; +pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 202; pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 203; pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 204; pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 205; @@ -942,7 +917,38 @@ pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214; pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 218; +pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; +pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), @@ -966,13 +972,19 @@ pub const DEFINED_REF: defined_type = 15; pub const DEFINED_FUNC: defined_type = 16; pub const DEFINED_CONST_FROM: defined_type = 17; pub type defined_type = u32; -pub const ROBJECT_OFFSET_AS_HEAP_IVPTR: robject_offsets = 16; -pub const ROBJECT_OFFSET_AS_HEAP_IV_INDEX_TBL: robject_offsets = 24; -pub const ROBJECT_OFFSET_AS_ARY: robject_offsets = 16; -pub type robject_offsets = u32; -pub const RUBY_OFFSET_RSTRING_LEN: rstring_offsets = 16; -pub type rstring_offsets = u32; -pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; +pub type rb_seq_param_keyword_struct = + rb_iseq_constant_body_rb_iseq_parameters_rb_iseq_param_keyword; +pub const ROBJECT_OFFSET_AS_HEAP_FIELDS: jit_bindgen_constants = 16; +pub const ROBJECT_OFFSET_AS_ARY: jit_bindgen_constants = 16; +pub const RUBY_OFFSET_RSTRING_LEN: jit_bindgen_constants = 16; +pub const RUBY_OFFSET_EC_CFP: jit_bindgen_constants = 16; +pub const RUBY_OFFSET_EC_INTERRUPT_FLAG: jit_bindgen_constants = 32; +pub const RUBY_OFFSET_EC_INTERRUPT_MASK: jit_bindgen_constants = 36; +pub const RUBY_OFFSET_EC_THREAD_PTR: jit_bindgen_constants = 48; +pub const RUBY_OFFSET_EC_RACTOR_ID: jit_bindgen_constants = 64; +pub type jit_bindgen_constants = u32; +pub type rb_iseq_param_keyword_struct = + rb_iseq_constant_body_rb_iseq_parameters_rb_iseq_param_keyword; extern "C" { pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void); pub fn rb_class_attached_object(klass: VALUE) -> VALUE; @@ -986,6 +998,7 @@ extern "C" { pub fn rb_gc_location(obj: VALUE) -> VALUE; pub fn rb_gc_writebarrier(old: VALUE, young: VALUE); pub fn rb_class_get_superclass(klass: VALUE) -> VALUE; + pub fn rb_funcall(recv: VALUE, mid: ID, n: ::std::os::raw::c_int, ...) -> VALUE; pub static mut rb_mKernel: VALUE; pub static mut rb_cBasicObject: VALUE; pub static mut rb_cArray: VALUE; @@ -1021,7 +1034,13 @@ extern "C" { pub fn rb_intern2(name: *const ::std::os::raw::c_char, len: ::std::os::raw::c_long) -> ID; pub fn rb_id2name(id: ID) -> *const ::std::os::raw::c_char; pub fn rb_class2name(klass: VALUE) -> *const ::std::os::raw::c_char; + pub fn rb_class_new_instance_pass_kw( + argc: ::std::os::raw::c_int, + argv: *const VALUE, + klass: VALUE, + ) -> VALUE; pub fn rb_obj_is_kind_of(obj: VALUE, klass: VALUE) -> VALUE; + pub fn rb_obj_alloc(klass: VALUE) -> VALUE; pub fn rb_obj_frozen_p(obj: VALUE) -> VALUE; pub fn rb_backref_get() -> VALUE; pub fn rb_range_new(beg: VALUE, end: VALUE, excl: ::std::os::raw::c_int) -> VALUE; @@ -1041,6 +1060,7 @@ extern "C" { pub fn rb_ivar_get(obj: VALUE, name: ID) -> VALUE; pub fn rb_ivar_defined(obj: VALUE, name: ID) -> VALUE; pub fn rb_attr_get(obj: VALUE, name: ID) -> VALUE; + pub fn rb_const_get(space: VALUE, name: ID) -> VALUE; pub fn rb_obj_info_dump(obj: VALUE); pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE; pub fn rb_obj_equal(obj1: VALUE, obj2: VALUE) -> VALUE; @@ -1056,13 +1076,14 @@ extern "C" { elts: *const VALUE, ) -> VALUE; pub fn rb_vm_top_self() -> VALUE; - pub static mut rb_vm_insns_count: u64; + pub static mut rb_vm_insn_count: u64; pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t; pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t; pub fn rb_callable_method_entry_or_negative( klass: VALUE, id: ID, ) -> *const rb_callable_method_entry_t; + pub static mut rb_cRubyVM: VALUE; pub static mut rb_mRubyVMFrozenCore: VALUE; pub static mut rb_block_param_proxy: VALUE; pub fn rb_vm_ep_local_ep(ep: *const VALUE) -> *const VALUE; @@ -1075,21 +1096,26 @@ extern "C" { pub fn rb_obj_info(obj: VALUE) -> *const ::std::os::raw::c_char; pub fn rb_ec_stack_check(ec: *mut rb_execution_context_struct) -> ::std::os::raw::c_int; pub fn rb_shape_id_offset() -> i32; - pub fn rb_shape_get_shape_by_id(shape_id: shape_id_t) -> *mut rb_shape_t; - pub fn rb_shape_get_shape_id(obj: VALUE) -> shape_id_t; - pub fn rb_shape_get_iv_index(shape: *mut rb_shape_t, id: ID, value: *mut attr_index_t) -> bool; - pub fn rb_shape_obj_too_complex(obj: VALUE) -> bool; - pub fn rb_shape_get_next_no_warnings( - shape: *mut rb_shape_t, - obj: VALUE, + pub fn rb_obj_shape_id(obj: VALUE) -> shape_id_t; + pub fn rb_shape_get_iv_index(shape_id: shape_id_t, id: ID, value: *mut attr_index_t) -> bool; + pub fn rb_shape_transition_add_ivar_no_warnings( + klass: VALUE, + original_shape_id: shape_id_t, id: ID, - ) -> *mut rb_shape_t; - pub fn rb_shape_id(shape: *mut rb_shape_t) -> shape_id_t; + ) -> shape_id_t; + pub fn rb_ivar_get_at(obj: VALUE, index: attr_index_t, id: ID) -> VALUE; + pub fn rb_ivar_get_at_no_ractor_check(obj: VALUE, index: attr_index_t) -> VALUE; pub fn rb_gvar_get(arg1: ID) -> VALUE; pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE; - pub fn rb_ensure_iv_list_size(obj: VALUE, len: u32, newsize: u32); + pub fn rb_ensure_iv_list_size(obj: VALUE, current_len: u32, newsize: u32); pub fn rb_vm_barrier(); pub fn rb_str_byte_substr(str_: VALUE, beg: VALUE, len: VALUE) -> VALUE; + pub fn rb_str_substr_two_fixnums( + str_: VALUE, + beg: VALUE, + len: VALUE, + empty: ::std::os::raw::c_int, + ) -> VALUE; pub fn rb_obj_as_string_result(str_: VALUE, obj: VALUE) -> VALUE; pub fn rb_str_concat_literals(num: usize, strary: *const VALUE) -> VALUE; pub fn rb_ec_str_resurrect( @@ -1128,32 +1154,58 @@ extern "C" { lines: *mut ::std::os::raw::c_int, ) -> ::std::os::raw::c_int; pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); - pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; - pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32); - pub fn rb_yjit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; - pub fn rb_yjit_array_len(a: VALUE) -> ::std::os::raw::c_long; - pub fn rb_yjit_icache_invalidate( - start: *mut ::std::os::raw::c_void, - end: *mut ::std::os::raw::c_void, - ); pub fn rb_yjit_exit_locations_dict( yjit_raw_samples: *mut VALUE, yjit_line_samples: *mut ::std::os::raw::c_int, samples_len: ::std::os::raw::c_int, ) -> VALUE; - pub fn rb_yjit_get_page_size() -> u32; - pub fn rb_yjit_reserve_addr_space(mem_size: u32) -> *mut u8; pub fn rb_c_method_tracing_currently_enabled(ec: *const rb_execution_context_t) -> bool; pub fn rb_full_cfunc_return(ec: *mut rb_execution_context_t, return_value: VALUE); - pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void; pub fn rb_iseq_set_yjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void); - pub fn rb_iseq_reset_jit_func(iseq: *const rb_iseq_t); + pub fn rb_get_symbol_id(namep: VALUE) -> ID; + pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function; + pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE; + pub fn rb_str_neq_internal(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_ary_unshift_m(argc: ::std::os::raw::c_int, argv: *mut VALUE, ary: VALUE) -> VALUE; + pub fn rb_yjit_rb_ary_subseq_length(ary: VALUE, beg: ::std::os::raw::c_long) -> VALUE; + pub fn rb_yjit_ruby2_keywords_splat_p(obj: VALUE) -> usize; + pub fn rb_yjit_splat_varg_checks( + sp: *mut VALUE, + splat_array: VALUE, + cfp: *mut rb_control_frame_t, + ) -> VALUE; + pub fn rb_yjit_splat_varg_cfunc(stack_splat_array: *mut VALUE) -> ::std::os::raw::c_int; + pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32); + pub fn rb_yjit_iseq_inspect(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_char; + pub fn rb_RSTRUCT_SET(st: VALUE, k: ::std::os::raw::c_int, v: VALUE); + pub fn rb_ENCODING_GET(obj: VALUE) -> ::std::os::raw::c_int; + pub fn rb_yjit_constcache_shareable(ice: *const iseq_inline_constant_cache_entry) -> bool; + pub fn rb_yjit_obj_written( + old: VALUE, + young: VALUE, + file: *const ::std::os::raw::c_char, + line: ::std::os::raw::c_int, + ); + pub fn rb_object_shape_count() -> VALUE; + pub fn rb_yjit_shape_obj_too_complex_p(obj: VALUE) -> bool; + pub fn rb_yjit_shape_capacity(shape_id: shape_id_t) -> attr_index_t; + pub fn rb_yjit_shape_index(shape_id: shape_id_t) -> attr_index_t; + pub fn rb_yjit_sendish_sp_pops(ci: *const rb_callinfo) -> usize; + pub fn rb_yjit_invokeblock_sp_pops(ci: *const rb_callinfo) -> usize; + pub fn rb_yjit_cme_ractor_serial(cme: *const rb_callable_method_entry_t) -> rb_serial_t; + pub fn rb_yjit_set_exception_return( + cfp: *mut rb_control_frame_t, + leave_exit: *mut ::std::os::raw::c_void, + leave_exception: *mut ::std::os::raw::c_void, + ); + pub fn rb_vm_instruction_size() -> u32; + pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE; pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong; pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char; - pub fn rb_yjit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t; pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char; pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID; @@ -1167,7 +1219,6 @@ extern "C" { pub fn rb_METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t; pub fn rb_get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t; pub fn rb_get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID; - pub fn rb_get_symbol_id(namep: VALUE) -> ID; pub fn rb_get_cme_def_body_optimized_type( cme: *const rb_callable_method_entry_t, ) -> method_optimized_type; @@ -1179,10 +1230,20 @@ extern "C" { ) -> *mut rb_method_cfunc_t; pub fn rb_get_def_method_serial(def: *const rb_method_definition_t) -> usize; pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID; + pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE; + pub fn rb_jit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t; + pub fn rb_optimized_call( + recv: *mut VALUE, + ec: *mut rb_execution_context_t, + argc: ::std::os::raw::c_int, + argv: *mut VALUE, + kw_splat: ::std::os::raw::c_int, + block_handler: VALUE, + ) -> VALUE; + pub fn rb_jit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int; pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void; pub fn rb_get_def_iseq_ptr(def: *mut rb_method_definition_t) -> *const rb_iseq_t; - pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE; pub fn rb_get_iseq_body_local_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t; pub fn rb_get_iseq_body_parent_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t; pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; @@ -1203,87 +1264,59 @@ extern "C" { pub fn rb_get_iseq_flags_forwardable(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_body_param_keyword( iseq: *const rb_iseq_t, - ) -> *const rb_seq_param_keyword_struct; + ) -> *const rb_iseq_param_keyword_struct; pub fn rb_get_iseq_body_param_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_get_iseq_body_param_lead_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int; pub fn rb_get_iseq_body_param_opt_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int; pub fn rb_get_iseq_body_param_opt_table(iseq: *const rb_iseq_t) -> *const VALUE; - pub fn rb_optimized_call( - recv: *mut VALUE, - ec: *mut rb_execution_context_t, - argc: ::std::os::raw::c_int, - argv: *mut VALUE, - kw_splat: ::std::os::raw::c_int, - block_handler: VALUE, - ) -> VALUE; - pub fn rb_yjit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; - pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function; - pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; pub fn rb_get_ec_cfp(ec: *const rb_execution_context_t) -> *mut rb_control_frame_struct; pub fn rb_get_cfp_iseq(cfp: *mut rb_control_frame_struct) -> *const rb_iseq_t; pub fn rb_get_cfp_pc(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_get_cfp_sp(cfp: *mut rb_control_frame_struct) -> *mut VALUE; - pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); - pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE); pub fn rb_get_cfp_self(cfp: *mut rb_control_frame_struct) -> VALUE; pub fn rb_get_cfp_ep(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_get_cfp_ep_level(cfp: *mut rb_control_frame_struct, lv: u32) -> *const VALUE; - pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_yarv_class_of(obj: VALUE) -> VALUE; - pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; - pub fn rb_str_neq_internal(str1: VALUE, str2: VALUE) -> VALUE; - pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; - pub fn rb_ary_unshift_m(argc: ::std::os::raw::c_int, argv: *mut VALUE, ary: VALUE) -> VALUE; - pub fn rb_yjit_rb_ary_subseq_length(ary: VALUE, beg: ::std::os::raw::c_long) -> VALUE; - pub fn rb_yjit_fix_div_fix(recv: VALUE, obj: VALUE) -> VALUE; - pub fn rb_yjit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; - pub fn rb_yjit_ruby2_keywords_splat_p(obj: VALUE) -> usize; - pub fn rb_yjit_splat_varg_checks( - sp: *mut VALUE, - splat_array: VALUE, - cfp: *mut rb_control_frame_t, - ) -> VALUE; - pub fn rb_yjit_splat_varg_cfunc(stack_splat_array: *mut VALUE) -> ::std::os::raw::c_int; - pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32); - pub fn rb_yjit_iseq_inspect(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_char; pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; pub fn rb_RSTRUCT_LEN(st: VALUE) -> ::std::os::raw::c_long; - pub fn rb_RSTRUCT_SET(st: VALUE, k: ::std::os::raw::c_int, v: VALUE); pub fn rb_get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo; pub fn rb_BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: u32) -> bool; pub fn rb_RCLASS_ORIGIN(c: VALUE) -> VALUE; - pub fn rb_ENCODING_GET(obj: VALUE) -> ::std::os::raw::c_int; - pub fn rb_yjit_multi_ractor_p() -> bool; pub fn rb_assert_iseq_handle(handle: VALUE); + pub fn rb_assert_holding_vm_lock(); pub fn rb_IMEMO_TYPE_P(imemo: VALUE, imemo_type: imemo_type) -> ::std::os::raw::c_int; - pub fn rb_yjit_constcache_shareable(ice: *const iseq_inline_constant_cache_entry) -> bool; pub fn rb_assert_cme_handle(handle: VALUE); - pub fn rb_yjit_for_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); - pub fn rb_yjit_obj_written( - old: VALUE, - young: VALUE, - file: *const ::std::os::raw::c_char, - line: ::std::os::raw::c_int, - ); - pub fn rb_yjit_vm_lock_then_barrier( + pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; + pub fn rb_jit_array_len(a: VALUE) -> ::std::os::raw::c_long; + pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); + pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE); + pub fn rb_jit_shape_too_complex_p(shape_id: shape_id_t) -> bool; + pub fn rb_jit_multi_ractor_p() -> bool; + pub fn rb_jit_vm_lock_then_barrier( recursive_lock_level: *mut ::std::os::raw::c_uint, file: *const ::std::os::raw::c_char, line: ::std::os::raw::c_int, ); - pub fn rb_yjit_vm_unlock( + pub fn rb_jit_vm_unlock( recursive_lock_level: *mut ::std::os::raw::c_uint, file: *const ::std::os::raw::c_char, line: ::std::os::raw::c_int, ); - pub fn rb_object_shape_count() -> VALUE; - pub fn rb_yjit_assert_holding_vm_lock(); - pub fn rb_yjit_sendish_sp_pops(ci: *const rb_callinfo) -> usize; - pub fn rb_yjit_invokeblock_sp_pops(ci: *const rb_callinfo) -> usize; - pub fn rb_yjit_set_exception_return( - cfp: *mut rb_control_frame_t, - leave_exit: *mut ::std::os::raw::c_void, - leave_exception: *mut ::std::os::raw::c_void, + pub fn rb_iseq_reset_jit_func(iseq: *const rb_iseq_t); + pub fn rb_jit_get_page_size() -> u32; + pub fn rb_jit_reserve_addr_space(mem_size: u32) -> *mut u8; + pub fn rb_jit_for_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); + pub fn rb_jit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; + pub fn rb_jit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32); + pub fn rb_jit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; + pub fn rb_jit_icache_invalidate( + start: *mut ::std::os::raw::c_void, + end: *mut ::std::os::raw::c_void, ); + pub fn rb_jit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; + pub fn rb_jit_fix_div_fix(recv: VALUE, obj: VALUE) -> VALUE; + pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_jit_str_concat_codepoint(str_: VALUE, codepoint: VALUE); } diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs index 89da07beda..4f85937ee9 100644 --- a/yjit/src/disasm.rs +++ b/yjit/src/disasm.rs @@ -7,6 +7,38 @@ use crate::options::DumpDisasm; use std::fmt::Write; +#[cfg_attr(not(feature = "disasm"), allow(dead_code))] +#[derive(Copy, Clone, Debug)] +pub struct TerminalColor { + pub blue_begin: &'static str, + pub blue_end: &'static str, + pub bold_begin: &'static str, + pub bold_end: &'static str, +} + +pub static TTY_TERMINAL_COLOR: TerminalColor = TerminalColor { + blue_begin: "\x1b[34m", + blue_end: "\x1b[0m", + bold_begin: "\x1b[1m", + bold_end: "\x1b[22m", +}; + +pub static NON_TTY_TERMINAL_COLOR: TerminalColor = TerminalColor { + blue_begin: "", + blue_end: "", + bold_begin: "", + bold_end: "", +}; + +/// Terminal escape codes for colors, font weight, etc. Only enabled if stdout is a TTY. +pub fn get_colors() -> &'static TerminalColor { + if crate::utils::stdout_supports_colors() { + &TTY_TERMINAL_COLOR + } else { + &NON_TTY_TERMINAL_COLOR + } +} + /// Primitive called in yjit.rb /// Produce a string representing the disassembly for an ISEQ #[no_mangle] @@ -120,7 +152,7 @@ pub fn dump_disasm_addr_range(cb: &CodeBlock, start_addr: CodePtr, end_addr: Cod // Write with the fd opened during boot let mut file = unsafe { std::fs::File::from_raw_fd(*fd) }; file.write_all(disasm.as_bytes()).unwrap(); - file.into_raw_fd(); // keep the fd open + let _ = file.into_raw_fd(); // keep the fd open } }; } @@ -158,6 +190,7 @@ pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> #[cfg(test)] let start_addr = 0; let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); + let colors = get_colors(); // For each instruction in this block for insn in insns.as_ref() { @@ -165,17 +198,17 @@ pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> if let Some(comment_list) = cb.comments_at(insn.address() as usize) { for comment in comment_list { if cb.outlined { - write!(&mut out, "\x1b[34m").unwrap(); // Make outlined code blue + write!(&mut out, "{}", colors.blue_begin).unwrap(); // Make outlined code blue } - writeln!(&mut out, " \x1b[1m# {comment}\x1b[22m").unwrap(); // Make comments bold + writeln!(&mut out, " {}# {comment}{}", colors.bold_begin, colors.bold_end).unwrap(); // Make comments bold } } if cb.outlined { - write!(&mut out, "\x1b[34m").unwrap(); // Make outlined code blue + write!(&mut out, "{}", colors.blue_begin).unwrap(); // Make outlined code blue } writeln!(&mut out, " {insn}").unwrap(); if cb.outlined { - write!(&mut out, "\x1b[0m").unwrap(); // Disable blue + write!(&mut out, "{}", colors.blue_end).unwrap(); // Disable blue } } @@ -188,6 +221,7 @@ pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> let mut out = String::new(); let mut line_byte_idx = 0; const MAX_BYTES_PER_LINE: usize = 16; + let colors = get_colors(); for addr in start_addr..end_addr { if let Some(comment_list) = cb.comments_at(addr) { @@ -197,7 +231,7 @@ pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> line_byte_idx = 0; } for comment in comment_list { - writeln!(&mut out, " \x1b[1m# {comment}\x1b[22m").unwrap(); // Make comments bold + writeln!(&mut out, " {}# {comment}{}", colors.bold_begin, colors.bold_end).unwrap(); // Make comments bold } } if line_byte_idx == 0 { diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs index d468cfebd9..0f22fba6b8 100644 --- a/yjit/src/invariants.rs +++ b/yjit/src/invariants.rs @@ -206,7 +206,7 @@ pub fn assume_method_basic_definition( /// Tracks that a block is assuming it is operating in single-ractor mode. #[must_use] pub fn assume_single_ractor_mode(jit: &mut JITState, asm: &mut Assembler) -> bool { - if unsafe { rb_yjit_multi_ractor_p() } { + if unsafe { rb_jit_multi_ractor_p() } { false } else { if jit_ensure_block_entry_exit(jit, asm).is_none() { @@ -303,7 +303,7 @@ pub extern "C" fn rb_yjit_cme_invalidate(callee_cme: *const rb_callable_method_e }); } -/// Callback for then Ruby is about to spawn a ractor. In that case we need to +/// Callback for when Ruby is about to spawn a ractor. In that case we need to /// invalidate every block that is assuming single ractor mode. #[no_mangle] pub extern "C" fn rb_yjit_before_ractor_spawn() { @@ -495,7 +495,7 @@ pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC, ins return; }; - if !unsafe { (*(*ic).entry).ic_cref }.is_null() || unsafe { rb_yjit_multi_ractor_p() } { + if !unsafe { (*(*ic).entry).ic_cref }.is_null() || unsafe { rb_jit_multi_ractor_p() } { // We can't generate code in these situations, so no need to invalidate. // See gen_opt_getinlinecache. return; @@ -626,6 +626,8 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { return; } + incr_counter!(invalidate_everything); + // Stop other ractors since we are going to patch machine code. with_vm_lock(src_loc!(), || { // Make it so all live block versions are no longer valid branch targets diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs index 1e3f31b88b..f3247fbf1a 100644 --- a/yjit/src/lib.rs +++ b/yjit/src/lib.rs @@ -3,6 +3,19 @@ #![allow(clippy::too_many_arguments)] // :shrug: #![allow(clippy::identity_op)] // Sometimes we do it for style +// TODO(alan): This lint is right -- the way we use `static mut` is UB happy. We have many globals +// and take `&mut` frequently, sometimes with a method that easily allows calling it twice. +// +// All of our globals rely on us running single threaded, which outside of boot-time relies on the +// VM lock (which signals and waits for all other threads to pause). To fix this properly, we should +// gather up all the globals into a struct to centralize the safety reasoning. That way we can also +// check for re-entrance in one place. +// +// We're too close to release to do that, though, so disable the lint for now. +#![allow(unknown_lints)] +#![allow(static_mut_refs)] +#![warn(unknown_lints)] + pub mod asm; mod backend; mod codegen; diff --git a/yjit/src/log.rs b/yjit/src/log.rs index f2dcf519e0..c5a724f7e1 100644 --- a/yjit/src/log.rs +++ b/yjit/src/log.rs @@ -81,7 +81,7 @@ impl Log { let mut file = unsafe { std::fs::File::from_raw_fd(fd) }; writeln!(file, "{}", entry).unwrap(); file.flush().unwrap(); - file.into_raw_fd(); // keep the fd open + let _ = file.into_raw_fd(); // keep the fd open } LogOutput::MemoryOnly => () // Don't print or write anything diff --git a/yjit/src/options.rs b/yjit/src/options.rs index b993b5685b..c87a436091 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -1,5 +1,5 @@ use std::{ffi::{CStr, CString}, ptr::null, fs::File}; -use crate::{backend::current::TEMP_REGS, stats::Counter}; +use crate::{backend::current::TEMP_REGS, cruby::*, stats::Counter}; use std::os::raw::{c_char, c_int, c_uint}; // Call threshold for small deployments and command-line apps @@ -46,6 +46,9 @@ pub struct Options { // The number of registers allocated for stack temps pub num_temp_regs: usize, + // Disable Ruby builtin methods defined by `with_jit` hooks, e.g. Array#each in Ruby + pub c_builtin: bool, + // Capture stats pub gen_stats: bool, @@ -94,6 +97,7 @@ pub static mut OPTIONS: Options = Options { no_type_prop: false, max_versions: 4, num_temp_regs: 5, + c_builtin: false, gen_stats: false, trace_exits: None, print_stats: true, @@ -117,7 +121,7 @@ pub const YJIT_OPTIONS: &'static [(&str, &str)] = &[ ("--yjit-call-threshold=num", "Number of calls to trigger JIT."), ("--yjit-cold-threshold=num", "Global calls after which ISEQs not compiled (default: 200K)."), ("--yjit-stats", "Enable collecting YJIT statistics."), - ("--yjit--log[=file|dir]", "Enable logging of YJIT's compilation activity."), + ("--yjit-log[=file|dir]", "Enable logging of YJIT's compilation activity."), ("--yjit-disable", "Disable YJIT for lazily enabling it with RubyVM::YJIT.enable."), ("--yjit-code-gc", "Run code GC when the code size reaches the limit."), ("--yjit-perf", "Enable frame pointers and perf profiling."), @@ -148,7 +152,6 @@ pub enum DumpDisasm { // Dump to stdout Stdout, // Dump to "yjit_{pid}.log" file under the specified directory - #[cfg_attr(not(feature = "disasm"), allow(dead_code))] File(std::os::unix::io::RawFd), } @@ -169,7 +172,7 @@ macro_rules! get_option { { // Make this a statement since attributes on expressions are experimental #[allow(unused_unsafe)] - let ret = unsafe { OPTIONS.$option_name }; + let ret = unsafe { crate::options::OPTIONS.$option_name }; ret } }; @@ -270,6 +273,10 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { } }, + ("c-builtin", _) => unsafe { + OPTIONS.c_builtin = true; + }, + ("code-gc", _) => unsafe { OPTIONS.code_gc = true; }, @@ -413,3 +420,13 @@ pub extern "C" fn rb_yjit_show_usage(help: c_int, highlight: c_int, width: c_uin unsafe { ruby_show_usage_line(name.as_ptr(), null(), description.as_ptr(), help, highlight, width, columns) } } } + +/// Return true if --yjit-c-builtin is given +#[no_mangle] +pub extern "C" fn rb_yjit_c_builtin_p(_ec: EcPtr, _self: VALUE) -> VALUE { + if get_option!(c_builtin) { + Qtrue + } else { + Qfalse + } +} diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 3dc37d4bac..105def2fff 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -1,18 +1,19 @@ //! Everything related to the collection of runtime stats in YJIT -//! See the stats feature and the --yjit-stats command-line option +//! See the --yjit-stats command-line option -#![allow(dead_code)] // Counters are only used with the stats features - -use std::alloc::{GlobalAlloc, Layout, System}; use std::ptr::addr_of_mut; -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::atomic::Ordering; use std::time::Instant; use std::collections::HashMap; use crate::codegen::CodegenGlobals; use crate::cruby::*; use crate::options::*; -use crate::yjit::yjit_enabled_p; +use crate::yjit::{yjit_enabled_p, YJIT_INIT_TIME}; + +#[cfg(feature = "stats_allocator")] +#[path = "../../jit/src/lib.rs"] +mod jit; /// Running total of how many ISeqs are in the system. #[no_mangle] @@ -22,43 +23,9 @@ pub static mut rb_yjit_live_iseq_count: u64 = 0; #[no_mangle] pub static mut rb_yjit_iseq_alloc_count: u64 = 0; -/// A middleware to count Rust-allocated bytes as yjit_alloc_size. -#[global_allocator] -static GLOBAL_ALLOCATOR: StatsAlloc = StatsAlloc { alloc_size: AtomicUsize::new(0) }; - -pub struct StatsAlloc { - alloc_size: AtomicUsize, -} - -unsafe impl GlobalAlloc for StatsAlloc { - unsafe fn alloc(&self, layout: Layout) -> *mut u8 { - self.alloc_size.fetch_add(layout.size(), Ordering::SeqCst); - System.alloc(layout) - } - - unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { - self.alloc_size.fetch_sub(layout.size(), Ordering::SeqCst); - System.dealloc(ptr, layout) - } - - unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { - self.alloc_size.fetch_add(layout.size(), Ordering::SeqCst); - System.alloc_zeroed(layout) - } - - unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { - if new_size > layout.size() { - self.alloc_size.fetch_add(new_size - layout.size(), Ordering::SeqCst); - } else if new_size < layout.size() { - self.alloc_size.fetch_sub(layout.size() - new_size, Ordering::SeqCst); - } - System.realloc(ptr, layout, new_size) - } -} - /// The number of bytes YJIT has allocated on the Rust heap. pub fn yjit_alloc_size() -> usize { - GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst) + jit::GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst) } /// Mapping of C function / ISEQ name to integer indices @@ -123,7 +90,9 @@ pub extern "C" fn incr_iseq_counter(idx: usize) { iseq_call_count[idx] += 1; } -// YJIT exit counts for each instruction type +/// YJIT exit counts for each instruction type. +/// Note that `VM_INSTRUCTION_SIZE` is an upper bound and the actual number +/// of VM opcodes may be different in the build. See [`rb_vm_instruction_size()`] const VM_INSTRUCTION_SIZE_USIZE: usize = VM_INSTRUCTION_SIZE as usize; static mut EXIT_OP_COUNT: [u64; VM_INSTRUCTION_SIZE_USIZE] = [0; VM_INSTRUCTION_SIZE_USIZE]; @@ -281,7 +250,9 @@ pub const DEFAULT_COUNTERS: &'static [Counter] = &[ Counter::deleted_defer_block_count, Counter::compiled_branch_count, Counter::compile_time_ns, + Counter::compilation_failure, Counter::max_inline_versions, + Counter::inline_block_count, Counter::num_contexts_encoded, Counter::context_cache_hits, @@ -293,6 +264,7 @@ pub const DEFAULT_COUNTERS: &'static [Counter] = &[ Counter::invalidate_constant_ic_fill, Counter::invalidate_no_singleton_class, Counter::invalidate_ep_escape, + Counter::invalidate_everything, ]; /// Macro to increase a counter by name and count @@ -348,7 +320,6 @@ macro_rules! ptr_to_counter { } }; } -pub(crate) use ptr_to_counter; // Declare all the counters we track make_counters! { @@ -356,6 +327,7 @@ make_counters! { // Method calls that fallback to dynamic dispatch send_singleton_class, + send_forwarding, send_ivar_set_method, send_zsuper_method, send_undef_method, @@ -382,8 +354,8 @@ make_counters! { send_iseq_arity_error, send_iseq_block_arg_type, send_iseq_clobbering_block_arg, + send_iseq_block_arg_gc_unsafe, send_iseq_complex_discard_extras, - send_iseq_forwarding, send_iseq_leaf_builtin_block_arg_block_param, send_iseq_kw_splat_non_nil, send_iseq_kwargs_mismatch, @@ -419,6 +391,9 @@ make_counters! { send_bmethod_ractor, send_bmethod_block_arg, send_optimized_block_arg, + send_pred_not_fixnum, + send_pred_underflow, + send_str_dup_exivar, invokesuper_defined_class_mismatch, invokesuper_forwarding, @@ -462,8 +437,10 @@ make_counters! { guard_send_not_fixnum_or_flonum, guard_send_not_string, guard_send_respond_to_mid_mismatch, + guard_send_str_aref_not_fixnum, guard_send_cfunc_bad_splat_vargs, + guard_send_cfunc_block_not_nil, guard_invokesuper_me_changed, @@ -512,8 +489,7 @@ make_counters! { opt_aset_not_array, opt_aset_not_fixnum, opt_aset_not_hash, - - opt_aref_with_qundef, + opt_aset_frozen, opt_case_dispatch_megamorphic, @@ -524,6 +500,7 @@ make_counters! { expandarray_postarg, expandarray_not_array, expandarray_to_ary, + expandarray_method_missing, expandarray_chain_max_depth, // getblockparam @@ -569,6 +546,7 @@ make_counters! { branch_insn_count, branch_known_count, max_inline_versions, + inline_block_count, num_contexts_encoded, freed_iseq_count, @@ -583,6 +561,7 @@ make_counters! { invalidate_constant_ic_fill, invalidate_no_singleton_class, invalidate_ep_escape, + invalidate_everything, // Currently, it's out of the ordinary (might be impossible) for YJIT to leave gaps in // executable memory, so this should be 0. @@ -662,8 +641,7 @@ pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE, key: VALUE) - /// Primitive called in yjit.rb /// -/// Check if trace_exits generation is enabled. Requires the stats feature -/// to be enabled. +/// Check if trace_exits generation is enabled. #[no_mangle] pub extern "C" fn rb_yjit_trace_exit_locations_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { if get_option!(trace_exits).is_some() { @@ -687,7 +665,7 @@ pub extern "C" fn rb_yjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> V return Qnil; } - // If the stats feature is enabled, pass yjit_raw_samples and yjit_line_samples + // Pass yjit_raw_samples and yjit_line_samples // to the C function called rb_yjit_exit_locations_dict for parsing. let yjit_raw_samples = YjitExitLocations::get_raw_samples(); let yjit_line_samples = YjitExitLocations::get_line_samples(); @@ -784,12 +762,18 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE { set_stat_usize!(hash, "context_cache_bytes", crate::core::CTX_ENCODE_CACHE_BYTES + crate::core::CTX_DECODE_CACHE_BYTES); // VM instructions count - set_stat_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize); + if rb_vm_insn_count > 0 { + set_stat_usize!(hash, "vm_insns_count", rb_vm_insn_count as usize); + } set_stat_usize!(hash, "live_iseq_count", rb_yjit_live_iseq_count as usize); set_stat_usize!(hash, "iseq_alloc_count", rb_yjit_iseq_alloc_count as usize); set_stat!(hash, "object_shape_count", rb_object_shape_count()); + + // Time since YJIT init in nanoseconds + let time_nanos = Instant::now().duration_since(YJIT_INIT_TIME.unwrap()).as_nanos(); + set_stat_usize!(hash, "yjit_active_ns", time_nanos as usize); } // If we're not generating stats, put only default counters @@ -824,7 +808,8 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE { // For each entry in exit_op_count, add a stats entry with key "exit_INSTRUCTION_NAME" // and the value is the count of side exits for that instruction. - for op_idx in 0..VM_INSTRUCTION_SIZE_USIZE { + use crate::utils::IntoUsize; + for op_idx in 0..rb_vm_instruction_size().as_usize() { let op_name = insn_name(op_idx); let key_string = "exit_".to_owned() + &op_name; let count = EXIT_OP_COUNT[op_idx]; @@ -850,11 +835,13 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE { set_stat_double!(hash, "avg_len_in_yjit", avg_len_in_yjit); // Proportion of instructions that retire in YJIT - let total_insns_count = retired_in_yjit + rb_vm_insns_count; - set_stat_usize!(hash, "total_insns_count", total_insns_count as usize); + if rb_vm_insn_count > 0 { + let total_insns_count = retired_in_yjit + rb_vm_insn_count; + set_stat_usize!(hash, "total_insns_count", total_insns_count as usize); - let ratio_in_yjit: f64 = 100.0 * retired_in_yjit as f64 / total_insns_count as f64; - set_stat_double!(hash, "ratio_in_yjit", ratio_in_yjit); + let ratio_in_yjit: f64 = 100.0 * retired_in_yjit as f64 / total_insns_count as f64; + set_stat_double!(hash, "ratio_in_yjit", ratio_in_yjit); + } // Set method call counts in a Ruby dict fn set_call_counts( @@ -905,13 +892,13 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE { } /// Record the backtrace when a YJIT exit occurs. This functionality requires -/// that the stats feature is enabled as well as the --yjit-trace-exits option. +/// the --yjit-trace-exits option. /// /// This function will fill two Vec's in YjitExitLocations to record the raw samples /// and line samples. Their length should be the same, however the data stored in /// them is different. #[no_mangle] -pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE) +pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE) { // Return if YJIT is not enabled if !yjit_enabled_p() { @@ -935,10 +922,11 @@ pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE) // rb_vm_insn_addr2opcode won't work in cargo test --all-features // because it's a C function. Without insn call, this function is useless // so wrap the whole thing in a not test check. + let _ = exit_pc; #[cfg(not(test))] { // Get the opcode from the encoded insn handler at this PC - let insn = unsafe { rb_vm_insn_addr2opcode((*_exit_pc).as_ptr()) }; + let insn = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) }; // Use the same buffer size as Stackprof. const BUFF_LEN: usize = 2048; diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index c4b5fbd2e7..251628fabf 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -3,6 +3,7 @@ use crate::backend::ir::*; use crate::cruby::*; use std::slice; +use std::os::raw::c_int; /// Trait for casting to [usize] that allows you to say `.as_usize()`. /// Implementation conditional on the cast preserving the numeric value on @@ -91,10 +92,7 @@ pub fn ruby_str_to_rust(v: VALUE) -> String { let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8; let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap(); let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) }; - match String::from_utf8(str_slice.to_vec()) { - Ok(utf8) => utf8, - Err(_) => String::new(), - } + String::from_utf8(str_slice.to_vec()).unwrap_or_default() } // Location is the file defining the method, colon, method name. @@ -162,8 +160,6 @@ pub fn print_int(asm: &mut Assembler, opnd: Opnd) { } } - asm.cpush_all(); - let argument = match opnd { Opnd::Mem(_) | Opnd::Reg(_) | Opnd::InsnOut { .. } => { // Sign-extend the value if necessary @@ -178,7 +174,6 @@ pub fn print_int(asm: &mut Assembler, opnd: Opnd) { }; asm.ccall(print_int_fn as *const u8, vec![argument]); - asm.cpop_all(); } /// Generate code to print a pointer @@ -191,9 +186,7 @@ pub fn print_ptr(asm: &mut Assembler, opnd: Opnd) { assert!(opnd.rm_num_bits() == 64); - asm.cpush_all(); asm.ccall(print_ptr_fn as *const u8, vec![opnd]); - asm.cpop_all(); } /// Generate code to print a value @@ -206,9 +199,7 @@ pub fn print_value(asm: &mut Assembler, opnd: Opnd) { assert!(matches!(opnd, Opnd::Value(_))); - asm.cpush_all(); asm.ccall(print_value_fn as *const u8, vec![opnd]); - asm.cpop_all(); } /// Generate code to print constant string to stdout @@ -223,7 +214,6 @@ pub fn print_str(asm: &mut Assembler, str: &str) { } } - asm.cpush_all(); let string_data = asm.new_label("string_data"); let after_string = asm.new_label("after_string"); @@ -235,8 +225,14 @@ pub fn print_str(asm: &mut Assembler, str: &str) { let opnd = asm.lea_jump_target(string_data); asm.ccall(print_str_cfun as *const u8, vec![opnd, Opnd::UImm(str.len() as u64)]); +} - asm.cpop_all(); +pub fn stdout_supports_colors() -> bool { + // TODO(max): Use std::io::IsTerminal after upgrading Rust to 1.70 + extern "C" { fn isatty(fd: c_int) -> c_int; } + let stdout = 1; + let is_terminal = unsafe { isatty(stdout) } == 1; + is_terminal } #[cfg(test)] diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs index f56b0d8213..9126cf300e 100644 --- a/yjit/src/virtualmem.rs +++ b/yjit/src/virtualmem.rs @@ -3,10 +3,13 @@ // usize->pointer casts is viable. It seems like a lot of work for us to participate for not much // benefit. -use std::ptr::NonNull; +use std::{cell::RefCell, ptr::NonNull}; use crate::{backend::ir::Target, stats::yjit_alloc_size, utils::IntoUsize}; +#[cfg(test)] +use crate::options::get_option; + #[cfg(not(test))] pub type VirtualMem = VirtualMemory<sys::SystemAllocator>; @@ -36,8 +39,14 @@ pub struct VirtualMemory<A: Allocator> { /// granularity. page_size_bytes: usize, + /// Mutable parts. + mutable: RefCell<VirtualMemoryMut<A>>, +} + +/// Mutable parts of [`VirtualMemory`]. +pub struct VirtualMemoryMut<A: Allocator> { /// Number of bytes that have we have allocated physical memory for starting at - /// [Self::region_start]. + /// [VirtualMemory::region_start]. mapped_region_bytes: usize, /// Keep track of the address of the last written to page. @@ -124,9 +133,11 @@ impl<A: Allocator> VirtualMemory<A> { region_size_bytes, memory_limit_bytes, page_size_bytes, - mapped_region_bytes: 0, - current_write_page: None, - allocator, + mutable: RefCell::new(VirtualMemoryMut { + mapped_region_bytes: 0, + current_write_page: None, + allocator, + }), } } @@ -137,7 +148,7 @@ impl<A: Allocator> VirtualMemory<A> { } pub fn mapped_end_ptr(&self) -> CodePtr { - self.start_ptr().add_bytes(self.mapped_region_bytes) + self.start_ptr().add_bytes(self.mutable.borrow().mapped_region_bytes) } pub fn virtual_end_ptr(&self) -> CodePtr { @@ -146,7 +157,7 @@ impl<A: Allocator> VirtualMemory<A> { /// Size of the region in bytes that we have allocated physical memory for. pub fn mapped_region_size(&self) -> usize { - self.mapped_region_bytes + self.mutable.borrow().mapped_region_bytes } /// Size of the region in bytes where writes could be attempted. @@ -161,19 +172,21 @@ impl<A: Allocator> VirtualMemory<A> { } /// Write a single byte. The first write to a page makes it readable. - pub fn write_byte(&mut self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> { + pub fn write_byte(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> { + let mut mutable = self.mutable.borrow_mut(); + let page_size = self.page_size_bytes; let raw: *mut u8 = write_ptr.raw_ptr(self) as *mut u8; let page_addr = (raw as usize / page_size) * page_size; - if self.current_write_page == Some(page_addr) { + if mutable.current_write_page == Some(page_addr) { // Writing within the last written to page, nothing to do } else { // Switching to a different and potentially new page let start = self.region_start.as_ptr(); - let mapped_region_end = start.wrapping_add(self.mapped_region_bytes); + let mapped_region_end = start.wrapping_add(mutable.mapped_region_bytes); let whole_region_end = start.wrapping_add(self.region_size_bytes); - let alloc = &mut self.allocator; + let alloc = &mut mutable.allocator; assert!((start..=whole_region_end).contains(&mapped_region_end)); @@ -185,7 +198,7 @@ impl<A: Allocator> VirtualMemory<A> { return Err(FailedPageMapping); } - self.current_write_page = Some(page_addr); + mutable.current_write_page = Some(page_addr); } else if (start..whole_region_end).contains(&raw) && (page_addr + page_size - start as usize) + yjit_alloc_size() < self.memory_limit_bytes { // Writing to a brand new page @@ -217,9 +230,9 @@ impl<A: Allocator> VirtualMemory<A> { unreachable!("unknown arch"); } } - self.mapped_region_bytes = self.mapped_region_bytes + alloc_size; + mutable.mapped_region_bytes = mutable.mapped_region_bytes + alloc_size; - self.current_write_page = Some(page_addr); + mutable.current_write_page = Some(page_addr); } else { return Err(OutOfBounds); } @@ -231,20 +244,41 @@ impl<A: Allocator> VirtualMemory<A> { Ok(()) } + /// Make all the code in the region writeable. + /// Call this during GC before the phase of updating reference fields. + pub fn mark_all_writeable(&self) { + let mut mutable = self.mutable.borrow_mut(); + + mutable.current_write_page = None; + + let region_start = self.region_start; + let mapped_region_bytes: u32 = mutable.mapped_region_bytes.try_into().unwrap(); + + // Make mapped region executable + if !mutable.allocator.mark_writable(region_start.as_ptr(), mapped_region_bytes) { + panic!("Cannot make memory region writable: {:?}-{:?}", + region_start.as_ptr(), + unsafe { region_start.as_ptr().add(mapped_region_bytes as usize)} + ); + } + } + /// Make all the code in the region executable. Call this at the end of a write session. /// See [Self] for usual usage flow. - pub fn mark_all_executable(&mut self) { - self.current_write_page = None; + pub fn mark_all_executable(&self) { + let mut mutable = self.mutable.borrow_mut(); + + mutable.current_write_page = None; let region_start = self.region_start; - let mapped_region_bytes: u32 = self.mapped_region_bytes.try_into().unwrap(); + let mapped_region_bytes: u32 = mutable.mapped_region_bytes.try_into().unwrap(); // Make mapped region executable - self.allocator.mark_executable(region_start.as_ptr(), mapped_region_bytes); + mutable.allocator.mark_executable(region_start.as_ptr(), mapped_region_bytes); } /// Free a range of bytes. start_ptr must be memory page-aligned. - pub fn free_bytes(&mut self, start_ptr: CodePtr, size: u32) { + pub fn free_bytes(&self, start_ptr: CodePtr, size: u32) { assert_eq!(start_ptr.raw_ptr(self) as usize % self.page_size_bytes, 0); // Bounds check the request. We should only free memory we manage. @@ -257,7 +291,8 @@ impl<A: Allocator> VirtualMemory<A> { // code page, it's more appropriate to check the last byte against the virtual region. assert!(virtual_region.contains(&last_byte_to_free)); - self.allocator.mark_unused(start_ptr.raw_ptr(self), size); + let mut mutable = self.mutable.borrow_mut(); + mutable.allocator.mark_unused(start_ptr.raw_ptr(self), size); } } @@ -284,15 +319,15 @@ mod sys { impl super::Allocator for SystemAllocator { fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool { - unsafe { rb_yjit_mark_writable(ptr as VoidPtr, size) } + unsafe { rb_jit_mark_writable(ptr as VoidPtr, size) } } fn mark_executable(&mut self, ptr: *const u8, size: u32) { - unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) } + unsafe { rb_jit_mark_executable(ptr as VoidPtr, size) } } fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool { - unsafe { rb_yjit_mark_unused(ptr as VoidPtr, size) } + unsafe { rb_jit_mark_unused(ptr as VoidPtr, size) } } } } @@ -379,18 +414,18 @@ pub mod tests { PAGE_SIZE.try_into().unwrap(), NonNull::new(mem_start as *mut u8).unwrap(), mem_size, - 128 * 1024 * 1024, + get_option!(mem_size), ) } #[test] #[cfg(target_arch = "x86_64")] fn new_memory_is_initialized() { - let mut virt = new_dummy_virt_mem(); + let virt = new_dummy_virt_mem(); virt.write_byte(virt.start_ptr(), 1).unwrap(); assert!( - virt.allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0), + virt.mutable.borrow().allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0), "Entire page should be initialized", ); @@ -398,21 +433,21 @@ pub mod tests { let three_pages = 3 * PAGE_SIZE; virt.write_byte(virt.start_ptr().add_bytes(three_pages), 1).unwrap(); assert!( - virt.allocator.memory[..three_pages].iter().all(|&byte| byte != 0), + virt.mutable.borrow().allocator.memory[..three_pages].iter().all(|&byte| byte != 0), "Gaps between write requests should be filled", ); } #[test] fn no_redundant_syscalls_when_writing_to_the_same_page() { - let mut virt = new_dummy_virt_mem(); + let virt = new_dummy_virt_mem(); virt.write_byte(virt.start_ptr(), 1).unwrap(); virt.write_byte(virt.start_ptr(), 0).unwrap(); assert!( matches!( - virt.allocator.requests[..], + virt.mutable.borrow().allocator.requests[..], [MarkWritable { start_idx: 0, length: PAGE_SIZE }], ) ); @@ -421,7 +456,7 @@ pub mod tests { #[test] fn bounds_checking() { use super::WriteError::*; - let mut virt = new_dummy_virt_mem(); + let virt = new_dummy_virt_mem(); let one_past_end = virt.start_ptr().add_bytes(virt.virtual_region_size()); assert_eq!(Err(OutOfBounds), virt.write_byte(one_past_end, 0)); @@ -434,7 +469,7 @@ pub mod tests { fn only_written_to_regions_become_executable() { // ... so we catch attempts to read/write/execute never-written-to regions const THREE_PAGES: usize = PAGE_SIZE * 3; - let mut virt = new_dummy_virt_mem(); + let virt = new_dummy_virt_mem(); let page_two_start = virt.start_ptr().add_bytes(PAGE_SIZE * 2); virt.write_byte(page_two_start, 1).unwrap(); virt.mark_all_executable(); @@ -442,7 +477,7 @@ pub mod tests { assert!(virt.virtual_region_size() > THREE_PAGES); assert!( matches!( - virt.allocator.requests[..], + virt.mutable.borrow().allocator.requests[..], [ MarkWritable { start_idx: 0, length: THREE_PAGES }, MarkExecutable { start_idx: 0, length: THREE_PAGES }, diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs index a9ecc24a80..517a0daae5 100644 --- a/yjit/src/yjit.rs +++ b/yjit/src/yjit.rs @@ -7,7 +7,8 @@ use crate::stats::YjitExitLocations; use crate::stats::incr_counter; use crate::stats::with_compile_time; -use std::os::raw; +use std::os::raw::{c_char, c_int}; +use std::time::Instant; use crate::log::Log; /// Is YJIT on? The interpreter uses this variable to decide whether to trigger @@ -16,10 +17,13 @@ use crate::log::Log; #[no_mangle] pub static mut rb_yjit_enabled_p: bool = false; +// Time when YJIT was yjit was initialized (see yjit_init) +pub static mut YJIT_INIT_TIME: Option<Instant> = None; + /// Parse one command-line option. /// This is called from ruby.c #[no_mangle] -pub extern "C" fn rb_yjit_parse_option(str_ptr: *const raw::c_char) -> bool { +pub extern "C" fn rb_yjit_parse_option(str_ptr: *const c_char) -> bool { return parse_option(str_ptr).is_some(); } @@ -50,6 +54,12 @@ fn yjit_init() { // TODO: need to make sure that command-line options have been // initialized by CRuby + // Call YJIT hooks before enabling YJIT to avoid compiling the hooks themselves + unsafe { + let yjit = rb_const_get(rb_cRubyVM, rust_str_to_id("YJIT")); + rb_funcall(yjit, rust_str_to_id("call_jit_hooks"), 0); + } + // Catch panics to avoid UB for unwinding into C frames. // See https://doc.rust-lang.org/nomicon/exception-safety.html let result = std::panic::catch_unwind(|| { @@ -76,6 +86,16 @@ fn yjit_init() { let _ = std::fs::remove_file(&perf_map); println!("YJIT perf map: {perf_map}"); } + + // Note the time when YJIT was initialized + unsafe { + YJIT_INIT_TIME = Some(Instant::now()); + } +} + +#[no_mangle] +pub extern "C" fn rb_yjit_free_at_exit() { + yjit_shutdown_free_codegen_table(); } /// At the moment, we abort in all cases we panic. @@ -102,7 +122,10 @@ fn rb_bug_panic_hook() { env::set_var("RUST_BACKTRACE", "1"); previous_hook(panic_info); - unsafe { rb_bug(b"YJIT panicked\0".as_ref().as_ptr() as *const raw::c_char); } + // Abort with rb_bug(). It has a length limit on the message. + let panic_message = &format!("{}", panic_info)[..]; + let len = std::cmp::min(0x100, panic_message.len()) as c_int; + unsafe { rb_bug(b"YJIT: %*s\0".as_ref().as_ptr() as *const c_char, len, panic_message.as_ptr()); } })); } @@ -168,8 +191,24 @@ pub extern "C" fn rb_yjit_code_gc(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { /// Enable YJIT compilation, returning true if YJIT was previously disabled #[no_mangle] -pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE, print_stats: VALUE, gen_log: VALUE, print_log: VALUE) -> VALUE { +pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE, print_stats: VALUE, gen_log: VALUE, print_log: VALUE, mem_size: VALUE, call_threshold: VALUE) -> VALUE { with_vm_lock(src_loc!(), || { + + if !mem_size.nil_p() { + let mem_size_mb = mem_size.as_isize() >> 1; + let mem_size_bytes = mem_size_mb * 1024 * 1024; + unsafe { + OPTIONS.mem_size = mem_size_bytes as usize; + } + } + + if !call_threshold.nil_p() { + let threshold = call_threshold.as_isize() >> 1; + unsafe { + rb_yjit_call_threshold = threshold as u64; + } + } + // Initialize and enable YJIT if gen_stats.test() { unsafe { |
