summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakashi Kokubun <takashikkbn@gmail.com>2022-10-17 10:45:59 -0700
committerGitHub <noreply@github.com>2022-10-17 10:45:59 -0700
commit64c52c428285e7930aed62740cc9c54ee483178e (patch)
tree818515b6cc1909e98cdcdca93f0a3ac3b2b8cd5a
parente7c71c6c9271b0c29f210769159090e17128e740 (diff)
YJIT: Interleave inline and outlined code blocks (#6460)
Co-authored-by: Alan Wu <alansi.xingwu@shopify.com> Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
Notes
Notes: Merged-By: k0kubun <takashikkbn@gmail.com>
-rw-r--r--yjit/src/asm/mod.rs240
-rw-r--r--yjit/src/backend/arm64/mod.rs103
-rw-r--r--yjit/src/backend/ir.rs17
-rw-r--r--yjit/src/backend/tests.rs6
-rw-r--r--yjit/src/backend/x86_64/mod.rs36
-rw-r--r--yjit/src/codegen.rs80
-rw-r--r--yjit/src/core.rs12
-rw-r--r--yjit/src/options.rs36
-rw-r--r--yjit/src/utils.rs9
9 files changed, 378 insertions, 161 deletions
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index 8356201ba6..1ab813964c 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -1,9 +1,20 @@
+use std::cell::RefCell;
+use std::cmp;
use std::fmt;
use std::mem;
+use std::rc::Rc;
+#[cfg(target_arch = "x86_64")]
+use crate::backend::x86_64::JMP_PTR_BYTES;
+#[cfg(target_arch = "aarch64")]
+use crate::backend::arm64::JMP_PTR_BYTES;
+use crate::backend::ir::Assembler;
+use crate::backend::ir::Target;
+use crate::virtualmem::WriteError;
#[cfg(feature = "asm_comments")]
use std::collections::BTreeMap;
+use crate::codegen::CodegenGlobals;
use crate::virtualmem::{VirtualMem, CodePtr};
// Lots of manual vertical alignment in there that rustfmt doesn't handle well.
@@ -17,7 +28,8 @@ pub mod arm64;
//
/// Reference to an ASM label
-struct LabelRef {
+#[derive(Clone)]
+pub struct LabelRef {
// Position in the code block where the label reference exists
pos: usize,
@@ -36,7 +48,7 @@ struct LabelRef {
/// Block of memory into which instructions can be assembled
pub struct CodeBlock {
// Memory for storing the encoded instructions
- mem_block: VirtualMem,
+ mem_block: Rc<RefCell<VirtualMem>>,
// Memory block size
mem_size: usize,
@@ -44,6 +56,12 @@ pub struct CodeBlock {
// Current writing position
write_pos: usize,
+ // Size of a code page (inlined + outlined)
+ page_size: usize,
+
+ // Size reserved for writing a jump to the next page
+ page_end_reserve: usize,
+
// Table of registered label addresses
label_addrs: Vec<usize>,
@@ -58,7 +76,6 @@ pub struct CodeBlock {
asm_comments: BTreeMap<usize, Vec<String>>,
// True for OutlinedCb
- #[cfg(feature = "disasm")]
pub outlined: bool,
// Set if the CodeBlock is unable to output some instructions,
@@ -67,27 +84,158 @@ pub struct CodeBlock {
dropped_bytes: bool,
}
+/// Set of CodeBlock label states. Used for recovering the previous state.
+pub struct LabelState {
+ label_addrs: Vec<usize>,
+ label_names: Vec<String>,
+ label_refs: Vec<LabelRef>,
+}
+
impl CodeBlock {
/// Make a new CodeBlock
- pub fn new(mem_block: VirtualMem, outlined: bool) -> Self {
- Self {
- mem_size: mem_block.virtual_region_size(),
+ pub fn new(mem_block: Rc<RefCell<VirtualMem>>, page_size: usize, outlined: bool) -> Self {
+ let mem_size = mem_block.borrow().virtual_region_size();
+ let mut cb = Self {
mem_block,
+ mem_size,
write_pos: 0,
+ page_size,
+ page_end_reserve: JMP_PTR_BYTES,
label_addrs: Vec::new(),
label_names: Vec::new(),
label_refs: Vec::new(),
#[cfg(feature = "asm_comments")]
asm_comments: BTreeMap::new(),
- #[cfg(feature = "disasm")]
outlined,
dropped_bytes: false,
+ };
+ cb.write_pos = cb.page_start();
+ cb
+ }
+
+ /// Move the CodeBlock to the next page. If it's on the furthest page,
+ /// move the other CodeBlock to the next page as well.
+ pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool {
+ let old_write_ptr = self.get_write_ptr();
+ self.set_write_ptr(base_ptr);
+ self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
+
+ // Move self to the next page
+ let next_page_idx = self.write_pos / self.page_size + 1;
+ if !self.set_page(next_page_idx, &jmp_ptr) {
+ self.set_write_ptr(old_write_ptr); // rollback if there are no more pages
+ return false;
+ }
+
+ // Move the other CodeBlock to the same page if it'S on the furthest page
+ self.other_cb().unwrap().set_page(next_page_idx, &jmp_ptr);
+
+ return !self.dropped_bytes;
+ }
+
+ /// Move the CodeBlock to page_idx only if it's not going backwards.
+ fn set_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, page_idx: usize, jmp_ptr: &F) -> bool {
+ // Do not move the CodeBlock if page_idx points to an old position so that this
+ // CodeBlock will not overwrite existing code.
+ //
+ // Let's say this is the current situation:
+ // cb: [page1, page2, page3 (write_pos)], ocb: [page1, page2, page3 (write_pos)]
+ //
+ // When cb needs to patch page1, this will be temporarily changed to:
+ // cb: [page1 (write_pos), page2, page3], ocb: [page1, page2, page3 (write_pos)]
+ //
+ // While patching page1, cb may need to jump to page2. What set_page currently does is:
+ // cb: [page1, page2 (write_pos), page3], ocb: [page1, page2, page3 (write_pos)]
+ // instead of:
+ // cb: [page1, page2 (write_pos), page3], ocb: [page1, page2 (write_pos), page3]
+ // because moving ocb's write_pos from page3 to the beginning of page2 will let ocb's
+ // write_pos point to existing code in page2, which might let ocb overwrite it later.
+ //
+ // We could remember the last write_pos in page2 and let set_page use that position,
+ // but you need to waste some space for keeping write_pos for every single page.
+ // It doesn't seem necessary for performance either. So we're currently not doing it.
+ let mut dst_pos = self.page_size * page_idx + self.page_start();
+ if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos {
+ // Reset dropped_bytes
+ self.dropped_bytes = false;
+
+ // Convert dst_pos to dst_ptr
+ let src_pos = self.write_pos;
+ self.write_pos = dst_pos;
+ let dst_ptr = self.get_write_ptr();
+ self.write_pos = src_pos;
+
+ // Generate jmp_ptr from src_pos to dst_pos
+ self.without_page_end_reserve(|cb| {
+ cb.add_comment("jump to next page");
+ jmp_ptr(cb, dst_ptr);
+ assert!(!cb.has_dropped_bytes());
+ });
+
+ // Start the next code from dst_pos
+ self.write_pos = dst_pos;
}
+ !self.dropped_bytes
+ }
+
+ /// write_pos of the current page start
+ pub fn page_start_pos(&self) -> usize {
+ self.get_write_pos() / self.page_size * self.page_size + self.page_start()
+ }
+
+ /// Offset of each page where CodeBlock should start writing
+ pub fn page_start(&self) -> usize {
+ let mut start = if self.inline() {
+ 0
+ } else {
+ self.page_size / 2
+ };
+ if cfg!(debug_assertions) && !cfg!(test) {
+ // Leave illegal instructions at the beginning of each page to assert
+ // we're not accidentally crossing page boundaries.
+ start += JMP_PTR_BYTES;
+ }
+ start
+ }
+
+ /// Offset of each page where CodeBlock should stop writing (exclusive)
+ pub fn page_end(&self) -> usize {
+ let page_end = if self.inline() {
+ self.page_size / 2
+ } else {
+ self.page_size
+ };
+ page_end - self.page_end_reserve // reserve space to jump to the next page
+ }
+
+ /// Call a given function with page_end_reserve = 0
+ pub fn without_page_end_reserve<F: Fn(&mut Self)>(&mut self, block: F) {
+ let old_page_end_reserve = self.page_end_reserve;
+ self.page_end_reserve = 0;
+ block(self);
+ self.page_end_reserve = old_page_end_reserve;
+ }
+
+ /// Return the address ranges of a given address range that this CodeBlock can write.
+ pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
+ let mut addrs = vec![];
+ let mut start = start_ptr.raw_ptr() as usize;
+ let codeblock_end = self.get_ptr(self.get_mem_size()).raw_ptr() as usize;
+ let end = std::cmp::min(end_ptr.raw_ptr() as usize, codeblock_end);
+ while start < end {
+ let current_page = start / self.page_size * self.page_size;
+ let page_end = std::cmp::min(end, current_page + self.page_end()) as usize;
+ addrs.push((start, page_end));
+ start = current_page + self.page_size + self.page_start();
+ }
+ addrs
}
/// Check if this code block has sufficient remaining capacity
pub fn has_capacity(&self, num_bytes: usize) -> bool {
- self.write_pos + num_bytes < self.mem_size
+ let page_offset = self.write_pos % self.page_size;
+ let capacity = self.page_end().saturating_sub(page_offset);
+ num_bytes <= capacity
}
/// Add an assembly comment if the feature is on.
@@ -121,8 +269,8 @@ impl CodeBlock {
self.write_pos
}
- pub fn get_mem(&mut self) -> &mut VirtualMem {
- &mut self.mem_block
+ pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
+ self.mem_block.borrow_mut().write_byte(write_ptr, byte)
}
// Set the current write position
@@ -134,49 +282,31 @@ impl CodeBlock {
self.write_pos = pos;
}
- // Align the current write pointer to a multiple of bytes
- pub fn align_pos(&mut self, multiple: u32) {
- // Compute the alignment boundary that is lower or equal
- // Do everything with usize
- let multiple: usize = multiple.try_into().unwrap();
- let pos = self.get_write_ptr().raw_ptr() as usize;
- let remainder = pos % multiple;
- let prev_aligned = pos - remainder;
-
- if prev_aligned == pos {
- // Already aligned so do nothing
- } else {
- // Align by advancing
- let pad = multiple - remainder;
- self.set_pos(self.get_write_pos() + pad);
- }
- }
-
// Set the current write position from a pointer
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
- let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize();
+ let pos = code_ptr.into_usize() - self.mem_block.borrow().start_ptr().into_usize();
self.set_pos(pos);
}
/// Get a (possibly dangling) direct pointer into the executable memory block
pub fn get_ptr(&self, offset: usize) -> CodePtr {
- self.mem_block.start_ptr().add_bytes(offset)
+ self.mem_block.borrow().start_ptr().add_bytes(offset)
}
/// Get a (possibly dangling) direct pointer to the current write position
- pub fn get_write_ptr(&mut self) -> CodePtr {
+ pub fn get_write_ptr(&self) -> CodePtr {
self.get_ptr(self.write_pos)
}
/// Write a single byte at the current position.
pub fn write_byte(&mut self, byte: u8) {
let write_ptr = self.get_write_ptr();
-
- if self.mem_block.write_byte(write_ptr, byte).is_ok() {
- self.write_pos += 1;
- } else {
+ if !self.has_capacity(1) || self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_err() {
self.dropped_bytes = true;
}
+
+ // Always advance write_pos since arm64 PadEntryExit needs this to stop the loop.
+ self.write_pos += 1;
}
/// Write multiple bytes starting from the current position.
@@ -242,6 +372,9 @@ impl CodeBlock {
self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode });
// Move past however many bytes the instruction takes up
+ if !self.has_capacity(num_bytes) {
+ self.dropped_bytes = true; // retry emitting the Insn after next_page
+ }
self.write_pos += num_bytes;
}
@@ -274,14 +407,43 @@ impl CodeBlock {
assert!(self.label_refs.is_empty());
}
+ pub fn clear_labels(&mut self) {
+ self.label_addrs.clear();
+ self.label_names.clear();
+ self.label_refs.clear();
+ }
+
+ pub fn get_label_state(&self) -> LabelState {
+ LabelState {
+ label_addrs: self.label_addrs.clone(),
+ label_names: self.label_names.clone(),
+ label_refs: self.label_refs.clone(),
+ }
+ }
+
+ pub fn set_label_state(&mut self, state: LabelState) {
+ self.label_addrs = state.label_addrs;
+ self.label_names = state.label_names;
+ self.label_refs = state.label_refs;
+ }
+
pub fn mark_all_executable(&mut self) {
- self.mem_block.mark_all_executable();
+ self.mem_block.borrow_mut().mark_all_executable();
}
- #[cfg(feature = "disasm")]
pub fn inline(&self) -> bool {
!self.outlined
}
+
+ pub fn other_cb(&self) -> Option<&'static mut Self> {
+ if !CodegenGlobals::has_instance() {
+ None
+ } else if self.inline() {
+ Some(CodegenGlobals::get_outlined_cb().unwrap())
+ } else {
+ Some(CodegenGlobals::get_inline_cb())
+ }
+ }
}
#[cfg(test)]
@@ -295,7 +457,7 @@ impl CodeBlock {
let mem_start: *const u8 = alloc.mem_start();
let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size);
- Self::new(virt_mem, false)
+ Self::new(Rc::new(RefCell::new(virt_mem)), 16 * 1024, false)
}
}
@@ -303,7 +465,7 @@ impl CodeBlock {
impl fmt::LowerHex for CodeBlock {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
for pos in 0..self.write_pos {
- let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() };
+ let byte = unsafe { self.mem_block.borrow().start_ptr().raw_ptr().add(pos).read() };
fmtr.write_fmt(format_args!("{:02x}", byte))?;
}
Ok(())
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 0180737d4d..5df072ed38 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -4,7 +4,7 @@
use crate::asm::{CodeBlock};
use crate::asm::arm64::*;
-use crate::codegen::{JITState};
+use crate::codegen::{JITState, CodegenGlobals};
use crate::cruby::*;
use crate::backend::ir::*;
use crate::virtualmem::CodePtr;
@@ -36,6 +36,9 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
pub const C_SP_REG: A64Opnd = X31;
pub const C_SP_STEP: i32 = 16;
+// The number of bytes that are generated by emit_jmp_ptr
+pub const JMP_PTR_BYTES: usize = 20;
+
/// Map Opnd to A64Opnd
impl From<Opnd> for A64Opnd {
fn from(opnd: Opnd) -> Self {
@@ -567,7 +570,7 @@ impl Assembler
/// Emit the required instructions to load the given value into the
/// given register. Our goal here is to use as few instructions as
/// possible to get this value into the register.
- fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> i32 {
+ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
let mut current = value;
if current <= 0xffff {
@@ -680,6 +683,31 @@ impl Assembler
ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP));
}
+ fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr) {
+ let src_addr = cb.get_write_ptr().into_i64();
+ let dst_addr = dst_ptr.into_i64();
+
+ // If the offset is short enough, then we'll use the
+ // branch instruction. Otherwise, we'll move the
+ // destination into a register and use the branch
+ // register instruction.
+ let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
+ b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
+ 1
+ } else {
+ let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
+ br(cb, Assembler::SCRATCH0);
+ num_insns + 1
+ };
+
+ // Make sure it's always a consistent number of
+ // instructions in case it gets patched and has to
+ // use the other branch.
+ for _ in num_insns..(JMP_PTR_BYTES / 4) {
+ nop(cb);
+ }
+ }
+
// dbg!(&self.insns);
// List of GC offsets
@@ -687,7 +715,13 @@ impl Assembler
// For each instruction
let start_write_pos = cb.get_write_pos();
- for insn in &self.insns {
+ let mut insn_idx: usize = 0;
+ while let Some(insn) = self.insns.get(insn_idx) {
+ let src_ptr = cb.get_write_ptr();
+ let had_dropped_bytes = cb.has_dropped_bytes();
+ let old_label_state = cb.get_label_state();
+ let mut insn_gc_offsets: Vec<u32> = Vec::new();
+
match insn {
Insn::Comment(text) => {
if cfg!(feature = "asm_comments") {
@@ -796,7 +830,7 @@ impl Assembler
cb.write_bytes(&value.as_u64().to_le_bytes());
let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
- gc_offsets.push(ptr_offset);
+ insn_gc_offsets.push(ptr_offset);
},
Opnd::None => {
unreachable!("Attempted to load from None operand");
@@ -904,28 +938,7 @@ impl Assembler
Insn::Jmp(target) => {
match target {
Target::CodePtr(dst_ptr) => {
- let src_addr = cb.get_write_ptr().into_i64();
- let dst_addr = dst_ptr.into_i64();
-
- // If the offset is short enough, then we'll use the
- // branch instruction. Otherwise, we'll move the
- // destination into a register and use the branch
- // register instruction.
- let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
- b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
- 0
- } else {
- let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
- br(cb, Self::SCRATCH0);
- num_insns
- };
-
- // Make sure it's always a consistent number of
- // instructions in case it gets patched and has to
- // use the other branch.
- for _ in num_insns..4 {
- nop(cb);
- }
+ emit_jmp_ptr(cb, *dst_ptr);
},
Target::Label(label_idx) => {
// Here we're going to save enough space for
@@ -997,13 +1010,21 @@ impl Assembler
csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE);
}
Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
- Insn::PadEntryExit => {
- let jmp_len = 5 * 4; // Op::Jmp may emit 5 instructions
- while (cb.get_write_pos() - start_write_pos) < jmp_len {
+ Insn::PadInvalPatch => {
+ while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES {
nop(cb);
}
}
};
+
+ // On failure, jump to the next page and retry the current insn
+ if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr) {
+ // Reset cb states before retrying the current Insn
+ cb.set_label_state(old_label_state);
+ } else {
+ insn_idx += 1;
+ gc_offsets.append(&mut insn_gc_offsets);
+ }
}
gc_offsets
@@ -1020,21 +1041,23 @@ impl Assembler
assert!(label_idx == idx);
}
- let start_write_pos = cb.get_write_pos();
+ let start_ptr = cb.get_write_ptr();
let gc_offsets = asm.arm64_emit(cb);
- if !cb.has_dropped_bytes() {
+ if cb.has_dropped_bytes() {
+ cb.clear_labels();
+ } else {
cb.link_labels();
- }
- // Invalidate icache for newly written out region so we don't run stale code.
- #[cfg(not(test))]
- {
- let start = cb.get_ptr(start_write_pos).raw_ptr();
- let write_ptr = cb.get_write_ptr().raw_ptr();
- let codeblock_end = cb.get_ptr(cb.get_mem_size()).raw_ptr();
- let end = std::cmp::min(write_ptr, codeblock_end);
- unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
+ // Invalidate icache for newly written out region so we don't run stale code.
+ // It should invalidate only the code ranges of the current cb because the code
+ // ranges of the other cb might have a memory region that is still PROT_NONE.
+ #[cfg(not(test))]
+ cb.without_page_end_reserve(|cb| {
+ for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) {
+ unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
+ }
+ });
}
gc_offsets
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index ba7e372188..e11235aec9 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -5,6 +5,7 @@
use std::cell::Cell;
use std::fmt;
use std::convert::From;
+use std::io::Write;
use std::mem::take;
use crate::cruby::{VALUE};
use crate::virtualmem::{CodePtr};
@@ -433,9 +434,9 @@ pub enum Insn {
// binary OR operation.
Or { left: Opnd, right: Opnd, out: Opnd },
- /// Pad nop instructions to accomodate Op::Jmp in case the block is
- /// invalidated.
- PadEntryExit,
+ /// Pad nop instructions to accomodate Op::Jmp in case the block or the insn
+ /// is invalidated.
+ PadInvalPatch,
// Mark a position in the generated code
PosMarker(PosMarkerFn),
@@ -521,7 +522,7 @@ impl Insn {
Insn::Mov { .. } => "Mov",
Insn::Not { .. } => "Not",
Insn::Or { .. } => "Or",
- Insn::PadEntryExit => "PadEntryExit",
+ Insn::PadInvalPatch => "PadEntryExit",
Insn::PosMarker(_) => "PosMarker",
Insn::RShift { .. } => "RShift",
Insn::Store { .. } => "Store",
@@ -658,7 +659,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::Jz(_) |
Insn::Label(_) |
Insn::LeaLabel { .. } |
- Insn::PadEntryExit |
+ Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
Insn::CPopInto(opnd) |
Insn::CPush(opnd) |
@@ -755,7 +756,7 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::Jz(_) |
Insn::Label(_) |
Insn::LeaLabel { .. } |
- Insn::PadEntryExit |
+ Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
Insn::CPopInto(opnd) |
Insn::CPush(opnd) |
@@ -1474,8 +1475,8 @@ impl Assembler {
out
}
- pub fn pad_entry_exit(&mut self) {
- self.push_insn(Insn::PadEntryExit);
+ pub fn pad_inval_patch(&mut self) {
+ self.push_insn(Insn::PadInvalPatch);
}
//pub fn pos_marker<F: FnMut(CodePtr)>(&mut self, marker_fn: F)
diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs
index 1bad8642a2..3098c7e3b0 100644
--- a/yjit/src/backend/tests.rs
+++ b/yjit/src/backend/tests.rs
@@ -231,7 +231,7 @@ fn test_jcc_ptr()
{
let (mut asm, mut cb) = setup_asm();
- let side_exit = Target::CodePtr((5 as *mut u8).into());
+ let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK));
asm.test(
Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
@@ -248,7 +248,7 @@ fn test_jmp_ptr()
{
let (mut asm, mut cb) = setup_asm();
- let stub = Target::CodePtr((5 as *mut u8).into());
+ let stub = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
asm.jmp(stub);
asm.compile_with_num_regs(&mut cb, 0);
@@ -259,7 +259,7 @@ fn test_jo()
{
let (mut asm, mut cb) = setup_asm();
- let side_exit = Target::CodePtr((5 as *mut u8).into());
+ let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
let arg1 = Opnd::mem(64, SP, 0);
let arg0 = Opnd::mem(64, SP, 8);
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
index f6bd822727..c8aa1a0ed5 100644
--- a/yjit/src/backend/x86_64/mod.rs
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -9,6 +9,7 @@ use crate::asm::x86_64::*;
use crate::codegen::{JITState};
use crate::cruby::*;
use crate::backend::ir::*;
+use crate::codegen::CodegenGlobals;
// Use the x86 register type for this platform
pub type Reg = X86Reg;
@@ -32,6 +33,9 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [
pub const C_RET_REG: Reg = RAX_REG;
pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
+// The number of bytes that are generated by jmp_ptr
+pub const JMP_PTR_BYTES: usize = 6;
+
/// Map Opnd to X86Opnd
impl From<Opnd> for X86Opnd {
fn from(opnd: Opnd) -> Self {
@@ -375,7 +379,13 @@ impl Assembler
// For each instruction
let start_write_pos = cb.get_write_pos();
- for insn in &self.insns {
+ let mut insns_idx: usize = 0;
+ while let Some(insn) = self.insns.get(insns_idx) {
+ let src_ptr = cb.get_write_ptr();
+ let had_dropped_bytes = cb.has_dropped_bytes();
+ let old_label_state = cb.get_label_state();
+ let mut insn_gc_offsets: Vec<u32> = Vec::new();
+
match insn {
Insn::Comment(text) => {
if cfg!(feature = "asm_comments") {
@@ -461,7 +471,7 @@ impl Assembler
if !val.special_const_p() {
// The pointer immediate is encoded as the last part of the mov written out
let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
- gc_offsets.push(ptr_offset);
+ insn_gc_offsets.push(ptr_offset);
}
}
},
@@ -651,11 +661,10 @@ impl Assembler
emit_csel(cb, *truthy, *falsy, *out, cmovl);
}
Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
- Insn::PadEntryExit => {
- // We assume that our Op::Jmp usage that gets invalidated is <= 5
- let code_size: u32 = (cb.get_write_pos() - start_write_pos).try_into().unwrap();
- if code_size < 5 {
- nop(cb, 5 - code_size);
+ Insn::PadInvalPatch => {
+ let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
+ if code_size < JMP_PTR_BYTES {
+ nop(cb, (JMP_PTR_BYTES - code_size) as u32);
}
}
@@ -666,6 +675,15 @@ impl Assembler
#[allow(unreachable_patterns)]
_ => panic!("unsupported instruction passed to x86 backend: {:?}", insn)
};
+
+ // On failure, jump to the next page and retry the current insn
+ if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, jmp_ptr) {
+ // Reset cb states before retrying the current Insn
+ cb.set_label_state(old_label_state);
+ } else {
+ insns_idx += 1;
+ gc_offsets.append(&mut insn_gc_offsets);
+ }
}
gc_offsets
@@ -684,7 +702,9 @@ impl Assembler
let gc_offsets = asm.x86_emit(cb);
- if !cb.has_dropped_bytes() {
+ if cb.has_dropped_bytes() {
+ cb.clear_labels();
+ } else {
cb.link_labels();
}
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 5f6d97834a..626916b240 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -13,13 +13,15 @@ use crate::utils::*;
use CodegenStatus::*;
use InsnOpnd::*;
-
+use std::cell::RefCell;
+use std::cell::RefMut;
use std::cmp;
use std::collections::HashMap;
use std::ffi::CStr;
use std::mem::{self, size_of};
use std::os::raw::c_uint;
use std::ptr;
+use std::rc::Rc;
use std::slice;
pub use crate::virtualmem::CodePtr;
@@ -296,6 +298,7 @@ fn jit_prepare_routine_call(
/// Record the current codeblock write position for rewriting into a jump into
/// the outlined block later. Used to implement global code invalidation.
fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) {
+ asm.pad_inval_patch();
asm.pos_marker(move |code_ptr| {
CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos);
});
@@ -606,19 +609,6 @@ fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) {
/// Compile an interpreter entry block to be inserted into an iseq
/// Returns None if compilation fails.
pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option<CodePtr> {
- const MAX_PROLOGUE_SIZE: usize = 1024;
-
- // Check if we have enough executable memory
- if !cb.has_capacity(MAX_PROLOGUE_SIZE) {
- return None;
- }
-
- let old_write_pos = cb.get_write_pos();
-
- // TODO: figure out if this is actually beneficial for performance
- // Align the current write position to cache line boundaries
- cb.align_pos(64);
-
let code_ptr = cb.get_write_ptr();
let mut asm = Assembler::new();
@@ -660,10 +650,11 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O
asm.compile(cb);
- // Verify MAX_PROLOGUE_SIZE
- assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE);
-
- return Some(code_ptr);
+ if (cb.has_dropped_bytes()) {
+ None
+ } else {
+ Some(code_ptr)
+ }
}
// Generate code to check for interrupts and take a side-exit.
@@ -853,7 +844,7 @@ pub fn gen_single_block(
{
let mut block = jit.block.borrow_mut();
if block.entry_exit.is_some() {
- asm.pad_entry_exit();
+ asm.pad_inval_patch();
}
// Compile code into the code block
@@ -6544,29 +6535,13 @@ static mut CODEGEN_GLOBALS: Option<CodegenGlobals> = None;
impl CodegenGlobals {
/// Initialize the codegen globals
pub fn init() {
- // Executable memory size in MiB
- let mem_size = get_option!(exec_mem_size) * 1024 * 1024;
+ // Executable memory and code page size in bytes
+ let mem_size = get_option!(exec_mem_size);
+ let code_page_size = get_option!(code_page_size);
#[cfg(not(test))]
let (mut cb, mut ocb) = {
- // TODO(alan): we can error more gracefully when the user gives
- // --yjit-exec-mem=absurdly-large-number
- //
- // 2 GiB. It's likely a bug if we generate this much code.
- const MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024 * 1024;
- assert!(mem_size <= MAX_BUFFER_SIZE);
- let mem_size_u32 = mem_size as u32;
- let half_size = mem_size / 2;
-
- let page_size = unsafe { rb_yjit_get_page_size() };
- let assert_page_aligned = |ptr| assert_eq!(
- 0,
- ptr as usize % page_size.as_usize(),
- "Start of virtual address block should be page-aligned",
- );
-
- let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size_u32) };
- let second_half = virt_block.wrapping_add(half_size);
+ let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) };
// Memory protection syscalls need page-aligned addresses, so check it here. Assuming
// `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
@@ -6575,26 +6550,25 @@ impl CodegenGlobals {
//
// Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB
// (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though.
- assert_page_aligned(virt_block);
- assert_page_aligned(second_half);
+ let page_size = unsafe { rb_yjit_get_page_size() };
+ assert_eq!(
+ virt_block as usize % page_size.as_usize(), 0,
+ "Start of virtual address block should be page-aligned",
+ );
+ assert_eq!(code_page_size % page_size.as_usize(), 0, "code_page_size was not page-aligned");
use crate::virtualmem::*;
- let first_half = VirtualMem::new(
+ let mem_block = VirtualMem::new(
SystemAllocator {},
page_size,
virt_block,
- half_size
- );
- let second_half = VirtualMem::new(
- SystemAllocator {},
- page_size,
- second_half,
- half_size
+ mem_size,
);
+ let mem_block = Rc::new(RefCell::new(mem_block));
- let cb = CodeBlock::new(first_half, false);
- let ocb = OutlinedCb::wrap(CodeBlock::new(second_half, true));
+ let cb = CodeBlock::new(mem_block.clone(), code_page_size, false);
+ let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, code_page_size, true));
(cb, ocb)
};
@@ -6702,6 +6676,10 @@ impl CodegenGlobals {
unsafe { CODEGEN_GLOBALS.as_mut().unwrap() }
}
+ pub fn has_instance() -> bool {
+ unsafe { CODEGEN_GLOBALS.as_mut().is_some() }
+ }
+
/// Get a mutable reference to the inline code block
pub fn get_inline_cb() -> &'static mut CodeBlock {
&mut CodegenGlobals::get_instance().inline_cb
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 3cecf31a85..53cb31beb1 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -665,7 +665,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
if new_addr != object {
for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() {
let byte_code_ptr = value_code_ptr.add_bytes(byte_idx);
- cb.get_mem().write_byte(byte_code_ptr, byte)
+ cb.write_mem(byte_code_ptr, byte)
.expect("patching existing code should be within bounds");
}
}
@@ -1916,7 +1916,9 @@ pub fn gen_branch(
// Call the branch generation function
asm.mark_branch_start(&branchref);
- gen_fn(asm, branch.dst_addrs[0].unwrap(), branch.dst_addrs[1], BranchShape::Default);
+ if let Some(dst_addr) = branch.dst_addrs[0] {
+ gen_fn(asm, dst_addr, branch.dst_addrs[1], BranchShape::Default);
+ }
asm.mark_branch_end(&branchref);
}
@@ -1955,6 +1957,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu
branch.shape = BranchShape::Default;
// Call the branch generation function
+ asm.comment("gen_direct_jmp: existing block");
asm.mark_branch_start(&branchref);
gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
asm.mark_branch_end(&branchref);
@@ -1965,6 +1968,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu
branch.shape = BranchShape::Next0;
// The branch is effectively empty (a noop)
+ asm.comment("gen_direct_jmp: fallthrough");
asm.mark_branch_start(&branchref);
asm.mark_branch_end(&branchref);
}
@@ -2003,7 +2007,9 @@ pub fn defer_compilation(
// Call the branch generation function
asm.mark_branch_start(&branch_rc);
- gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
+ if let Some(dst_addr) = branch.dst_addrs[0] {
+ gen_jump_branch(asm, dst_addr, None, BranchShape::Default);
+ }
asm.mark_branch_end(&branch_rc);
}
diff --git a/yjit/src/options.rs b/yjit/src/options.rs
index f73dca67de..d2b43ecb26 100644
--- a/yjit/src/options.rs
+++ b/yjit/src/options.rs
@@ -4,9 +4,14 @@ use std::ffi::CStr;
#[derive(Clone, PartialEq, Eq, Debug)]
#[repr(C)]
pub struct Options {
- // Size of the executable memory block to allocate in MiB
+ // Size of the executable memory block to allocate in bytes
+ // Note that the command line argument is expressed in MiB and not bytes
pub exec_mem_size: usize,
+ // Size of each executable memory code page in bytes
+ // Note that the command line argument is expressed in KiB and not bytes
+ pub code_page_size: usize,
+
// Number of method calls after which to start generating code
// Threshold==1 means compile on first execution
pub call_threshold: usize,
@@ -48,7 +53,8 @@ pub struct Options {
// Initialize the options to default values
pub static mut OPTIONS: Options = Options {
- exec_mem_size: 256,
+ exec_mem_size: 256 * 1024 * 1024,
+ code_page_size: 16 * 1024,
call_threshold: 10,
greedy_versioning: false,
no_type_prop: false,
@@ -118,8 +124,30 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
match (opt_name, opt_val) {
("", "") => (), // Simply --yjit
- ("exec-mem-size", _) => match opt_val.parse() {
- Ok(n) => unsafe { OPTIONS.exec_mem_size = n },
+ ("exec-mem-size", _) => match opt_val.parse::<usize>() {
+ Ok(n) => {
+ if n == 0 || n > 2 * 1024 * 1024 {
+ return None
+ }
+
+ // Convert from MiB to bytes internally for convenience
+ unsafe { OPTIONS.exec_mem_size = n * 1024 * 1024 }
+ }
+ Err(_) => {
+ return None;
+ }
+ },
+
+ ("code-page-size", _) => match opt_val.parse::<usize>() {
+ Ok(n) => {
+ // Enforce bounds checks and that n is divisible by 4KiB
+ if n < 4 || n > 256 || n % 4 != 0 {
+ return None
+ }
+
+ // Convert from KiB to bytes internally for convenience
+ unsafe { OPTIONS.code_page_size = n * 1024 }
+ }
Err(_) => {
return None;
}
diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs
index cabebb7dcc..b156c9d5ed 100644
--- a/yjit/src/utils.rs
+++ b/yjit/src/utils.rs
@@ -74,14 +74,13 @@ pub(crate) use offset_of;
// This should work fine on ASCII strings and anything else
// that is considered legal UTF-8, including embedded nulls.
fn ruby_str_to_rust(v: VALUE) -> String {
- // Make sure the CRuby encoding is UTF-8 compatible
- let encoding = unsafe { rb_ENCODING_GET(v) } as u32;
- assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII);
-
let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
- String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation
+ match String::from_utf8(str_slice.to_vec()) {
+ Ok(utf8) => utf8,
+ Err(_) => String::new(),
+ }
}
// Location is the file defining the method, colon, method name.