YJIT: Interleave inline and outlined code blocks (#6460)

Co-authored-by: Alan Wu <alansi.xingwu@shopify.com> Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
author: Takashi Kokubun <takashikkbn@gmail.com> 2022-10-17 10:45:59 -0700
committer: GitHub <noreply@github.com> 2022-10-17 10:45:59 -0700
commit: 64c52c428285e7930aed62740cc9c54ee483178e (patch)
tree: 818515b6cc1909e98cdcdca93f0a3ac3b2b8cd5a
parent: e7c71c6c9271b0c29f210769159090e17128e740 (diff)
9 files changed, 378 insertions, 161 deletions
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index 8356201ba6..1ab813964c 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -1,9 +1,20 @@
+use std::cell::RefCell;
+use std::cmp;
 use std::fmt;
 use std::mem;
+use std::rc::Rc;
+#[cfg(target_arch = "x86_64")]
+use crate::backend::x86_64::JMP_PTR_BYTES;
+#[cfg(target_arch = "aarch64")]
+use crate::backend::arm64::JMP_PTR_BYTES;
+use crate::backend::ir::Assembler;
+use crate::backend::ir::Target;
+use crate::virtualmem::WriteError;
 
 #[cfg(feature = "asm_comments")]
 use std::collections::BTreeMap;
 
+use crate::codegen::CodegenGlobals;
 use crate::virtualmem::{VirtualMem, CodePtr};
 
 // Lots of manual vertical alignment in there that rustfmt doesn't handle well.
@@ -17,7 +28,8 @@ pub mod arm64;
 //
 
 /// Reference to an ASM label
-struct LabelRef {
+#[derive(Clone)]
+pub struct LabelRef {
     // Position in the code block where the label reference exists
     pos: usize,
 
@@ -36,7 +48,7 @@ struct LabelRef {
 /// Block of memory into which instructions can be assembled
 pub struct CodeBlock {
     // Memory for storing the encoded instructions
-    mem_block: VirtualMem,
+    mem_block: Rc<RefCell<VirtualMem>>,
 
     // Memory block size
     mem_size: usize,
@@ -44,6 +56,12 @@ pub struct CodeBlock {
     // Current writing position
     write_pos: usize,
 
+    // Size of a code page (inlined + outlined)
+    page_size: usize,
+
+    // Size reserved for writing a jump to the next page
+    page_end_reserve: usize,
+
     // Table of registered label addresses
     label_addrs: Vec<usize>,
 
@@ -58,7 +76,6 @@ pub struct CodeBlock {
     asm_comments: BTreeMap<usize, Vec<String>>,
 
     // True for OutlinedCb
-    #[cfg(feature = "disasm")]
     pub outlined: bool,
 
     // Set if the CodeBlock is unable to output some instructions,
@@ -67,27 +84,158 @@ pub struct CodeBlock {
     dropped_bytes: bool,
 }
 
+/// Set of CodeBlock label states. Used for recovering the previous state.
+pub struct LabelState {
+    label_addrs: Vec<usize>,
+    label_names: Vec<String>,
+    label_refs: Vec<LabelRef>,
+}
+
 impl CodeBlock {
     /// Make a new CodeBlock
-    pub fn new(mem_block: VirtualMem, outlined: bool) -> Self {
-        Self {
-            mem_size: mem_block.virtual_region_size(),
+    pub fn new(mem_block: Rc<RefCell<VirtualMem>>, page_size: usize, outlined: bool) -> Self {
+        let mem_size = mem_block.borrow().virtual_region_size();
+        let mut cb = Self {
             mem_block,
+            mem_size,
             write_pos: 0,
+            page_size,
+            page_end_reserve: JMP_PTR_BYTES,
             label_addrs: Vec::new(),
             label_names: Vec::new(),
             label_refs: Vec::new(),
             #[cfg(feature = "asm_comments")]
             asm_comments: BTreeMap::new(),
-            #[cfg(feature = "disasm")]
             outlined,
             dropped_bytes: false,
+        };
+        cb.write_pos = cb.page_start();
+        cb
+    }
+
+    /// Move the CodeBlock to the next page. If it's on the furthest page,
+    /// move the other CodeBlock to the next page as well.
+    pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool {
+        let old_write_ptr = self.get_write_ptr();
+        self.set_write_ptr(base_ptr);
+        self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
+
+        // Move self to the next page
+        let next_page_idx = self.write_pos / self.page_size + 1;
+        if !self.set_page(next_page_idx, &jmp_ptr) {
+            self.set_write_ptr(old_write_ptr); // rollback if there are no more pages
+            return false;
+        }
+
+        // Move the other CodeBlock to the same page if it'S on the furthest page
+        self.other_cb().unwrap().set_page(next_page_idx, &jmp_ptr);
+
+        return !self.dropped_bytes;
+    }
+
+    /// Move the CodeBlock to page_idx only if it's not going backwards.
+    fn set_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, page_idx: usize, jmp_ptr: &F) -> bool {
+        // Do not move the CodeBlock if page_idx points to an old position so that this
+        // CodeBlock will not overwrite existing code.
+        //
+        // Let's say this is the current situation:
+        //   cb: [page1, page2, page3 (write_pos)], ocb: [page1, page2, page3 (write_pos)]
+        //
+        // When cb needs to patch page1, this will be temporarily changed to:
+        //   cb: [page1 (write_pos), page2, page3], ocb: [page1, page2, page3 (write_pos)]
+        //
+        // While patching page1, cb may need to jump to page2. What set_page currently does is:
+        //   cb: [page1, page2 (write_pos), page3], ocb: [page1, page2, page3 (write_pos)]
+        // instead of:
+        //   cb: [page1, page2 (write_pos), page3], ocb: [page1, page2 (write_pos), page3]
+        // because moving ocb's write_pos from page3 to the beginning of page2 will let ocb's
+        // write_pos point to existing code in page2, which might let ocb overwrite it later.
+        //
+        // We could remember the last write_pos in page2 and let set_page use that position,
+        // but you need to waste some space for keeping write_pos for every single page.
+        // It doesn't seem necessary for performance either. So we're currently not doing it.
+        let mut dst_pos = self.page_size * page_idx + self.page_start();
+        if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos {
+            // Reset dropped_bytes
+            self.dropped_bytes = false;
+
+            // Convert dst_pos to dst_ptr
+            let src_pos = self.write_pos;
+            self.write_pos = dst_pos;
+            let dst_ptr = self.get_write_ptr();
+            self.write_pos = src_pos;
+
+            // Generate jmp_ptr from src_pos to dst_pos
+            self.without_page_end_reserve(|cb| {
+                cb.add_comment("jump to next page");
+                jmp_ptr(cb, dst_ptr);
+                assert!(!cb.has_dropped_bytes());
+            });
+
+            // Start the next code from dst_pos
+            self.write_pos = dst_pos;
         }
+        !self.dropped_bytes
+    }
+
+    /// write_pos of the current page start
+    pub fn page_start_pos(&self) -> usize {
+        self.get_write_pos() / self.page_size * self.page_size + self.page_start()
+    }
+
+    /// Offset of each page where CodeBlock should start writing
+    pub fn page_start(&self) -> usize {
+        let mut start = if self.inline() {
+            0
+        } else {
+            self.page_size / 2
+        };
+        if cfg!(debug_assertions) && !cfg!(test) {
+            // Leave illegal instructions at the beginning of each page to assert
+            // we're not accidentally crossing page boundaries.
+            start += JMP_PTR_BYTES;
+        }
+        start
+    }
+
+    /// Offset of each page where CodeBlock should stop writing (exclusive)
+    pub fn page_end(&self) -> usize {
+        let page_end = if self.inline() {
+            self.page_size / 2
+        } else {
+            self.page_size
+        };
+        page_end - self.page_end_reserve // reserve space to jump to the next page
+    }
+
+    /// Call a given function with page_end_reserve = 0
+    pub fn without_page_end_reserve<F: Fn(&mut Self)>(&mut self, block: F) {
+        let old_page_end_reserve = self.page_end_reserve;
+        self.page_end_reserve = 0;
+        block(self);
+        self.page_end_reserve = old_page_end_reserve;
+    }
+
+    /// Return the address ranges of a given address range that this CodeBlock can write.
+    pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
+        let mut addrs = vec![];
+        let mut start = start_ptr.raw_ptr() as usize;
+        let codeblock_end = self.get_ptr(self.get_mem_size()).raw_ptr() as usize;
+        let end = std::cmp::min(end_ptr.raw_ptr() as usize, codeblock_end);
+        while start < end {
+            let current_page = start / self.page_size * self.page_size;
+            let page_end = std::cmp::min(end, current_page + self.page_end()) as usize;
+            addrs.push((start, page_end));
+            start = current_page + self.page_size + self.page_start();
+        }
+        addrs
     }
 
     /// Check if this code block has sufficient remaining capacity
     pub fn has_capacity(&self, num_bytes: usize) -> bool {
-        self.write_pos + num_bytes < self.mem_size
+        let page_offset = self.write_pos % self.page_size;
+        let capacity = self.page_end().saturating_sub(page_offset);
+        num_bytes <= capacity
     }
 
     /// Add an assembly comment if the feature is on.
@@ -121,8 +269,8 @@ impl CodeBlock {
         self.write_pos
     }
 
-    pub fn get_mem(&mut self) -> &mut VirtualMem {
-        &mut self.mem_block
+    pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
+        self.mem_block.borrow_mut().write_byte(write_ptr, byte)
     }
 
     // Set the current write position
@@ -134,49 +282,31 @@ impl CodeBlock {
         self.write_pos = pos;
     }
 
-    // Align the current write pointer to a multiple of bytes
-    pub fn align_pos(&mut self, multiple: u32) {
-        // Compute the alignment boundary that is lower or equal
-        // Do everything with usize
-        let multiple: usize = multiple.try_into().unwrap();
-        let pos = self.get_write_ptr().raw_ptr() as usize;
-        let remainder = pos % multiple;
-        let prev_aligned = pos - remainder;
-
-        if prev_aligned == pos {
-            // Already aligned so do nothing
-        } else {
-            // Align by advancing
-            let pad = multiple - remainder;
-            self.set_pos(self.get_write_pos() + pad);
-        }
-    }
-
     // Set the current write position from a pointer
     pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
-        let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize();
+        let pos = code_ptr.into_usize() - self.mem_block.borrow().start_ptr().into_usize();
         self.set_pos(pos);
     }
 
     /// Get a (possibly dangling) direct pointer into the executable memory block
     pub fn get_ptr(&self, offset: usize) -> CodePtr {
-        self.mem_block.start_ptr().add_bytes(offset)
+        self.mem_block.borrow().start_ptr().add_bytes(offset)
     }
 
     /// Get a (possibly dangling) direct pointer to the current write position
-    pub fn get_write_ptr(&mut self) -> CodePtr {
+    pub fn get_write_ptr(&self) -> CodePtr {
         self.get_ptr(self.write_pos)
     }
 
     /// Write a single byte at the current position.
     pub fn write_byte(&mut self, byte: u8) {
         let write_ptr = self.get_write_ptr();
-
-        if self.mem_block.write_byte(write_ptr, byte).is_ok() {
-            self.write_pos += 1;
-        } else {
+        if !self.has_capacity(1) || self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_err() {
             self.dropped_bytes = true;
         }
+
+        // Always advance write_pos since arm64 PadEntryExit needs this to stop the loop.
+        self.write_pos += 1;
     }
 
     /// Write multiple bytes starting from the current position.
@@ -242,6 +372,9 @@ impl CodeBlock {
         self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode });
 
         // Move past however many bytes the instruction takes up
+        if !self.has_capacity(num_bytes) {
+            self.dropped_bytes = true; // retry emitting the Insn after next_page
+        }
         self.write_pos += num_bytes;
     }
 
@@ -274,14 +407,43 @@ impl CodeBlock {
         assert!(self.label_refs.is_empty());
     }
 
+    pub fn clear_labels(&mut self) {
+        self.label_addrs.clear();
+        self.label_names.clear();
+        self.label_refs.clear();
+    }
+
+    pub fn get_label_state(&self) -> LabelState {
+        LabelState {
+            label_addrs: self.label_addrs.clone(),
+            label_names: self.label_names.clone(),
+            label_refs: self.label_refs.clone(),
+        }
+    }
+
+    pub fn set_label_state(&mut self, state: LabelState) {
+        self.label_addrs = state.label_addrs;
+        self.label_names = state.label_names;
+        self.label_refs = state.label_refs;
+    }
+
     pub fn mark_all_executable(&mut self) {
-        self.mem_block.mark_all_executable();
+        self.mem_block.borrow_mut().mark_all_executable();
     }
 
-    #[cfg(feature = "disasm")]
     pub fn inline(&self) -> bool {
         !self.outlined
     }
+
+    pub fn other_cb(&self) -> Option<&'static mut Self> {
+        if !CodegenGlobals::has_instance() {
+            None
+        } else if self.inline() {
+            Some(CodegenGlobals::get_outlined_cb().unwrap())
+        } else {
+            Some(CodegenGlobals::get_inline_cb())
+        }
+    }
 }
 
 #[cfg(test)]
@@ -295,7 +457,7 @@ impl CodeBlock {
         let mem_start: *const u8 = alloc.mem_start();
         let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size);
 
-        Self::new(virt_mem, false)
+        Self::new(Rc::new(RefCell::new(virt_mem)), 16 * 1024, false)
     }
 }
 
@@ -303,7 +465,7 @@ impl CodeBlock {
 impl fmt::LowerHex for CodeBlock {
     fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
         for pos in 0..self.write_pos {
-            let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() };
+            let byte = unsafe { self.mem_block.borrow().start_ptr().raw_ptr().add(pos).read() };
             fmtr.write_fmt(format_args!("{:02x}", byte))?;
         }
         Ok(())
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 0180737d4d..5df072ed38 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -4,7 +4,7 @@
 
 use crate::asm::{CodeBlock};
 use crate::asm::arm64::*;
-use crate::codegen::{JITState};
+use crate::codegen::{JITState, CodegenGlobals};
 use crate::cruby::*;
 use crate::backend::ir::*;
 use crate::virtualmem::CodePtr;
@@ -36,6 +36,9 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
 pub const C_SP_REG: A64Opnd = X31;
 pub const C_SP_STEP: i32 = 16;
 
+// The number of bytes that are generated by emit_jmp_ptr
+pub const JMP_PTR_BYTES: usize = 20;
+
 /// Map Opnd to A64Opnd
 impl From<Opnd> for A64Opnd {
     fn from(opnd: Opnd) -> Self {
@@ -567,7 +570,7 @@ impl Assembler
         /// Emit the required instructions to load the given value into the
         /// given register. Our goal here is to use as few instructions as
         /// possible to get this value into the register.
-        fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> i32 {
+        fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
             let mut current = value;
 
             if current <= 0xffff {
@@ -680,6 +683,31 @@ impl Assembler
             ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP));
         }
 
+        fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr) {
+            let src_addr = cb.get_write_ptr().into_i64();
+            let dst_addr = dst_ptr.into_i64();
+
+            // If the offset is short enough, then we'll use the
+            // branch instruction. Otherwise, we'll move the
+            // destination into a register and use the branch
+            // register instruction.
+            let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
+                b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
+                1
+            } else {
+                let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
+                br(cb, Assembler::SCRATCH0);
+                num_insns + 1
+            };
+
+            // Make sure it's always a consistent number of
+            // instructions in case it gets patched and has to
+            // use the other branch.
+            for _ in num_insns..(JMP_PTR_BYTES / 4) {
+                nop(cb);
+            }
+        }
+
         // dbg!(&self.insns);
 
         // List of GC offsets
@@ -687,7 +715,13 @@ impl Assembler
 
         // For each instruction
         let start_write_pos = cb.get_write_pos();
-        for insn in &self.insns {
+        let mut insn_idx: usize = 0;
+        while let Some(insn) = self.insns.get(insn_idx) {
+            let src_ptr = cb.get_write_ptr();
+            let had_dropped_bytes = cb.has_dropped_bytes();
+            let old_label_state = cb.get_label_state();
+            let mut insn_gc_offsets: Vec<u32> = Vec::new();
+
             match insn {
                 Insn::Comment(text) => {
                     if cfg!(feature = "asm_comments") {
@@ -796,7 +830,7 @@ impl Assembler
                             cb.write_bytes(&value.as_u64().to_le_bytes());
 
                             let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
-                            gc_offsets.push(ptr_offset);
+                            insn_gc_offsets.push(ptr_offset);
                         },
                         Opnd::None => {
                             unreachable!("Attempted to load from None operand");
@@ -904,28 +938,7 @@ impl Assembler
                 Insn::Jmp(target) => {
                     match target {
                         Target::CodePtr(dst_ptr) => {
-                            let src_addr = cb.get_write_ptr().into_i64();
-                            let dst_addr = dst_ptr.into_i64();
-
-                            // If the offset is short enough, then we'll use the
-                            // branch instruction. Otherwise, we'll move the
-                            // destination into a register and use the branch
-                            // register instruction.
-                            let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
-                                b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
-                                0
-                            } else {
-                                let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
-                                br(cb, Self::SCRATCH0);
-                                num_insns
-                            };
-
-                            // Make sure it's always a consistent number of
-                            // instructions in case it gets patched and has to
-                            // use the other branch.
-                            for _ in num_insns..4 {
-                                nop(cb);
-                            }
+                            emit_jmp_ptr(cb, *dst_ptr);
                         },
                         Target::Label(label_idx) => {
                             // Here we're going to save enough space for
@@ -997,13 +1010,21 @@ impl Assembler
                     csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE);
                 }
                 Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
-                Insn::PadEntryExit => {
-                    let jmp_len = 5 * 4; // Op::Jmp may emit 5 instructions
-                    while (cb.get_write_pos() - start_write_pos) < jmp_len {
+                Insn::PadInvalPatch => {
+                    while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES {
                         nop(cb);
                     }
                 }
             };
+
+            // On failure, jump to the next page and retry the current insn
+            if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr) {
+                // Reset cb states before retrying the current Insn
+                cb.set_label_state(old_label_state);
+            } else {
+                insn_idx += 1;
+                gc_offsets.append(&mut insn_gc_offsets);
+            }
         }
 
         gc_offsets
@@ -1020,21 +1041,23 @@ impl Assembler
             assert!(label_idx == idx);
         }
 
-        let start_write_pos = cb.get_write_pos();
+        let start_ptr = cb.get_write_ptr();
         let gc_offsets = asm.arm64_emit(cb);
 
-        if !cb.has_dropped_bytes() {
+        if cb.has_dropped_bytes() {
+            cb.clear_labels();
+        } else {
             cb.link_labels();
-        }
 
-        // Invalidate icache for newly written out region so we don't run stale code.
-        #[cfg(not(test))]
-        {
-            let start = cb.get_ptr(start_write_pos).raw_ptr();
-            let write_ptr = cb.get_write_ptr().raw_ptr();
-            let codeblock_end = cb.get_ptr(cb.get_mem_size()).raw_ptr();
-            let end = std::cmp::min(write_ptr, codeblock_end);
-            unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
+            // Invalidate icache for newly written out region so we don't run stale code.
+            // It should invalidate only the code ranges of the current cb because the code
+            // ranges of the other cb might have a memory region that is still PROT_NONE.
+            #[cfg(not(test))]
+            cb.without_page_end_reserve(|cb| {
+                for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) {
+                    unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
+                }
+            });
         }
 
         gc_offsets
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index ba7e372188..e11235aec9 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -5,6 +5,7 @@
 use std::cell::Cell;
 use std::fmt;
 use std::convert::From;
+use std::io::Write;
 use std::mem::take;
 use crate::cruby::{VALUE};
 use crate::virtualmem::{CodePtr};
@@ -433,9 +434,9 @@ pub enum Insn {
     // binary OR operation.
     Or { left: Opnd, right: Opnd, out: Opnd },
 
-    /// Pad nop instructions to accomodate Op::Jmp in case the block is
-    /// invalidated.
-    PadEntryExit,
+    /// Pad nop instructions to accomodate Op::Jmp in case the block or the insn
+    /// is invalidated.
+    PadInvalPatch,
 
     // Mark a position in the generated code
     PosMarker(PosMarkerFn),
@@ -521,7 +522,7 @@ impl Insn {
             Insn::Mov { .. } => "Mov",
             Insn::Not { .. } => "Not",
             Insn::Or { .. } => "Or",
-            Insn::PadEntryExit => "PadEntryExit",
+            Insn::PadInvalPatch => "PadEntryExit",
             Insn::PosMarker(_) => "PosMarker",
             Insn::RShift { .. } => "RShift",
             Insn::Store { .. } => "Store",
@@ -658,7 +659,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
             Insn::Jz(_) |
             Insn::Label(_) |
             Insn::LeaLabel { .. } |
-            Insn::PadEntryExit |
+            Insn::PadInvalPatch |
             Insn::PosMarker(_) => None,
             Insn::CPopInto(opnd) |
             Insn::CPush(opnd) |
@@ -755,7 +756,7 @@ impl<'a> InsnOpndMutIterator<'a> {
             Insn::Jz(_) |
             Insn::Label(_) |
             Insn::LeaLabel { .. } |
-            Insn::PadEntryExit |
+            Insn::PadInvalPatch |
             Insn::PosMarker(_) => None,
             Insn::CPopInto(opnd) |
             Insn::CPush(opnd) |
@@ -1474,8 +1475,8 @@ impl Assembler {
         out
     }
 
-    pub fn pad_entry_exit(&mut self) {
-        self.push_insn(Insn::PadEntryExit);
+    pub fn pad_inval_patch(&mut self) {
+        self.push_insn(Insn::PadInvalPatch);
     }
 
     //pub fn pos_marker<F: FnMut(CodePtr)>(&mut self, marker_fn: F)
diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs
index 1bad8642a2..3098c7e3b0 100644
--- a/yjit/src/backend/tests.rs
+++ b/yjit/src/backend/tests.rs
@@ -231,7 +231,7 @@ fn test_jcc_ptr()
 {
     let (mut asm, mut cb) = setup_asm();
 
-    let side_exit = Target::CodePtr((5 as *mut u8).into());
+    let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
     let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK));
     asm.test(
         Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
@@ -248,7 +248,7 @@ fn test_jmp_ptr()
 {
     let (mut asm, mut cb) = setup_asm();
 
-    let stub = Target::CodePtr((5 as *mut u8).into());
+    let stub = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
     asm.jmp(stub);
 
     asm.compile_with_num_regs(&mut cb, 0);
@@ -259,7 +259,7 @@ fn test_jo()
 {
     let (mut asm, mut cb) = setup_asm();
 
-    let side_exit = Target::CodePtr((5 as *mut u8).into());
+    let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
 
     let arg1 = Opnd::mem(64, SP, 0);
     let arg0 = Opnd::mem(64, SP, 8);
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
index f6bd822727..c8aa1a0ed5 100644
--- a/yjit/src/backend/x86_64/mod.rs
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -9,6 +9,7 @@ use crate::asm::x86_64::*;
 use crate::codegen::{JITState};
 use crate::cruby::*;
 use crate::backend::ir::*;
+use crate::codegen::CodegenGlobals;
 
 // Use the x86 register type for this platform
 pub type Reg = X86Reg;
@@ -32,6 +33,9 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [
 pub const C_RET_REG: Reg = RAX_REG;
 pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
 
+// The number of bytes that are generated by jmp_ptr
+pub const JMP_PTR_BYTES: usize = 6;
+
 /// Map Opnd to X86Opnd
 impl From<Opnd> for X86Opnd {
     fn from(opnd: Opnd) -> Self {
@@ -375,7 +379,13 @@ impl Assembler
 
         // For each instruction
         let start_write_pos = cb.get_write_pos();
-        for insn in &self.insns {
+        let mut insns_idx: usize = 0;
+        while let Some(insn) = self.insns.get(insns_idx) {
+            let src_ptr = cb.get_write_ptr();
+            let had_dropped_bytes = cb.has_dropped_bytes();
+            let old_label_state = cb.get_label_state();
+            let mut insn_gc_offsets: Vec<u32> = Vec::new();
+
             match insn {
                 Insn::Comment(text) => {
                     if cfg!(feature = "asm_comments") {
@@ -461,7 +471,7 @@ impl Assembler
                         if !val.special_const_p() {
                             // The pointer immediate is encoded as the last part of the mov written out
                             let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
-                            gc_offsets.push(ptr_offset);
+                            insn_gc_offsets.push(ptr_offset);
                         }
                     }
                 },
@@ -651,11 +661,10 @@ impl Assembler
                     emit_csel(cb, *truthy, *falsy, *out, cmovl);
                 }
                 Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
-                Insn::PadEntryExit => {
-                    // We assume that our Op::Jmp usage that gets invalidated is <= 5
-                    let code_size: u32 = (cb.get_write_pos() - start_write_pos).try_into().unwrap();
-                    if code_size < 5 {
-                        nop(cb, 5 - code_size);
+                Insn::PadInvalPatch => {
+                    let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
+                    if code_size < JMP_PTR_BYTES {
+                        nop(cb, (JMP_PTR_BYTES - code_size) as u32);
                     }
                 }
 
@@ -666,6 +675,15 @@ impl Assembler
                 #[allow(unreachable_patterns)]
                 _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn)
             };
+
+            // On failure, jump to the next page and retry the current insn
+            if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, jmp_ptr) {
+                // Reset cb states before retrying the current Insn
+                cb.set_label_state(old_label_state);
+            } else {
+                insns_idx += 1;
+                gc_offsets.append(&mut insn_gc_offsets);
+            }
         }
 
         gc_offsets
@@ -684,7 +702,9 @@ impl Assembler
 
         let gc_offsets = asm.x86_emit(cb);
 
-        if !cb.has_dropped_bytes() {
+        if cb.has_dropped_bytes() {
+            cb.clear_labels();
+        } else {
             cb.link_labels();
         }
 
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 5f6d97834a..626916b240 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -13,13 +13,15 @@ use crate::utils::*;
 use CodegenStatus::*;
 use InsnOpnd::*;
 
-
+use std::cell::RefCell;
+use std::cell::RefMut;
 use std::cmp;
 use std::collections::HashMap;
 use std::ffi::CStr;
 use std::mem::{self, size_of};
 use std::os::raw::c_uint;
 use std::ptr;
+use std::rc::Rc;
 use std::slice;
 
 pub use crate::virtualmem::CodePtr;
@@ -296,6 +298,7 @@ fn jit_prepare_routine_call(
 /// Record the current codeblock write position for rewriting into a jump into
 /// the outlined block later. Used to implement global code invalidation.
 fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) {
+    asm.pad_inval_patch();
     asm.pos_marker(move |code_ptr| {
         CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos);
     });
@@ -606,19 +609,6 @@ fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) {
 /// Compile an interpreter entry block to be inserted into an iseq
 /// Returns None if compilation fails.
 pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option<CodePtr> {
-    const MAX_PROLOGUE_SIZE: usize = 1024;
-
-    // Check if we have enough executable memory
-    if !cb.has_capacity(MAX_PROLOGUE_SIZE) {
-        return None;
-    }
-
-    let old_write_pos = cb.get_write_pos();
-
-    // TODO: figure out if this is actually beneficial for performance
-    // Align the current write position to cache line boundaries
-    cb.align_pos(64);
-
     let code_ptr = cb.get_write_ptr();
 
     let mut asm = Assembler::new();
@@ -660,10 +650,11 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O
 
     asm.compile(cb);
 
-    // Verify MAX_PROLOGUE_SIZE
-    assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE);
-
-    return Some(code_ptr);
+    if (cb.has_dropped_bytes()) {
+        None
+    } else {
+        Some(code_ptr)
+    }
 }
 
 // Generate code to check for interrupts and take a side-exit.
@@ -853,7 +844,7 @@ pub fn gen_single_block(
     {
         let mut block = jit.block.borrow_mut();
         if block.entry_exit.is_some() {
-            asm.pad_entry_exit();
+            asm.pad_inval_patch();
         }
 
         // Compile code into the code block
@@ -6544,29 +6535,13 @@ static mut CODEGEN_GLOBALS: Option<CodegenGlobals> = None;
 impl CodegenGlobals {
     /// Initialize the codegen globals
     pub fn init() {
-        // Executable memory size in MiB
-        let mem_size = get_option!(exec_mem_size) * 1024 * 1024;
+        // Executable memory and code page size in bytes
+        let mem_size = get_option!(exec_mem_size);
+        let code_page_size = get_option!(code_page_size);
 
         #[cfg(not(test))]
         let (mut cb, mut ocb) = {
-            // TODO(alan): we can error more gracefully when the user gives
-            //   --yjit-exec-mem=absurdly-large-number
-            //
-            // 2 GiB. It's likely a bug if we generate this much code.
-            const MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024 * 1024;
-            assert!(mem_size <= MAX_BUFFER_SIZE);
-            let mem_size_u32 = mem_size as u32;
-            let half_size = mem_size / 2;
-
-            let page_size = unsafe { rb_yjit_get_page_size() };
-            let assert_page_aligned = |ptr| assert_eq!(
-                0,
-                ptr as usize % page_size.as_usize(),
-                "Start of virtual address block should be page-aligned",
-            );
-
-            let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size_u32) };
-            let second_half = virt_block.wrapping_add(half_size);
+            let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) };
 
             // Memory protection syscalls need page-aligned addresses, so check it here. Assuming
             // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
@@ -6575,26 +6550,25 @@ impl CodegenGlobals {
             //
             // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB
             // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though.
-            assert_page_aligned(virt_block);
-            assert_page_aligned(second_half);
+            let page_size = unsafe { rb_yjit_get_page_size() };
+            assert_eq!(
+                virt_block as usize % page_size.as_usize(), 0,
+                "Start of virtual address block should be page-aligned",
+            );
+            assert_eq!(code_page_size % page_size.as_usize(), 0, "code_page_size was not page-aligned");
 
             use crate::virtualmem::*;
 
-            let first_half = VirtualMem::new(
+            let mem_block = VirtualMem::new(
                 SystemAllocator {},
                 page_size,
                 virt_block,
-                half_size
-            );
-            let second_half = VirtualMem::new(
-                SystemAllocator {},
-                page_size,
-                second_half,
-                half_size
+                mem_size,
             );
+            let mem_block = Rc::new(RefCell::new(mem_block));
 
-            let cb = CodeBlock::new(first_half, false);
-            let ocb = OutlinedCb::wrap(CodeBlock::new(second_half, true));
+            let cb = CodeBlock::new(mem_block.clone(), code_page_size, false);
+            let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, code_page_size, true));
 
             (cb, ocb)
         };
@@ -6702,6 +6676,10 @@ impl CodegenGlobals {
         unsafe { CODEGEN_GLOBALS.as_mut().unwrap() }
     }
 
+    pub fn has_instance() -> bool {
+        unsafe { CODEGEN_GLOBALS.as_mut().is_some() }
+    }
+
     /// Get a mutable reference to the inline code block
     pub fn get_inline_cb() -> &'static mut CodeBlock {
         &mut CodegenGlobals::get_instance().inline_cb
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 3cecf31a85..53cb31beb1 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -665,7 +665,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
                 if new_addr != object {
                     for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() {
                         let byte_code_ptr = value_code_ptr.add_bytes(byte_idx);
-                        cb.get_mem().write_byte(byte_code_ptr, byte)
+                        cb.write_mem(byte_code_ptr, byte)
                             .expect("patching existing code should be within bounds");
                     }
                 }
@@ -1916,7 +1916,9 @@ pub fn gen_branch(
 
     // Call the branch generation function
     asm.mark_branch_start(&branchref);
-    gen_fn(asm, branch.dst_addrs[0].unwrap(), branch.dst_addrs[1], BranchShape::Default);
+    if let Some(dst_addr) = branch.dst_addrs[0] {
+        gen_fn(asm, dst_addr, branch.dst_addrs[1], BranchShape::Default);
+    }
     asm.mark_branch_end(&branchref);
 }
 
@@ -1955,6 +1957,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu
         branch.shape = BranchShape::Default;
 
         // Call the branch generation function
+        asm.comment("gen_direct_jmp: existing block");
         asm.mark_branch_start(&branchref);
         gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
         asm.mark_branch_end(&branchref);
@@ -1965,6 +1968,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu
         branch.shape = BranchShape::Next0;
 
         // The branch is effectively empty (a noop)
+        asm.comment("gen_direct_jmp: fallthrough");
         asm.mark_branch_start(&branchref);
         asm.mark_branch_end(&branchref);
     }
@@ -2003,7 +2007,9 @@ pub fn defer_compilation(
 
     // Call the branch generation function
     asm.mark_branch_start(&branch_rc);
-    gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
+    if let Some(dst_addr) = branch.dst_addrs[0] {
+        gen_jump_branch(asm, dst_addr, None, BranchShape::Default);
+    }
     asm.mark_branch_end(&branch_rc);
 }
 
diff --git a/yjit/src/options.rs b/yjit/src/options.rs
index f73dca67de..d2b43ecb26 100644
--- a/yjit/src/options.rs
+++ b/yjit/src/options.rs
@@ -4,9 +4,14 @@ use std::ffi::CStr;
 #[derive(Clone, PartialEq, Eq, Debug)]
 #[repr(C)]
 pub struct Options {
-    // Size of the executable memory block to allocate in MiB
+    // Size of the executable memory block to allocate in bytes
+    // Note that the command line argument is expressed in MiB and not bytes
     pub exec_mem_size: usize,
 
+    // Size of each executable memory code page in bytes
+    // Note that the command line argument is expressed in KiB and not bytes
+    pub code_page_size: usize,
+
     // Number of method calls after which to start generating code
     // Threshold==1 means compile on first execution
     pub call_threshold: usize,
@@ -48,7 +53,8 @@ pub struct Options {
 
 // Initialize the options to default values
 pub static mut OPTIONS: Options = Options {
-    exec_mem_size: 256,
+    exec_mem_size: 256 * 1024 * 1024,
+    code_page_size: 16 * 1024,
     call_threshold: 10,
     greedy_versioning: false,
     no_type_prop: false,
@@ -118,8 +124,30 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
     match (opt_name, opt_val) {
         ("", "") => (), // Simply --yjit
 
-        ("exec-mem-size", _) => match opt_val.parse() {
-            Ok(n) => unsafe { OPTIONS.exec_mem_size = n },
+        ("exec-mem-size", _) => match opt_val.parse::<usize>() {
+            Ok(n) => {
+                if n == 0 || n > 2 * 1024 * 1024 {
+                    return None
+                }
+
+                // Convert from MiB to bytes internally for convenience
+                unsafe { OPTIONS.exec_mem_size = n * 1024 * 1024 }
+            }
+            Err(_) => {
+                return None;
+            }
+        },
+
+        ("code-page-size", _) => match opt_val.parse::<usize>() {
+            Ok(n) => {
+                // Enforce bounds checks and that n is divisible by 4KiB
+                if n < 4 || n > 256 || n % 4 != 0 {
+                    return None
+                }
+
+                // Convert from KiB to bytes internally for convenience
+                unsafe { OPTIONS.code_page_size = n * 1024 }
+            }
             Err(_) => {
                 return None;
             }
diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs
index cabebb7dcc..b156c9d5ed 100644
--- a/yjit/src/utils.rs
+++ b/yjit/src/utils.rs
@@ -74,14 +74,13 @@ pub(crate) use offset_of;
 // This should work fine on ASCII strings and anything else
 // that is considered legal UTF-8, including embedded nulls.
 fn ruby_str_to_rust(v: VALUE) -> String {
-    // Make sure the CRuby encoding is UTF-8 compatible
-    let encoding = unsafe { rb_ENCODING_GET(v) } as u32;
-    assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII);
-
     let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
     let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
     let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
-    String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation
+    match String::from_utf8(str_slice.to_vec()) {
+        Ok(utf8) => utf8,
+        Err(_) => String::new(),
+    }
 }
 
 // Location is the file defining the method, colon, method name.
author	Takashi Kokubun <takashikkbn@gmail.com>	2022-10-17 10:45:59 -0700
committer	GitHub <noreply@github.com>	2022-10-17 10:45:59 -0700
commit	64c52c428285e7930aed62740cc9c54ee483178e (patch)
tree	818515b6cc1909e98cdcdca93f0a3ac3b2b8cd5a
parent	e7c71c6c9271b0c29f210769159090e17128e740 (diff)