summaryrefslogtreecommitdiff
path: root/yjit
diff options
context:
space:
mode:
authorAlan Wu <XrXr@users.noreply.github.com>2022-06-14 10:23:13 -0400
committerGitHub <noreply@github.com>2022-06-14 10:23:13 -0400
commit9f09397bfe6762bf19ef47b2f60988e49b80560d (patch)
tree2be526b0bc34af44937eab15f31f131c85df6b03 /yjit
parent9b9cc8ad34fdecdede439f14c027c5eefef5541e (diff)
YJIT: On-demand executable memory allocation; faster boot (#5944)
This commit makes YJIT allocate memory for generated code gradually as needed. Previously, YJIT allocates all the memory it needs on boot in one go, leading to higher than necessary resident set size (RSS) and time spent on boot initializing the memory with a large memset(). Users should no longer need to search for a magic number to pass to `--yjit-exec-mem` since physical memory consumption should now more accurately reflect the requirement of the workload. YJIT now reserves a range of addresses on boot. This region start out with no access permission at all so buggy attempts to jump to the region crashes like before this change. To get this hardening at finer granularity than the page size, we fill each page with trapping instructions when we first allocate physical memory for the page. Most of the time applications don't need 256 MiB of executable code, so allocating on-demand ends up doing less total work than before. Case in point, a simple `ruby --yjit-call-threshold=1 -eitself` takes about half as long after this change. In terms of memory consumption, here is a table to give a rough summary of the impact: | Peak RSS in MiB | -eitself example | railsbench once | | :-------------: | ---------------: | --------------: | | before | 265 | 377 | | after | 11 | 143 | | no YJIT | 10 | 101 | A new module is introduced to handle allocation bookkeeping. `CodePtr` is moved into the module since it has a close relationship with the new `VirtualMemory` struct. This new interface has a slightly smaller surface than before in that marking a region as writable is no longer a public operation.
Notes
Notes: Merged-By: maximecb <maximecb@ruby-lang.org>
Diffstat (limited to 'yjit')
-rw-r--r--yjit/bindgen/src/main.rs1
-rw-r--r--yjit/src/asm/mod.rs144
-rw-r--r--yjit/src/asm/x86_64/mod.rs2
-rw-r--r--yjit/src/asm/x86_64/tests.rs2
-rw-r--r--yjit/src/codegen.rs55
-rw-r--r--yjit/src/core.rs26
-rw-r--r--yjit/src/cruby.rs3
-rw-r--r--yjit/src/cruby_bindings.inc.rs5
-rw-r--r--yjit/src/lib.rs1
-rw-r--r--yjit/src/stats.rs6
-rw-r--r--yjit/src/virtualmem.rs376
11 files changed, 480 insertions, 141 deletions
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index 212013d70c..d8f3c98e89 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -239,6 +239,7 @@ fn main() {
.allowlist_function("rb_iseq_(get|set)_yjit_payload")
.allowlist_function("rb_iseq_pc_at_idx")
.allowlist_function("rb_iseq_opcode_at_pc")
+ .allowlist_function("rb_yjit_reserve_addr_space")
.allowlist_function("rb_yjit_mark_writable")
.allowlist_function("rb_yjit_mark_executable")
.allowlist_function("rb_yjit_get_page_size")
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index 1d31facb78..e16e856925 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -3,49 +3,16 @@ use std::mem;
#[cfg(feature = "asm_comments")]
use std::collections::BTreeMap;
+use crate::virtualmem::{VirtualMem, CodePtr};
+
// Lots of manual vertical alignment in there that rustfmt doesn't handle well.
#[rustfmt::skip]
pub mod x86_64;
-/// Pointer to a piece of machine code
-/// We may later change this to wrap an u32
-/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead.
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
-#[repr(C)]
-pub struct CodePtr(*const u8);
-
-impl CodePtr {
- pub fn raw_ptr(&self) -> *const u8 {
- let CodePtr(ptr) = *self;
- return ptr;
- }
-
- fn into_i64(&self) -> i64 {
- let CodePtr(ptr) = self;
- *ptr as i64
- }
-
- #[allow(unused)]
- fn into_usize(&self) -> usize {
- let CodePtr(ptr) = self;
- *ptr as usize
- }
-}
-
-impl From<*mut u8> for CodePtr {
- fn from(value: *mut u8) -> Self {
- assert!(value as usize != 0);
- return CodePtr(value);
- }
-}
-
//
// TODO: need a field_size_of macro, to compute the size of a struct field in bytes
//
-// 1 is not aligned so this won't match any pages
-const ALIGNED_WRITE_POSITION_NONE: usize = 1;
-
/// Reference to an ASM label
struct LabelRef {
// Position in the code block where the label reference exists
@@ -57,13 +24,8 @@ struct LabelRef {
/// Block of memory into which instructions can be assembled
pub struct CodeBlock {
- // Block of non-executable memory used for dummy code blocks
- // This memory is owned by this block and lives as long as the block
- #[allow(unused)]
- dummy_block: Vec<u8>,
-
- // Pointer to memory we are writing into
- mem_block: *mut u8,
+ // Memory for storing the encoded instructions
+ mem_block: VirtualMem,
// Memory block size
mem_size: usize,
@@ -84,14 +46,6 @@ pub struct CodeBlock {
#[cfg(feature = "asm_comments")]
asm_comments: BTreeMap<usize, Vec<String>>,
- // Keep track of the current aligned write position.
- // Used for changing protection when writing to the JIT buffer
- current_aligned_write_pos: usize,
-
- // Memory protection works at page granularity and this is the
- // the size of each page. Used to implement W^X.
- page_size: usize,
-
// Set if the CodeBlock is unable to output some instructions,
// for example, when there is not enough space or when a jump
// target is too far away.
@@ -99,47 +53,22 @@ pub struct CodeBlock {
}
impl CodeBlock {
- #[cfg(test)]
- pub fn new_dummy(mem_size: usize) -> Self {
- // Allocate some non-executable memory
- let mut dummy_block = vec![0; mem_size];
- let mem_ptr = dummy_block.as_mut_ptr();
-
- Self {
- dummy_block: dummy_block,
- mem_block: mem_ptr,
- mem_size: mem_size,
- write_pos: 0,
- label_addrs: Vec::new(),
- label_names: Vec::new(),
- label_refs: Vec::new(),
- #[cfg(feature = "asm_comments")]
- asm_comments: BTreeMap::new(),
- current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE,
- page_size: 4096,
- dropped_bytes: false,
- }
- }
-
- #[cfg(not(test))]
- pub fn new(mem_block: *mut u8, mem_size: usize, page_size: usize) -> Self {
+ /// Make a new CodeBlock
+ pub fn new(mem_block: VirtualMem) -> Self {
Self {
- dummy_block: vec![0; 0],
- mem_block: mem_block,
- mem_size: mem_size,
+ mem_size: mem_block.virtual_region_size(),
+ mem_block,
write_pos: 0,
label_addrs: Vec::new(),
label_names: Vec::new(),
label_refs: Vec::new(),
#[cfg(feature = "asm_comments")]
asm_comments: BTreeMap::new(),
- current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE,
- page_size,
dropped_bytes: false,
}
}
- // Check if this code block has sufficient remaining capacity
+ /// Check if this code block has sufficient remaining capacity
pub fn has_capacity(&self, num_bytes: usize) -> bool {
self.write_pos + num_bytes < self.mem_size
}
@@ -175,6 +104,10 @@ impl CodeBlock {
self.write_pos
}
+ pub fn get_mem(&mut self) -> &mut VirtualMem {
+ &mut self.mem_block
+ }
+
// Set the current write position
pub fn set_pos(&mut self, pos: usize) {
// Assert here since while CodeBlock functions do bounds checking, there is
@@ -204,16 +137,13 @@ impl CodeBlock {
// Set the current write position from a pointer
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
- let pos = (code_ptr.raw_ptr() as usize) - (self.mem_block as usize);
+ let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize();
self.set_pos(pos);
}
// Get a direct pointer into the executable memory block
pub fn get_ptr(&self, offset: usize) -> CodePtr {
- unsafe {
- let ptr = self.mem_block.add(offset);
- CodePtr(ptr)
- }
+ self.mem_block.start_ptr().add_bytes(offset)
}
// Get a direct pointer to the current write position
@@ -223,9 +153,9 @@ impl CodeBlock {
// Write a single byte at the current position
pub fn write_byte(&mut self, byte: u8) {
- if self.write_pos < self.mem_size {
- self.mark_position_writable(self.write_pos);
- unsafe { self.mem_block.add(self.write_pos).write(byte) };
+ let write_ptr = self.get_write_ptr();
+
+ if self.mem_block.write_byte(write_ptr, byte).is_ok() {
self.write_pos += 1;
} else {
self.dropped_bytes = true;
@@ -328,33 +258,23 @@ impl CodeBlock {
assert!(self.label_refs.is_empty());
}
- pub fn mark_position_writable(&mut self, write_pos: usize) {
- let page_size = self.page_size;
- let aligned_position = (write_pos / page_size) * page_size;
+ pub fn mark_all_executable(&mut self) {
+ self.mem_block.mark_all_executable();
+ }
+}
- if self.current_aligned_write_pos != aligned_position {
- self.current_aligned_write_pos = aligned_position;
+#[cfg(test)]
+impl CodeBlock {
+ /// Stubbed CodeBlock for testing. Can't execute generated code.
+ pub fn new_dummy(mem_size: usize) -> Self {
+ use crate::virtualmem::*;
+ use crate::virtualmem::tests::TestingAllocator;
- #[cfg(not(test))]
- unsafe {
- use core::ffi::c_void;
- let page_ptr = self.get_ptr(aligned_position).raw_ptr() as *mut c_void;
- crate::cruby::rb_yjit_mark_writable(page_ptr, page_size.try_into().unwrap());
- }
- }
- }
+ let alloc = TestingAllocator::new(mem_size);
+ let mem_start: *const u8 = alloc.mem_start();
+ let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size);
- pub fn mark_all_executable(&mut self) {
- self.current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
-
- #[cfg(not(test))]
- unsafe {
- use core::ffi::c_void;
- // NOTE(alan): Right now we do allocate one big chunck and give the top half to the outlined codeblock
- // The start of the top half of the region isn't necessarily a page boundary...
- let cb_start = self.get_ptr(0).raw_ptr() as *mut c_void;
- crate::cruby::rb_yjit_mark_executable(cb_start, self.mem_size.try_into().unwrap());
- }
+ Self::new(virt_mem)
}
}
diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs
index a2549faab8..6eb7efaa0a 100644
--- a/yjit/src/asm/x86_64/mod.rs
+++ b/yjit/src/asm/x86_64/mod.rs
@@ -862,7 +862,7 @@ fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) {
let end_ptr = cb.get_ptr(cb.write_pos + 4);
// Compute the jump offset
- let rel64 = (dst_ptr.0 as i64) - (end_ptr.0 as i64);
+ let rel64 = dst_ptr.into_i64() - end_ptr.into_i64();
if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
// Write the relative 32-bit jump offset
diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs
index c77d30e74d..ffcc063420 100644
--- a/yjit/src/asm/x86_64/tests.rs
+++ b/yjit/src/asm/x86_64/tests.rs
@@ -7,7 +7,7 @@ use std::fmt;
impl<'a> fmt::LowerHex for super::CodeBlock {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
for pos in 0..self.write_pos {
- let byte = unsafe { self.mem_block.add(pos).read() };
+ let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() };
fmtr.write_fmt(format_args!("{:02x}", byte))?;
}
Ok(())
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index ca2c237e2d..75249658fb 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -21,6 +21,8 @@ use std::os::raw::c_uint;
use std::ptr;
use std::slice;
+pub use crate::virtualmem::CodePtr;
+
// Callee-saved registers
pub const REG_CFP: X86Opnd = R13;
pub const REG_EC: X86Opnd = R12;
@@ -5982,14 +5984,53 @@ impl CodegenGlobals {
#[cfg(not(test))]
let (mut cb, mut ocb) = {
- let page_size = unsafe { rb_yjit_get_page_size() }.as_usize();
- let mem_block: *mut u8 = unsafe { alloc_exec_mem(mem_size.try_into().unwrap()) };
- let cb = CodeBlock::new(mem_block, mem_size / 2, page_size);
- let ocb = OutlinedCb::wrap(CodeBlock::new(
- unsafe { mem_block.add(mem_size / 2) },
- mem_size / 2,
+ // TODO(alan): we can error more gracefully when the user gives
+ // --yjit-exec-mem=absurdly-large-number
+ //
+ // 2 GiB. It's likely a bug if we generate this much code.
+ const MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024 * 1024;
+ assert!(mem_size <= MAX_BUFFER_SIZE);
+ let mem_size_u32 = mem_size as u32;
+ let half_size = mem_size / 2;
+
+ let page_size = unsafe { rb_yjit_get_page_size() };
+ let assert_page_aligned = |ptr| assert_eq!(
+ 0,
+ ptr as usize % page_size.as_usize(),
+ "Start of virtual address block should be page-aligned",
+ );
+
+ let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size_u32) };
+ let second_half = virt_block.wrapping_add(half_size);
+
+ // Memory protection syscalls need page-aligned addresses, so check it here. Assuming
+ // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
+ // page size in bytes is a power of two 2¹⁹ or smaller. This is because the user
+ // requested size is half of mem_option × 2²⁰ as it's in MiB.
+ //
+ // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB
+ // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though.
+ assert_page_aligned(virt_block);
+ assert_page_aligned(second_half);
+
+ use crate::virtualmem::*;
+
+ let first_half = VirtualMem::new(
+ SystemAllocator {},
page_size,
- ));
+ virt_block,
+ half_size
+ );
+ let second_half = VirtualMem::new(
+ SystemAllocator {},
+ page_size,
+ second_half,
+ half_size
+ );
+
+ let cb = CodeBlock::new(first_half);
+ let ocb = OutlinedCb::wrap(CodeBlock::new(second_half));
+
(cb, ocb)
};
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index bbc5e800c0..6d6877f273 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -1,6 +1,7 @@
use crate::asm::x86_64::*;
use crate::asm::*;
use crate::codegen::*;
+use crate::virtualmem::CodePtr;
use crate::cruby::*;
use crate::options::*;
use crate::stats::*;
@@ -9,7 +10,6 @@ use core::ffi::c_void;
use std::cell::*;
use std::hash::{Hash, Hasher};
use std::mem;
-use std::mem::size_of;
use std::rc::{Rc};
use InsnOpnd::*;
use TempMapping::*;
@@ -573,24 +573,22 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
// Walk over references to objects in generated code.
for offset in &block.gc_object_offsets {
let offset_to_value = offset.as_usize();
- let value_address: *const u8 = cb.get_ptr(offset_to_value).raw_ptr();
+ let value_code_ptr = cb.get_ptr(offset_to_value);
+ let value_ptr: *const u8 = value_code_ptr.raw_ptr();
// Creating an unaligned pointer is well defined unlike in C.
- let value_address = value_address as *mut VALUE;
+ let value_ptr = value_ptr as *mut VALUE;
// SAFETY: these point to YJIT's code buffer
- let object = unsafe { value_address.read_unaligned() };
+ let object = unsafe { value_ptr.read_unaligned() };
let new_addr = unsafe { rb_gc_location(object) };
- // Only write when the VALUE moves, to be CoW friendly.
+ // Only write when the VALUE moves, to be copy-on-write friendly.
if new_addr != object {
- // Possibly unlock the page we need to update
- cb.mark_position_writable(offset_to_value);
-
- // Object could cross a page boundary, so unlock there as well
- cb.mark_position_writable(offset_to_value + size_of::<VALUE>() - 1);
-
- // SAFETY: we just made this address writable
- unsafe { value_address.write_unaligned(new_addr) };
+ for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() {
+ let byte_code_ptr = value_code_ptr.add_bytes(byte_idx);
+ cb.get_mem().write_byte(byte_code_ptr, byte)
+ .expect("patching existing code should be within bounds");
+ }
}
}
}
@@ -599,8 +597,6 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
// Note that we would have returned already if YJIT is off.
cb.mark_all_executable();
- // I guess we need to make the outlined block executable as well because
- // we don't split the two at exact page boundaries.
CodegenGlobals::get_outlined_cb()
.unwrap()
.mark_all_executable();
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs
index da9a84a160..51ba9c1531 100644
--- a/yjit/src/cruby.rs
+++ b/yjit/src/cruby.rs
@@ -111,9 +111,6 @@ pub use autogened::*;
// and textually included in this file
#[cfg_attr(test, allow(unused))] // We don't link against C code when testing
extern "C" {
- #[link_name = "rb_yjit_alloc_exec_mem"] // we can rename functions with this attribute
- pub fn alloc_exec_mem(mem_size: u32) -> *mut u8;
-
#[link_name = "rb_insn_name"]
pub fn raw_insn_name(insn: VALUE) -> *const c_char;
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index b5dd356aef..44f87a8482 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -976,7 +976,7 @@ extern "C" {
) -> ::std::os::raw::c_int;
}
extern "C" {
- pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
+ pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
}
extern "C" {
pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
@@ -992,6 +992,9 @@ extern "C" {
pub fn rb_yjit_get_page_size() -> u32;
}
extern "C" {
+ pub fn rb_yjit_reserve_addr_space(mem_size: u32) -> *mut u8;
+}
+extern "C" {
pub fn rb_c_method_tracing_currently_enabled(ec: *mut rb_execution_context_t) -> bool;
}
extern "C" {
diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs
index 2313fdbce6..6772f551a8 100644
--- a/yjit/src/lib.rs
+++ b/yjit/src/lib.rs
@@ -13,3 +13,4 @@ mod options;
mod stats;
mod utils;
mod yjit;
+mod virtualmem;
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index e129cc2811..6bad8db7e7 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -141,7 +141,7 @@ macro_rules! incr_counter {
($counter_name:ident) => {
#[allow(unused_unsafe)]
{
- unsafe { COUNTERS.$counter_name += 1 }
+ unsafe { $crate::stats::COUNTERS.$counter_name += 1 }
}
};
}
@@ -244,6 +244,10 @@ make_counters! {
gbpp_block_param_modified,
gbpp_block_handler_not_iseq,
+
+ // Currently, it's out of the ordinary (might be impossible) for YJIT to leave gaps in
+ // executable memory, so this should be 0.
+ exec_mem_non_bump_alloc,
}
//===========================================================================
diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs
new file mode 100644
index 0000000000..6a8e27447e
--- /dev/null
+++ b/yjit/src/virtualmem.rs
@@ -0,0 +1,376 @@
+//! Memory management stuff for YJIT's code storage. Deals with virtual memory.
+// I'm aware that there is an experiment in Rust Nightly right now for to see if banning
+// usize->pointer casts is viable. It seems like a lot of work for us to participate for not much
+// benefit.
+
+use crate::utils::IntoUsize;
+
+#[cfg(not(test))]
+pub type VirtualMem = VirtualMemory<sys::SystemAllocator>;
+
+#[cfg(test)]
+pub type VirtualMem = VirtualMemory<tests::TestingAllocator>;
+
+/// Memory for generated executable machine code. When not testing, we reserve address space for
+/// the entire region upfront and map physical memory into the reserved address space as needed. On
+/// Linux, this is basically done using an `mmap` with `PROT_NONE` upfront and gradually using
+/// `mprotect` with `PROT_READ|PROT_WRITE` as needed. The WIN32 equivalent seems to be
+/// `VirtualAlloc` with `MEM_RESERVE` then later with `MEM_COMMIT`.
+///
+/// This handles ["W^X"](https://en.wikipedia.org/wiki/W%5EX) semi-automatically. Writes
+/// are always accepted and once writes are done a call to [Self::mark_all_executable] makes
+/// the code in the region executable.
+pub struct VirtualMemory<A: Allocator> {
+ /// Location of the virtual memory region.
+ region_start: *mut u8,
+
+ /// Size of the region in bytes.
+ region_size_bytes: usize,
+
+ /// Number of bytes per "page", memory protection permission can only be controlled at this
+ /// granularity.
+ page_size_bytes: usize,
+
+ /// Number of bytes that have we have allocated physical memory for starting at
+ /// [Self::region_start].
+ mapped_region_bytes: usize,
+
+ /// Keep track of the address of the last written to page.
+ /// Used for changing protection to implement W^X.
+ current_write_page: Option<usize>,
+
+ /// Zero size member for making syscalls to get physical memory during normal operation.
+ /// When testing this owns some memory.
+ allocator: A,
+}
+
+/// Groups together the two syscalls to get get new physical memory and to change
+/// memory protection. See [VirtualMemory] for details.
+pub trait Allocator {
+ #[must_use]
+ fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool;
+
+ fn mark_executable(&mut self, ptr: *const u8, size: u32);
+}
+
+/// Pointer into a [VirtualMemory].
+/// We may later change this to wrap an u32.
+/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead.
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
+#[repr(C)]
+pub struct CodePtr(*const u8);
+
+/// Errors that can happen when writing to [VirtualMemory]
+#[derive(Debug, PartialEq)]
+pub enum WriteError {
+ OutOfBounds,
+ FailedPageMapping,
+}
+
+use WriteError::*;
+
+impl<A: Allocator> VirtualMemory<A> {
+ /// Bring a part of the address space under management.
+ pub fn new(allocator: A, page_size: u32, virt_region_start: *mut u8, size_bytes: usize) -> Self {
+ assert_ne!(0, page_size);
+ let page_size_bytes = page_size.as_usize();
+
+ Self {
+ region_start: virt_region_start,
+ region_size_bytes: size_bytes,
+ page_size_bytes,
+ mapped_region_bytes: 0,
+ current_write_page: None,
+ allocator,
+ }
+ }
+
+ /// Return the start of the region as a raw pointer. Note that it could be a dangling
+ /// pointer so be careful dereferencing it.
+ pub fn start_ptr(&self) -> CodePtr {
+ CodePtr(self.region_start)
+ }
+
+ /// Size of the region in bytes where writes could be attempted.
+ pub fn virtual_region_size(&self) -> usize {
+ self.region_size_bytes
+ }
+
+ /// Write a single byte. The first write to a page makes it readable.
+ pub fn write_byte(&mut self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
+ let page_size = self.page_size_bytes;
+ let raw: *mut u8 = write_ptr.raw_ptr() as *mut u8;
+ let page_addr = (raw as usize / page_size) * page_size;
+
+ if self.current_write_page == Some(page_addr) {
+ // Writing within the last written to page, nothing to do
+ } else {
+ // Switching to a different and potentially new page
+ let start = self.region_start;
+ let mapped_region_end = start.wrapping_add(self.mapped_region_bytes);
+ let whole_region_end = start.wrapping_add(self.region_size_bytes);
+ let alloc = &mut self.allocator;
+
+ assert!((start..=whole_region_end).contains(&mapped_region_end));
+
+ if (start..mapped_region_end).contains(&raw) {
+ // Writing to a previously written to page.
+ // Need to make page writable, but no need to fill.
+ let page_size: u32 = page_size.try_into().unwrap();
+ if !alloc.mark_writable(page_addr as *const _, page_size) {
+ return Err(FailedPageMapping);
+ }
+
+ self.current_write_page = Some(page_addr);
+ } else if (start..whole_region_end).contains(&raw) {
+ // Writing to a brand new page
+ let mapped_region_end_addr = mapped_region_end as usize;
+ let alloc_size = page_addr - mapped_region_end_addr + page_size;
+
+ assert_eq!(0, alloc_size % page_size, "allocation size should be page aligned");
+ assert_eq!(0, mapped_region_end_addr % page_size, "pointer should be page aligned");
+
+ if alloc_size > page_size {
+ // This is unusual for the current setup, so keep track of it.
+ crate::stats::incr_counter!(exec_mem_non_bump_alloc);
+ }
+
+ // Allocate new chunk
+ let alloc_size_u32: u32 = alloc_size.try_into().unwrap();
+ unsafe {
+ if !alloc.mark_writable(mapped_region_end.cast(), alloc_size_u32) {
+ return Err(FailedPageMapping);
+ }
+ // Fill new memory with PUSH DS (0x1E) so that executing uninitialized memory
+ // will fault with #UD in 64-bit mode. On Linux it becomes SIGILL and use the
+ // usual Ruby crash reporter.
+ std::slice::from_raw_parts_mut(mapped_region_end, alloc_size).fill(0x1E);
+ }
+ self.mapped_region_bytes = self.mapped_region_bytes + alloc_size;
+
+ self.current_write_page = Some(page_addr);
+ } else {
+ return Err(OutOfBounds);
+ }
+ }
+
+ // We have permission to write if we get here
+ unsafe { raw.write(byte) };
+
+ Ok(())
+ }
+
+ /// Make all the code in the region executable. Call this at the end of a write session.
+ /// See [Self] for usual usage flow.
+ pub fn mark_all_executable(&mut self) {
+ self.current_write_page = None;
+
+ let region_start = self.region_start;
+ let mapped_region_bytes: u32 = self.mapped_region_bytes.try_into().unwrap();
+
+ // Make mapped region executable
+ self.allocator.mark_executable(region_start, mapped_region_bytes);
+ }
+}
+
+impl CodePtr {
+ /// Note that the raw pointer might be dangling if there hasn't
+ /// been any writes to it through the [VirtualMemory] yet.
+ pub fn raw_ptr(self) -> *const u8 {
+ let CodePtr(ptr) = self;
+ return ptr;
+ }
+
+ /// Advance the CodePtr. Can return a dangling pointer.
+ pub fn add_bytes(self, bytes: usize) -> Self {
+ let CodePtr(raw) = self;
+ CodePtr(raw.wrapping_add(bytes))
+ }
+
+ pub fn into_i64(self) -> i64 {
+ let CodePtr(ptr) = self;
+ ptr as i64
+ }
+
+ pub fn into_usize(self) -> usize {
+ let CodePtr(ptr) = self;
+ ptr as usize
+ }
+}
+
+impl From<*mut u8> for CodePtr {
+ fn from(value: *mut u8) -> Self {
+ assert!(value as usize != 0);
+ return CodePtr(value);
+ }
+}
+
+/// Requires linking with CRuby to work
+#[cfg(not(test))]
+mod sys {
+ use crate::cruby::*;
+
+ /// Zero size! This just groups together syscalls that require linking with CRuby.
+ pub struct SystemAllocator;
+
+ type VoidPtr = *mut std::os::raw::c_void;
+
+ impl super::Allocator for SystemAllocator {
+ fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool {
+ unsafe { rb_yjit_mark_writable(ptr as VoidPtr, size) }
+ }
+
+ fn mark_executable(&mut self, ptr: *const u8, size: u32) {
+ unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) }
+ }
+ }
+}
+
+#[cfg(not(test))]
+pub(crate) use sys::*;
+
+
+#[cfg(test)]
+pub mod tests {
+ use crate::utils::IntoUsize;
+ use super::*;
+
+ // Track allocation requests and owns some fixed size backing memory for requests.
+ // While testing we don't execute generated code.
+ pub struct TestingAllocator {
+ requests: Vec<AllocRequest>,
+ memory: Vec<u8>,
+ }
+
+ #[derive(Debug)]
+ enum AllocRequest {
+ MarkWritable{ start_idx: usize, length: usize },
+ MarkExecutable{ start_idx: usize, length: usize },
+ }
+ use AllocRequest::*;
+
+ impl TestingAllocator {
+ pub fn new(mem_size: usize) -> Self {
+ Self { requests: Vec::default(), memory: vec![0; mem_size] }
+ }
+
+ pub fn mem_start(&self) -> *const u8 {
+ self.memory.as_ptr()
+ }
+
+ // Verify that write_byte() bounds checks. Return `ptr` as an index.
+ fn bounds_check_request(&self, ptr: *const u8, size: u32) -> usize {
+ let mem_start = self.memory.as_ptr() as usize;
+ let index = ptr as usize - mem_start;
+
+ assert!(index < self.memory.len());
+ assert!(index + size.as_usize() <= self.memory.len());
+
+ index
+ }
+ }
+
+ // Bounds check and then record the request
+ impl super::Allocator for TestingAllocator {
+ fn mark_writable(&mut self, ptr: *const u8, length: u32) -> bool {
+ let index = self.bounds_check_request(ptr, length);
+ self.requests.push(MarkWritable { start_idx: index, length: length.as_usize() });
+
+ true
+ }
+
+ fn mark_executable(&mut self, ptr: *const u8, length: u32) {
+ let index = self.bounds_check_request(ptr, length);
+ self.requests.push(MarkExecutable { start_idx: index, length: length.as_usize() });
+
+ // We don't try to execute generated code in cfg(test)
+ // so no need to actually request executable memory.
+ }
+ }
+
+ // Fictional architecture where each page is 4 bytes long
+ const PAGE_SIZE: usize = 4;
+ fn new_dummy_virt_mem() -> VirtualMemory<TestingAllocator> {
+ let mem_size = PAGE_SIZE * 10;
+ let alloc = TestingAllocator::new(mem_size);
+ let mem_start: *const u8 = alloc.mem_start();
+
+ VirtualMemory::new(
+ alloc,
+ PAGE_SIZE.try_into().unwrap(),
+ mem_start as *mut u8,
+ mem_size,
+ )
+ }
+
+ #[test]
+ fn new_memory_is_initialized() {
+ let mut virt = new_dummy_virt_mem();
+
+ virt.write_byte(virt.start_ptr(), 1).unwrap();
+ assert!(
+ virt.allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0),
+ "Entire page should be initialized",
+ );
+
+ // Skip a few page
+ let three_pages = 3 * PAGE_SIZE;
+ virt.write_byte(virt.start_ptr().add_bytes(three_pages), 1).unwrap();
+ assert!(
+ virt.allocator.memory[..three_pages].iter().all(|&byte| byte != 0),
+ "Gaps between write requests should be filled",
+ );
+ }
+
+ #[test]
+ fn no_redundant_syscalls_when_writing_to_the_same_page() {
+ let mut virt = new_dummy_virt_mem();
+
+ virt.write_byte(virt.start_ptr(), 1).unwrap();
+ virt.write_byte(virt.start_ptr(), 0).unwrap();
+
+ assert!(
+ matches!(
+ virt.allocator.requests[..],
+ [MarkWritable { start_idx: 0, length: PAGE_SIZE }],
+ )
+ );
+ }
+
+ #[test]
+ fn bounds_checking() {
+ use super::WriteError::*;
+ use std::ptr;
+ let mut virt = new_dummy_virt_mem();
+
+ let null = CodePtr(ptr::null());
+ assert_eq!(Err(OutOfBounds), virt.write_byte(null, 0));
+
+ let one_past_end = virt.start_ptr().add_bytes(virt.virtual_region_size());
+ assert_eq!(Err(OutOfBounds), virt.write_byte(one_past_end, 0));
+
+ let end_of_addr_space = CodePtr(usize::MAX as _);
+ assert_eq!(Err(OutOfBounds), virt.write_byte(end_of_addr_space, 0));
+ }
+
+ #[test]
+ fn only_written_to_regions_become_executable() {
+ // ... so we catch attempts to read/write/execute never-written-to regions
+ const THREE_PAGES: usize = PAGE_SIZE * 3;
+ let mut virt = new_dummy_virt_mem();
+ let page_two_start = virt.start_ptr().add_bytes(PAGE_SIZE * 2);
+ virt.write_byte(page_two_start, 1).unwrap();
+ virt.mark_all_executable();
+
+ assert!(virt.virtual_region_size() > THREE_PAGES);
+ assert!(
+ matches!(
+ virt.allocator.requests[..],
+ [
+ MarkWritable { start_idx: 0, length: THREE_PAGES },
+ MarkExecutable { start_idx: 0, length: THREE_PAGES },
+ ]
+ ),
+ );
+ }
+}