summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--yjit.c51
-rw-r--r--yjit/bindgen/src/main.rs1
-rw-r--r--yjit/src/asm/mod.rs144
-rw-r--r--yjit/src/asm/x86_64/mod.rs2
-rw-r--r--yjit/src/asm/x86_64/tests.rs2
-rw-r--r--yjit/src/codegen.rs55
-rw-r--r--yjit/src/core.rs26
-rw-r--r--yjit/src/cruby.rs3
-rw-r--r--yjit/src/cruby_bindings.inc.rs5
-rw-r--r--yjit/src/lib.rs1
-rw-r--r--yjit/src/stats.rs6
-rw-r--r--yjit/src/virtualmem.rs376
12 files changed, 504 insertions, 168 deletions
diff --git a/yjit.c b/yjit.c
index 03930706ba..fe9098f0b5 100644
--- a/yjit.c
+++ b/yjit.c
@@ -56,7 +56,7 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM);
// types in C such as int, long, etc. and use `std::os::raw::c_long` and friends on
// the Rust side.
//
-// What's up with the long prefix? The "rb_" part is to apease `make leaked-globals`
+// What's up with the long prefix? The "rb_" part is to appease `make leaked-globals`
// which runs on upstream CI. The rationale for the check is unclear to Alan as
// we build with `-fvisibility=hidden` so only explicitly marked functions end
// up as public symbols in libruby.so. Perhaps the check is for the static
@@ -66,13 +66,13 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM);
// The "_yjit_" part is for trying to be informative. We might want different
// suffixes for symbols meant for Rust and symbols meant for broader CRuby.
-void
+bool
rb_yjit_mark_writable(void *mem_block, uint32_t mem_size)
{
if (mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE)) {
- rb_bug("Couldn't make JIT page region (%p, %lu bytes) writeable, errno: %s\n",
- mem_block, (unsigned long)mem_size, strerror(errno));
+ return false;
}
+ return true;
}
void
@@ -209,25 +209,29 @@ align_ptr(uint8_t *ptr, uint32_t multiple)
}
#endif
-// Allocate a block of executable memory
+// Address space reservation. Memory pages are mapped on an as needed basis.
+// See the Rust mm module for details.
uint8_t *
-rb_yjit_alloc_exec_mem(uint32_t mem_size)
+rb_yjit_reserve_addr_space(uint32_t mem_size)
{
#ifndef _WIN32
uint8_t *mem_block;
// On Linux
#if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
+ uint32_t const page_size = (uint32_t)sysconf(_SC_PAGESIZE);
+ uint8_t *const cfunc_sample_addr = (void *)&rb_yjit_reserve_addr_space;
+ uint8_t *const probe_region_end = cfunc_sample_addr + INT32_MAX;
// Align the requested address to page size
- uint32_t page_size = (uint32_t)sysconf(_SC_PAGESIZE);
- uint8_t *req_addr = align_ptr((uint8_t*)&rb_yjit_alloc_exec_mem, page_size);
+ uint8_t *req_addr = align_ptr(cfunc_sample_addr, page_size);
+ // Probe for addresses close to this function using MAP_FIXED_NOREPLACE
+ // to improve odds of being in range for 32-bit relative call instructions.
do {
- // Try to map a chunk of memory as executable
- mem_block = (uint8_t*)mmap(
- (void*)req_addr,
+ mem_block = mmap(
+ req_addr,
mem_size,
- PROT_READ | PROT_EXEC,
+ PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
-1,
0
@@ -240,15 +244,15 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size)
// +4MB
req_addr += 4 * 1024 * 1024;
- } while (req_addr < (uint8_t*)&rb_yjit_alloc_exec_mem + INT32_MAX);
+ } while (req_addr < probe_region_end);
// On MacOS and other platforms
#else
// Try to map a chunk of memory as executable
- mem_block = (uint8_t*)mmap(
- (void*)rb_yjit_alloc_exec_mem,
+ mem_block = mmap(
+ (void *)rb_yjit_reserve_addr_space,
mem_size,
- PROT_READ | PROT_EXEC,
+ PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0
@@ -258,10 +262,10 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size)
// Fallback
if (mem_block == MAP_FAILED) {
// Try again without the address hint (e.g., valgrind)
- mem_block = (uint8_t*)mmap(
+ mem_block = mmap(
NULL,
mem_size,
- PROT_READ | PROT_EXEC,
+ PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0
@@ -270,17 +274,10 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size)
// Check that the memory mapping was successful
if (mem_block == MAP_FAILED) {
- perror("mmap call failed");
- exit(-1);
+ perror("ruby: yjit: mmap:");
+ rb_bug("mmap failed");
}
- // Fill the executable memory with PUSH DS (0x1E) so that
- // executing uninitialized memory will fault with #UD in
- // 64-bit mode.
- rb_yjit_mark_writable(mem_block, mem_size);
- memset(mem_block, 0x1E, mem_size);
- rb_yjit_mark_executable(mem_block, mem_size);
-
return mem_block;
#else
// Windows not supported for now
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index 212013d70c..d8f3c98e89 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -239,6 +239,7 @@ fn main() {
.allowlist_function("rb_iseq_(get|set)_yjit_payload")
.allowlist_function("rb_iseq_pc_at_idx")
.allowlist_function("rb_iseq_opcode_at_pc")
+ .allowlist_function("rb_yjit_reserve_addr_space")
.allowlist_function("rb_yjit_mark_writable")
.allowlist_function("rb_yjit_mark_executable")
.allowlist_function("rb_yjit_get_page_size")
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index 1d31facb78..e16e856925 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -3,49 +3,16 @@ use std::mem;
#[cfg(feature = "asm_comments")]
use std::collections::BTreeMap;
+use crate::virtualmem::{VirtualMem, CodePtr};
+
// Lots of manual vertical alignment in there that rustfmt doesn't handle well.
#[rustfmt::skip]
pub mod x86_64;
-/// Pointer to a piece of machine code
-/// We may later change this to wrap an u32
-/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead.
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
-#[repr(C)]
-pub struct CodePtr(*const u8);
-
-impl CodePtr {
- pub fn raw_ptr(&self) -> *const u8 {
- let CodePtr(ptr) = *self;
- return ptr;
- }
-
- fn into_i64(&self) -> i64 {
- let CodePtr(ptr) = self;
- *ptr as i64
- }
-
- #[allow(unused)]
- fn into_usize(&self) -> usize {
- let CodePtr(ptr) = self;
- *ptr as usize
- }
-}
-
-impl From<*mut u8> for CodePtr {
- fn from(value: *mut u8) -> Self {
- assert!(value as usize != 0);
- return CodePtr(value);
- }
-}
-
//
// TODO: need a field_size_of macro, to compute the size of a struct field in bytes
//
-// 1 is not aligned so this won't match any pages
-const ALIGNED_WRITE_POSITION_NONE: usize = 1;
-
/// Reference to an ASM label
struct LabelRef {
// Position in the code block where the label reference exists
@@ -57,13 +24,8 @@ struct LabelRef {
/// Block of memory into which instructions can be assembled
pub struct CodeBlock {
- // Block of non-executable memory used for dummy code blocks
- // This memory is owned by this block and lives as long as the block
- #[allow(unused)]
- dummy_block: Vec<u8>,
-
- // Pointer to memory we are writing into
- mem_block: *mut u8,
+ // Memory for storing the encoded instructions
+ mem_block: VirtualMem,
// Memory block size
mem_size: usize,
@@ -84,14 +46,6 @@ pub struct CodeBlock {
#[cfg(feature = "asm_comments")]
asm_comments: BTreeMap<usize, Vec<String>>,
- // Keep track of the current aligned write position.
- // Used for changing protection when writing to the JIT buffer
- current_aligned_write_pos: usize,
-
- // Memory protection works at page granularity and this is the
- // the size of each page. Used to implement W^X.
- page_size: usize,
-
// Set if the CodeBlock is unable to output some instructions,
// for example, when there is not enough space or when a jump
// target is too far away.
@@ -99,47 +53,22 @@ pub struct CodeBlock {
}
impl CodeBlock {
- #[cfg(test)]
- pub fn new_dummy(mem_size: usize) -> Self {
- // Allocate some non-executable memory
- let mut dummy_block = vec![0; mem_size];
- let mem_ptr = dummy_block.as_mut_ptr();
-
- Self {
- dummy_block: dummy_block,
- mem_block: mem_ptr,
- mem_size: mem_size,
- write_pos: 0,
- label_addrs: Vec::new(),
- label_names: Vec::new(),
- label_refs: Vec::new(),
- #[cfg(feature = "asm_comments")]
- asm_comments: BTreeMap::new(),
- current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE,
- page_size: 4096,
- dropped_bytes: false,
- }
- }
-
- #[cfg(not(test))]
- pub fn new(mem_block: *mut u8, mem_size: usize, page_size: usize) -> Self {
+ /// Make a new CodeBlock
+ pub fn new(mem_block: VirtualMem) -> Self {
Self {
- dummy_block: vec![0; 0],
- mem_block: mem_block,
- mem_size: mem_size,
+ mem_size: mem_block.virtual_region_size(),
+ mem_block,
write_pos: 0,
label_addrs: Vec::new(),
label_names: Vec::new(),
label_refs: Vec::new(),
#[cfg(feature = "asm_comments")]
asm_comments: BTreeMap::new(),
- current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE,
- page_size,
dropped_bytes: false,
}
}
- // Check if this code block has sufficient remaining capacity
+ /// Check if this code block has sufficient remaining capacity
pub fn has_capacity(&self, num_bytes: usize) -> bool {
self.write_pos + num_bytes < self.mem_size
}
@@ -175,6 +104,10 @@ impl CodeBlock {
self.write_pos
}
+ pub fn get_mem(&mut self) -> &mut VirtualMem {
+ &mut self.mem_block
+ }
+
// Set the current write position
pub fn set_pos(&mut self, pos: usize) {
// Assert here since while CodeBlock functions do bounds checking, there is
@@ -204,16 +137,13 @@ impl CodeBlock {
// Set the current write position from a pointer
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
- let pos = (code_ptr.raw_ptr() as usize) - (self.mem_block as usize);
+ let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize();
self.set_pos(pos);
}
// Get a direct pointer into the executable memory block
pub fn get_ptr(&self, offset: usize) -> CodePtr {
- unsafe {
- let ptr = self.mem_block.add(offset);
- CodePtr(ptr)
- }
+ self.mem_block.start_ptr().add_bytes(offset)
}
// Get a direct pointer to the current write position
@@ -223,9 +153,9 @@ impl CodeBlock {
// Write a single byte at the current position
pub fn write_byte(&mut self, byte: u8) {
- if self.write_pos < self.mem_size {
- self.mark_position_writable(self.write_pos);
- unsafe { self.mem_block.add(self.write_pos).write(byte) };
+ let write_ptr = self.get_write_ptr();
+
+ if self.mem_block.write_byte(write_ptr, byte).is_ok() {
self.write_pos += 1;
} else {
self.dropped_bytes = true;
@@ -328,33 +258,23 @@ impl CodeBlock {
assert!(self.label_refs.is_empty());
}
- pub fn mark_position_writable(&mut self, write_pos: usize) {
- let page_size = self.page_size;
- let aligned_position = (write_pos / page_size) * page_size;
+ pub fn mark_all_executable(&mut self) {
+ self.mem_block.mark_all_executable();
+ }
+}
- if self.current_aligned_write_pos != aligned_position {
- self.current_aligned_write_pos = aligned_position;
+#[cfg(test)]
+impl CodeBlock {
+ /// Stubbed CodeBlock for testing. Can't execute generated code.
+ pub fn new_dummy(mem_size: usize) -> Self {
+ use crate::virtualmem::*;
+ use crate::virtualmem::tests::TestingAllocator;
- #[cfg(not(test))]
- unsafe {
- use core::ffi::c_void;
- let page_ptr = self.get_ptr(aligned_position).raw_ptr() as *mut c_void;
- crate::cruby::rb_yjit_mark_writable(page_ptr, page_size.try_into().unwrap());
- }
- }
- }
+ let alloc = TestingAllocator::new(mem_size);
+ let mem_start: *const u8 = alloc.mem_start();
+ let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size);
- pub fn mark_all_executable(&mut self) {
- self.current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
-
- #[cfg(not(test))]
- unsafe {
- use core::ffi::c_void;
- // NOTE(alan): Right now we do allocate one big chunck and give the top half to the outlined codeblock
- // The start of the top half of the region isn't necessarily a page boundary...
- let cb_start = self.get_ptr(0).raw_ptr() as *mut c_void;
- crate::cruby::rb_yjit_mark_executable(cb_start, self.mem_size.try_into().unwrap());
- }
+ Self::new(virt_mem)
}
}
diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs
index a2549faab8..6eb7efaa0a 100644
--- a/yjit/src/asm/x86_64/mod.rs
+++ b/yjit/src/asm/x86_64/mod.rs
@@ -862,7 +862,7 @@ fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) {
let end_ptr = cb.get_ptr(cb.write_pos + 4);
// Compute the jump offset
- let rel64 = (dst_ptr.0 as i64) - (end_ptr.0 as i64);
+ let rel64 = dst_ptr.into_i64() - end_ptr.into_i64();
if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
// Write the relative 32-bit jump offset
diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs
index c77d30e74d..ffcc063420 100644
--- a/yjit/src/asm/x86_64/tests.rs
+++ b/yjit/src/asm/x86_64/tests.rs
@@ -7,7 +7,7 @@ use std::fmt;
impl<'a> fmt::LowerHex for super::CodeBlock {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
for pos in 0..self.write_pos {
- let byte = unsafe { self.mem_block.add(pos).read() };
+ let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() };
fmtr.write_fmt(format_args!("{:02x}", byte))?;
}
Ok(())
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index ca2c237e2d..75249658fb 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -21,6 +21,8 @@ use std::os::raw::c_uint;
use std::ptr;
use std::slice;
+pub use crate::virtualmem::CodePtr;
+
// Callee-saved registers
pub const REG_CFP: X86Opnd = R13;
pub const REG_EC: X86Opnd = R12;
@@ -5982,14 +5984,53 @@ impl CodegenGlobals {
#[cfg(not(test))]
let (mut cb, mut ocb) = {
- let page_size = unsafe { rb_yjit_get_page_size() }.as_usize();
- let mem_block: *mut u8 = unsafe { alloc_exec_mem(mem_size.try_into().unwrap()) };
- let cb = CodeBlock::new(mem_block, mem_size / 2, page_size);
- let ocb = OutlinedCb::wrap(CodeBlock::new(
- unsafe { mem_block.add(mem_size / 2) },
- mem_size / 2,
+ // TODO(alan): we can error more gracefully when the user gives
+ // --yjit-exec-mem=absurdly-large-number
+ //
+ // 2 GiB. It's likely a bug if we generate this much code.
+ const MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024 * 1024;
+ assert!(mem_size <= MAX_BUFFER_SIZE);
+ let mem_size_u32 = mem_size as u32;
+ let half_size = mem_size / 2;
+
+ let page_size = unsafe { rb_yjit_get_page_size() };
+ let assert_page_aligned = |ptr| assert_eq!(
+ 0,
+ ptr as usize % page_size.as_usize(),
+ "Start of virtual address block should be page-aligned",
+ );
+
+ let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size_u32) };
+ let second_half = virt_block.wrapping_add(half_size);
+
+ // Memory protection syscalls need page-aligned addresses, so check it here. Assuming
+ // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
+ // page size in bytes is a power of two 2¹⁹ or smaller. This is because the user
+ // requested size is half of mem_option × 2²⁰ as it's in MiB.
+ //
+ // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB
+ // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though.
+ assert_page_aligned(virt_block);
+ assert_page_aligned(second_half);
+
+ use crate::virtualmem::*;
+
+ let first_half = VirtualMem::new(
+ SystemAllocator {},
page_size,
- ));
+ virt_block,
+ half_size
+ );
+ let second_half = VirtualMem::new(
+ SystemAllocator {},
+ page_size,
+ second_half,
+ half_size
+ );
+
+ let cb = CodeBlock::new(first_half);
+ let ocb = OutlinedCb::wrap(CodeBlock::new(second_half));
+
(cb, ocb)
};
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index bbc5e800c0..6d6877f273 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -1,6 +1,7 @@
use crate::asm::x86_64::*;
use crate::asm::*;
use crate::codegen::*;
+use crate::virtualmem::CodePtr;
use crate::cruby::*;
use crate::options::*;
use crate::stats::*;
@@ -9,7 +10,6 @@ use core::ffi::c_void;
use std::cell::*;
use std::hash::{Hash, Hasher};
use std::mem;
-use std::mem::size_of;
use std::rc::{Rc};
use InsnOpnd::*;
use TempMapping::*;
@@ -573,24 +573,22 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
// Walk over references to objects in generated code.
for offset in &block.gc_object_offsets {
let offset_to_value = offset.as_usize();
- let value_address: *const u8 = cb.get_ptr(offset_to_value).raw_ptr();
+ let value_code_ptr = cb.get_ptr(offset_to_value);
+ let value_ptr: *const u8 = value_code_ptr.raw_ptr();
// Creating an unaligned pointer is well defined unlike in C.
- let value_address = value_address as *mut VALUE;
+ let value_ptr = value_ptr as *mut VALUE;
// SAFETY: these point to YJIT's code buffer
- let object = unsafe { value_address.read_unaligned() };
+ let object = unsafe { value_ptr.read_unaligned() };
let new_addr = unsafe { rb_gc_location(object) };
- // Only write when the VALUE moves, to be CoW friendly.
+ // Only write when the VALUE moves, to be copy-on-write friendly.
if new_addr != object {
- // Possibly unlock the page we need to update
- cb.mark_position_writable(offset_to_value);
-
- // Object could cross a page boundary, so unlock there as well
- cb.mark_position_writable(offset_to_value + size_of::<VALUE>() - 1);
-
- // SAFETY: we just made this address writable
- unsafe { value_address.write_unaligned(new_addr) };
+ for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() {
+ let byte_code_ptr = value_code_ptr.add_bytes(byte_idx);
+ cb.get_mem().write_byte(byte_code_ptr, byte)
+ .expect("patching existing code should be within bounds");
+ }
}
}
}
@@ -599,8 +597,6 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
// Note that we would have returned already if YJIT is off.
cb.mark_all_executable();
- // I guess we need to make the outlined block executable as well because
- // we don't split the two at exact page boundaries.
CodegenGlobals::get_outlined_cb()
.unwrap()
.mark_all_executable();
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs
index da9a84a160..51ba9c1531 100644
--- a/yjit/src/cruby.rs
+++ b/yjit/src/cruby.rs
@@ -111,9 +111,6 @@ pub use autogened::*;
// and textually included in this file
#[cfg_attr(test, allow(unused))] // We don't link against C code when testing
extern "C" {
- #[link_name = "rb_yjit_alloc_exec_mem"] // we can rename functions with this attribute
- pub fn alloc_exec_mem(mem_size: u32) -> *mut u8;
-
#[link_name = "rb_insn_name"]
pub fn raw_insn_name(insn: VALUE) -> *const c_char;
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index b5dd356aef..44f87a8482 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -976,7 +976,7 @@ extern "C" {
) -> ::std::os::raw::c_int;
}
extern "C" {
- pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
+ pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
}
extern "C" {
pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
@@ -992,6 +992,9 @@ extern "C" {
pub fn rb_yjit_get_page_size() -> u32;
}
extern "C" {
+ pub fn rb_yjit_reserve_addr_space(mem_size: u32) -> *mut u8;
+}
+extern "C" {
pub fn rb_c_method_tracing_currently_enabled(ec: *mut rb_execution_context_t) -> bool;
}
extern "C" {
diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs
index 2313fdbce6..6772f551a8 100644
--- a/yjit/src/lib.rs
+++ b/yjit/src/lib.rs
@@ -13,3 +13,4 @@ mod options;
mod stats;
mod utils;
mod yjit;
+mod virtualmem;
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index e129cc2811..6bad8db7e7 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -141,7 +141,7 @@ macro_rules! incr_counter {
($counter_name:ident) => {
#[allow(unused_unsafe)]
{
- unsafe { COUNTERS.$counter_name += 1 }
+ unsafe { $crate::stats::COUNTERS.$counter_name += 1 }
}
};
}
@@ -244,6 +244,10 @@ make_counters! {
gbpp_block_param_modified,
gbpp_block_handler_not_iseq,
+
+ // Currently, it's out of the ordinary (might be impossible) for YJIT to leave gaps in
+ // executable memory, so this should be 0.
+ exec_mem_non_bump_alloc,
}
//===========================================================================
diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs
new file mode 100644
index 0000000000..6a8e27447e
--- /dev/null
+++ b/yjit/src/virtualmem.rs
@@ -0,0 +1,376 @@
+//! Memory management stuff for YJIT's code storage. Deals with virtual memory.
+// I'm aware that there is an experiment in Rust Nightly right now for to see if banning
+// usize->pointer casts is viable. It seems like a lot of work for us to participate for not much
+// benefit.
+
+use crate::utils::IntoUsize;
+
+#[cfg(not(test))]
+pub type VirtualMem = VirtualMemory<sys::SystemAllocator>;
+
+#[cfg(test)]
+pub type VirtualMem = VirtualMemory<tests::TestingAllocator>;
+
+/// Memory for generated executable machine code. When not testing, we reserve address space for
+/// the entire region upfront and map physical memory into the reserved address space as needed. On
+/// Linux, this is basically done using an `mmap` with `PROT_NONE` upfront and gradually using
+/// `mprotect` with `PROT_READ|PROT_WRITE` as needed. The WIN32 equivalent seems to be
+/// `VirtualAlloc` with `MEM_RESERVE` then later with `MEM_COMMIT`.
+///
+/// This handles ["W^X"](https://en.wikipedia.org/wiki/W%5EX) semi-automatically. Writes
+/// are always accepted and once writes are done a call to [Self::mark_all_executable] makes
+/// the code in the region executable.
+pub struct VirtualMemory<A: Allocator> {
+ /// Location of the virtual memory region.
+ region_start: *mut u8,
+
+ /// Size of the region in bytes.
+ region_size_bytes: usize,
+
+ /// Number of bytes per "page", memory protection permission can only be controlled at this
+ /// granularity.
+ page_size_bytes: usize,
+
+ /// Number of bytes that have we have allocated physical memory for starting at
+ /// [Self::region_start].
+ mapped_region_bytes: usize,
+
+ /// Keep track of the address of the last written to page.
+ /// Used for changing protection to implement W^X.
+ current_write_page: Option<usize>,
+
+ /// Zero size member for making syscalls to get physical memory during normal operation.
+ /// When testing this owns some memory.
+ allocator: A,
+}
+
+/// Groups together the two syscalls to get get new physical memory and to change
+/// memory protection. See [VirtualMemory] for details.
+pub trait Allocator {
+ #[must_use]
+ fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool;
+
+ fn mark_executable(&mut self, ptr: *const u8, size: u32);
+}
+
+/// Pointer into a [VirtualMemory].
+/// We may later change this to wrap an u32.
+/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead.
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
+#[repr(C)]
+pub struct CodePtr(*const u8);
+
+/// Errors that can happen when writing to [VirtualMemory]
+#[derive(Debug, PartialEq)]
+pub enum WriteError {
+ OutOfBounds,
+ FailedPageMapping,
+}
+
+use WriteError::*;
+
+impl<A: Allocator> VirtualMemory<A> {
+ /// Bring a part of the address space under management.
+ pub fn new(allocator: A, page_size: u32, virt_region_start: *mut u8, size_bytes: usize) -> Self {
+ assert_ne!(0, page_size);
+ let page_size_bytes = page_size.as_usize();
+
+ Self {
+ region_start: virt_region_start,
+ region_size_bytes: size_bytes,
+ page_size_bytes,
+ mapped_region_bytes: 0,
+ current_write_page: None,
+ allocator,
+ }
+ }
+
+ /// Return the start of the region as a raw pointer. Note that it could be a dangling
+ /// pointer so be careful dereferencing it.
+ pub fn start_ptr(&self) -> CodePtr {
+ CodePtr(self.region_start)
+ }
+
+ /// Size of the region in bytes where writes could be attempted.
+ pub fn virtual_region_size(&self) -> usize {
+ self.region_size_bytes
+ }
+
+ /// Write a single byte. The first write to a page makes it readable.
+ pub fn write_byte(&mut self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
+ let page_size = self.page_size_bytes;
+ let raw: *mut u8 = write_ptr.raw_ptr() as *mut u8;
+ let page_addr = (raw as usize / page_size) * page_size;
+
+ if self.current_write_page == Some(page_addr) {
+ // Writing within the last written to page, nothing to do
+ } else {
+ // Switching to a different and potentially new page
+ let start = self.region_start;
+ let mapped_region_end = start.wrapping_add(self.mapped_region_bytes);
+ let whole_region_end = start.wrapping_add(self.region_size_bytes);
+ let alloc = &mut self.allocator;
+
+ assert!((start..=whole_region_end).contains(&mapped_region_end));
+
+ if (start..mapped_region_end).contains(&raw) {
+ // Writing to a previously written to page.
+ // Need to make page writable, but no need to fill.
+ let page_size: u32 = page_size.try_into().unwrap();
+ if !alloc.mark_writable(page_addr as *const _, page_size) {
+ return Err(FailedPageMapping);
+ }
+
+ self.current_write_page = Some(page_addr);
+ } else if (start..whole_region_end).contains(&raw) {
+ // Writing to a brand new page
+ let mapped_region_end_addr = mapped_region_end as usize;
+ let alloc_size = page_addr - mapped_region_end_addr + page_size;
+
+ assert_eq!(0, alloc_size % page_size, "allocation size should be page aligned");
+ assert_eq!(0, mapped_region_end_addr % page_size, "pointer should be page aligned");
+
+ if alloc_size > page_size {
+ // This is unusual for the current setup, so keep track of it.
+ crate::stats::incr_counter!(exec_mem_non_bump_alloc);
+ }
+
+ // Allocate new chunk
+ let alloc_size_u32: u32 = alloc_size.try_into().unwrap();
+ unsafe {
+ if !alloc.mark_writable(mapped_region_end.cast(), alloc_size_u32) {
+ return Err(FailedPageMapping);
+ }
+ // Fill new memory with PUSH DS (0x1E) so that executing uninitialized memory
+ // will fault with #UD in 64-bit mode. On Linux it becomes SIGILL and use the
+ // usual Ruby crash reporter.
+ std::slice::from_raw_parts_mut(mapped_region_end, alloc_size).fill(0x1E);
+ }
+ self.mapped_region_bytes = self.mapped_region_bytes + alloc_size;
+
+ self.current_write_page = Some(page_addr);
+ } else {
+ return Err(OutOfBounds);
+ }
+ }
+
+ // We have permission to write if we get here
+ unsafe { raw.write(byte) };
+
+ Ok(())
+ }
+
+ /// Make all the code in the region executable. Call this at the end of a write session.
+ /// See [Self] for usual usage flow.
+ pub fn mark_all_executable(&mut self) {
+ self.current_write_page = None;
+
+ let region_start = self.region_start;
+ let mapped_region_bytes: u32 = self.mapped_region_bytes.try_into().unwrap();
+
+ // Make mapped region executable
+ self.allocator.mark_executable(region_start, mapped_region_bytes);
+ }
+}
+
+impl CodePtr {
+ /// Note that the raw pointer might be dangling if there hasn't
+ /// been any writes to it through the [VirtualMemory] yet.
+ pub fn raw_ptr(self) -> *const u8 {
+ let CodePtr(ptr) = self;
+ return ptr;
+ }
+
+ /// Advance the CodePtr. Can return a dangling pointer.
+ pub fn add_bytes(self, bytes: usize) -> Self {
+ let CodePtr(raw) = self;
+ CodePtr(raw.wrapping_add(bytes))
+ }
+
+ pub fn into_i64(self) -> i64 {
+ let CodePtr(ptr) = self;
+ ptr as i64
+ }
+
+ pub fn into_usize(self) -> usize {
+ let CodePtr(ptr) = self;
+ ptr as usize
+ }
+}
+
+impl From<*mut u8> for CodePtr {
+ fn from(value: *mut u8) -> Self {
+ assert!(value as usize != 0);
+ return CodePtr(value);
+ }
+}
+
+/// Requires linking with CRuby to work
+#[cfg(not(test))]
+mod sys {
+ use crate::cruby::*;
+
+ /// Zero size! This just groups together syscalls that require linking with CRuby.
+ pub struct SystemAllocator;
+
+ type VoidPtr = *mut std::os::raw::c_void;
+
+ impl super::Allocator for SystemAllocator {
+ fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool {
+ unsafe { rb_yjit_mark_writable(ptr as VoidPtr, size) }
+ }
+
+ fn mark_executable(&mut self, ptr: *const u8, size: u32) {
+ unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) }
+ }
+ }
+}
+
+#[cfg(not(test))]
+pub(crate) use sys::*;
+
+
+#[cfg(test)]
+pub mod tests {
+ use crate::utils::IntoUsize;
+ use super::*;
+
+ // Track allocation requests and owns some fixed size backing memory for requests.
+ // While testing we don't execute generated code.
+ pub struct TestingAllocator {
+ requests: Vec<AllocRequest>,
+ memory: Vec<u8>,
+ }
+
+ #[derive(Debug)]
+ enum AllocRequest {
+ MarkWritable{ start_idx: usize, length: usize },
+ MarkExecutable{ start_idx: usize, length: usize },
+ }
+ use AllocRequest::*;
+
+ impl TestingAllocator {
+ pub fn new(mem_size: usize) -> Self {
+ Self { requests: Vec::default(), memory: vec![0; mem_size] }
+ }
+
+ pub fn mem_start(&self) -> *const u8 {
+ self.memory.as_ptr()
+ }
+
+ // Verify that write_byte() bounds checks. Return `ptr` as an index.
+ fn bounds_check_request(&self, ptr: *const u8, size: u32) -> usize {
+ let mem_start = self.memory.as_ptr() as usize;
+ let index = ptr as usize - mem_start;
+
+ assert!(index < self.memory.len());
+ assert!(index + size.as_usize() <= self.memory.len());
+
+ index
+ }
+ }
+
+ // Bounds check and then record the request
+ impl super::Allocator for TestingAllocator {
+ fn mark_writable(&mut self, ptr: *const u8, length: u32) -> bool {
+ let index = self.bounds_check_request(ptr, length);
+ self.requests.push(MarkWritable { start_idx: index, length: length.as_usize() });
+
+ true
+ }
+
+ fn mark_executable(&mut self, ptr: *const u8, length: u32) {
+ let index = self.bounds_check_request(ptr, length);
+ self.requests.push(MarkExecutable { start_idx: index, length: length.as_usize() });
+
+ // We don't try to execute generated code in cfg(test)
+ // so no need to actually request executable memory.
+ }
+ }
+
+ // Fictional architecture where each page is 4 bytes long
+ const PAGE_SIZE: usize = 4;
+ fn new_dummy_virt_mem() -> VirtualMemory<TestingAllocator> {
+ let mem_size = PAGE_SIZE * 10;
+ let alloc = TestingAllocator::new(mem_size);
+ let mem_start: *const u8 = alloc.mem_start();
+
+ VirtualMemory::new(
+ alloc,
+ PAGE_SIZE.try_into().unwrap(),
+ mem_start as *mut u8,
+ mem_size,
+ )
+ }
+
+ #[test]
+ fn new_memory_is_initialized() {
+ let mut virt = new_dummy_virt_mem();
+
+ virt.write_byte(virt.start_ptr(), 1).unwrap();
+ assert!(
+ virt.allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0),
+ "Entire page should be initialized",
+ );
+
+ // Skip a few page
+ let three_pages = 3 * PAGE_SIZE;
+ virt.write_byte(virt.start_ptr().add_bytes(three_pages), 1).unwrap();
+ assert!(
+ virt.allocator.memory[..three_pages].iter().all(|&byte| byte != 0),
+ "Gaps between write requests should be filled",
+ );
+ }
+
+ #[test]
+ fn no_redundant_syscalls_when_writing_to_the_same_page() {
+ let mut virt = new_dummy_virt_mem();
+
+ virt.write_byte(virt.start_ptr(), 1).unwrap();
+ virt.write_byte(virt.start_ptr(), 0).unwrap();
+
+ assert!(
+ matches!(
+ virt.allocator.requests[..],
+ [MarkWritable { start_idx: 0, length: PAGE_SIZE }],
+ )
+ );
+ }
+
+ #[test]
+ fn bounds_checking() {
+ use super::WriteError::*;
+ use std::ptr;
+ let mut virt = new_dummy_virt_mem();
+
+ let null = CodePtr(ptr::null());
+ assert_eq!(Err(OutOfBounds), virt.write_byte(null, 0));
+
+ let one_past_end = virt.start_ptr().add_bytes(virt.virtual_region_size());
+ assert_eq!(Err(OutOfBounds), virt.write_byte(one_past_end, 0));
+
+ let end_of_addr_space = CodePtr(usize::MAX as _);
+ assert_eq!(Err(OutOfBounds), virt.write_byte(end_of_addr_space, 0));
+ }
+
+ #[test]
+ fn only_written_to_regions_become_executable() {
+ // ... so we catch attempts to read/write/execute never-written-to regions
+ const THREE_PAGES: usize = PAGE_SIZE * 3;
+ let mut virt = new_dummy_virt_mem();
+ let page_two_start = virt.start_ptr().add_bytes(PAGE_SIZE * 2);
+ virt.write_byte(page_two_start, 1).unwrap();
+ virt.mark_all_executable();
+
+ assert!(virt.virtual_region_size() > THREE_PAGES);
+ assert!(
+ matches!(
+ virt.allocator.requests[..],
+ [
+ MarkWritable { start_idx: 0, length: THREE_PAGES },
+ MarkExecutable { start_idx: 0, length: THREE_PAGES },
+ ]
+ ),
+ );
+ }
+}