summaryrefslogtreecommitdiff
path: root/yjit.c
diff options
context:
space:
mode:
authorAlan Wu <XrXr@users.noreply.github.com>2022-06-14 10:23:13 -0400
committerGitHub <noreply@github.com>2022-06-14 10:23:13 -0400
commit9f09397bfe6762bf19ef47b2f60988e49b80560d (patch)
tree2be526b0bc34af44937eab15f31f131c85df6b03 /yjit.c
parent9b9cc8ad34fdecdede439f14c027c5eefef5541e (diff)
YJIT: On-demand executable memory allocation; faster boot (#5944)
This commit makes YJIT allocate memory for generated code gradually as needed. Previously, YJIT allocates all the memory it needs on boot in one go, leading to higher than necessary resident set size (RSS) and time spent on boot initializing the memory with a large memset(). Users should no longer need to search for a magic number to pass to `--yjit-exec-mem` since physical memory consumption should now more accurately reflect the requirement of the workload. YJIT now reserves a range of addresses on boot. This region start out with no access permission at all so buggy attempts to jump to the region crashes like before this change. To get this hardening at finer granularity than the page size, we fill each page with trapping instructions when we first allocate physical memory for the page. Most of the time applications don't need 256 MiB of executable code, so allocating on-demand ends up doing less total work than before. Case in point, a simple `ruby --yjit-call-threshold=1 -eitself` takes about half as long after this change. In terms of memory consumption, here is a table to give a rough summary of the impact: | Peak RSS in MiB | -eitself example | railsbench once | | :-------------: | ---------------: | --------------: | | before | 265 | 377 | | after | 11 | 143 | | no YJIT | 10 | 101 | A new module is introduced to handle allocation bookkeeping. `CodePtr` is moved into the module since it has a close relationship with the new `VirtualMemory` struct. This new interface has a slightly smaller surface than before in that marking a region as writable is no longer a public operation.
Notes
Notes: Merged-By: maximecb <maximecb@ruby-lang.org>
Diffstat (limited to 'yjit.c')
-rw-r--r--yjit.c51
1 files changed, 24 insertions, 27 deletions
diff --git a/yjit.c b/yjit.c
index 03930706ba..fe9098f0b5 100644
--- a/yjit.c
+++ b/yjit.c
@@ -56,7 +56,7 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM);
// types in C such as int, long, etc. and use `std::os::raw::c_long` and friends on
// the Rust side.
//
-// What's up with the long prefix? The "rb_" part is to apease `make leaked-globals`
+// What's up with the long prefix? The "rb_" part is to appease `make leaked-globals`
// which runs on upstream CI. The rationale for the check is unclear to Alan as
// we build with `-fvisibility=hidden` so only explicitly marked functions end
// up as public symbols in libruby.so. Perhaps the check is for the static
@@ -66,13 +66,13 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM);
// The "_yjit_" part is for trying to be informative. We might want different
// suffixes for symbols meant for Rust and symbols meant for broader CRuby.
-void
+bool
rb_yjit_mark_writable(void *mem_block, uint32_t mem_size)
{
if (mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE)) {
- rb_bug("Couldn't make JIT page region (%p, %lu bytes) writeable, errno: %s\n",
- mem_block, (unsigned long)mem_size, strerror(errno));
+ return false;
}
+ return true;
}
void
@@ -209,25 +209,29 @@ align_ptr(uint8_t *ptr, uint32_t multiple)
}
#endif
-// Allocate a block of executable memory
+// Address space reservation. Memory pages are mapped on an as needed basis.
+// See the Rust mm module for details.
uint8_t *
-rb_yjit_alloc_exec_mem(uint32_t mem_size)
+rb_yjit_reserve_addr_space(uint32_t mem_size)
{
#ifndef _WIN32
uint8_t *mem_block;
// On Linux
#if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
+ uint32_t const page_size = (uint32_t)sysconf(_SC_PAGESIZE);
+ uint8_t *const cfunc_sample_addr = (void *)&rb_yjit_reserve_addr_space;
+ uint8_t *const probe_region_end = cfunc_sample_addr + INT32_MAX;
// Align the requested address to page size
- uint32_t page_size = (uint32_t)sysconf(_SC_PAGESIZE);
- uint8_t *req_addr = align_ptr((uint8_t*)&rb_yjit_alloc_exec_mem, page_size);
+ uint8_t *req_addr = align_ptr(cfunc_sample_addr, page_size);
+ // Probe for addresses close to this function using MAP_FIXED_NOREPLACE
+ // to improve odds of being in range for 32-bit relative call instructions.
do {
- // Try to map a chunk of memory as executable
- mem_block = (uint8_t*)mmap(
- (void*)req_addr,
+ mem_block = mmap(
+ req_addr,
mem_size,
- PROT_READ | PROT_EXEC,
+ PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
-1,
0
@@ -240,15 +244,15 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size)
// +4MB
req_addr += 4 * 1024 * 1024;
- } while (req_addr < (uint8_t*)&rb_yjit_alloc_exec_mem + INT32_MAX);
+ } while (req_addr < probe_region_end);
// On MacOS and other platforms
#else
// Try to map a chunk of memory as executable
- mem_block = (uint8_t*)mmap(
- (void*)rb_yjit_alloc_exec_mem,
+ mem_block = mmap(
+ (void *)rb_yjit_reserve_addr_space,
mem_size,
- PROT_READ | PROT_EXEC,
+ PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0
@@ -258,10 +262,10 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size)
// Fallback
if (mem_block == MAP_FAILED) {
// Try again without the address hint (e.g., valgrind)
- mem_block = (uint8_t*)mmap(
+ mem_block = mmap(
NULL,
mem_size,
- PROT_READ | PROT_EXEC,
+ PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0
@@ -270,17 +274,10 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size)
// Check that the memory mapping was successful
if (mem_block == MAP_FAILED) {
- perror("mmap call failed");
- exit(-1);
+ perror("ruby: yjit: mmap:");
+ rb_bug("mmap failed");
}
- // Fill the executable memory with PUSH DS (0x1E) so that
- // executing uninitialized memory will fault with #UD in
- // 64-bit mode.
- rb_yjit_mark_writable(mem_block, mem_size);
- memset(mem_block, 0x1E, mem_size);
- rb_yjit_mark_executable(mem_block, mem_size);
-
return mem_block;
#else
// Windows not supported for now