summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/tarball-test.yml14
-rw-r--r--.github/workflows/tarball-ubuntu.yml12
-rw-r--r--benchmark/string_codepoints.yml9
-rw-r--r--gc/mmtk/src/api.rs85
-rw-r--r--gc/mmtk/src/collection.rs13
-rw-r--r--gc/mmtk/src/heap/cpu_heap_trigger.rs370
-rw-r--r--gc/mmtk/src/heap/mod.rs5
-rw-r--r--hash.c8
-rw-r--r--insns.def7
-rw-r--r--internal/hash.h1
-rw-r--r--internal/rational.h4
-rw-r--r--internal/re.h1
-rw-r--r--numeric.c13
-rw-r--r--process.c1
-rw-r--r--rational.c32
-rw-r--r--re.c9
-rw-r--r--string.c9
-rw-r--r--test/ruby/test_float.rb39
-rw-r--r--test/ruby/test_numeric.rb8
-rw-r--r--zjit/bindgen/src/main.rs2
-rw-r--r--zjit/src/codegen.rs22
-rw-r--r--zjit/src/cruby_bindings.inc.rs6
22 files changed, 612 insertions, 58 deletions
diff --git a/.github/workflows/tarball-test.yml b/.github/workflows/tarball-test.yml
index db49977cbf..5adc2708ff 100644
--- a/.github/workflows/tarball-test.yml
+++ b/.github/workflows/tarball-test.yml
@@ -35,7 +35,18 @@ jobs:
BRANCH: master
outputs:
branch: ${{ env.BRANCH }}
+ skip: ${{ steps.skipping.outputs.skip }}
steps:
+ - id: skipping
+ run:
+ echo 'skip=true' >> $GITHUB_OUTPUT
+ if: >-
+ ${{(false
+ || contains(github.event.head_commit.message, '[DOC]')
+ || contains(github.event.pull_request.title, '[DOC]')
+ || contains(github.event.pull_request.labels.*.name, 'Documentation')
+ || (github.event.pull_request.user.login == 'dependabot[bot]')
+ )}}
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
@@ -55,6 +66,7 @@ jobs:
ubuntu:
needs: tarball
+ if: ${{ ! needs.tarball.outputs.skip }}
uses: ./.github/workflows/tarball-ubuntu.yml
with:
archname: snapshot-${{ needs.tarball.outputs.branch }}
@@ -63,6 +75,7 @@ jobs:
macos:
needs: tarball
+ if: ${{ ! needs.tarball.outputs.skip }}
uses: ./.github/workflows/tarball-macos.yml
with:
archname: snapshot-${{ needs.tarball.outputs.branch }}
@@ -71,6 +84,7 @@ jobs:
windows:
needs: tarball
+ if: ${{ ! needs.tarball.outputs.skip }}
uses: ./.github/workflows/tarball-windows.yml
with:
archname: snapshot-${{ needs.tarball.outputs.branch }}
diff --git a/.github/workflows/tarball-ubuntu.yml b/.github/workflows/tarball-ubuntu.yml
index 13606c7eee..f0e773b526 100644
--- a/.github/workflows/tarball-ubuntu.yml
+++ b/.github/workflows/tarball-ubuntu.yml
@@ -69,16 +69,16 @@ jobs:
[
Dir.home,
].each do |dir|
- Pathname(dir).each_child do |pn|
- st = pn.stat
+ Dir.each_child(dir) do |pn|
+ st = File.stat(pn)
if st.file?
- content = Digest::SHA1.hexdigest(pn.read)
+ content = Digest::SHA1.file(pn).hexdigest
elsif st.directory? && st.nlink <= 10
- content = pn.children.map(&:basename).map(&:to_s).sort
+ content = Dir.children(pn).sort
end
- out << [pn.to_s, "%o"%st.mode, st.nlink, st.uid, st.gid, st.size, content].to_s
+ out << [pn, "%o"%st.mode, st.nlink, st.uid, st.gid, st.size, content].to_s
rescue
- out << [pn.to_s, $!.inspect].to_s
+ out << [pn, $!.inspect].to_s
end
end
File.open(ARGV.shift, "w") do |io|
diff --git a/benchmark/string_codepoints.yml b/benchmark/string_codepoints.yml
new file mode 100644
index 0000000000..6a07db7ce1
--- /dev/null
+++ b/benchmark/string_codepoints.yml
@@ -0,0 +1,9 @@
+prelude: |
+ mixed_ascii64 = ("a" * 63 + "\u{100}") * 2048
+ mixed_ascii256 = ("a" * 255 + "\u{100}") * 512
+ utf8_2byte = "\u{100}" * 65536
+
+benchmark:
+ codepoints_mixed_ascii64: mixed_ascii64.codepoints
+ codepoints_mixed_ascii256: mixed_ascii256.codepoints
+ codepoints_utf8_2byte: utf8_2byte.codepoints
diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs
index 1519d2b623..c0540fe0c8 100644
--- a/gc/mmtk/src/api.rs
+++ b/gc/mmtk/src/api.rs
@@ -14,7 +14,9 @@ use crate::abi::RubyBindingOptions;
use crate::abi::RubyUpcalls;
use crate::binding;
use crate::binding::RubyBinding;
+use crate::heap::CpuHeapTriggerConfig;
use crate::heap::RubyHeapTriggerConfig;
+use crate::heap::CPU_HEAP_TRIGGER_CONFIG;
use crate::heap::RUBY_HEAP_TRIGGER_CONFIG;
use crate::mmtk;
use crate::utils::default_heap_max;
@@ -131,6 +133,42 @@ fn mmtk_builder_default_parse_heap_mode(heap_min: usize, heap_max: usize) -> GCT
Some(GCTriggerSelector::Delegated)
}
+ "cpu" => {
+ // CPU-overhead-driven heap sizing based on Tavakolisomeh et al.,
+ // "Heap Size Adjustment with CPU Control", MPLR '23.
+ //
+ // Target is expressed as a percentage (0, 100) via
+ // `MMTK_GC_CPU_TARGET`. The paper recommends 15 for ZGC (a
+ // concurrent collector); we default to 5 for MMTk-Ruby. With
+ // MMTk's stop-the-world Immix, every percent of GC CPU is also
+ // a percent of wall-clock the mutator is blocked on, so a much
+ // smaller budget is appropriate. An empirical sweep across
+ // ruby-bench (railsbench, lobsters, psych-load, liquid-render,
+ // lee) found target=5 to be Pareto-optimal: ~6% geomean speedup
+ // vs. the `ruby` heap mode with effectively identical geomean
+ // peak RSS.
+ let target_percent = parse_float_env_var("MMTK_GC_CPU_TARGET", 5.0, 0.0, 100.0);
+ let window_size = parse_env_var::<usize>("MMTK_GC_CPU_WINDOW").unwrap_or(3);
+ let window_size = window_size.max(1);
+
+ let min_heap_pages = conversions::bytes_to_pages_up(heap_min);
+ let max_heap_pages = conversions::bytes_to_pages_up(heap_max);
+ // Start at the min heap size, as the other delegated triggers do.
+ // The control loop will adjust from here after the first GC cycle.
+ let initial_heap_pages = min_heap_pages;
+
+ CPU_HEAP_TRIGGER_CONFIG
+ .set(CpuHeapTriggerConfig {
+ min_heap_pages,
+ max_heap_pages,
+ initial_heap_pages,
+ target_gc_cpu: target_percent / 100.0,
+ window_size,
+ })
+ .unwrap_or_else(|_| panic!("CPU_HEAP_TRIGGER_CONFIG is already set"));
+
+ Some(GCTriggerSelector::Delegated)
+ }
_ => None,
})
.unwrap_or_else(make_dynamic)
@@ -446,11 +484,20 @@ pub extern "C" fn mmtk_heap_mode() -> *const u8 {
static FIXED_HEAP: &[u8] = b"fixed\0";
static DYNAMIC_HEAP: &[u8] = b"dynamic\0";
static RUBY_HEAP: &[u8] = b"ruby\0";
+ static CPU_HEAP: &[u8] = b"cpu\0";
match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger {
GCTriggerSelector::FixedHeapSize(_) => FIXED_HEAP.as_ptr(),
GCTriggerSelector::DynamicHeapSize(_, _) => DYNAMIC_HEAP.as_ptr(),
- GCTriggerSelector::Delegated => RUBY_HEAP.as_ptr(),
+ GCTriggerSelector::Delegated => {
+ // Two delegated triggers exist; disambiguate via the populated
+ // config singleton.
+ if CPU_HEAP_TRIGGER_CONFIG.get().is_some() {
+ CPU_HEAP.as_ptr()
+ } else {
+ RUBY_HEAP.as_ptr()
+ }
+ }
}
}
@@ -459,12 +506,18 @@ pub extern "C" fn mmtk_heap_min() -> usize {
match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger {
GCTriggerSelector::FixedHeapSize(_) => 0,
GCTriggerSelector::DynamicHeapSize(min_size, _) => min_size,
- GCTriggerSelector::Delegated => conversions::pages_to_bytes(
- RUBY_HEAP_TRIGGER_CONFIG
- .get()
- .expect("RUBY_HEAP_TRIGGER_CONFIG not set")
- .min_heap_pages,
- ),
+ GCTriggerSelector::Delegated => {
+ if let Some(cfg) = CPU_HEAP_TRIGGER_CONFIG.get() {
+ conversions::pages_to_bytes(cfg.min_heap_pages)
+ } else {
+ conversions::pages_to_bytes(
+ RUBY_HEAP_TRIGGER_CONFIG
+ .get()
+ .expect("RUBY_HEAP_TRIGGER_CONFIG not set")
+ .min_heap_pages,
+ )
+ }
+ }
}
}
@@ -473,12 +526,18 @@ pub extern "C" fn mmtk_heap_max() -> usize {
match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger {
GCTriggerSelector::FixedHeapSize(max_size) => max_size,
GCTriggerSelector::DynamicHeapSize(_, max_size) => max_size,
- GCTriggerSelector::Delegated => conversions::pages_to_bytes(
- RUBY_HEAP_TRIGGER_CONFIG
- .get()
- .expect("RUBY_HEAP_TRIGGER_CONFIG not set")
- .max_heap_pages,
- ),
+ GCTriggerSelector::Delegated => {
+ if let Some(cfg) = CPU_HEAP_TRIGGER_CONFIG.get() {
+ conversions::pages_to_bytes(cfg.max_heap_pages)
+ } else {
+ conversions::pages_to_bytes(
+ RUBY_HEAP_TRIGGER_CONFIG
+ .get()
+ .expect("RUBY_HEAP_TRIGGER_CONFIG not set")
+ .max_heap_pages,
+ )
+ }
+ }
}
}
diff --git a/gc/mmtk/src/collection.rs b/gc/mmtk/src/collection.rs
index 28daa4f991..648efa4e27 100644
--- a/gc/mmtk/src/collection.rs
+++ b/gc/mmtk/src/collection.rs
@@ -1,7 +1,9 @@
use crate::abi::GCThreadTLS;
use crate::api::RubyMutator;
+use crate::heap::CpuHeapTrigger;
use crate::heap::RubyHeapTrigger;
+use crate::heap::CPU_HEAP_TRIGGER_CONFIG;
use crate::mmtk;
use crate::upcalls;
use crate::Ruby;
@@ -95,7 +97,16 @@ impl Collection<Ruby> for VMCollection {
}
fn create_gc_trigger() -> Box<dyn GCTriggerPolicy<Ruby>> {
- Box::new(RubyHeapTrigger::default())
+ // `GCTriggerSelector::Delegated` is currently used by two different
+ // heap modes: `ruby` (the Ruby-like free-slot ratio trigger) and `cpu`
+ // (the CPU-overhead trigger from Tavakolisomeh et al., MPLR '23).
+ // Which one is active is determined by which `OnceCell` config the
+ // `MMTK_HEAP_MODE` parser populated.
+ if CPU_HEAP_TRIGGER_CONFIG.get().is_some() {
+ Box::new(CpuHeapTrigger::default())
+ } else {
+ Box::new(RubyHeapTrigger::default())
+ }
}
}
diff --git a/gc/mmtk/src/heap/cpu_heap_trigger.rs b/gc/mmtk/src/heap/cpu_heap_trigger.rs
new file mode 100644
index 0000000000..ef5a79fe9a
--- /dev/null
+++ b/gc/mmtk/src/heap/cpu_heap_trigger.rs
@@ -0,0 +1,370 @@
+//! A GC trigger that adjusts the heap size based on the CPU overhead of GC.
+//!
+//! This is an implementation of the heap sizing policy described in
+//! Tavakolisomeh, Shimchenko, Österlund, Bruno, Ferreira, Wrigstad,
+//! "Heap Size Adjustment with CPU Control", MPLR '23.
+//! <https://doi.org/10.1145/3617651.3622988>
+//!
+//! The idea: rather than letting heap size control GC frequency, let a
+//! user-supplied *target GC CPU overhead* control the heap size. After each GC
+//! cycle, we measure the GC CPU overhead (fraction of process CPU time spent
+//! in GC) and compare it to the target. If GC is over budget we grow the heap
+//! (reducing GC frequency); if it is under budget we shrink the heap (trading
+//! memory for more frequent collections).
+//!
+//! ## Algorithm
+//!
+//! After each GC cycle we compute, using an average of the last `n` cycles:
+//!
+//! ```text
+//! GC_CPU = T_GC / T_APP (Eq. 1)
+//! overhead_error = GC_CPU - target (Eq. 2)
+//! sigmoid_error = 1 / (1 + e^(-overhead_error)) (Eq. 3)
+//! adjustment_factor = sigmoid_error + 0.5 (in (0.5, 1.5)) (Eq. 4)
+//! new_size = current_size * adjustment_factor (Eq. 5)
+//! ```
+//!
+//! where:
+//! - `T_GC` is the wall-clock duration of each GC cycle.
+//! - `T_APP` is process CPU time elapsed between consecutive GC cycles (sum of
+//! CPU time over all threads — mutators, GC workers, compilers, etc.), read
+//! via `clock_gettime(CLOCK_PROCESS_CPUTIME_ID)`.
+//!
+//! The final heap size is then clamped to the range
+//! `[max(1.1 * used, min_heap_pages), max_heap_pages]`, providing 10% headroom
+//! above current live memory to avoid triggering GC on an effectively-empty
+//! heap.
+//!
+//! ## Differences from the paper
+//!
+//! The paper targets ZGC, a concurrent generational collector. MMTk's Ruby
+//! binding currently ships stop-the-world collectors (Immix, MarkSweep). The
+//! paper's formula still applies: with a STW collector the process CPU time
+//! during GC closely tracks the wall-clock GC time, and mutator CPU time
+//! during the mutator phase is correctly attributed. For generational plans
+//! we skip nursery-only GCs, consistent with MemBalancer.
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+use std::sync::Mutex;
+
+use mmtk::util::heap::GCTriggerPolicy;
+use mmtk::util::heap::SpaceStats;
+use mmtk::Plan;
+use mmtk::MMTK;
+use once_cell::sync::OnceCell;
+
+use crate::Ruby;
+
+pub static CPU_HEAP_TRIGGER_CONFIG: OnceCell<CpuHeapTriggerConfig> = OnceCell::new();
+
+/// Configuration for the [`CpuHeapTrigger`].
+pub struct CpuHeapTriggerConfig {
+ /// Lower bound on heap size (in pages). The trigger will never shrink below
+ /// this value.
+ pub min_heap_pages: usize,
+ /// Upper bound on heap size (in pages). The trigger will never grow above
+ /// this value.
+ pub max_heap_pages: usize,
+ /// Initial heap size (in pages).
+ pub initial_heap_pages: usize,
+ /// Target GC CPU overhead as a fraction of total process CPU time. For
+ /// example, `0.15` means the policy will try to keep GC CPU usage near 15%.
+ /// Valid range: `(0.0, 1.0)`.
+ pub target_gc_cpu: f64,
+ /// Number of recent GC cycles averaged together when computing the CPU
+ /// overhead signal. Smoothes out short-term fluctuations. The paper uses 3.
+ pub window_size: usize,
+}
+
+/// A single GC cycle's timing measurements.
+#[derive(Clone, Copy, Debug, Default)]
+struct GcSample {
+ /// Wall-clock seconds spent inside this GC cycle.
+ gc_seconds: f64,
+ /// Seconds of process CPU time elapsed since the previous GC cycle ended.
+ /// This covers both mutator time and (on multi-threaded mutators) any
+ /// mutator CPU time consumed in parallel with the previous GC.
+ app_cpu_seconds: f64,
+}
+
+struct CpuHeapTriggerState {
+ /// Ring buffer of the last `window_size` samples. Oldest-first.
+ samples: Vec<GcSample>,
+ /// Wall-clock time when the current GC cycle started. `None` when no GC is
+ /// in progress.
+ gc_start_wall: Option<std::time::Instant>,
+ /// Process CPU time (seconds) recorded at the end of the previous GC
+ /// cycle. `None` until the first cycle completes.
+ last_gc_end_cpu: Option<f64>,
+}
+
+impl CpuHeapTriggerState {
+ fn new() -> Self {
+ Self {
+ samples: Vec::new(),
+ gc_start_wall: None,
+ last_gc_end_cpu: None,
+ }
+ }
+
+ /// Pushes a new sample, dropping the oldest when the window is full.
+ fn push_sample(&mut self, sample: GcSample, window_size: usize) {
+ if self.samples.len() >= window_size {
+ self.samples.remove(0);
+ }
+ self.samples.push(sample);
+ }
+
+ /// Returns the arithmetic mean GC CPU overhead across the window, or
+ /// `None` if we don't yet have a full sample (which happens on the first
+ /// GC cycle — we have no baseline for `app_cpu_seconds`).
+ fn mean_gc_cpu(&self) -> Option<f64> {
+ if self.samples.is_empty() {
+ return None;
+ }
+ let total_gc: f64 = self.samples.iter().map(|s| s.gc_seconds).sum();
+ let total_app: f64 = self.samples.iter().map(|s| s.app_cpu_seconds).sum();
+ if total_app <= 0.0 {
+ return None;
+ }
+ Some(total_gc / total_app)
+ }
+}
+
+pub struct CpuHeapTrigger {
+ /// Target heap size in pages. Updated at the end of each GC cycle.
+ target_heap_pages: AtomicUsize,
+ /// Mutable timing state. Wrapped in a `Mutex` because `on_gc_start` and
+ /// `on_gc_end` are the only mutation sites and they are not on an
+ /// allocation hot path; avoiding the complexity of lock-free state is
+ /// worth the trivial contention.
+ state: Mutex<CpuHeapTriggerState>,
+}
+
+impl Default for CpuHeapTrigger {
+ fn default() -> Self {
+ let cfg = Self::get_config();
+ Self {
+ target_heap_pages: AtomicUsize::new(cfg.initial_heap_pages),
+ state: Mutex::new(CpuHeapTriggerState::new()),
+ }
+ }
+}
+
+impl GCTriggerPolicy<Ruby> for CpuHeapTrigger {
+ fn is_gc_required(
+ &self,
+ space_full: bool,
+ space: Option<SpaceStats<Ruby>>,
+ plan: &dyn Plan<VM = Ruby>,
+ ) -> bool {
+ // Let the plan decide, matching the other triggers.
+ plan.collection_required(space_full, space)
+ }
+
+ fn on_gc_start(&self, _mmtk: &'static MMTK<Ruby>) {
+ let mut state = self.state.lock().unwrap();
+ state.gc_start_wall = Some(std::time::Instant::now());
+ }
+
+ fn on_gc_end(&self, mmtk: &'static MMTK<Ruby>) {
+ // Skip nursery-only GCs for generational plans. The heap resizing
+ // decision is driven by the (much more expensive) full collections
+ // where the signal-to-noise ratio is high enough to be useful.
+ if let Some(gen_plan) = mmtk.get_plan().generational() {
+ if gen_plan.is_current_gc_nursery() {
+ return;
+ }
+ }
+
+ let cfg = Self::get_config();
+ let gc_end_cpu = process_cpu_time_seconds();
+
+ let mut state = self.state.lock().unwrap();
+
+ // Duration of this GC cycle (wall clock).
+ let gc_seconds = state
+ .gc_start_wall
+ .take()
+ .map(|start| start.elapsed().as_secs_f64())
+ .unwrap_or(0.0);
+
+ // Process CPU time elapsed since the previous GC cycle ended. We
+ // require at least one previous end timestamp to produce a valid
+ // sample — without it we cannot compute `T_APP`.
+ if let (Some(last_end), Some(now)) = (state.last_gc_end_cpu, gc_end_cpu) {
+ let app_cpu_seconds = (now - last_end).max(0.0);
+ // Only record non-degenerate samples to avoid poisoning the window
+ // with zero-time entries from back-to-back GCs.
+ if app_cpu_seconds > 0.0 {
+ state.push_sample(
+ GcSample {
+ gc_seconds,
+ app_cpu_seconds,
+ },
+ cfg.window_size,
+ );
+ }
+ }
+ state.last_gc_end_cpu = gc_end_cpu;
+
+ // Compute the new heap size only when we have samples to average over.
+ if let Some(gc_cpu) = state.mean_gc_cpu() {
+ // Drop the lock before doing the (relatively cheap) math and
+ // atomic update; nothing below needs the state.
+ drop(state);
+
+ let overhead_error = gc_cpu - cfg.target_gc_cpu; // Eq. (2)
+ let sigmoid_error = sigmoid(overhead_error); // Eq. (3)
+ let adjustment_factor = sigmoid_error + 0.5; // Eq. (4), range (0.5, 1.5)
+
+ let current = self.target_heap_pages.load(Ordering::Relaxed);
+ let suggested = ((current as f64) * adjustment_factor) as usize; // Eq. (5)
+
+ // Clamp:
+ // - upper bound: configured max
+ // - lower bound: max(1.1 * used, min) — 10% headroom above current
+ // live memory, so we never request a heap so small that GC is
+ // triggered immediately on return from this one.
+ let used = mmtk.get_plan().get_used_pages();
+ let floor = ((used as f64) * 1.1).ceil() as usize;
+ let lower = floor.max(cfg.min_heap_pages).min(cfg.max_heap_pages);
+ let upper = cfg.max_heap_pages;
+ let new_target = suggested.clamp(lower, upper);
+
+ self.target_heap_pages.store(new_target, Ordering::Relaxed);
+
+ info!(
+ "CpuHeapTrigger: gc_cpu={:.4} target={:.4} factor={:.4} \
+ pages {} -> {} (used={}, clamp=[{}, {}])",
+ gc_cpu,
+ cfg.target_gc_cpu,
+ adjustment_factor,
+ current,
+ new_target,
+ used,
+ lower,
+ upper
+ );
+ }
+ }
+
+ fn is_heap_full(&self, plan: &dyn Plan<VM = Ruby>) -> bool {
+ plan.get_reserved_pages() > self.target_heap_pages.load(Ordering::Relaxed)
+ }
+
+ fn get_current_heap_size_in_pages(&self) -> usize {
+ self.target_heap_pages.load(Ordering::Relaxed)
+ }
+
+ fn get_max_heap_size_in_pages(&self) -> usize {
+ Self::get_config().max_heap_pages
+ }
+
+ fn can_heap_size_grow(&self) -> bool {
+ self.target_heap_pages.load(Ordering::Relaxed) < Self::get_config().max_heap_pages
+ }
+}
+
+impl CpuHeapTrigger {
+ fn get_config<'b>() -> &'b CpuHeapTriggerConfig {
+ CPU_HEAP_TRIGGER_CONFIG
+ .get()
+ .expect("Attempt to use CPU_HEAP_TRIGGER_CONFIG before it is initialized")
+ }
+}
+
+/// Standard logistic sigmoid. Returns 0.5 when x == 0, asymptotes to 0 and 1.
+fn sigmoid(x: f64) -> f64 {
+ 1.0 / (1.0 + (-x).exp())
+}
+
+/// Reads the process-wide CPU time as a floating-point number of seconds,
+/// summed across all threads of this process. Returns `None` if the clock
+/// query fails (which should be essentially impossible on supported
+/// platforms).
+fn process_cpu_time_seconds() -> Option<f64> {
+ let mut ts = libc::timespec {
+ tv_sec: 0,
+ tv_nsec: 0,
+ };
+ // SAFETY: `clock_gettime` writes exactly `sizeof(timespec)` bytes to the
+ // pointer we pass, which is a valid local stack allocation.
+ let rc = unsafe { libc::clock_gettime(libc::CLOCK_PROCESS_CPUTIME_ID, &mut ts) };
+ if rc != 0 {
+ return None;
+ }
+ Some((ts.tv_sec as f64) + (ts.tv_nsec as f64) / 1_000_000_000.0)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn sigmoid_is_well_behaved() {
+ assert!((sigmoid(0.0) - 0.5).abs() < 1e-12);
+ assert!(sigmoid(-100.0) < 1e-9);
+ assert!(sigmoid(100.0) > 1.0 - 1e-9);
+ // Monotonic.
+ assert!(sigmoid(-1.0) < sigmoid(0.0));
+ assert!(sigmoid(0.0) < sigmoid(1.0));
+ }
+
+ #[test]
+ fn adjustment_factor_is_within_paper_bounds() {
+ // Eq. (4): adjustment_factor = sigmoid(e) + 0.5 must lie in (0.5, 1.5).
+ for e in [-10.0_f64, -1.0, 0.0, 1.0, 10.0] {
+ let f = sigmoid(e) + 0.5;
+ assert!(f > 0.5 && f < 1.5, "factor {f} out of range for e={e}");
+ }
+ }
+
+ #[test]
+ fn mean_gc_cpu_is_total_weighted() {
+ let mut state = CpuHeapTriggerState::new();
+ state.push_sample(
+ GcSample {
+ gc_seconds: 1.0,
+ app_cpu_seconds: 10.0,
+ },
+ 3,
+ );
+ state.push_sample(
+ GcSample {
+ gc_seconds: 3.0,
+ app_cpu_seconds: 10.0,
+ },
+ 3,
+ );
+ // (1 + 3) / (10 + 10) = 0.2
+ assert!((state.mean_gc_cpu().unwrap() - 0.2).abs() < 1e-12);
+ }
+
+ #[test]
+ fn window_drops_oldest() {
+ let mut state = CpuHeapTriggerState::new();
+ for i in 0..5 {
+ state.push_sample(
+ GcSample {
+ gc_seconds: i as f64,
+ app_cpu_seconds: 1.0,
+ },
+ 3,
+ );
+ }
+ assert_eq!(state.samples.len(), 3);
+ // After pushing 0,1,2,3,4 with window 3, we should have [2,3,4].
+ assert_eq!(state.samples[0].gc_seconds, 2.0);
+ assert_eq!(state.samples[2].gc_seconds, 4.0);
+ }
+
+ #[test]
+ fn no_sample_without_prior_gc() {
+ // First GC cycle cannot produce a sample (no previous end time). The
+ // push happens only when last_gc_end_cpu is Some.
+ let state = CpuHeapTriggerState::new();
+ assert!(state.mean_gc_cpu().is_none());
+ }
+}
diff --git a/gc/mmtk/src/heap/mod.rs b/gc/mmtk/src/heap/mod.rs
index 6af7c1b2e5..05a35efb23 100644
--- a/gc/mmtk/src/heap/mod.rs
+++ b/gc/mmtk/src/heap/mod.rs
@@ -1,4 +1,9 @@
+mod cpu_heap_trigger;
mod ruby_heap_trigger;
+
+pub use cpu_heap_trigger::CpuHeapTrigger;
+pub use cpu_heap_trigger::CpuHeapTriggerConfig;
+pub use cpu_heap_trigger::CPU_HEAP_TRIGGER_CONFIG;
pub use ruby_heap_trigger::RubyHeapTrigger;
pub use ruby_heap_trigger::RubyHeapTriggerConfig;
pub use ruby_heap_trigger::RUBY_HEAP_TRIGGER_CONFIG;
diff --git a/hash.c b/hash.c
index fbad278bee..7fae2f284d 100644
--- a/hash.c
+++ b/hash.c
@@ -5109,6 +5109,14 @@ rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash)
}
}
+VALUE
+rb_hash_new_with_bulk_insert(long argc, const VALUE *argv)
+{
+ VALUE val = rb_hash_new_with_size(argc / 2);
+ rb_hash_bulk_insert(argc, argv, val);
+ return val;
+}
+
static char **origenviron;
#ifdef _WIN32
#define GET_ENVIRON(e) ((e) = rb_w32_get_environ())
diff --git a/insns.def b/insns.def
index e32caef2dc..26a44299db 100644
--- a/insns.def
+++ b/insns.def
@@ -429,9 +429,7 @@ toregexp
// attr bool leaf = false;
// attr rb_snum_t sp_inc = 1 - (rb_snum_t)cnt;
{
- const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, STACK_ADDR_FROM_TOP(cnt));
- val = rb_reg_new_ary(ary, (int)opt);
- rb_ary_clear(ary);
+ val = rb_reg_new_from_values(cnt, STACK_ADDR_FROM_TOP(cnt), (int)opt);
}
/* intern str to Symbol and push it. */
@@ -591,8 +589,7 @@ newhash
RUBY_DTRACE_CREATE_HOOK(HASH, num);
if (num) {
- val = rb_hash_new_with_size(num / 2);
- rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
+ val = rb_hash_new_with_bulk_insert(num, STACK_ADDR_FROM_TOP(num));
}
else {
val = rb_hash_new();
diff --git a/internal/hash.h b/internal/hash.h
index 6671cd496d..baf5af9abd 100644
--- a/internal/hash.h
+++ b/internal/hash.h
@@ -112,6 +112,7 @@ int rb_hash_stlike_foreach(VALUE hash, st_foreach_callback_func *func, st_data_t
RUBY_SYMBOL_EXPORT_END
VALUE rb_hash_new_with_size(st_index_t size);
+VALUE rb_hash_new_with_bulk_insert(long argc, const VALUE *argv);
VALUE rb_hash_resurrect(VALUE hash);
int rb_hash_stlike_lookup(VALUE hash, st_data_t key, st_data_t *pval);
VALUE rb_hash_keys(VALUE hash);
diff --git a/internal/rational.h b/internal/rational.h
index f7e382af8c..6861a90130 100644
--- a/internal/rational.h
+++ b/internal/rational.h
@@ -38,7 +38,9 @@ VALUE rb_rational_cmp(VALUE self, VALUE other);
VALUE rb_rational_pow(VALUE self, VALUE other);
VALUE rb_rational_floor(VALUE self, int ndigits);
VALUE rb_numeric_quo(VALUE x, VALUE y);
-VALUE rb_flo_round_by_rational(int argc, VALUE *argv, VALUE num);
+VALUE rb_flo_round_by_rational(VALUE num, int ndigits, enum ruby_num_rounding_mode mode);
+VALUE rb_flo_ceil_by_rational(VALUE num, int ndigits);
+VALUE rb_flo_floor_by_rational(VALUE num, int ndigits);
VALUE rb_float_numerator(VALUE x);
VALUE rb_float_denominator(VALUE x);
diff --git a/internal/re.h b/internal/re.h
index da165e4756..3ad364a1a6 100644
--- a/internal/re.h
+++ b/internal/re.h
@@ -69,6 +69,7 @@ VALUE rb_backref_set_string(VALUE string, long pos, long len);
void rb_match_unbusy(VALUE);
int rb_match_count(VALUE match);
VALUE rb_reg_new_ary(VALUE ary, int options);
+VALUE rb_reg_new_from_values(long cnt, const VALUE *elements, int opt);
VALUE rb_reg_last_defined(VALUE match);
#define ARG_REG_OPTION_MASK \
diff --git a/numeric.c b/numeric.c
index 175bd7cfa0..f4dc0f8927 100644
--- a/numeric.c
+++ b/numeric.c
@@ -75,6 +75,8 @@
#define DBL_EPSILON 2.2204460492503131e-16
#endif
+#define ACCURATE_POW10(ndigits) ((ndigits) < DBL_DIG)
+
#ifndef USE_RB_INFINITY
#elif !defined(WORDS_BIGENDIAN) /* BYTE_ORDER == LITTLE_ENDIAN */
const union bytesequence4_or_float rb_infinity = {{0x00, 0x00, 0x80, 0x7f}};
@@ -2018,6 +2020,9 @@ rb_float_floor(VALUE num, int ndigits)
if (float_round_overflow(ndigits, binexp)) return num;
if (number > 0.0 && float_round_underflow(ndigits, binexp))
return DBL2NUM(0.0);
+ if (!ACCURATE_POW10(ndigits)) {
+ return rb_flo_floor_by_rational(num, ndigits);
+ }
f = pow(10, ndigits);
mul = floor(number * f);
res = (mul + 1) / f;
@@ -2226,6 +2231,9 @@ rb_float_ceil(VALUE num, int ndigits)
if (float_round_overflow(ndigits, binexp)) return num;
if (number < 0.0 && float_round_underflow(ndigits, binexp))
return DBL2NUM(0.0);
+ if (!ACCURATE_POW10(ndigits)) {
+ return rb_flo_ceil_by_rational(num, ndigits);
+ }
f = pow(10, ndigits);
f = ceil(number * f) / f;
return DBL2NUM(f);
@@ -2490,9 +2498,8 @@ flo_round(int argc, VALUE *argv, VALUE num)
frexp(number, &binexp);
if (float_round_overflow(ndigits, binexp)) return num;
if (float_round_underflow(ndigits, binexp)) return DBL2NUM(0);
- if (ndigits > 14) {
- /* In this case, pow(10, ndigits) may not be accurate. */
- return rb_flo_round_by_rational(argc, argv, num);
+ if (!ACCURATE_POW10(ndigits)) {
+ return rb_flo_round_by_rational(num, ndigits, mode);
}
f = pow(10, ndigits);
x = ROUND_CALL(mode, round, (number, f));
diff --git a/process.c b/process.c
index 126e36ee8d..cef1b09bd0 100644
--- a/process.c
+++ b/process.c
@@ -2888,7 +2888,6 @@ void
rb_execarg_parent_end(VALUE execarg_obj)
{
execarg_parent_end(execarg_obj);
- RB_GC_GUARD(execarg_obj);
}
static void
diff --git a/rational.c b/rational.c
index d6214451b4..b031838d69 100644
--- a/rational.c
+++ b/rational.c
@@ -1374,10 +1374,12 @@ nurat_round_half_even(VALUE self)
return num;
}
+static VALUE f_round_n(VALUE self, VALUE n, VALUE (*func)(VALUE)) ;
+
static VALUE
f_round_common(int argc, VALUE *argv, VALUE self, VALUE (*func)(VALUE))
{
- VALUE n, b, s;
+ VALUE n;
if (rb_check_arity(argc, 0, 1) == 0)
return (*func)(self);
@@ -1387,6 +1389,14 @@ f_round_common(int argc, VALUE *argv, VALUE self, VALUE (*func)(VALUE))
if (!k_integer_p(n))
rb_raise(rb_eTypeError, "not an integer");
+ return f_round_n(self, n, func);
+}
+
+static VALUE
+f_round_n(VALUE self, VALUE n, VALUE (*func)(VALUE))
+{
+ VALUE b, s;
+
b = f_expt10(n);
s = rb_rational_mul(self, b);
@@ -1417,8 +1427,7 @@ rb_rational_floor(VALUE self, int ndigits)
return nurat_floor(self);
}
else {
- VALUE n = INT2NUM(ndigits);
- return f_round_common(1, &n, self, nurat_floor);
+ return f_round_n(self, INT2NUM(ndigits), nurat_floor);
}
}
@@ -1561,9 +1570,22 @@ nurat_round_n(int argc, VALUE *argv, VALUE self)
}
VALUE
-rb_flo_round_by_rational(int argc, VALUE *argv, VALUE num)
+rb_flo_round_by_rational(VALUE num, int ndigits, enum ruby_num_rounding_mode mode)
+{
+ VALUE (*round_func)(VALUE) = ROUND_FUNC(mode, nurat_round);
+ return nurat_to_f(f_round_n(float_to_r(num), INT2NUM(ndigits), round_func));
+}
+
+VALUE
+rb_flo_ceil_by_rational(VALUE num, int ndigits)
+{
+ return nurat_to_f(f_round_n(float_to_r(num), INT2NUM(ndigits), nurat_ceil));
+}
+
+VALUE
+rb_flo_floor_by_rational(VALUE num, int ndigits)
{
- return nurat_to_f(nurat_round_n(argc, argv, float_to_r(num)));
+ return nurat_to_f(f_round_n(float_to_r(num), INT2NUM(ndigits), nurat_floor));
}
static double
diff --git a/re.c b/re.c
index b778fa08f3..ec337cd21c 100644
--- a/re.c
+++ b/re.c
@@ -3529,6 +3529,15 @@ rb_reg_new_ary(VALUE ary, int opt)
}
VALUE
+rb_reg_new_from_values(long cnt, const VALUE *elements, int opt)
+{
+ const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, elements);
+ VALUE val = rb_reg_new_ary(ary, (int)opt);
+ rb_ary_clear(ary);
+ return val;
+}
+
+VALUE
rb_enc_reg_new(const char *s, long len, rb_encoding *enc, int options)
{
VALUE re = rb_reg_alloc();
diff --git a/string.c b/string.c
index eb249662db..a59340adfd 100644
--- a/string.c
+++ b/string.c
@@ -9772,6 +9772,7 @@ rb_str_enumerate_codepoints(VALUE str, VALUE ary)
unsigned int c;
const char *ptr, *end;
rb_encoding *enc;
+ int enc_asciicompat;
if (single_byte_optimizable(str))
return rb_str_enumerate_bytes(str, ary);
@@ -9780,9 +9781,15 @@ rb_str_enumerate_codepoints(VALUE str, VALUE ary)
ptr = RSTRING_PTR(str);
end = RSTRING_END(str);
enc = STR_ENC_GET(str);
+ enc_asciicompat = rb_enc_asciicompat(enc);
while (ptr < end) {
- c = rb_enc_codepoint_len(ptr, end, &n, enc);
+ /* Fast path: ASCII byte in an ASCII-compatible encoding is its own codepoint;
+ * skip rb_enc_codepoint_len and return the byte directly.
+ */
+ n = 1;
+ c = (enc_asciicompat && ISASCII(*ptr)) ?
+ (unsigned char)*ptr : rb_enc_codepoint_len(ptr, end, &n, enc);
ENUM_ELEM(ary, UINT2NUM(c));
ptr += n;
}
diff --git a/test/ruby/test_float.rb b/test/ruby/test_float.rb
index d0d180593a..c01e8bb80b 100644
--- a/test/ruby/test_float.rb
+++ b/test/ruby/test_float.rb
@@ -492,6 +492,22 @@ class TestFloat < Test::Unit::TestCase
assert_equal(-1.26, -1.255.round(2))
end
+ def test_round_ndigits
+ bug14635 = "[ruby-core:86323]"
+ f = 0.5
+ 31.times do |i|
+ assert_equal(0.5, f.round(i+1), bug14635 + " (argument: #{i+1})")
+ end
+ end
+
+ def test_round_with_precision_min
+ (0..3).each do |n|
+ n -= Float::MIN_10_EXP
+ f = Float::MIN.round(n)
+ assert_include([Float::MIN.floor(n), Float::MIN.ceil(n)], f, "round(#{n})")
+ end
+ end
+
def test_round_half_even_with_precision
assert_equal(767573.18759, 767573.1875850001.round(5, half: :even))
assert_equal(767573.18758, 767573.187585.round(5, half: :even))
@@ -536,6 +552,16 @@ class TestFloat < Test::Unit::TestCase
assert_equal(-100000000000000000000000000000000000000000000000000, -1.0.floor(-50), "[Bug #20654]")
end
+ def test_floor_with_precision_min
+ min = Float::MIN
+ (0..3).each do |n|
+ n -= Float::MIN_10_EXP
+ f = min.floor(n)
+ assert_operator(f, :<=, Float::MIN, "floor(#{n})")
+ assert_operator(f, :>=, Float::MIN.floor(n-1), "ceil(#{n})")
+ end
+ end
+
def test_ceil_with_precision
assert_equal(+0.1, +0.001.ceil(1))
assert_equal(-0.0, -0.001.ceil(1))
@@ -567,6 +593,19 @@ class TestFloat < Test::Unit::TestCase
assert_equal(100000000000000000000000000000000000000000000000000, 1.0.ceil(-50), "[Bug #20654]")
end
+ def test_ceil_with_precision_min
+ min = Float::MIN
+ (-Float::MIN_10_EXP).times do |n|
+ assert_equal(10.pow(-n), min.ceil(n))
+ end
+ (0..3).each do |n|
+ n -= Float::MIN_10_EXP
+ f = min.ceil(n)
+ assert_operator(f, :>=, Float::MIN, "ceil(#{n})")
+ assert_operator(f, :<=, Float::MIN.ceil(n-1), "ceil(#{n})")
+ end
+ end
+
def test_truncate_with_precision
assert_equal(1.100, 1.111.truncate(1))
assert_equal(1.110, 1.111.truncate(2))
diff --git a/test/ruby/test_numeric.rb b/test/ruby/test_numeric.rb
index 35496ac875..b272b89921 100644
--- a/test/ruby/test_numeric.rb
+++ b/test/ruby/test_numeric.rb
@@ -206,14 +206,6 @@ class TestNumeric < Test::Unit::TestCase
assert_nil(a <=> :foo)
end
- def test_float_round_ndigits
- bug14635 = "[ruby-core:86323]"
- f = 0.5
- 31.times do |i|
- assert_equal(0.5, f.round(i+1), bug14635 + " (argument: #{i+1})")
- end
- end
-
def test_floor_ceil_round_truncate
a = Class.new(Numeric) do
def to_f; 1.5; end
diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs
index 573eb37a72..2cde74facd 100644
--- a/zjit/bindgen/src/main.rs
+++ b/zjit/bindgen/src/main.rs
@@ -123,6 +123,7 @@ fn main() {
.allowlist_function("rb_hash_aset")
.allowlist_function("rb_hash_aref")
.allowlist_function("rb_hash_bulk_insert")
+ .allowlist_function("rb_hash_new_with_bulk_insert")
.allowlist_function("rb_hash_stlike_lookup")
.allowlist_function("rb_ary_new_capa")
.allowlist_function("rb_ary_store")
@@ -215,6 +216,7 @@ fn main() {
.allowlist_function("rb_reg_match_last")
.allowlist_function("rb_reg_nth_match")
.allowlist_function("rb_reg_new_ary")
+ .allowlist_function("rb_reg_new_from_values")
.allowlist_var("ARG_ENCODING_FIXED")
.allowlist_var("ARG_ENCODING_NONE")
.allowlist_var("ONIG_OPTION_IGNORECASE")
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index f1ef17d794..4eee769315 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -2111,19 +2111,17 @@ fn gen_new_hash(
elements: Vec<Opnd>,
state: &FrameState,
) -> lir::Opnd {
- gen_prepare_non_leaf_call(jit, asm, state);
-
- let cap: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long");
- let new_hash = asm_ccall!(asm, rb_hash_new_with_size, lir::Opnd::Imm(cap));
+ if elements.is_empty() {
+ gen_prepare_leaf_call_with_gc(asm, state);
+ asm_ccall!(asm, rb_hash_new,)
+ } else {
+ gen_prepare_non_leaf_call(jit, asm, state);
- if !elements.is_empty() {
let argv = gen_push_opnds(asm, &elements);
- asm_ccall!(asm, rb_hash_bulk_insert, elements.len().into(), argv, new_hash);
-
+ let hash = asm_ccall!(asm, rb_hash_new_with_bulk_insert, elements.len().into(), argv);
gen_pop_opnds(asm, &elements);
+ hash
}
-
- new_hash
}
/// Compile a new range instruction
@@ -3401,11 +3399,7 @@ fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec
gen_prepare_non_leaf_call(jit, asm, state);
let first_opnd_ptr = gen_push_opnds(asm, &values);
-
- let tmp_ary = asm_ccall!(asm, rb_ary_tmp_new_from_values, Opnd::Imm(0), values.len().into(), first_opnd_ptr);
- let result = asm_ccall!(asm, rb_reg_new_ary, tmp_ary, opt.into());
- asm_ccall!(asm, rb_ary_clear, tmp_ary);
-
+ let result = asm_ccall!(asm, rb_reg_new_from_values, values.len().into(), first_opnd_ptr, opt.into());
gen_pop_opnds(asm, &values);
result
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index c61e61edd1..5a7c3de606 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -2060,6 +2060,11 @@ unsafe extern "C" {
pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE;
pub fn rb_obj_equal(obj1: VALUE, obj2: VALUE) -> VALUE;
pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE;
+ pub fn rb_reg_new_from_values(
+ cnt: ::std::os::raw::c_long,
+ elements: *const VALUE,
+ opt: ::std::os::raw::c_int,
+ ) -> VALUE;
pub fn rb_ary_tmp_new_from_values(
arg1: VALUE,
arg2: ::std::os::raw::c_long,
@@ -2132,6 +2137,7 @@ unsafe extern "C" {
arg: st_data_t,
) -> ::std::os::raw::c_int;
pub fn rb_hash_new_with_size(size: st_index_t) -> VALUE;
+ pub fn rb_hash_new_with_bulk_insert(argc: ::std::os::raw::c_long, argv: *const VALUE) -> VALUE;
pub fn rb_hash_resurrect(hash: VALUE) -> VALUE;
pub fn rb_hash_stlike_lookup(
hash: VALUE,