diff options
| -rw-r--r-- | .github/workflows/tarball-test.yml | 14 | ||||
| -rw-r--r-- | .github/workflows/tarball-ubuntu.yml | 12 | ||||
| -rw-r--r-- | benchmark/string_codepoints.yml | 9 | ||||
| -rw-r--r-- | gc/mmtk/src/api.rs | 85 | ||||
| -rw-r--r-- | gc/mmtk/src/collection.rs | 13 | ||||
| -rw-r--r-- | gc/mmtk/src/heap/cpu_heap_trigger.rs | 370 | ||||
| -rw-r--r-- | gc/mmtk/src/heap/mod.rs | 5 | ||||
| -rw-r--r-- | hash.c | 8 | ||||
| -rw-r--r-- | insns.def | 7 | ||||
| -rw-r--r-- | internal/hash.h | 1 | ||||
| -rw-r--r-- | internal/rational.h | 4 | ||||
| -rw-r--r-- | internal/re.h | 1 | ||||
| -rw-r--r-- | numeric.c | 13 | ||||
| -rw-r--r-- | process.c | 1 | ||||
| -rw-r--r-- | rational.c | 32 | ||||
| -rw-r--r-- | re.c | 9 | ||||
| -rw-r--r-- | string.c | 9 | ||||
| -rw-r--r-- | test/ruby/test_float.rb | 39 | ||||
| -rw-r--r-- | test/ruby/test_numeric.rb | 8 | ||||
| -rw-r--r-- | zjit/bindgen/src/main.rs | 2 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 22 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 6 |
22 files changed, 612 insertions, 58 deletions
diff --git a/.github/workflows/tarball-test.yml b/.github/workflows/tarball-test.yml index db49977cbf..5adc2708ff 100644 --- a/.github/workflows/tarball-test.yml +++ b/.github/workflows/tarball-test.yml @@ -35,7 +35,18 @@ jobs: BRANCH: master outputs: branch: ${{ env.BRANCH }} + skip: ${{ steps.skipping.outputs.skip }} steps: + - id: skipping + run: + echo 'skip=true' >> $GITHUB_OUTPUT + if: >- + ${{(false + || contains(github.event.head_commit.message, '[DOC]') + || contains(github.event.pull_request.title, '[DOC]') + || contains(github.event.pull_request.labels.*.name, 'Documentation') + || (github.event.pull_request.user.login == 'dependabot[bot]') + )}} - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 @@ -55,6 +66,7 @@ jobs: ubuntu: needs: tarball + if: ${{ ! needs.tarball.outputs.skip }} uses: ./.github/workflows/tarball-ubuntu.yml with: archname: snapshot-${{ needs.tarball.outputs.branch }} @@ -63,6 +75,7 @@ jobs: macos: needs: tarball + if: ${{ ! needs.tarball.outputs.skip }} uses: ./.github/workflows/tarball-macos.yml with: archname: snapshot-${{ needs.tarball.outputs.branch }} @@ -71,6 +84,7 @@ jobs: windows: needs: tarball + if: ${{ ! needs.tarball.outputs.skip }} uses: ./.github/workflows/tarball-windows.yml with: archname: snapshot-${{ needs.tarball.outputs.branch }} diff --git a/.github/workflows/tarball-ubuntu.yml b/.github/workflows/tarball-ubuntu.yml index 13606c7eee..f0e773b526 100644 --- a/.github/workflows/tarball-ubuntu.yml +++ b/.github/workflows/tarball-ubuntu.yml @@ -69,16 +69,16 @@ jobs: [ Dir.home, ].each do |dir| - Pathname(dir).each_child do |pn| - st = pn.stat + Dir.each_child(dir) do |pn| + st = File.stat(pn) if st.file? - content = Digest::SHA1.hexdigest(pn.read) + content = Digest::SHA1.file(pn).hexdigest elsif st.directory? && st.nlink <= 10 - content = pn.children.map(&:basename).map(&:to_s).sort + content = Dir.children(pn).sort end - out << [pn.to_s, "%o"%st.mode, st.nlink, st.uid, st.gid, st.size, content].to_s + out << [pn, "%o"%st.mode, st.nlink, st.uid, st.gid, st.size, content].to_s rescue - out << [pn.to_s, $!.inspect].to_s + out << [pn, $!.inspect].to_s end end File.open(ARGV.shift, "w") do |io| diff --git a/benchmark/string_codepoints.yml b/benchmark/string_codepoints.yml new file mode 100644 index 0000000000..6a07db7ce1 --- /dev/null +++ b/benchmark/string_codepoints.yml @@ -0,0 +1,9 @@ +prelude: | + mixed_ascii64 = ("a" * 63 + "\u{100}") * 2048 + mixed_ascii256 = ("a" * 255 + "\u{100}") * 512 + utf8_2byte = "\u{100}" * 65536 + +benchmark: + codepoints_mixed_ascii64: mixed_ascii64.codepoints + codepoints_mixed_ascii256: mixed_ascii256.codepoints + codepoints_utf8_2byte: utf8_2byte.codepoints diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs index 1519d2b623..c0540fe0c8 100644 --- a/gc/mmtk/src/api.rs +++ b/gc/mmtk/src/api.rs @@ -14,7 +14,9 @@ use crate::abi::RubyBindingOptions; use crate::abi::RubyUpcalls; use crate::binding; use crate::binding::RubyBinding; +use crate::heap::CpuHeapTriggerConfig; use crate::heap::RubyHeapTriggerConfig; +use crate::heap::CPU_HEAP_TRIGGER_CONFIG; use crate::heap::RUBY_HEAP_TRIGGER_CONFIG; use crate::mmtk; use crate::utils::default_heap_max; @@ -131,6 +133,42 @@ fn mmtk_builder_default_parse_heap_mode(heap_min: usize, heap_max: usize) -> GCT Some(GCTriggerSelector::Delegated) } + "cpu" => { + // CPU-overhead-driven heap sizing based on Tavakolisomeh et al., + // "Heap Size Adjustment with CPU Control", MPLR '23. + // + // Target is expressed as a percentage (0, 100) via + // `MMTK_GC_CPU_TARGET`. The paper recommends 15 for ZGC (a + // concurrent collector); we default to 5 for MMTk-Ruby. With + // MMTk's stop-the-world Immix, every percent of GC CPU is also + // a percent of wall-clock the mutator is blocked on, so a much + // smaller budget is appropriate. An empirical sweep across + // ruby-bench (railsbench, lobsters, psych-load, liquid-render, + // lee) found target=5 to be Pareto-optimal: ~6% geomean speedup + // vs. the `ruby` heap mode with effectively identical geomean + // peak RSS. + let target_percent = parse_float_env_var("MMTK_GC_CPU_TARGET", 5.0, 0.0, 100.0); + let window_size = parse_env_var::<usize>("MMTK_GC_CPU_WINDOW").unwrap_or(3); + let window_size = window_size.max(1); + + let min_heap_pages = conversions::bytes_to_pages_up(heap_min); + let max_heap_pages = conversions::bytes_to_pages_up(heap_max); + // Start at the min heap size, as the other delegated triggers do. + // The control loop will adjust from here after the first GC cycle. + let initial_heap_pages = min_heap_pages; + + CPU_HEAP_TRIGGER_CONFIG + .set(CpuHeapTriggerConfig { + min_heap_pages, + max_heap_pages, + initial_heap_pages, + target_gc_cpu: target_percent / 100.0, + window_size, + }) + .unwrap_or_else(|_| panic!("CPU_HEAP_TRIGGER_CONFIG is already set")); + + Some(GCTriggerSelector::Delegated) + } _ => None, }) .unwrap_or_else(make_dynamic) @@ -446,11 +484,20 @@ pub extern "C" fn mmtk_heap_mode() -> *const u8 { static FIXED_HEAP: &[u8] = b"fixed\0"; static DYNAMIC_HEAP: &[u8] = b"dynamic\0"; static RUBY_HEAP: &[u8] = b"ruby\0"; + static CPU_HEAP: &[u8] = b"cpu\0"; match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(_) => FIXED_HEAP.as_ptr(), GCTriggerSelector::DynamicHeapSize(_, _) => DYNAMIC_HEAP.as_ptr(), - GCTriggerSelector::Delegated => RUBY_HEAP.as_ptr(), + GCTriggerSelector::Delegated => { + // Two delegated triggers exist; disambiguate via the populated + // config singleton. + if CPU_HEAP_TRIGGER_CONFIG.get().is_some() { + CPU_HEAP.as_ptr() + } else { + RUBY_HEAP.as_ptr() + } + } } } @@ -459,12 +506,18 @@ pub extern "C" fn mmtk_heap_min() -> usize { match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(_) => 0, GCTriggerSelector::DynamicHeapSize(min_size, _) => min_size, - GCTriggerSelector::Delegated => conversions::pages_to_bytes( - RUBY_HEAP_TRIGGER_CONFIG - .get() - .expect("RUBY_HEAP_TRIGGER_CONFIG not set") - .min_heap_pages, - ), + GCTriggerSelector::Delegated => { + if let Some(cfg) = CPU_HEAP_TRIGGER_CONFIG.get() { + conversions::pages_to_bytes(cfg.min_heap_pages) + } else { + conversions::pages_to_bytes( + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("RUBY_HEAP_TRIGGER_CONFIG not set") + .min_heap_pages, + ) + } + } } } @@ -473,12 +526,18 @@ pub extern "C" fn mmtk_heap_max() -> usize { match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(max_size) => max_size, GCTriggerSelector::DynamicHeapSize(_, max_size) => max_size, - GCTriggerSelector::Delegated => conversions::pages_to_bytes( - RUBY_HEAP_TRIGGER_CONFIG - .get() - .expect("RUBY_HEAP_TRIGGER_CONFIG not set") - .max_heap_pages, - ), + GCTriggerSelector::Delegated => { + if let Some(cfg) = CPU_HEAP_TRIGGER_CONFIG.get() { + conversions::pages_to_bytes(cfg.max_heap_pages) + } else { + conversions::pages_to_bytes( + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("RUBY_HEAP_TRIGGER_CONFIG not set") + .max_heap_pages, + ) + } + } } } diff --git a/gc/mmtk/src/collection.rs b/gc/mmtk/src/collection.rs index 28daa4f991..648efa4e27 100644 --- a/gc/mmtk/src/collection.rs +++ b/gc/mmtk/src/collection.rs @@ -1,7 +1,9 @@ use crate::abi::GCThreadTLS; use crate::api::RubyMutator; +use crate::heap::CpuHeapTrigger; use crate::heap::RubyHeapTrigger; +use crate::heap::CPU_HEAP_TRIGGER_CONFIG; use crate::mmtk; use crate::upcalls; use crate::Ruby; @@ -95,7 +97,16 @@ impl Collection<Ruby> for VMCollection { } fn create_gc_trigger() -> Box<dyn GCTriggerPolicy<Ruby>> { - Box::new(RubyHeapTrigger::default()) + // `GCTriggerSelector::Delegated` is currently used by two different + // heap modes: `ruby` (the Ruby-like free-slot ratio trigger) and `cpu` + // (the CPU-overhead trigger from Tavakolisomeh et al., MPLR '23). + // Which one is active is determined by which `OnceCell` config the + // `MMTK_HEAP_MODE` parser populated. + if CPU_HEAP_TRIGGER_CONFIG.get().is_some() { + Box::new(CpuHeapTrigger::default()) + } else { + Box::new(RubyHeapTrigger::default()) + } } } diff --git a/gc/mmtk/src/heap/cpu_heap_trigger.rs b/gc/mmtk/src/heap/cpu_heap_trigger.rs new file mode 100644 index 0000000000..ef5a79fe9a --- /dev/null +++ b/gc/mmtk/src/heap/cpu_heap_trigger.rs @@ -0,0 +1,370 @@ +//! A GC trigger that adjusts the heap size based on the CPU overhead of GC. +//! +//! This is an implementation of the heap sizing policy described in +//! Tavakolisomeh, Shimchenko, Österlund, Bruno, Ferreira, Wrigstad, +//! "Heap Size Adjustment with CPU Control", MPLR '23. +//! <https://doi.org/10.1145/3617651.3622988> +//! +//! The idea: rather than letting heap size control GC frequency, let a +//! user-supplied *target GC CPU overhead* control the heap size. After each GC +//! cycle, we measure the GC CPU overhead (fraction of process CPU time spent +//! in GC) and compare it to the target. If GC is over budget we grow the heap +//! (reducing GC frequency); if it is under budget we shrink the heap (trading +//! memory for more frequent collections). +//! +//! ## Algorithm +//! +//! After each GC cycle we compute, using an average of the last `n` cycles: +//! +//! ```text +//! GC_CPU = T_GC / T_APP (Eq. 1) +//! overhead_error = GC_CPU - target (Eq. 2) +//! sigmoid_error = 1 / (1 + e^(-overhead_error)) (Eq. 3) +//! adjustment_factor = sigmoid_error + 0.5 (in (0.5, 1.5)) (Eq. 4) +//! new_size = current_size * adjustment_factor (Eq. 5) +//! ``` +//! +//! where: +//! - `T_GC` is the wall-clock duration of each GC cycle. +//! - `T_APP` is process CPU time elapsed between consecutive GC cycles (sum of +//! CPU time over all threads — mutators, GC workers, compilers, etc.), read +//! via `clock_gettime(CLOCK_PROCESS_CPUTIME_ID)`. +//! +//! The final heap size is then clamped to the range +//! `[max(1.1 * used, min_heap_pages), max_heap_pages]`, providing 10% headroom +//! above current live memory to avoid triggering GC on an effectively-empty +//! heap. +//! +//! ## Differences from the paper +//! +//! The paper targets ZGC, a concurrent generational collector. MMTk's Ruby +//! binding currently ships stop-the-world collectors (Immix, MarkSweep). The +//! paper's formula still applies: with a STW collector the process CPU time +//! during GC closely tracks the wall-clock GC time, and mutator CPU time +//! during the mutator phase is correctly attributed. For generational plans +//! we skip nursery-only GCs, consistent with MemBalancer. + +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; +use std::sync::Mutex; + +use mmtk::util::heap::GCTriggerPolicy; +use mmtk::util::heap::SpaceStats; +use mmtk::Plan; +use mmtk::MMTK; +use once_cell::sync::OnceCell; + +use crate::Ruby; + +pub static CPU_HEAP_TRIGGER_CONFIG: OnceCell<CpuHeapTriggerConfig> = OnceCell::new(); + +/// Configuration for the [`CpuHeapTrigger`]. +pub struct CpuHeapTriggerConfig { + /// Lower bound on heap size (in pages). The trigger will never shrink below + /// this value. + pub min_heap_pages: usize, + /// Upper bound on heap size (in pages). The trigger will never grow above + /// this value. + pub max_heap_pages: usize, + /// Initial heap size (in pages). + pub initial_heap_pages: usize, + /// Target GC CPU overhead as a fraction of total process CPU time. For + /// example, `0.15` means the policy will try to keep GC CPU usage near 15%. + /// Valid range: `(0.0, 1.0)`. + pub target_gc_cpu: f64, + /// Number of recent GC cycles averaged together when computing the CPU + /// overhead signal. Smoothes out short-term fluctuations. The paper uses 3. + pub window_size: usize, +} + +/// A single GC cycle's timing measurements. +#[derive(Clone, Copy, Debug, Default)] +struct GcSample { + /// Wall-clock seconds spent inside this GC cycle. + gc_seconds: f64, + /// Seconds of process CPU time elapsed since the previous GC cycle ended. + /// This covers both mutator time and (on multi-threaded mutators) any + /// mutator CPU time consumed in parallel with the previous GC. + app_cpu_seconds: f64, +} + +struct CpuHeapTriggerState { + /// Ring buffer of the last `window_size` samples. Oldest-first. + samples: Vec<GcSample>, + /// Wall-clock time when the current GC cycle started. `None` when no GC is + /// in progress. + gc_start_wall: Option<std::time::Instant>, + /// Process CPU time (seconds) recorded at the end of the previous GC + /// cycle. `None` until the first cycle completes. + last_gc_end_cpu: Option<f64>, +} + +impl CpuHeapTriggerState { + fn new() -> Self { + Self { + samples: Vec::new(), + gc_start_wall: None, + last_gc_end_cpu: None, + } + } + + /// Pushes a new sample, dropping the oldest when the window is full. + fn push_sample(&mut self, sample: GcSample, window_size: usize) { + if self.samples.len() >= window_size { + self.samples.remove(0); + } + self.samples.push(sample); + } + + /// Returns the arithmetic mean GC CPU overhead across the window, or + /// `None` if we don't yet have a full sample (which happens on the first + /// GC cycle — we have no baseline for `app_cpu_seconds`). + fn mean_gc_cpu(&self) -> Option<f64> { + if self.samples.is_empty() { + return None; + } + let total_gc: f64 = self.samples.iter().map(|s| s.gc_seconds).sum(); + let total_app: f64 = self.samples.iter().map(|s| s.app_cpu_seconds).sum(); + if total_app <= 0.0 { + return None; + } + Some(total_gc / total_app) + } +} + +pub struct CpuHeapTrigger { + /// Target heap size in pages. Updated at the end of each GC cycle. + target_heap_pages: AtomicUsize, + /// Mutable timing state. Wrapped in a `Mutex` because `on_gc_start` and + /// `on_gc_end` are the only mutation sites and they are not on an + /// allocation hot path; avoiding the complexity of lock-free state is + /// worth the trivial contention. + state: Mutex<CpuHeapTriggerState>, +} + +impl Default for CpuHeapTrigger { + fn default() -> Self { + let cfg = Self::get_config(); + Self { + target_heap_pages: AtomicUsize::new(cfg.initial_heap_pages), + state: Mutex::new(CpuHeapTriggerState::new()), + } + } +} + +impl GCTriggerPolicy<Ruby> for CpuHeapTrigger { + fn is_gc_required( + &self, + space_full: bool, + space: Option<SpaceStats<Ruby>>, + plan: &dyn Plan<VM = Ruby>, + ) -> bool { + // Let the plan decide, matching the other triggers. + plan.collection_required(space_full, space) + } + + fn on_gc_start(&self, _mmtk: &'static MMTK<Ruby>) { + let mut state = self.state.lock().unwrap(); + state.gc_start_wall = Some(std::time::Instant::now()); + } + + fn on_gc_end(&self, mmtk: &'static MMTK<Ruby>) { + // Skip nursery-only GCs for generational plans. The heap resizing + // decision is driven by the (much more expensive) full collections + // where the signal-to-noise ratio is high enough to be useful. + if let Some(gen_plan) = mmtk.get_plan().generational() { + if gen_plan.is_current_gc_nursery() { + return; + } + } + + let cfg = Self::get_config(); + let gc_end_cpu = process_cpu_time_seconds(); + + let mut state = self.state.lock().unwrap(); + + // Duration of this GC cycle (wall clock). + let gc_seconds = state + .gc_start_wall + .take() + .map(|start| start.elapsed().as_secs_f64()) + .unwrap_or(0.0); + + // Process CPU time elapsed since the previous GC cycle ended. We + // require at least one previous end timestamp to produce a valid + // sample — without it we cannot compute `T_APP`. + if let (Some(last_end), Some(now)) = (state.last_gc_end_cpu, gc_end_cpu) { + let app_cpu_seconds = (now - last_end).max(0.0); + // Only record non-degenerate samples to avoid poisoning the window + // with zero-time entries from back-to-back GCs. + if app_cpu_seconds > 0.0 { + state.push_sample( + GcSample { + gc_seconds, + app_cpu_seconds, + }, + cfg.window_size, + ); + } + } + state.last_gc_end_cpu = gc_end_cpu; + + // Compute the new heap size only when we have samples to average over. + if let Some(gc_cpu) = state.mean_gc_cpu() { + // Drop the lock before doing the (relatively cheap) math and + // atomic update; nothing below needs the state. + drop(state); + + let overhead_error = gc_cpu - cfg.target_gc_cpu; // Eq. (2) + let sigmoid_error = sigmoid(overhead_error); // Eq. (3) + let adjustment_factor = sigmoid_error + 0.5; // Eq. (4), range (0.5, 1.5) + + let current = self.target_heap_pages.load(Ordering::Relaxed); + let suggested = ((current as f64) * adjustment_factor) as usize; // Eq. (5) + + // Clamp: + // - upper bound: configured max + // - lower bound: max(1.1 * used, min) — 10% headroom above current + // live memory, so we never request a heap so small that GC is + // triggered immediately on return from this one. + let used = mmtk.get_plan().get_used_pages(); + let floor = ((used as f64) * 1.1).ceil() as usize; + let lower = floor.max(cfg.min_heap_pages).min(cfg.max_heap_pages); + let upper = cfg.max_heap_pages; + let new_target = suggested.clamp(lower, upper); + + self.target_heap_pages.store(new_target, Ordering::Relaxed); + + info!( + "CpuHeapTrigger: gc_cpu={:.4} target={:.4} factor={:.4} \ + pages {} -> {} (used={}, clamp=[{}, {}])", + gc_cpu, + cfg.target_gc_cpu, + adjustment_factor, + current, + new_target, + used, + lower, + upper + ); + } + } + + fn is_heap_full(&self, plan: &dyn Plan<VM = Ruby>) -> bool { + plan.get_reserved_pages() > self.target_heap_pages.load(Ordering::Relaxed) + } + + fn get_current_heap_size_in_pages(&self) -> usize { + self.target_heap_pages.load(Ordering::Relaxed) + } + + fn get_max_heap_size_in_pages(&self) -> usize { + Self::get_config().max_heap_pages + } + + fn can_heap_size_grow(&self) -> bool { + self.target_heap_pages.load(Ordering::Relaxed) < Self::get_config().max_heap_pages + } +} + +impl CpuHeapTrigger { + fn get_config<'b>() -> &'b CpuHeapTriggerConfig { + CPU_HEAP_TRIGGER_CONFIG + .get() + .expect("Attempt to use CPU_HEAP_TRIGGER_CONFIG before it is initialized") + } +} + +/// Standard logistic sigmoid. Returns 0.5 when x == 0, asymptotes to 0 and 1. +fn sigmoid(x: f64) -> f64 { + 1.0 / (1.0 + (-x).exp()) +} + +/// Reads the process-wide CPU time as a floating-point number of seconds, +/// summed across all threads of this process. Returns `None` if the clock +/// query fails (which should be essentially impossible on supported +/// platforms). +fn process_cpu_time_seconds() -> Option<f64> { + let mut ts = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + // SAFETY: `clock_gettime` writes exactly `sizeof(timespec)` bytes to the + // pointer we pass, which is a valid local stack allocation. + let rc = unsafe { libc::clock_gettime(libc::CLOCK_PROCESS_CPUTIME_ID, &mut ts) }; + if rc != 0 { + return None; + } + Some((ts.tv_sec as f64) + (ts.tv_nsec as f64) / 1_000_000_000.0) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sigmoid_is_well_behaved() { + assert!((sigmoid(0.0) - 0.5).abs() < 1e-12); + assert!(sigmoid(-100.0) < 1e-9); + assert!(sigmoid(100.0) > 1.0 - 1e-9); + // Monotonic. + assert!(sigmoid(-1.0) < sigmoid(0.0)); + assert!(sigmoid(0.0) < sigmoid(1.0)); + } + + #[test] + fn adjustment_factor_is_within_paper_bounds() { + // Eq. (4): adjustment_factor = sigmoid(e) + 0.5 must lie in (0.5, 1.5). + for e in [-10.0_f64, -1.0, 0.0, 1.0, 10.0] { + let f = sigmoid(e) + 0.5; + assert!(f > 0.5 && f < 1.5, "factor {f} out of range for e={e}"); + } + } + + #[test] + fn mean_gc_cpu_is_total_weighted() { + let mut state = CpuHeapTriggerState::new(); + state.push_sample( + GcSample { + gc_seconds: 1.0, + app_cpu_seconds: 10.0, + }, + 3, + ); + state.push_sample( + GcSample { + gc_seconds: 3.0, + app_cpu_seconds: 10.0, + }, + 3, + ); + // (1 + 3) / (10 + 10) = 0.2 + assert!((state.mean_gc_cpu().unwrap() - 0.2).abs() < 1e-12); + } + + #[test] + fn window_drops_oldest() { + let mut state = CpuHeapTriggerState::new(); + for i in 0..5 { + state.push_sample( + GcSample { + gc_seconds: i as f64, + app_cpu_seconds: 1.0, + }, + 3, + ); + } + assert_eq!(state.samples.len(), 3); + // After pushing 0,1,2,3,4 with window 3, we should have [2,3,4]. + assert_eq!(state.samples[0].gc_seconds, 2.0); + assert_eq!(state.samples[2].gc_seconds, 4.0); + } + + #[test] + fn no_sample_without_prior_gc() { + // First GC cycle cannot produce a sample (no previous end time). The + // push happens only when last_gc_end_cpu is Some. + let state = CpuHeapTriggerState::new(); + assert!(state.mean_gc_cpu().is_none()); + } +} diff --git a/gc/mmtk/src/heap/mod.rs b/gc/mmtk/src/heap/mod.rs index 6af7c1b2e5..05a35efb23 100644 --- a/gc/mmtk/src/heap/mod.rs +++ b/gc/mmtk/src/heap/mod.rs @@ -1,4 +1,9 @@ +mod cpu_heap_trigger; mod ruby_heap_trigger; + +pub use cpu_heap_trigger::CpuHeapTrigger; +pub use cpu_heap_trigger::CpuHeapTriggerConfig; +pub use cpu_heap_trigger::CPU_HEAP_TRIGGER_CONFIG; pub use ruby_heap_trigger::RubyHeapTrigger; pub use ruby_heap_trigger::RubyHeapTriggerConfig; pub use ruby_heap_trigger::RUBY_HEAP_TRIGGER_CONFIG; @@ -5109,6 +5109,14 @@ rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash) } } +VALUE +rb_hash_new_with_bulk_insert(long argc, const VALUE *argv) +{ + VALUE val = rb_hash_new_with_size(argc / 2); + rb_hash_bulk_insert(argc, argv, val); + return val; +} + static char **origenviron; #ifdef _WIN32 #define GET_ENVIRON(e) ((e) = rb_w32_get_environ()) @@ -429,9 +429,7 @@ toregexp // attr bool leaf = false; // attr rb_snum_t sp_inc = 1 - (rb_snum_t)cnt; { - const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, STACK_ADDR_FROM_TOP(cnt)); - val = rb_reg_new_ary(ary, (int)opt); - rb_ary_clear(ary); + val = rb_reg_new_from_values(cnt, STACK_ADDR_FROM_TOP(cnt), (int)opt); } /* intern str to Symbol and push it. */ @@ -591,8 +589,7 @@ newhash RUBY_DTRACE_CREATE_HOOK(HASH, num); if (num) { - val = rb_hash_new_with_size(num / 2); - rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val); + val = rb_hash_new_with_bulk_insert(num, STACK_ADDR_FROM_TOP(num)); } else { val = rb_hash_new(); diff --git a/internal/hash.h b/internal/hash.h index 6671cd496d..baf5af9abd 100644 --- a/internal/hash.h +++ b/internal/hash.h @@ -112,6 +112,7 @@ int rb_hash_stlike_foreach(VALUE hash, st_foreach_callback_func *func, st_data_t RUBY_SYMBOL_EXPORT_END VALUE rb_hash_new_with_size(st_index_t size); +VALUE rb_hash_new_with_bulk_insert(long argc, const VALUE *argv); VALUE rb_hash_resurrect(VALUE hash); int rb_hash_stlike_lookup(VALUE hash, st_data_t key, st_data_t *pval); VALUE rb_hash_keys(VALUE hash); diff --git a/internal/rational.h b/internal/rational.h index f7e382af8c..6861a90130 100644 --- a/internal/rational.h +++ b/internal/rational.h @@ -38,7 +38,9 @@ VALUE rb_rational_cmp(VALUE self, VALUE other); VALUE rb_rational_pow(VALUE self, VALUE other); VALUE rb_rational_floor(VALUE self, int ndigits); VALUE rb_numeric_quo(VALUE x, VALUE y); -VALUE rb_flo_round_by_rational(int argc, VALUE *argv, VALUE num); +VALUE rb_flo_round_by_rational(VALUE num, int ndigits, enum ruby_num_rounding_mode mode); +VALUE rb_flo_ceil_by_rational(VALUE num, int ndigits); +VALUE rb_flo_floor_by_rational(VALUE num, int ndigits); VALUE rb_float_numerator(VALUE x); VALUE rb_float_denominator(VALUE x); diff --git a/internal/re.h b/internal/re.h index da165e4756..3ad364a1a6 100644 --- a/internal/re.h +++ b/internal/re.h @@ -69,6 +69,7 @@ VALUE rb_backref_set_string(VALUE string, long pos, long len); void rb_match_unbusy(VALUE); int rb_match_count(VALUE match); VALUE rb_reg_new_ary(VALUE ary, int options); +VALUE rb_reg_new_from_values(long cnt, const VALUE *elements, int opt); VALUE rb_reg_last_defined(VALUE match); #define ARG_REG_OPTION_MASK \ @@ -75,6 +75,8 @@ #define DBL_EPSILON 2.2204460492503131e-16 #endif +#define ACCURATE_POW10(ndigits) ((ndigits) < DBL_DIG) + #ifndef USE_RB_INFINITY #elif !defined(WORDS_BIGENDIAN) /* BYTE_ORDER == LITTLE_ENDIAN */ const union bytesequence4_or_float rb_infinity = {{0x00, 0x00, 0x80, 0x7f}}; @@ -2018,6 +2020,9 @@ rb_float_floor(VALUE num, int ndigits) if (float_round_overflow(ndigits, binexp)) return num; if (number > 0.0 && float_round_underflow(ndigits, binexp)) return DBL2NUM(0.0); + if (!ACCURATE_POW10(ndigits)) { + return rb_flo_floor_by_rational(num, ndigits); + } f = pow(10, ndigits); mul = floor(number * f); res = (mul + 1) / f; @@ -2226,6 +2231,9 @@ rb_float_ceil(VALUE num, int ndigits) if (float_round_overflow(ndigits, binexp)) return num; if (number < 0.0 && float_round_underflow(ndigits, binexp)) return DBL2NUM(0.0); + if (!ACCURATE_POW10(ndigits)) { + return rb_flo_ceil_by_rational(num, ndigits); + } f = pow(10, ndigits); f = ceil(number * f) / f; return DBL2NUM(f); @@ -2490,9 +2498,8 @@ flo_round(int argc, VALUE *argv, VALUE num) frexp(number, &binexp); if (float_round_overflow(ndigits, binexp)) return num; if (float_round_underflow(ndigits, binexp)) return DBL2NUM(0); - if (ndigits > 14) { - /* In this case, pow(10, ndigits) may not be accurate. */ - return rb_flo_round_by_rational(argc, argv, num); + if (!ACCURATE_POW10(ndigits)) { + return rb_flo_round_by_rational(num, ndigits, mode); } f = pow(10, ndigits); x = ROUND_CALL(mode, round, (number, f)); @@ -2888,7 +2888,6 @@ void rb_execarg_parent_end(VALUE execarg_obj) { execarg_parent_end(execarg_obj); - RB_GC_GUARD(execarg_obj); } static void diff --git a/rational.c b/rational.c index d6214451b4..b031838d69 100644 --- a/rational.c +++ b/rational.c @@ -1374,10 +1374,12 @@ nurat_round_half_even(VALUE self) return num; } +static VALUE f_round_n(VALUE self, VALUE n, VALUE (*func)(VALUE)) ; + static VALUE f_round_common(int argc, VALUE *argv, VALUE self, VALUE (*func)(VALUE)) { - VALUE n, b, s; + VALUE n; if (rb_check_arity(argc, 0, 1) == 0) return (*func)(self); @@ -1387,6 +1389,14 @@ f_round_common(int argc, VALUE *argv, VALUE self, VALUE (*func)(VALUE)) if (!k_integer_p(n)) rb_raise(rb_eTypeError, "not an integer"); + return f_round_n(self, n, func); +} + +static VALUE +f_round_n(VALUE self, VALUE n, VALUE (*func)(VALUE)) +{ + VALUE b, s; + b = f_expt10(n); s = rb_rational_mul(self, b); @@ -1417,8 +1427,7 @@ rb_rational_floor(VALUE self, int ndigits) return nurat_floor(self); } else { - VALUE n = INT2NUM(ndigits); - return f_round_common(1, &n, self, nurat_floor); + return f_round_n(self, INT2NUM(ndigits), nurat_floor); } } @@ -1561,9 +1570,22 @@ nurat_round_n(int argc, VALUE *argv, VALUE self) } VALUE -rb_flo_round_by_rational(int argc, VALUE *argv, VALUE num) +rb_flo_round_by_rational(VALUE num, int ndigits, enum ruby_num_rounding_mode mode) +{ + VALUE (*round_func)(VALUE) = ROUND_FUNC(mode, nurat_round); + return nurat_to_f(f_round_n(float_to_r(num), INT2NUM(ndigits), round_func)); +} + +VALUE +rb_flo_ceil_by_rational(VALUE num, int ndigits) +{ + return nurat_to_f(f_round_n(float_to_r(num), INT2NUM(ndigits), nurat_ceil)); +} + +VALUE +rb_flo_floor_by_rational(VALUE num, int ndigits) { - return nurat_to_f(nurat_round_n(argc, argv, float_to_r(num))); + return nurat_to_f(f_round_n(float_to_r(num), INT2NUM(ndigits), nurat_floor)); } static double @@ -3529,6 +3529,15 @@ rb_reg_new_ary(VALUE ary, int opt) } VALUE +rb_reg_new_from_values(long cnt, const VALUE *elements, int opt) +{ + const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, elements); + VALUE val = rb_reg_new_ary(ary, (int)opt); + rb_ary_clear(ary); + return val; +} + +VALUE rb_enc_reg_new(const char *s, long len, rb_encoding *enc, int options) { VALUE re = rb_reg_alloc(); @@ -9772,6 +9772,7 @@ rb_str_enumerate_codepoints(VALUE str, VALUE ary) unsigned int c; const char *ptr, *end; rb_encoding *enc; + int enc_asciicompat; if (single_byte_optimizable(str)) return rb_str_enumerate_bytes(str, ary); @@ -9780,9 +9781,15 @@ rb_str_enumerate_codepoints(VALUE str, VALUE ary) ptr = RSTRING_PTR(str); end = RSTRING_END(str); enc = STR_ENC_GET(str); + enc_asciicompat = rb_enc_asciicompat(enc); while (ptr < end) { - c = rb_enc_codepoint_len(ptr, end, &n, enc); + /* Fast path: ASCII byte in an ASCII-compatible encoding is its own codepoint; + * skip rb_enc_codepoint_len and return the byte directly. + */ + n = 1; + c = (enc_asciicompat && ISASCII(*ptr)) ? + (unsigned char)*ptr : rb_enc_codepoint_len(ptr, end, &n, enc); ENUM_ELEM(ary, UINT2NUM(c)); ptr += n; } diff --git a/test/ruby/test_float.rb b/test/ruby/test_float.rb index d0d180593a..c01e8bb80b 100644 --- a/test/ruby/test_float.rb +++ b/test/ruby/test_float.rb @@ -492,6 +492,22 @@ class TestFloat < Test::Unit::TestCase assert_equal(-1.26, -1.255.round(2)) end + def test_round_ndigits + bug14635 = "[ruby-core:86323]" + f = 0.5 + 31.times do |i| + assert_equal(0.5, f.round(i+1), bug14635 + " (argument: #{i+1})") + end + end + + def test_round_with_precision_min + (0..3).each do |n| + n -= Float::MIN_10_EXP + f = Float::MIN.round(n) + assert_include([Float::MIN.floor(n), Float::MIN.ceil(n)], f, "round(#{n})") + end + end + def test_round_half_even_with_precision assert_equal(767573.18759, 767573.1875850001.round(5, half: :even)) assert_equal(767573.18758, 767573.187585.round(5, half: :even)) @@ -536,6 +552,16 @@ class TestFloat < Test::Unit::TestCase assert_equal(-100000000000000000000000000000000000000000000000000, -1.0.floor(-50), "[Bug #20654]") end + def test_floor_with_precision_min + min = Float::MIN + (0..3).each do |n| + n -= Float::MIN_10_EXP + f = min.floor(n) + assert_operator(f, :<=, Float::MIN, "floor(#{n})") + assert_operator(f, :>=, Float::MIN.floor(n-1), "ceil(#{n})") + end + end + def test_ceil_with_precision assert_equal(+0.1, +0.001.ceil(1)) assert_equal(-0.0, -0.001.ceil(1)) @@ -567,6 +593,19 @@ class TestFloat < Test::Unit::TestCase assert_equal(100000000000000000000000000000000000000000000000000, 1.0.ceil(-50), "[Bug #20654]") end + def test_ceil_with_precision_min + min = Float::MIN + (-Float::MIN_10_EXP).times do |n| + assert_equal(10.pow(-n), min.ceil(n)) + end + (0..3).each do |n| + n -= Float::MIN_10_EXP + f = min.ceil(n) + assert_operator(f, :>=, Float::MIN, "ceil(#{n})") + assert_operator(f, :<=, Float::MIN.ceil(n-1), "ceil(#{n})") + end + end + def test_truncate_with_precision assert_equal(1.100, 1.111.truncate(1)) assert_equal(1.110, 1.111.truncate(2)) diff --git a/test/ruby/test_numeric.rb b/test/ruby/test_numeric.rb index 35496ac875..b272b89921 100644 --- a/test/ruby/test_numeric.rb +++ b/test/ruby/test_numeric.rb @@ -206,14 +206,6 @@ class TestNumeric < Test::Unit::TestCase assert_nil(a <=> :foo) end - def test_float_round_ndigits - bug14635 = "[ruby-core:86323]" - f = 0.5 - 31.times do |i| - assert_equal(0.5, f.round(i+1), bug14635 + " (argument: #{i+1})") - end - end - def test_floor_ceil_round_truncate a = Class.new(Numeric) do def to_f; 1.5; end diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 573eb37a72..2cde74facd 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -123,6 +123,7 @@ fn main() { .allowlist_function("rb_hash_aset") .allowlist_function("rb_hash_aref") .allowlist_function("rb_hash_bulk_insert") + .allowlist_function("rb_hash_new_with_bulk_insert") .allowlist_function("rb_hash_stlike_lookup") .allowlist_function("rb_ary_new_capa") .allowlist_function("rb_ary_store") @@ -215,6 +216,7 @@ fn main() { .allowlist_function("rb_reg_match_last") .allowlist_function("rb_reg_nth_match") .allowlist_function("rb_reg_new_ary") + .allowlist_function("rb_reg_new_from_values") .allowlist_var("ARG_ENCODING_FIXED") .allowlist_var("ARG_ENCODING_NONE") .allowlist_var("ONIG_OPTION_IGNORECASE") diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index f1ef17d794..4eee769315 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -2111,19 +2111,17 @@ fn gen_new_hash( elements: Vec<Opnd>, state: &FrameState, ) -> lir::Opnd { - gen_prepare_non_leaf_call(jit, asm, state); - - let cap: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long"); - let new_hash = asm_ccall!(asm, rb_hash_new_with_size, lir::Opnd::Imm(cap)); + if elements.is_empty() { + gen_prepare_leaf_call_with_gc(asm, state); + asm_ccall!(asm, rb_hash_new,) + } else { + gen_prepare_non_leaf_call(jit, asm, state); - if !elements.is_empty() { let argv = gen_push_opnds(asm, &elements); - asm_ccall!(asm, rb_hash_bulk_insert, elements.len().into(), argv, new_hash); - + let hash = asm_ccall!(asm, rb_hash_new_with_bulk_insert, elements.len().into(), argv); gen_pop_opnds(asm, &elements); + hash } - - new_hash } /// Compile a new range instruction @@ -3401,11 +3399,7 @@ fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec gen_prepare_non_leaf_call(jit, asm, state); let first_opnd_ptr = gen_push_opnds(asm, &values); - - let tmp_ary = asm_ccall!(asm, rb_ary_tmp_new_from_values, Opnd::Imm(0), values.len().into(), first_opnd_ptr); - let result = asm_ccall!(asm, rb_reg_new_ary, tmp_ary, opt.into()); - asm_ccall!(asm, rb_ary_clear, tmp_ary); - + let result = asm_ccall!(asm, rb_reg_new_from_values, values.len().into(), first_opnd_ptr, opt.into()); gen_pop_opnds(asm, &values); result diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index c61e61edd1..5a7c3de606 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -2060,6 +2060,11 @@ unsafe extern "C" { pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE; pub fn rb_obj_equal(obj1: VALUE, obj2: VALUE) -> VALUE; pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE; + pub fn rb_reg_new_from_values( + cnt: ::std::os::raw::c_long, + elements: *const VALUE, + opt: ::std::os::raw::c_int, + ) -> VALUE; pub fn rb_ary_tmp_new_from_values( arg1: VALUE, arg2: ::std::os::raw::c_long, @@ -2132,6 +2137,7 @@ unsafe extern "C" { arg: st_data_t, ) -> ::std::os::raw::c_int; pub fn rb_hash_new_with_size(size: st_index_t) -> VALUE; + pub fn rb_hash_new_with_bulk_insert(argc: ::std::os::raw::c_long, argv: *const VALUE) -> VALUE; pub fn rb_hash_resurrect(hash: VALUE) -> VALUE; pub fn rb_hash_stlike_lookup( hash: VALUE, |
