summaryrefslogtreecommitdiff
path: root/zjit/src/distribution.rs
diff options
context:
space:
mode:
Diffstat (limited to 'zjit/src/distribution.rs')
-rw-r--r--zjit/src/distribution.rs282
1 files changed, 282 insertions, 0 deletions
diff --git a/zjit/src/distribution.rs b/zjit/src/distribution.rs
new file mode 100644
index 0000000000..aa4667b939
--- /dev/null
+++ b/zjit/src/distribution.rs
@@ -0,0 +1,282 @@
+//! Type frequency distribution tracker.
+
+use crate::options::NumProfiles;
+
+/// This implementation was inspired by the type feedback module from Google's S6, which was
+/// written in C++ for use with Python. This is a new implementation in Rust created for use with
+/// Ruby instead of Python.
+#[derive(Debug, Clone)]
+pub struct Distribution<T: Copy + PartialEq + Default, const N: usize> {
+ /// buckets and counts have the same length
+ /// `buckets[0]` is always the most common item
+ buckets: [T; N],
+ counts: [NumProfiles; N],
+ /// if there is no more room, increment the fallback
+ other: NumProfiles,
+ // TODO(max): Add count disparity, which can help determine when to reset the distribution
+}
+
+impl<T: Copy + PartialEq + Default, const N: usize> Distribution<T, N> {
+ pub fn new() -> Self {
+ Self { buckets: [Default::default(); N], counts: [0; N], other: 0 }
+ }
+
+ pub fn observe(&mut self, item: T) {
+ for (bucket, count) in self.buckets.iter_mut().zip(self.counts.iter_mut()) {
+ if *bucket == item || *count == 0 {
+ *bucket = item;
+ *count = count.saturating_add(1);
+ // Keep the most frequent item at the front
+ self.bubble_up();
+ return;
+ }
+ }
+ self.other = self.other.saturating_add(1);
+ }
+
+ /// Keep the highest counted bucket at index 0
+ fn bubble_up(&mut self) {
+ if N == 0 { return; }
+ let max_index = self.counts.into_iter().enumerate().max_by_key(|(_, val)| *val).unwrap().0;
+ if max_index != 0 {
+ self.counts.swap(0, max_index);
+ self.buckets.swap(0, max_index);
+ }
+ }
+
+ pub fn each_item(&self) -> impl Iterator<Item = T> + '_ {
+ self.buckets.iter().zip(self.counts.iter())
+ .filter_map(|(&bucket, &count)| if count > 0 { Some(bucket) } else { None })
+ }
+
+ pub fn each_item_mut(&mut self) -> impl Iterator<Item = &mut T> + '_ {
+ self.buckets.iter_mut().zip(self.counts.iter())
+ .filter_map(|(bucket, &count)| if count > 0 { Some(bucket) } else { None })
+ }
+}
+
+#[derive(PartialEq, Debug, Clone, Copy)]
+enum DistributionKind {
+ /// No types seen
+ Empty,
+ /// One type seen
+ Monomorphic,
+ /// Between 2 and (fixed) N types seen
+ Polymorphic,
+ /// Polymorphic, but with a significant skew towards one type
+ SkewedPolymorphic,
+ /// More than N types seen with no clear winner
+ Megamorphic,
+ /// Megamorphic, but with a significant skew towards one type
+ SkewedMegamorphic,
+}
+
+#[derive(Debug, Clone)]
+pub struct DistributionSummary<T: Copy + PartialEq + Default + std::fmt::Debug, const N: usize> {
+ kind: DistributionKind,
+ buckets: [T; N],
+ // TODO(max): Determine if we need some notion of stability
+}
+
+const SKEW_THRESHOLD: f64 = 0.75;
+
+impl<T: Copy + PartialEq + Default + std::fmt::Debug, const N: usize> DistributionSummary<T, N> {
+ pub fn new(dist: &Distribution<T, N>) -> Self {
+ #[cfg(debug_assertions)]
+ {
+ let first_count = dist.counts[0];
+ for &count in &dist.counts[1..] {
+ assert!(first_count >= count, "First count should be the largest");
+ }
+ }
+ let num_seen = dist.counts.iter().map(|&c| usize::from(c)).sum::<usize>() + usize::from(dist.other);
+ let kind = if dist.other == 0 {
+ // Seen <= N types total
+ if dist.counts[0] == 0 {
+ DistributionKind::Empty
+ } else if dist.counts[1] == 0 {
+ DistributionKind::Monomorphic
+ } else if (dist.counts[0] as f64)/(num_seen as f64) >= SKEW_THRESHOLD {
+ DistributionKind::SkewedPolymorphic
+ } else {
+ DistributionKind::Polymorphic
+ }
+ } else {
+ // Seen > N types total; considered megamorphic
+ if (dist.counts[0] as f64)/(num_seen as f64) >= SKEW_THRESHOLD {
+ DistributionKind::SkewedMegamorphic
+ } else {
+ DistributionKind::Megamorphic
+ }
+ };
+ Self { kind, buckets: dist.buckets }
+ }
+
+ pub fn is_monomorphic(&self) -> bool {
+ self.kind == DistributionKind::Monomorphic
+ }
+
+ pub fn is_polymorphic(&self) -> bool {
+ self.kind == DistributionKind::Polymorphic
+ }
+
+ pub fn is_skewed_polymorphic(&self) -> bool {
+ self.kind == DistributionKind::SkewedPolymorphic
+ }
+
+ pub fn is_megamorphic(&self) -> bool {
+ self.kind == DistributionKind::Megamorphic
+ }
+
+ pub fn is_skewed_megamorphic(&self) -> bool {
+ self.kind == DistributionKind::SkewedMegamorphic
+ }
+
+ pub fn bucket(&self, idx: usize) -> T {
+ assert!(idx < N, "index {idx} out of bounds for buckets[{N}]");
+ self.buckets[idx]
+ }
+
+ pub fn buckets(&self) -> &[T] {
+ &self.buckets
+ }
+}
+
+#[cfg(test)]
+mod distribution_tests {
+ use super::*;
+
+ #[test]
+ fn start_empty() {
+ let dist = Distribution::<usize, 4>::new();
+ assert_eq!(dist.other, 0);
+ assert!(dist.counts.iter().all(|&b| b == 0));
+ }
+
+ #[test]
+ fn observe_adds_record() {
+ let mut dist = Distribution::<usize, 4>::new();
+ dist.observe(10);
+ assert_eq!(dist.buckets[0], 10);
+ assert_eq!(dist.counts[0], 1);
+ assert_eq!(dist.other, 0);
+ }
+
+ #[test]
+ fn observe_increments_record() {
+ let mut dist = Distribution::<usize, 4>::new();
+ dist.observe(10);
+ dist.observe(10);
+ assert_eq!(dist.buckets[0], 10);
+ assert_eq!(dist.counts[0], 2);
+ assert_eq!(dist.other, 0);
+ }
+
+ #[test]
+ fn observe_two() {
+ let mut dist = Distribution::<usize, 4>::new();
+ dist.observe(10);
+ dist.observe(10);
+ dist.observe(11);
+ dist.observe(11);
+ dist.observe(11);
+ assert_eq!(dist.buckets[0], 11);
+ assert_eq!(dist.counts[0], 3);
+ assert_eq!(dist.buckets[1], 10);
+ assert_eq!(dist.counts[1], 2);
+ assert_eq!(dist.other, 0);
+ }
+
+ #[test]
+ fn observe_with_max_increments_other() {
+ let mut dist = Distribution::<usize, 0>::new();
+ dist.observe(10);
+ assert!(dist.buckets.is_empty());
+ assert!(dist.counts.is_empty());
+ assert_eq!(dist.other, 1);
+ }
+
+ #[test]
+ fn empty_distribution_returns_empty_summary() {
+ let dist = Distribution::<usize, 4>::new();
+ let summary = DistributionSummary::new(&dist);
+ assert_eq!(summary.kind, DistributionKind::Empty);
+ }
+
+ #[test]
+ fn monomorphic_distribution_returns_monomorphic_summary() {
+ let mut dist = Distribution::<usize, 4>::new();
+ dist.observe(10);
+ dist.observe(10);
+ let summary = DistributionSummary::new(&dist);
+ assert_eq!(summary.kind, DistributionKind::Monomorphic);
+ assert_eq!(summary.buckets[0], 10);
+ }
+
+ #[test]
+ fn polymorphic_distribution_returns_polymorphic_summary() {
+ let mut dist = Distribution::<usize, 4>::new();
+ dist.observe(10);
+ dist.observe(11);
+ dist.observe(11);
+ let summary = DistributionSummary::new(&dist);
+ assert_eq!(summary.kind, DistributionKind::Polymorphic);
+ assert_eq!(summary.buckets[0], 11);
+ assert_eq!(summary.buckets[1], 10);
+ }
+
+ #[test]
+ fn skewed_polymorphic_distribution_returns_skewed_polymorphic_summary() {
+ let mut dist = Distribution::<usize, 4>::new();
+ dist.observe(10);
+ dist.observe(11);
+ dist.observe(11);
+ dist.observe(11);
+ let summary = DistributionSummary::new(&dist);
+ assert_eq!(summary.kind, DistributionKind::SkewedPolymorphic);
+ assert_eq!(summary.buckets[0], 11);
+ assert_eq!(summary.buckets[1], 10);
+ }
+
+ #[test]
+ fn megamorphic_distribution_returns_megamorphic_summary() {
+ let mut dist = Distribution::<usize, 4>::new();
+ dist.observe(10);
+ dist.observe(11);
+ dist.observe(12);
+ dist.observe(13);
+ dist.observe(14);
+ dist.observe(11);
+ let summary = DistributionSummary::new(&dist);
+ assert_eq!(summary.kind, DistributionKind::Megamorphic);
+ assert_eq!(summary.buckets[0], 11);
+ }
+
+ #[test]
+ fn skewed_megamorphic_distribution_returns_skewed_megamorphic_summary() {
+ let mut dist = Distribution::<usize, 4>::new();
+ dist.observe(10);
+ dist.observe(11);
+ dist.observe(11);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(12);
+ dist.observe(13);
+ dist.observe(14);
+ let summary = DistributionSummary::new(&dist);
+ assert_eq!(summary.kind, DistributionKind::SkewedMegamorphic);
+ assert_eq!(summary.buckets[0], 12);
+ }
+}