summaryrefslogtreecommitdiff
path: root/benchmark/string_memsearch.yml
blob: cde363289a5187c8b84df7d410b366a3895c49d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
prelude: |
  # Haystacks of various sizes
  small_hay  = "a" * 256
  medium_hay = "a" * 4096
  large_hay  = "a" * 65536

  # Short needles (2-8 bytes) that exercise rb_memsearch_ss
  needle_2 = "xy"
  needle_4 = "xyzw"
  needle_8 = "xyzwabcd"

  # Needle whose first byte is absent from the haystack (memchr fast-path)
  # vs needle whose first byte is common (rolling hash comparison)
  first_byte_absent  = "x" + "a" * 3
  first_byte_common  = "a" + "x" * 3

  # Haystack with match at the end
  hay_match_end = "a" * 4095 + "xy"

  # Haystack with match at the start
  hay_match_start = "xy" + "a" * 4094

  # Mixed content haystack (more realistic)
  mixed_hay = (("abcdefghij" * 100) + "z") * 10

benchmark:
  # === First byte absent from haystack (biggest win for rolling hash) ===
  index_first_byte_absent_small: |
    small_hay.index(first_byte_absent)
  index_first_byte_absent_medium: |
    medium_hay.index(first_byte_absent)
  index_first_byte_absent_large: |
    large_hay.index(first_byte_absent)

  # === First byte common in haystack (stresses comparison loop) ===
  index_first_byte_common_small: |
    small_hay.index(first_byte_common)
  index_first_byte_common_medium: |
    medium_hay.index(first_byte_common)
  index_first_byte_common_large: |
    large_hay.index(first_byte_common)

  # === Needle length variations (all absent) ===
  index_needle_2_absent: |
    medium_hay.index(needle_2)
  index_needle_4_absent: |
    medium_hay.index(needle_4)
  index_needle_8_absent: |
    medium_hay.index(needle_8)

  # === Match at end of haystack ===
  index_match_at_end: |
    hay_match_end.index(needle_2)

  # === Match at start of haystack ===
  index_match_at_start: |
    hay_match_start.index(needle_2)

  # === include? (same code path) ===
  include_first_byte_absent: |
    medium_hay.include?(first_byte_absent)
  include_first_byte_common: |
    medium_hay.include?(first_byte_common)

  # === byteindex ===
  byteindex_first_byte_absent: |
    medium_hay.byteindex(first_byte_absent)
  byteindex_first_byte_common: |
    medium_hay.byteindex(first_byte_common)

  # === Mixed/realistic haystack ===
  index_mixed_absent: |
    mixed_hay.index(needle_4)
  index_mixed_present: |
    mixed_hay.index("ijab")