blob: cde363289a5187c8b84df7d410b366a3895c49d2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
prelude: |
# Haystacks of various sizes
small_hay = "a" * 256
medium_hay = "a" * 4096
large_hay = "a" * 65536
# Short needles (2-8 bytes) that exercise rb_memsearch_ss
needle_2 = "xy"
needle_4 = "xyzw"
needle_8 = "xyzwabcd"
# Needle whose first byte is absent from the haystack (memchr fast-path)
# vs needle whose first byte is common (rolling hash comparison)
first_byte_absent = "x" + "a" * 3
first_byte_common = "a" + "x" * 3
# Haystack with match at the end
hay_match_end = "a" * 4095 + "xy"
# Haystack with match at the start
hay_match_start = "xy" + "a" * 4094
# Mixed content haystack (more realistic)
mixed_hay = (("abcdefghij" * 100) + "z") * 10
benchmark:
# === First byte absent from haystack (biggest win for rolling hash) ===
index_first_byte_absent_small: |
small_hay.index(first_byte_absent)
index_first_byte_absent_medium: |
medium_hay.index(first_byte_absent)
index_first_byte_absent_large: |
large_hay.index(first_byte_absent)
# === First byte common in haystack (stresses comparison loop) ===
index_first_byte_common_small: |
small_hay.index(first_byte_common)
index_first_byte_common_medium: |
medium_hay.index(first_byte_common)
index_first_byte_common_large: |
large_hay.index(first_byte_common)
# === Needle length variations (all absent) ===
index_needle_2_absent: |
medium_hay.index(needle_2)
index_needle_4_absent: |
medium_hay.index(needle_4)
index_needle_8_absent: |
medium_hay.index(needle_8)
# === Match at end of haystack ===
index_match_at_end: |
hay_match_end.index(needle_2)
# === Match at start of haystack ===
index_match_at_start: |
hay_match_start.index(needle_2)
# === include? (same code path) ===
include_first_byte_absent: |
medium_hay.include?(first_byte_absent)
include_first_byte_common: |
medium_hay.include?(first_byte_common)
# === byteindex ===
byteindex_first_byte_absent: |
medium_hay.byteindex(first_byte_absent)
byteindex_first_byte_common: |
medium_hay.byteindex(first_byte_common)
# === Mixed/realistic haystack ===
index_mixed_absent: |
mixed_hay.index(needle_4)
index_mixed_present: |
mixed_hay.index("ijab")
|