summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2026-01-18 10:33:54 +0100
committerJean Boussier <jean.boussier@gmail.com>2026-01-18 16:31:31 +0100
commit6cd4549060a608d8a7e5ee0dde2c4b69b08d7f6e (patch)
tree17cd606e1d3ecd918d00c515126f29b0b3456a3e /string.c
parentd1dc4bdb2fe7f16e6da78c0930353e4a5031465a (diff)
Optimize File.join common use case
`File.join` is a hotspot for common libraries such as Zeitwerk and Bootsnap. It has a fairly flexible signature, but 99% of the time it's called with just two (or a small number of) UTF-8 strings. If we optimistically optimize for that use case we can cut down a large number of type and encoding checks, significantly speeding up the method. The one remaining expensive check we could try to optimize is `str_null_check`. Given it's common to use the same base string for joining, we could memoize it. Also we could precompute it for literal strings. ``` compare-ruby: ruby 4.1.0dev (2026-01-17T14:40:03Z master 00a3b71eaf) +PRISM [arm64-darwin25] built-ruby: ruby 4.1.0dev (2026-01-18T12:10:38Z spedup-file-join 069bab58d4) +PRISM [arm64-darwin25] warming up.... | |compare-ruby|built-ruby| |:-------------|-----------:|---------:| |two_strings | 2.475M| 9.444M| | | -| 3.82x| |many_strings | 551.975k| 2.346M| | | -| 4.25x| |array | 514.946k| 522.034k| | | -| 1.01x| |mixed | 621.236k| 633.189k| | | -| 1.02x| ```
Diffstat (limited to 'string.c')
-rw-r--r--string.c47
1 files changed, 16 insertions, 31 deletions
diff --git a/string.c b/string.c
index 2d74c46a36..cfadabd379 100644
--- a/string.c
+++ b/string.c
@@ -146,27 +146,7 @@ VALUE rb_cSymbol;
RSTRING(str)->len = (n); \
} while (0)
-static inline bool
-str_encindex_fastpath(int encindex)
-{
- // The overwhelming majority of strings are in one of these 3 encodings.
- switch (encindex) {
- case ENCINDEX_ASCII_8BIT:
- case ENCINDEX_UTF_8:
- case ENCINDEX_US_ASCII:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool
-str_enc_fastpath(VALUE str)
-{
- return str_encindex_fastpath(ENCODING_GET_INLINED(str));
-}
-
-#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
+#define TERM_LEN(str) (rb_str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
#define TERM_FILL(ptr, termlen) do {\
char *const term_fill_ptr = (ptr);\
const int term_fill_len = (termlen);\
@@ -960,7 +940,7 @@ static inline bool
rb_enc_str_asciicompat(VALUE str)
{
int encindex = ENCODING_GET_INLINED(str);
- return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
+ return rb_str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
}
int
@@ -2796,7 +2776,7 @@ rb_must_asciicompat(VALUE str)
rb_raise(rb_eTypeError, "not encoding capable object");
}
- if (RB_LIKELY(str_encindex_fastpath(encindex))) {
+ if (RB_LIKELY(rb_str_encindex_fastpath(encindex))) {
return;
}
@@ -2897,16 +2877,21 @@ str_null_check(VALUE str, int *w)
{
char *s = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
- rb_encoding *enc = rb_enc_get(str);
- const int minlen = rb_enc_mbminlen(enc);
+ int minlen = 1;
+
+ if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
+ rb_encoding *enc = rb_enc_get(str);
+ minlen = rb_enc_mbminlen(enc);
- if (minlen > 1) {
- *w = 1;
- if (str_null_char(s, len, minlen, enc)) {
- return NULL;
+ if (minlen > 1) {
+ *w = 1;
+ if (str_null_char(s, len, minlen, enc)) {
+ return NULL;
+ }
+ return str_fill_term(str, s, len, minlen);
}
- return str_fill_term(str, s, len, minlen);
}
+
*w = 0;
if (!s || memchr(s, 0, len)) {
return NULL;
@@ -3765,7 +3750,7 @@ rb_str_buf_append(VALUE str, VALUE str2)
{
int str2_cr = rb_enc_str_coderange(str2);
- if (str_enc_fastpath(str)) {
+ if (rb_str_enc_fastpath(str)) {
switch (str2_cr) {
case ENC_CODERANGE_7BIT:
// If RHS is 7bit we can do simple concatenation