Optimize File.join common use case

`File.join` is a hotspot for common libraries such as Zeitwerk and Bootsnap. It has a fairly flexible signature, but 99% of the time it's called with just two (or a small number of) UTF-8 strings. If we optimistically optimize for that use case we can cut down a large number of type and encoding checks, significantly speeding up the method. The one remaining expensive check we could try to optimize is `str_null_check`. Given it's common to use the same base string for joining, we could memoize it. Also we could precompute it for literal strings. ``` compare-ruby: ruby 4.1.0dev (2026-01-17T14:40:03Z master 00a3b71eaf) +PRISM [arm64-darwin25] built-ruby: ruby 4.1.0dev (2026-01-18T12:10:38Z spedup-file-join 069bab58d4) +PRISM [arm64-darwin25] warming up.... | |compare-ruby|built-ruby| |:-------------|-----------:|---------:| |two_strings | 2.475M| 9.444M| | | -| 3.82x| |many_strings | 551.975k| 2.346M| | | -| 4.25x| |array | 514.946k| 522.034k| | | -| 1.01x| |mixed | 621.236k| 633.189k| | | -| 1.02x| ```
author: Jean Boussier <jean.boussier@gmail.com> 2026-01-18 10:33:54 +0100
committer: Jean Boussier <jean.boussier@gmail.com> 2026-01-18 16:31:31 +0100
commit: 6cd4549060a608d8a7e5ee0dde2c4b69b08d7f6e (patch)
tree: 17cd606e1d3ecd918d00c515126f29b0b3456a3e /string.c
parent: d1dc4bdb2fe7f16e6da78c0930353e4a5031465a (diff)
1 files changed, 16 insertions, 31 deletions
diff --git a/string.c b/string.c
index 2d74c46a36..cfadabd379 100644
--- a/string.c
+++ b/string.c
@@ -146,27 +146,7 @@ VALUE rb_cSymbol;
     RSTRING(str)->len = (n); \
 } while (0)
 
-static inline bool
-str_encindex_fastpath(int encindex)
-{
-    // The overwhelming majority of strings are in one of these 3 encodings.
-    switch (encindex) {
-      case ENCINDEX_ASCII_8BIT:
-      case ENCINDEX_UTF_8:
-      case ENCINDEX_US_ASCII:
-        return true;
-      default:
-        return false;
-    }
-}
-
-static inline bool
-str_enc_fastpath(VALUE str)
-{
-    return str_encindex_fastpath(ENCODING_GET_INLINED(str));
-}
-
-#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
+#define TERM_LEN(str) (rb_str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
 #define TERM_FILL(ptr, termlen) do {\
     char *const term_fill_ptr = (ptr);\
     const int term_fill_len = (termlen);\
@@ -960,7 +940,7 @@ static inline bool
 rb_enc_str_asciicompat(VALUE str)
 {
     int encindex = ENCODING_GET_INLINED(str);
-    return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
+    return rb_str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
 }
 
 int
@@ -2796,7 +2776,7 @@ rb_must_asciicompat(VALUE str)
         rb_raise(rb_eTypeError, "not encoding capable object");
     }
 
-    if (RB_LIKELY(str_encindex_fastpath(encindex))) {
+    if (RB_LIKELY(rb_str_encindex_fastpath(encindex))) {
         return;
     }
 
@@ -2897,16 +2877,21 @@ str_null_check(VALUE str, int *w)
 {
     char *s = RSTRING_PTR(str);
     long len = RSTRING_LEN(str);
-    rb_encoding *enc = rb_enc_get(str);
-    const int minlen = rb_enc_mbminlen(enc);
+    int minlen = 1;
+
+    if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
+        rb_encoding *enc = rb_enc_get(str);
+        minlen = rb_enc_mbminlen(enc);
 
-    if (minlen > 1) {
-        *w = 1;
-        if (str_null_char(s, len, minlen, enc)) {
-            return NULL;
+        if (minlen > 1) {
+            *w = 1;
+            if (str_null_char(s, len, minlen, enc)) {
+                return NULL;
+            }
+            return str_fill_term(str, s, len, minlen);
         }
-        return str_fill_term(str, s, len, minlen);
     }
+
     *w = 0;
     if (!s || memchr(s, 0, len)) {
         return NULL;
@@ -3765,7 +3750,7 @@ rb_str_buf_append(VALUE str, VALUE str2)
 {
     int str2_cr = rb_enc_str_coderange(str2);
 
-    if (str_enc_fastpath(str)) {
+    if (rb_str_enc_fastpath(str)) {
         switch (str2_cr) {
           case ENC_CODERANGE_7BIT:
             // If RHS is 7bit we can do simple concatenation
author	Jean Boussier <jean.boussier@gmail.com>	2026-01-18 10:33:54 +0100
committer	Jean Boussier <jean.boussier@gmail.com>	2026-01-18 16:31:31 +0100
commit	6cd4549060a608d8a7e5ee0dde2c4b69b08d7f6e (patch)
tree	17cd606e1d3ecd918d00c515126f29b0b3456a3e /string.c
parent	d1dc4bdb2fe7f16e6da78c0930353e4a5031465a (diff)