summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2026-01-18 10:33:54 +0100
committerJean Boussier <jean.boussier@gmail.com>2026-01-18 16:31:31 +0100
commit6cd4549060a608d8a7e5ee0dde2c4b69b08d7f6e (patch)
tree17cd606e1d3ecd918d00c515126f29b0b3456a3e
parentd1dc4bdb2fe7f16e6da78c0930353e4a5031465a (diff)
Optimize File.join common use case
`File.join` is a hotspot for common libraries such as Zeitwerk and Bootsnap. It has a fairly flexible signature, but 99% of the time it's called with just two (or a small number of) UTF-8 strings. If we optimistically optimize for that use case we can cut down a large number of type and encoding checks, significantly speeding up the method. The one remaining expensive check we could try to optimize is `str_null_check`. Given it's common to use the same base string for joining, we could memoize it. Also we could precompute it for literal strings. ``` compare-ruby: ruby 4.1.0dev (2026-01-17T14:40:03Z master 00a3b71eaf) +PRISM [arm64-darwin25] built-ruby: ruby 4.1.0dev (2026-01-18T12:10:38Z spedup-file-join 069bab58d4) +PRISM [arm64-darwin25] warming up.... | |compare-ruby|built-ruby| |:-------------|-----------:|---------:| |two_strings | 2.475M| 9.444M| | | -| 3.82x| |many_strings | 551.975k| 2.346M| | | -| 4.25x| |array | 514.946k| 522.034k| | | -| 1.01x| |mixed | 621.236k| 633.189k| | | -| 1.02x| ```
-rw-r--r--benchmark/file_join.yml7
-rw-r--r--depend41
-rw-r--r--ext/-test-/stack/depend1
-rw-r--r--ext/-test-/string/depend3
-rw-r--r--ext/objspace/depend1
-rw-r--r--ext/ripper/depend1
-rw-r--r--ext/socket/depend15
-rw-r--r--file.c99
-rw-r--r--internal/string.h21
-rw-r--r--string.c47
10 files changed, 197 insertions, 39 deletions
diff --git a/benchmark/file_join.yml b/benchmark/file_join.yml
new file mode 100644
index 0000000000..845257cf1e
--- /dev/null
+++ b/benchmark/file_join.yml
@@ -0,0 +1,7 @@
+prelude: |
+ # frozen_string_literal: true
+benchmark:
+ two_strings: File.join(__FILE__, "path")
+ many_strings: File.join(__FILE__, "path", "a", "b", "c", "d")
+ array: File.join([__FILE__, "path", "a", "b", "c", "d"])
+ mixed: File.join(__FILE__, "path", "a", "b", ["c", "d"])
diff --git a/depend b/depend
index cfafc77703..7372902666 100644
--- a/depend
+++ b/depend
@@ -799,6 +799,7 @@ box.$(OBJEXT): {$(VPATH)}constant.h
box.$(OBJEXT): {$(VPATH)}darray.h
box.$(OBJEXT): {$(VPATH)}debug_counter.h
box.$(OBJEXT): {$(VPATH)}defines.h
+box.$(OBJEXT): {$(VPATH)}encindex.h
box.$(OBJEXT): {$(VPATH)}encoding.h
box.$(OBJEXT): {$(VPATH)}eval_intern.h
box.$(OBJEXT): {$(VPATH)}id.h
@@ -1250,6 +1251,7 @@ class.$(OBJEXT): {$(VPATH)}config.h
class.$(OBJEXT): {$(VPATH)}constant.h
class.$(OBJEXT): {$(VPATH)}debug_counter.h
class.$(OBJEXT): {$(VPATH)}defines.h
+class.$(OBJEXT): {$(VPATH)}encindex.h
class.$(OBJEXT): {$(VPATH)}encoding.h
class.$(OBJEXT): {$(VPATH)}id.h
class.$(OBJEXT): {$(VPATH)}id_table.h
@@ -1449,6 +1451,7 @@ compar.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
compar.$(OBJEXT): {$(VPATH)}compar.c
compar.$(OBJEXT): {$(VPATH)}config.h
compar.$(OBJEXT): {$(VPATH)}defines.h
+compar.$(OBJEXT): {$(VPATH)}encindex.h
compar.$(OBJEXT): {$(VPATH)}encoding.h
compar.$(OBJEXT): {$(VPATH)}id.h
compar.$(OBJEXT): {$(VPATH)}intern.h
@@ -1921,6 +1924,7 @@ complex.$(OBJEXT): {$(VPATH)}config.h
complex.$(OBJEXT): {$(VPATH)}constant.h
complex.$(OBJEXT): {$(VPATH)}debug_counter.h
complex.$(OBJEXT): {$(VPATH)}defines.h
+complex.$(OBJEXT): {$(VPATH)}encindex.h
complex.$(OBJEXT): {$(VPATH)}encoding.h
complex.$(OBJEXT): {$(VPATH)}id.h
complex.$(OBJEXT): {$(VPATH)}id_table.h
@@ -2126,6 +2130,7 @@ concurrent_set.$(OBJEXT): {$(VPATH)}concurrent_set.c
concurrent_set.$(OBJEXT): {$(VPATH)}config.h
concurrent_set.$(OBJEXT): {$(VPATH)}debug_counter.h
concurrent_set.$(OBJEXT): {$(VPATH)}defines.h
+concurrent_set.$(OBJEXT): {$(VPATH)}encindex.h
concurrent_set.$(OBJEXT): {$(VPATH)}encoding.h
concurrent_set.$(OBJEXT): {$(VPATH)}id.h
concurrent_set.$(OBJEXT): {$(VPATH)}id_table.h
@@ -2364,6 +2369,7 @@ cont.$(OBJEXT): {$(VPATH)}constant.h
cont.$(OBJEXT): {$(VPATH)}cont.c
cont.$(OBJEXT): {$(VPATH)}debug_counter.h
cont.$(OBJEXT): {$(VPATH)}defines.h
+cont.$(OBJEXT): {$(VPATH)}encindex.h
cont.$(OBJEXT): {$(VPATH)}encoding.h
cont.$(OBJEXT): {$(VPATH)}eval_intern.h
cont.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
@@ -4906,6 +4912,7 @@ enumerator.$(OBJEXT): {$(VPATH)}config.h
enumerator.$(OBJEXT): {$(VPATH)}constant.h
enumerator.$(OBJEXT): {$(VPATH)}debug_counter.h
enumerator.$(OBJEXT): {$(VPATH)}defines.h
+enumerator.$(OBJEXT): {$(VPATH)}encindex.h
enumerator.$(OBJEXT): {$(VPATH)}encoding.h
enumerator.$(OBJEXT): {$(VPATH)}enumerator.c
enumerator.$(OBJEXT): {$(VPATH)}id.h
@@ -5126,6 +5133,7 @@ error.$(OBJEXT): {$(VPATH)}config.h
error.$(OBJEXT): {$(VPATH)}constant.h
error.$(OBJEXT): {$(VPATH)}debug_counter.h
error.$(OBJEXT): {$(VPATH)}defines.h
+error.$(OBJEXT): {$(VPATH)}encindex.h
error.$(OBJEXT): {$(VPATH)}encoding.h
error.$(OBJEXT): {$(VPATH)}error.c
error.$(OBJEXT): {$(VPATH)}id.h
@@ -5373,6 +5381,7 @@ eval.$(OBJEXT): {$(VPATH)}config.h
eval.$(OBJEXT): {$(VPATH)}constant.h
eval.$(OBJEXT): {$(VPATH)}debug_counter.h
eval.$(OBJEXT): {$(VPATH)}defines.h
+eval.$(OBJEXT): {$(VPATH)}encindex.h
eval.$(OBJEXT): {$(VPATH)}encoding.h
eval.$(OBJEXT): {$(VPATH)}eval.c
eval.$(OBJEXT): {$(VPATH)}eval_error.c
@@ -5584,6 +5593,7 @@ file.$(OBJEXT): $(top_srcdir)/internal/array.h
file.$(OBJEXT): $(top_srcdir)/internal/class.h
file.$(OBJEXT): $(top_srcdir)/internal/compilers.h
file.$(OBJEXT): $(top_srcdir)/internal/dir.h
+file.$(OBJEXT): $(top_srcdir)/internal/encoding.h
file.$(OBJEXT): $(top_srcdir)/internal/error.h
file.$(OBJEXT): $(top_srcdir)/internal/file.h
file.$(OBJEXT): $(top_srcdir)/internal/gc.h
@@ -5865,6 +5875,7 @@ gc.$(OBJEXT): {$(VPATH)}darray.h
gc.$(OBJEXT): {$(VPATH)}debug.h
gc.$(OBJEXT): {$(VPATH)}debug_counter.h
gc.$(OBJEXT): {$(VPATH)}defines.h
+gc.$(OBJEXT): {$(VPATH)}encindex.h
gc.$(OBJEXT): {$(VPATH)}encoding.h
gc.$(OBJEXT): {$(VPATH)}eval_intern.h
gc.$(OBJEXT): {$(VPATH)}gc.c
@@ -6373,6 +6384,7 @@ hash.$(OBJEXT): {$(VPATH)}config.h
hash.$(OBJEXT): {$(VPATH)}constant.h
hash.$(OBJEXT): {$(VPATH)}debug_counter.h
hash.$(OBJEXT): {$(VPATH)}defines.h
+hash.$(OBJEXT): {$(VPATH)}encindex.h
hash.$(OBJEXT): {$(VPATH)}encoding.h
hash.$(OBJEXT): {$(VPATH)}hash.c
hash.$(OBJEXT): {$(VPATH)}hash.rbinc
@@ -7203,6 +7215,7 @@ io_buffer.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
io_buffer.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
io_buffer.$(OBJEXT): {$(VPATH)}config.h
io_buffer.$(OBJEXT): {$(VPATH)}defines.h
+io_buffer.$(OBJEXT): {$(VPATH)}encindex.h
io_buffer.$(OBJEXT): {$(VPATH)}encoding.h
io_buffer.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
io_buffer.$(OBJEXT): {$(VPATH)}id.h
@@ -7454,6 +7467,7 @@ iseq.$(OBJEXT): {$(VPATH)}config.h
iseq.$(OBJEXT): {$(VPATH)}constant.h
iseq.$(OBJEXT): {$(VPATH)}debug_counter.h
iseq.$(OBJEXT): {$(VPATH)}defines.h
+iseq.$(OBJEXT): {$(VPATH)}encindex.h
iseq.$(OBJEXT): {$(VPATH)}encoding.h
iseq.$(OBJEXT): {$(VPATH)}eval_intern.h
iseq.$(OBJEXT): {$(VPATH)}id.h
@@ -7702,6 +7716,7 @@ jit.$(OBJEXT): {$(VPATH)}config.h
jit.$(OBJEXT): {$(VPATH)}constant.h
jit.$(OBJEXT): {$(VPATH)}debug_counter.h
jit.$(OBJEXT): {$(VPATH)}defines.h
+jit.$(OBJEXT): {$(VPATH)}encindex.h
jit.$(OBJEXT): {$(VPATH)}encoding.h
jit.$(OBJEXT): {$(VPATH)}id.h
jit.$(OBJEXT): {$(VPATH)}id_table.h
@@ -7956,6 +7971,7 @@ load.$(OBJEXT): {$(VPATH)}constant.h
load.$(OBJEXT): {$(VPATH)}darray.h
load.$(OBJEXT): {$(VPATH)}defines.h
load.$(OBJEXT): {$(VPATH)}dln.h
+load.$(OBJEXT): {$(VPATH)}encindex.h
load.$(OBJEXT): {$(VPATH)}encoding.h
load.$(OBJEXT): {$(VPATH)}eval_intern.h
load.$(OBJEXT): {$(VPATH)}id.h
@@ -9979,6 +9995,7 @@ numeric.$(OBJEXT): {$(VPATH)}builtin.h
numeric.$(OBJEXT): {$(VPATH)}config.h
numeric.$(OBJEXT): {$(VPATH)}constant.h
numeric.$(OBJEXT): {$(VPATH)}defines.h
+numeric.$(OBJEXT): {$(VPATH)}encindex.h
numeric.$(OBJEXT): {$(VPATH)}encoding.h
numeric.$(OBJEXT): {$(VPATH)}id.h
numeric.$(OBJEXT): {$(VPATH)}id_table.h
@@ -10200,6 +10217,7 @@ object.$(OBJEXT): {$(VPATH)}config.h
object.$(OBJEXT): {$(VPATH)}constant.h
object.$(OBJEXT): {$(VPATH)}debug_counter.h
object.$(OBJEXT): {$(VPATH)}defines.h
+object.$(OBJEXT): {$(VPATH)}encindex.h
object.$(OBJEXT): {$(VPATH)}encoding.h
object.$(OBJEXT): {$(VPATH)}id.h
object.$(OBJEXT): {$(VPATH)}id_table.h
@@ -10418,6 +10436,7 @@ pack.$(OBJEXT): {$(VPATH)}builtin.h
pack.$(OBJEXT): {$(VPATH)}config.h
pack.$(OBJEXT): {$(VPATH)}constant.h
pack.$(OBJEXT): {$(VPATH)}defines.h
+pack.$(OBJEXT): {$(VPATH)}encindex.h
pack.$(OBJEXT): {$(VPATH)}encoding.h
pack.$(OBJEXT): {$(VPATH)}id.h
pack.$(OBJEXT): {$(VPATH)}id_table.h
@@ -10644,6 +10663,7 @@ parse.$(OBJEXT): {$(VPATH)}config.h
parse.$(OBJEXT): {$(VPATH)}constant.h
parse.$(OBJEXT): {$(VPATH)}defines.h
parse.$(OBJEXT): {$(VPATH)}defs/keywords
+parse.$(OBJEXT): {$(VPATH)}encindex.h
parse.$(OBJEXT): {$(VPATH)}encoding.h
parse.$(OBJEXT): {$(VPATH)}id.h
parse.$(OBJEXT): {$(VPATH)}id_table.h
@@ -12125,6 +12145,7 @@ proc.$(OBJEXT): {$(VPATH)}config.h
proc.$(OBJEXT): {$(VPATH)}constant.h
proc.$(OBJEXT): {$(VPATH)}debug_counter.h
proc.$(OBJEXT): {$(VPATH)}defines.h
+proc.$(OBJEXT): {$(VPATH)}encindex.h
proc.$(OBJEXT): {$(VPATH)}encoding.h
proc.$(OBJEXT): {$(VPATH)}eval_intern.h
proc.$(OBJEXT): {$(VPATH)}id.h
@@ -12356,6 +12377,7 @@ process.$(OBJEXT): {$(VPATH)}constant.h
process.$(OBJEXT): {$(VPATH)}debug_counter.h
process.$(OBJEXT): {$(VPATH)}defines.h
process.$(OBJEXT): {$(VPATH)}dln.h
+process.$(OBJEXT): {$(VPATH)}encindex.h
process.$(OBJEXT): {$(VPATH)}encoding.h
process.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
process.$(OBJEXT): {$(VPATH)}hrtime.h
@@ -12585,6 +12607,7 @@ ractor.$(OBJEXT): {$(VPATH)}config.h
ractor.$(OBJEXT): {$(VPATH)}constant.h
ractor.$(OBJEXT): {$(VPATH)}debug_counter.h
ractor.$(OBJEXT): {$(VPATH)}defines.h
+ractor.$(OBJEXT): {$(VPATH)}encindex.h
ractor.$(OBJEXT): {$(VPATH)}encoding.h
ractor.$(OBJEXT): {$(VPATH)}eval_intern.h
ractor.$(OBJEXT): {$(VPATH)}id.h
@@ -13018,6 +13041,7 @@ range.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
range.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
range.$(OBJEXT): {$(VPATH)}config.h
range.$(OBJEXT): {$(VPATH)}defines.h
+range.$(OBJEXT): {$(VPATH)}encindex.h
range.$(OBJEXT): {$(VPATH)}encoding.h
range.$(OBJEXT): {$(VPATH)}id.h
range.$(OBJEXT): {$(VPATH)}id_table.h
@@ -14688,6 +14712,7 @@ ruby.$(OBJEXT): {$(VPATH)}constant.h
ruby.$(OBJEXT): {$(VPATH)}debug_counter.h
ruby.$(OBJEXT): {$(VPATH)}defines.h
ruby.$(OBJEXT): {$(VPATH)}dln.h
+ruby.$(OBJEXT): {$(VPATH)}encindex.h
ruby.$(OBJEXT): {$(VPATH)}encoding.h
ruby.$(OBJEXT): {$(VPATH)}eval_intern.h
ruby.$(OBJEXT): {$(VPATH)}id.h
@@ -14896,6 +14921,7 @@ ruby_parser.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
ruby_parser.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
ruby_parser.$(OBJEXT): {$(VPATH)}config.h
ruby_parser.$(OBJEXT): {$(VPATH)}defines.h
+ruby_parser.$(OBJEXT): {$(VPATH)}encindex.h
ruby_parser.$(OBJEXT): {$(VPATH)}encoding.h
ruby_parser.$(OBJEXT): {$(VPATH)}intern.h
ruby_parser.$(OBJEXT): {$(VPATH)}internal.h
@@ -15306,6 +15332,7 @@ set.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
set.$(OBJEXT): {$(VPATH)}config.h
set.$(OBJEXT): {$(VPATH)}constant.h
set.$(OBJEXT): {$(VPATH)}defines.h
+set.$(OBJEXT): {$(VPATH)}encindex.h
set.$(OBJEXT): {$(VPATH)}encoding.h
set.$(OBJEXT): {$(VPATH)}id.h
set.$(OBJEXT): {$(VPATH)}id_table.h
@@ -15678,6 +15705,7 @@ shape.$(OBJEXT): {$(VPATH)}config.h
shape.$(OBJEXT): {$(VPATH)}constant.h
shape.$(OBJEXT): {$(VPATH)}debug_counter.h
shape.$(OBJEXT): {$(VPATH)}defines.h
+shape.$(OBJEXT): {$(VPATH)}encindex.h
shape.$(OBJEXT): {$(VPATH)}encoding.h
shape.$(OBJEXT): {$(VPATH)}id.h
shape.$(OBJEXT): {$(VPATH)}id_table.h
@@ -15892,6 +15920,7 @@ signal.$(OBJEXT): {$(VPATH)}config.h
signal.$(OBJEXT): {$(VPATH)}constant.h
signal.$(OBJEXT): {$(VPATH)}debug_counter.h
signal.$(OBJEXT): {$(VPATH)}defines.h
+signal.$(OBJEXT): {$(VPATH)}encindex.h
signal.$(OBJEXT): {$(VPATH)}encoding.h
signal.$(OBJEXT): {$(VPATH)}eval_intern.h
signal.$(OBJEXT): {$(VPATH)}id.h
@@ -16101,6 +16130,7 @@ sprintf.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
sprintf.$(OBJEXT): {$(VPATH)}config.h
sprintf.$(OBJEXT): {$(VPATH)}constant.h
sprintf.$(OBJEXT): {$(VPATH)}defines.h
+sprintf.$(OBJEXT): {$(VPATH)}encindex.h
sprintf.$(OBJEXT): {$(VPATH)}encoding.h
sprintf.$(OBJEXT): {$(VPATH)}id.h
sprintf.$(OBJEXT): {$(VPATH)}id_table.h
@@ -16457,6 +16487,7 @@ strftime.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
strftime.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
strftime.$(OBJEXT): {$(VPATH)}config.h
strftime.$(OBJEXT): {$(VPATH)}defines.h
+strftime.$(OBJEXT): {$(VPATH)}encindex.h
strftime.$(OBJEXT): {$(VPATH)}encoding.h
strftime.$(OBJEXT): {$(VPATH)}intern.h
strftime.$(OBJEXT): {$(VPATH)}internal.h
@@ -16925,6 +16956,7 @@ struct.$(OBJEXT): {$(VPATH)}config.h
struct.$(OBJEXT): {$(VPATH)}constant.h
struct.$(OBJEXT): {$(VPATH)}debug_counter.h
struct.$(OBJEXT): {$(VPATH)}defines.h
+struct.$(OBJEXT): {$(VPATH)}encindex.h
struct.$(OBJEXT): {$(VPATH)}encoding.h
struct.$(OBJEXT): {$(VPATH)}id.h
struct.$(OBJEXT): {$(VPATH)}id_table.h
@@ -17141,6 +17173,7 @@ symbol.$(OBJEXT): {$(VPATH)}constant.h
symbol.$(OBJEXT): {$(VPATH)}darray.h
symbol.$(OBJEXT): {$(VPATH)}debug_counter.h
symbol.$(OBJEXT): {$(VPATH)}defines.h
+symbol.$(OBJEXT): {$(VPATH)}encindex.h
symbol.$(OBJEXT): {$(VPATH)}encoding.h
symbol.$(OBJEXT): {$(VPATH)}id.c
symbol.$(OBJEXT): {$(VPATH)}id.h
@@ -17398,6 +17431,7 @@ thread.$(OBJEXT): {$(VPATH)}constant.h
thread.$(OBJEXT): {$(VPATH)}debug.h
thread.$(OBJEXT): {$(VPATH)}debug_counter.h
thread.$(OBJEXT): {$(VPATH)}defines.h
+thread.$(OBJEXT): {$(VPATH)}encindex.h
thread.$(OBJEXT): {$(VPATH)}encoding.h
thread.$(OBJEXT): {$(VPATH)}eval_intern.h
thread.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
@@ -17628,6 +17662,7 @@ time.$(OBJEXT): {$(VPATH)}builtin.h
time.$(OBJEXT): {$(VPATH)}config.h
time.$(OBJEXT): {$(VPATH)}constant.h
time.$(OBJEXT): {$(VPATH)}defines.h
+time.$(OBJEXT): {$(VPATH)}encindex.h
time.$(OBJEXT): {$(VPATH)}encoding.h
time.$(OBJEXT): {$(VPATH)}id.h
time.$(OBJEXT): {$(VPATH)}id_table.h
@@ -17830,6 +17865,7 @@ transcode.$(OBJEXT): {$(VPATH)}config.h
transcode.$(OBJEXT): {$(VPATH)}constant.h
transcode.$(OBJEXT): {$(VPATH)}debug_counter.h
transcode.$(OBJEXT): {$(VPATH)}defines.h
+transcode.$(OBJEXT): {$(VPATH)}encindex.h
transcode.$(OBJEXT): {$(VPATH)}encoding.h
transcode.$(OBJEXT): {$(VPATH)}id.h
transcode.$(OBJEXT): {$(VPATH)}id_table.h
@@ -18211,6 +18247,7 @@ variable.$(OBJEXT): {$(VPATH)}config.h
variable.$(OBJEXT): {$(VPATH)}constant.h
variable.$(OBJEXT): {$(VPATH)}debug_counter.h
variable.$(OBJEXT): {$(VPATH)}defines.h
+variable.$(OBJEXT): {$(VPATH)}encindex.h
variable.$(OBJEXT): {$(VPATH)}encoding.h
variable.$(OBJEXT): {$(VPATH)}id.h
variable.$(OBJEXT): {$(VPATH)}id_table.h
@@ -18687,6 +18724,7 @@ vm.$(OBJEXT): {$(VPATH)}constant.h
vm.$(OBJEXT): {$(VPATH)}debug_counter.h
vm.$(OBJEXT): {$(VPATH)}defines.h
vm.$(OBJEXT): {$(VPATH)}defs/opt_operand.def
+vm.$(OBJEXT): {$(VPATH)}encindex.h
vm.$(OBJEXT): {$(VPATH)}encoding.h
vm.$(OBJEXT): {$(VPATH)}eval_intern.h
vm.$(OBJEXT): {$(VPATH)}id.h
@@ -18951,6 +18989,7 @@ vm_backtrace.$(OBJEXT): {$(VPATH)}constant.h
vm_backtrace.$(OBJEXT): {$(VPATH)}debug.h
vm_backtrace.$(OBJEXT): {$(VPATH)}debug_counter.h
vm_backtrace.$(OBJEXT): {$(VPATH)}defines.h
+vm_backtrace.$(OBJEXT): {$(VPATH)}encindex.h
vm_backtrace.$(OBJEXT): {$(VPATH)}encoding.h
vm_backtrace.$(OBJEXT): {$(VPATH)}eval_intern.h
vm_backtrace.$(OBJEXT): {$(VPATH)}id.h
@@ -20087,6 +20126,7 @@ yjit.$(OBJEXT): {$(VPATH)}constant.h
yjit.$(OBJEXT): {$(VPATH)}debug.h
yjit.$(OBJEXT): {$(VPATH)}debug_counter.h
yjit.$(OBJEXT): {$(VPATH)}defines.h
+yjit.$(OBJEXT): {$(VPATH)}encindex.h
yjit.$(OBJEXT): {$(VPATH)}encoding.h
yjit.$(OBJEXT): {$(VPATH)}id.h
yjit.$(OBJEXT): {$(VPATH)}id_table.h
@@ -20342,6 +20382,7 @@ zjit.$(OBJEXT): {$(VPATH)}constant.h
zjit.$(OBJEXT): {$(VPATH)}debug.h
zjit.$(OBJEXT): {$(VPATH)}debug_counter.h
zjit.$(OBJEXT): {$(VPATH)}defines.h
+zjit.$(OBJEXT): {$(VPATH)}encindex.h
zjit.$(OBJEXT): {$(VPATH)}encoding.h
zjit.$(OBJEXT): {$(VPATH)}id.h
zjit.$(OBJEXT): {$(VPATH)}id_table.h
diff --git a/ext/-test-/stack/depend b/ext/-test-/stack/depend
index 31571c882e..77e93bb201 100644
--- a/ext/-test-/stack/depend
+++ b/ext/-test-/stack/depend
@@ -172,6 +172,7 @@ stack.o: $(hdrdir)/ruby/oniguruma.h
stack.o: $(hdrdir)/ruby/ruby.h
stack.o: $(hdrdir)/ruby/st.h
stack.o: $(hdrdir)/ruby/subst.h
+stack.o: $(top_srcdir)/encindex.h
stack.o: $(top_srcdir)/internal/compilers.h
stack.o: $(top_srcdir)/internal/string.h
stack.o: stack.c
diff --git a/ext/-test-/string/depend b/ext/-test-/string/depend
index de6e775acc..478ae3b82b 100644
--- a/ext/-test-/string/depend
+++ b/ext/-test-/string/depend
@@ -172,6 +172,7 @@ capacity.o: $(hdrdir)/ruby/oniguruma.h
capacity.o: $(hdrdir)/ruby/ruby.h
capacity.o: $(hdrdir)/ruby/st.h
capacity.o: $(hdrdir)/ruby/subst.h
+capacity.o: $(top_srcdir)/encindex.h
capacity.o: $(top_srcdir)/internal/compilers.h
capacity.o: $(top_srcdir)/internal/string.h
capacity.o: capacity.c
@@ -679,6 +680,7 @@ cstr.o: $(hdrdir)/ruby/oniguruma.h
cstr.o: $(hdrdir)/ruby/ruby.h
cstr.o: $(hdrdir)/ruby/st.h
cstr.o: $(hdrdir)/ruby/subst.h
+cstr.o: $(top_srcdir)/encindex.h
cstr.o: $(top_srcdir)/internal.h
cstr.o: $(top_srcdir)/internal/compilers.h
cstr.o: $(top_srcdir)/internal/string.h
@@ -1535,6 +1537,7 @@ fstring.o: $(hdrdir)/ruby/oniguruma.h
fstring.o: $(hdrdir)/ruby/ruby.h
fstring.o: $(hdrdir)/ruby/st.h
fstring.o: $(hdrdir)/ruby/subst.h
+fstring.o: $(top_srcdir)/encindex.h
fstring.o: $(top_srcdir)/internal/compilers.h
fstring.o: $(top_srcdir)/internal/string.h
fstring.o: fstring.c
diff --git a/ext/objspace/depend b/ext/objspace/depend
index 04b26eb6c2..d9dfc0c42b 100644
--- a/ext/objspace/depend
+++ b/ext/objspace/depend
@@ -602,6 +602,7 @@ objspace_dump.o: $(top_srcdir)/ccan/list/list.h
objspace_dump.o: $(top_srcdir)/ccan/str/str.h
objspace_dump.o: $(top_srcdir)/constant.h
objspace_dump.o: $(top_srcdir)/debug_counter.h
+objspace_dump.o: $(top_srcdir)/encindex.h
objspace_dump.o: $(top_srcdir)/id_table.h
objspace_dump.o: $(top_srcdir)/internal.h
objspace_dump.o: $(top_srcdir)/internal/array.h
diff --git a/ext/ripper/depend b/ext/ripper/depend
index bd2de75906..944da25ee9 100644
--- a/ext/ripper/depend
+++ b/ext/ripper/depend
@@ -578,6 +578,7 @@ ripper.o: $(top_srcdir)/ccan/container_of/container_of.h
ripper.o: $(top_srcdir)/ccan/list/list.h
ripper.o: $(top_srcdir)/ccan/str/str.h
ripper.o: $(top_srcdir)/constant.h
+ripper.o: $(top_srcdir)/encindex.h
ripper.o: $(top_srcdir)/id_table.h
ripper.o: $(top_srcdir)/internal.h
ripper.o: $(top_srcdir)/internal/array.h
diff --git a/ext/socket/depend b/ext/socket/depend
index 3573dc45e2..77f6239a3d 100644
--- a/ext/socket/depend
+++ b/ext/socket/depend
@@ -193,6 +193,7 @@ ancdata.o: $(top_srcdir)/ccan/check_type/check_type.h
ancdata.o: $(top_srcdir)/ccan/container_of/container_of.h
ancdata.o: $(top_srcdir)/ccan/list/list.h
ancdata.o: $(top_srcdir)/ccan/str/str.h
+ancdata.o: $(top_srcdir)/encindex.h
ancdata.o: $(top_srcdir)/id_table.h
ancdata.o: $(top_srcdir)/internal.h
ancdata.o: $(top_srcdir)/internal/array.h
@@ -408,6 +409,7 @@ basicsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
basicsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
basicsocket.o: $(top_srcdir)/ccan/list/list.h
basicsocket.o: $(top_srcdir)/ccan/str/str.h
+basicsocket.o: $(top_srcdir)/encindex.h
basicsocket.o: $(top_srcdir)/id_table.h
basicsocket.o: $(top_srcdir)/internal.h
basicsocket.o: $(top_srcdir)/internal/array.h
@@ -623,6 +625,7 @@ constants.o: $(top_srcdir)/ccan/check_type/check_type.h
constants.o: $(top_srcdir)/ccan/container_of/container_of.h
constants.o: $(top_srcdir)/ccan/list/list.h
constants.o: $(top_srcdir)/ccan/str/str.h
+constants.o: $(top_srcdir)/encindex.h
constants.o: $(top_srcdir)/id_table.h
constants.o: $(top_srcdir)/internal.h
constants.o: $(top_srcdir)/internal/array.h
@@ -839,6 +842,7 @@ ifaddr.o: $(top_srcdir)/ccan/check_type/check_type.h
ifaddr.o: $(top_srcdir)/ccan/container_of/container_of.h
ifaddr.o: $(top_srcdir)/ccan/list/list.h
ifaddr.o: $(top_srcdir)/ccan/str/str.h
+ifaddr.o: $(top_srcdir)/encindex.h
ifaddr.o: $(top_srcdir)/id_table.h
ifaddr.o: $(top_srcdir)/internal.h
ifaddr.o: $(top_srcdir)/internal/array.h
@@ -1054,6 +1058,7 @@ init.o: $(top_srcdir)/ccan/check_type/check_type.h
init.o: $(top_srcdir)/ccan/container_of/container_of.h
init.o: $(top_srcdir)/ccan/list/list.h
init.o: $(top_srcdir)/ccan/str/str.h
+init.o: $(top_srcdir)/encindex.h
init.o: $(top_srcdir)/id_table.h
init.o: $(top_srcdir)/internal.h
init.o: $(top_srcdir)/internal/array.h
@@ -1269,6 +1274,7 @@ ipsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
ipsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
ipsocket.o: $(top_srcdir)/ccan/list/list.h
ipsocket.o: $(top_srcdir)/ccan/str/str.h
+ipsocket.o: $(top_srcdir)/encindex.h
ipsocket.o: $(top_srcdir)/id_table.h
ipsocket.o: $(top_srcdir)/internal.h
ipsocket.o: $(top_srcdir)/internal/array.h
@@ -1484,6 +1490,7 @@ option.o: $(top_srcdir)/ccan/check_type/check_type.h
option.o: $(top_srcdir)/ccan/container_of/container_of.h
option.o: $(top_srcdir)/ccan/list/list.h
option.o: $(top_srcdir)/ccan/str/str.h
+option.o: $(top_srcdir)/encindex.h
option.o: $(top_srcdir)/id_table.h
option.o: $(top_srcdir)/internal.h
option.o: $(top_srcdir)/internal/array.h
@@ -1699,6 +1706,7 @@ raddrinfo.o: $(top_srcdir)/ccan/check_type/check_type.h
raddrinfo.o: $(top_srcdir)/ccan/container_of/container_of.h
raddrinfo.o: $(top_srcdir)/ccan/list/list.h
raddrinfo.o: $(top_srcdir)/ccan/str/str.h
+raddrinfo.o: $(top_srcdir)/encindex.h
raddrinfo.o: $(top_srcdir)/id_table.h
raddrinfo.o: $(top_srcdir)/internal.h
raddrinfo.o: $(top_srcdir)/internal/array.h
@@ -1914,6 +1922,7 @@ socket.o: $(top_srcdir)/ccan/check_type/check_type.h
socket.o: $(top_srcdir)/ccan/container_of/container_of.h
socket.o: $(top_srcdir)/ccan/list/list.h
socket.o: $(top_srcdir)/ccan/str/str.h
+socket.o: $(top_srcdir)/encindex.h
socket.o: $(top_srcdir)/id_table.h
socket.o: $(top_srcdir)/internal.h
socket.o: $(top_srcdir)/internal/array.h
@@ -2129,6 +2138,7 @@ sockssocket.o: $(top_srcdir)/ccan/check_type/check_type.h
sockssocket.o: $(top_srcdir)/ccan/container_of/container_of.h
sockssocket.o: $(top_srcdir)/ccan/list/list.h
sockssocket.o: $(top_srcdir)/ccan/str/str.h
+sockssocket.o: $(top_srcdir)/encindex.h
sockssocket.o: $(top_srcdir)/id_table.h
sockssocket.o: $(top_srcdir)/internal.h
sockssocket.o: $(top_srcdir)/internal/array.h
@@ -2344,6 +2354,7 @@ tcpserver.o: $(top_srcdir)/ccan/check_type/check_type.h
tcpserver.o: $(top_srcdir)/ccan/container_of/container_of.h
tcpserver.o: $(top_srcdir)/ccan/list/list.h
tcpserver.o: $(top_srcdir)/ccan/str/str.h
+tcpserver.o: $(top_srcdir)/encindex.h
tcpserver.o: $(top_srcdir)/id_table.h
tcpserver.o: $(top_srcdir)/internal.h
tcpserver.o: $(top_srcdir)/internal/array.h
@@ -2559,6 +2570,7 @@ tcpsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
tcpsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
tcpsocket.o: $(top_srcdir)/ccan/list/list.h
tcpsocket.o: $(top_srcdir)/ccan/str/str.h
+tcpsocket.o: $(top_srcdir)/encindex.h
tcpsocket.o: $(top_srcdir)/id_table.h
tcpsocket.o: $(top_srcdir)/internal.h
tcpsocket.o: $(top_srcdir)/internal/array.h
@@ -2774,6 +2786,7 @@ udpsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
udpsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
udpsocket.o: $(top_srcdir)/ccan/list/list.h
udpsocket.o: $(top_srcdir)/ccan/str/str.h
+udpsocket.o: $(top_srcdir)/encindex.h
udpsocket.o: $(top_srcdir)/id_table.h
udpsocket.o: $(top_srcdir)/internal.h
udpsocket.o: $(top_srcdir)/internal/array.h
@@ -2989,6 +3002,7 @@ unixserver.o: $(top_srcdir)/ccan/check_type/check_type.h
unixserver.o: $(top_srcdir)/ccan/container_of/container_of.h
unixserver.o: $(top_srcdir)/ccan/list/list.h
unixserver.o: $(top_srcdir)/ccan/str/str.h
+unixserver.o: $(top_srcdir)/encindex.h
unixserver.o: $(top_srcdir)/id_table.h
unixserver.o: $(top_srcdir)/internal.h
unixserver.o: $(top_srcdir)/internal/array.h
@@ -3204,6 +3218,7 @@ unixsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
unixsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
unixsocket.o: $(top_srcdir)/ccan/list/list.h
unixsocket.o: $(top_srcdir)/ccan/str/str.h
+unixsocket.o: $(top_srcdir)/encindex.h
unixsocket.o: $(top_srcdir)/id_table.h
unixsocket.o: $(top_srcdir)/internal.h
unixsocket.o: $(top_srcdir)/internal/array.h
diff --git a/file.c b/file.c
index 89294ea009..8658b30677 100644
--- a/file.c
+++ b/file.c
@@ -169,6 +169,7 @@ typedef struct timespec stat_timestamp;
#include "internal.h"
#include "internal/compilers.h"
#include "internal/dir.h"
+#include "internal/encoding.h"
#include "internal/error.h"
#include "internal/file.h"
#include "internal/io.h"
@@ -3713,6 +3714,22 @@ chompdirsep(const char *path, const char *end, rb_encoding *enc)
return (char *)path;
}
+static char *
+single_byte_chompdirsep(const char *path, const char *end)
+{
+ while (path < end) {
+ if (isdirsep(*path)) {
+ const char *last = path++;
+ while (path < end && isdirsep(*path)) path++;
+ if (path >= end) return (char *)last;
+ }
+ else {
+ path++;
+ }
+ }
+ return (char *)path;
+}
+
char *
rb_enc_path_end(const char *path, const char *end, rb_encoding *enc)
{
@@ -3723,7 +3740,7 @@ rb_enc_path_end(const char *path, const char *end, rb_encoding *enc)
static rb_encoding *
fs_enc_check(VALUE path1, VALUE path2)
{
- rb_encoding *enc = rb_enc_check(path1, path2);
+ rb_encoding *enc = rb_enc_check_str(path1, path2);
int encidx = rb_enc_to_index(enc);
if (encidx == ENCINDEX_US_ASCII) {
encidx = rb_enc_get_index(path1);
@@ -4651,7 +4668,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum
return resolved;
}
-static VALUE rb_file_join(VALUE ary);
+static VALUE rb_file_join(long argc, VALUE *args);
#ifndef HAVE_REALPATH
static VALUE
@@ -4692,7 +4709,8 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, rb_encoding *origenc, enum
unresolved_path = rb_str_dup_frozen(path);
if (*RSTRING_PTR(unresolved_path) != '/' && !NIL_P(basedir)) {
- unresolved_path = rb_file_join(rb_assoc_new(basedir, unresolved_path));
+ VALUE paths[2] = {basedir, unresolved_path};
+ unresolved_path = rb_file_join(2, paths);
}
if (origenc) unresolved_path = TO_OSPATH(unresolved_path);
@@ -5255,15 +5273,17 @@ rb_file_s_split(VALUE klass, VALUE path)
return rb_assoc_new(rb_file_dirname(path), rb_file_s_basename(1,&path,Qundef));
}
+static VALUE rb_file_join_ary(VALUE ary);
+
static VALUE
file_inspect_join(VALUE ary, VALUE arg, int recur)
{
if (recur || ary == arg) rb_raise(rb_eArgError, "recursive array");
- return rb_file_join(arg);
+ return rb_file_join_ary(arg);
}
static VALUE
-rb_file_join(VALUE ary)
+rb_file_join_ary(VALUE ary)
{
long len, i;
VALUE result, tmp;
@@ -5328,6 +5348,69 @@ rb_file_join(VALUE ary)
return result;
}
+static inline VALUE
+rb_file_join_fastpath(long argc, VALUE *args)
+{
+ long size = argc;
+
+ long i;
+ for (i = 0; i < argc; i++) {
+ VALUE tmp = args[i];
+ if (RB_LIKELY(RB_TYPE_P(tmp, T_STRING) && rb_str_enc_fastpath(tmp))) {
+ size += RSTRING_LEN(tmp);
+ }
+ else {
+ return 0;
+ }
+ }
+
+ VALUE result = rb_str_buf_new(size);
+
+ StringValueCStr(args[0]);
+ int encidx = ENCODING_GET_INLINED(args[0]);
+ ENCODING_SET_INLINED(result, encidx);
+ rb_str_buf_append(result, args[0]);
+
+ const char *name = RSTRING_PTR(result);
+ for (i = 1; i < argc; i++) {
+ VALUE tmp = args[i];
+ StringValueCStr(tmp);
+ long len = RSTRING_LEN(result);
+
+ const char *tail = single_byte_chompdirsep(name, name + len);
+ if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) {
+ rb_str_set_len(result, tail - name);
+ }
+ else if (!*tail) {
+ rb_str_cat(result, "/", 1);
+ }
+
+ if (RB_UNLIKELY(ENCODING_GET_INLINED(tmp) != encidx)) {
+ rb_encoding *new_enc = fs_enc_check(result, tmp);
+ rb_enc_associate(result, new_enc);
+ encidx = rb_enc_to_index(new_enc);
+ }
+
+ rb_str_buf_append(result, tmp);
+ }
+
+ return result;
+}
+
+static inline VALUE
+rb_file_join(long argc, VALUE *args)
+{
+ if (RB_UNLIKELY(argc == 0)) {
+ return rb_str_new(0, 0);
+ }
+
+ VALUE result = rb_file_join_fastpath(argc, args);
+ if (RB_LIKELY(result)) {
+ return result;
+ }
+
+ return rb_file_join_ary(rb_ary_new_from_values(argc, args));
+}
/*
* call-seq:
* File.join(string, ...) -> string
@@ -5340,9 +5423,9 @@ rb_file_join(VALUE ary)
*/
static VALUE
-rb_file_s_join(VALUE klass, VALUE args)
+rb_file_s_join(int argc, VALUE *argv, VALUE klass)
{
- return rb_file_join(args);
+ return rb_file_join(argc, argv);
}
#if defined(HAVE_TRUNCATE)
@@ -7584,7 +7667,7 @@ Init_File(void)
/* separates directory parts in path */
rb_define_const(rb_cFile, "SEPARATOR", separator);
rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1);
- rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2);
+ rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -1);
#ifdef DOSISH
/* platform specific alternative separator */
diff --git a/internal/string.h b/internal/string.h
index d6fea62061..ea81db7ed3 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -14,6 +14,7 @@
#include "ruby/internal/stdbool.h" /* for bool */
#include "ruby/encoding.h" /* for rb_encoding */
#include "ruby/ruby.h" /* for VALUE */
+#include "encindex.h"
#define STR_SHARED FL_USER0 /* = ELTS_SHARED */
#define STR_NOEMBED FL_USER1
@@ -29,6 +30,26 @@ enum ruby_rstring_private_flags {
# undef rb_fstring_cstr
#endif
+static inline bool
+rb_str_encindex_fastpath(int encindex)
+{
+ // The overwhelming majority of strings are in one of these 3 encodings.
+ switch (encindex) {
+ case ENCINDEX_ASCII_8BIT:
+ case ENCINDEX_UTF_8:
+ case ENCINDEX_US_ASCII:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+rb_str_enc_fastpath(VALUE str)
+{
+ return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str));
+}
+
/* string.c */
VALUE rb_str_dup_m(VALUE str);
VALUE rb_fstring(VALUE);
diff --git a/string.c b/string.c
index 2d74c46a36..cfadabd379 100644
--- a/string.c
+++ b/string.c
@@ -146,27 +146,7 @@ VALUE rb_cSymbol;
RSTRING(str)->len = (n); \
} while (0)
-static inline bool
-str_encindex_fastpath(int encindex)
-{
- // The overwhelming majority of strings are in one of these 3 encodings.
- switch (encindex) {
- case ENCINDEX_ASCII_8BIT:
- case ENCINDEX_UTF_8:
- case ENCINDEX_US_ASCII:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool
-str_enc_fastpath(VALUE str)
-{
- return str_encindex_fastpath(ENCODING_GET_INLINED(str));
-}
-
-#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
+#define TERM_LEN(str) (rb_str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
#define TERM_FILL(ptr, termlen) do {\
char *const term_fill_ptr = (ptr);\
const int term_fill_len = (termlen);\
@@ -960,7 +940,7 @@ static inline bool
rb_enc_str_asciicompat(VALUE str)
{
int encindex = ENCODING_GET_INLINED(str);
- return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
+ return rb_str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
}
int
@@ -2796,7 +2776,7 @@ rb_must_asciicompat(VALUE str)
rb_raise(rb_eTypeError, "not encoding capable object");
}
- if (RB_LIKELY(str_encindex_fastpath(encindex))) {
+ if (RB_LIKELY(rb_str_encindex_fastpath(encindex))) {
return;
}
@@ -2897,16 +2877,21 @@ str_null_check(VALUE str, int *w)
{
char *s = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
- rb_encoding *enc = rb_enc_get(str);
- const int minlen = rb_enc_mbminlen(enc);
+ int minlen = 1;
+
+ if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
+ rb_encoding *enc = rb_enc_get(str);
+ minlen = rb_enc_mbminlen(enc);
- if (minlen > 1) {
- *w = 1;
- if (str_null_char(s, len, minlen, enc)) {
- return NULL;
+ if (minlen > 1) {
+ *w = 1;
+ if (str_null_char(s, len, minlen, enc)) {
+ return NULL;
+ }
+ return str_fill_term(str, s, len, minlen);
}
- return str_fill_term(str, s, len, minlen);
}
+
*w = 0;
if (!s || memchr(s, 0, len)) {
return NULL;
@@ -3765,7 +3750,7 @@ rb_str_buf_append(VALUE str, VALUE str2)
{
int str2_cr = rb_enc_str_coderange(str2);
- if (str_enc_fastpath(str)) {
+ if (rb_str_enc_fastpath(str)) {
switch (str2_cr) {
case ENC_CODERANGE_7BIT:
// If RHS is 7bit we can do simple concatenation