diff options
| author | Jean Boussier <jean.boussier@gmail.com> | 2026-01-18 10:33:54 +0100 |
|---|---|---|
| committer | Jean Boussier <jean.boussier@gmail.com> | 2026-01-18 16:31:31 +0100 |
| commit | 6cd4549060a608d8a7e5ee0dde2c4b69b08d7f6e (patch) | |
| tree | 17cd606e1d3ecd918d00c515126f29b0b3456a3e | |
| parent | d1dc4bdb2fe7f16e6da78c0930353e4a5031465a (diff) | |
Optimize File.join common use case
`File.join` is a hotspot for common libraries such as Zeitwerk
and Bootsnap. It has a fairly flexible signature, but 99% of
the time it's called with just two (or a small number of) UTF-8 strings.
If we optimistically optimize for that use case we can cut down a large
number of type and encoding checks, significantly speeding up the method.
The one remaining expensive check we could try to optimize is `str_null_check`.
Given it's common to use the same base string for joining, we could memoize it.
Also we could precompute it for literal strings.
```
compare-ruby: ruby 4.1.0dev (2026-01-17T14:40:03Z master 00a3b71eaf) +PRISM [arm64-darwin25]
built-ruby: ruby 4.1.0dev (2026-01-18T12:10:38Z spedup-file-join 069bab58d4) +PRISM [arm64-darwin25]
warming up....
| |compare-ruby|built-ruby|
|:-------------|-----------:|---------:|
|two_strings | 2.475M| 9.444M|
| | -| 3.82x|
|many_strings | 551.975k| 2.346M|
| | -| 4.25x|
|array | 514.946k| 522.034k|
| | -| 1.01x|
|mixed | 621.236k| 633.189k|
| | -| 1.02x|
```
| -rw-r--r-- | benchmark/file_join.yml | 7 | ||||
| -rw-r--r-- | depend | 41 | ||||
| -rw-r--r-- | ext/-test-/stack/depend | 1 | ||||
| -rw-r--r-- | ext/-test-/string/depend | 3 | ||||
| -rw-r--r-- | ext/objspace/depend | 1 | ||||
| -rw-r--r-- | ext/ripper/depend | 1 | ||||
| -rw-r--r-- | ext/socket/depend | 15 | ||||
| -rw-r--r-- | file.c | 99 | ||||
| -rw-r--r-- | internal/string.h | 21 | ||||
| -rw-r--r-- | string.c | 47 |
10 files changed, 197 insertions, 39 deletions
diff --git a/benchmark/file_join.yml b/benchmark/file_join.yml new file mode 100644 index 0000000000..845257cf1e --- /dev/null +++ b/benchmark/file_join.yml @@ -0,0 +1,7 @@ +prelude: | + # frozen_string_literal: true +benchmark: + two_strings: File.join(__FILE__, "path") + many_strings: File.join(__FILE__, "path", "a", "b", "c", "d") + array: File.join([__FILE__, "path", "a", "b", "c", "d"]) + mixed: File.join(__FILE__, "path", "a", "b", ["c", "d"]) @@ -799,6 +799,7 @@ box.$(OBJEXT): {$(VPATH)}constant.h box.$(OBJEXT): {$(VPATH)}darray.h box.$(OBJEXT): {$(VPATH)}debug_counter.h box.$(OBJEXT): {$(VPATH)}defines.h +box.$(OBJEXT): {$(VPATH)}encindex.h box.$(OBJEXT): {$(VPATH)}encoding.h box.$(OBJEXT): {$(VPATH)}eval_intern.h box.$(OBJEXT): {$(VPATH)}id.h @@ -1250,6 +1251,7 @@ class.$(OBJEXT): {$(VPATH)}config.h class.$(OBJEXT): {$(VPATH)}constant.h class.$(OBJEXT): {$(VPATH)}debug_counter.h class.$(OBJEXT): {$(VPATH)}defines.h +class.$(OBJEXT): {$(VPATH)}encindex.h class.$(OBJEXT): {$(VPATH)}encoding.h class.$(OBJEXT): {$(VPATH)}id.h class.$(OBJEXT): {$(VPATH)}id_table.h @@ -1449,6 +1451,7 @@ compar.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h compar.$(OBJEXT): {$(VPATH)}compar.c compar.$(OBJEXT): {$(VPATH)}config.h compar.$(OBJEXT): {$(VPATH)}defines.h +compar.$(OBJEXT): {$(VPATH)}encindex.h compar.$(OBJEXT): {$(VPATH)}encoding.h compar.$(OBJEXT): {$(VPATH)}id.h compar.$(OBJEXT): {$(VPATH)}intern.h @@ -1921,6 +1924,7 @@ complex.$(OBJEXT): {$(VPATH)}config.h complex.$(OBJEXT): {$(VPATH)}constant.h complex.$(OBJEXT): {$(VPATH)}debug_counter.h complex.$(OBJEXT): {$(VPATH)}defines.h +complex.$(OBJEXT): {$(VPATH)}encindex.h complex.$(OBJEXT): {$(VPATH)}encoding.h complex.$(OBJEXT): {$(VPATH)}id.h complex.$(OBJEXT): {$(VPATH)}id_table.h @@ -2126,6 +2130,7 @@ concurrent_set.$(OBJEXT): {$(VPATH)}concurrent_set.c concurrent_set.$(OBJEXT): {$(VPATH)}config.h concurrent_set.$(OBJEXT): {$(VPATH)}debug_counter.h concurrent_set.$(OBJEXT): {$(VPATH)}defines.h +concurrent_set.$(OBJEXT): {$(VPATH)}encindex.h concurrent_set.$(OBJEXT): {$(VPATH)}encoding.h concurrent_set.$(OBJEXT): {$(VPATH)}id.h concurrent_set.$(OBJEXT): {$(VPATH)}id_table.h @@ -2364,6 +2369,7 @@ cont.$(OBJEXT): {$(VPATH)}constant.h cont.$(OBJEXT): {$(VPATH)}cont.c cont.$(OBJEXT): {$(VPATH)}debug_counter.h cont.$(OBJEXT): {$(VPATH)}defines.h +cont.$(OBJEXT): {$(VPATH)}encindex.h cont.$(OBJEXT): {$(VPATH)}encoding.h cont.$(OBJEXT): {$(VPATH)}eval_intern.h cont.$(OBJEXT): {$(VPATH)}fiber/scheduler.h @@ -4906,6 +4912,7 @@ enumerator.$(OBJEXT): {$(VPATH)}config.h enumerator.$(OBJEXT): {$(VPATH)}constant.h enumerator.$(OBJEXT): {$(VPATH)}debug_counter.h enumerator.$(OBJEXT): {$(VPATH)}defines.h +enumerator.$(OBJEXT): {$(VPATH)}encindex.h enumerator.$(OBJEXT): {$(VPATH)}encoding.h enumerator.$(OBJEXT): {$(VPATH)}enumerator.c enumerator.$(OBJEXT): {$(VPATH)}id.h @@ -5126,6 +5133,7 @@ error.$(OBJEXT): {$(VPATH)}config.h error.$(OBJEXT): {$(VPATH)}constant.h error.$(OBJEXT): {$(VPATH)}debug_counter.h error.$(OBJEXT): {$(VPATH)}defines.h +error.$(OBJEXT): {$(VPATH)}encindex.h error.$(OBJEXT): {$(VPATH)}encoding.h error.$(OBJEXT): {$(VPATH)}error.c error.$(OBJEXT): {$(VPATH)}id.h @@ -5373,6 +5381,7 @@ eval.$(OBJEXT): {$(VPATH)}config.h eval.$(OBJEXT): {$(VPATH)}constant.h eval.$(OBJEXT): {$(VPATH)}debug_counter.h eval.$(OBJEXT): {$(VPATH)}defines.h +eval.$(OBJEXT): {$(VPATH)}encindex.h eval.$(OBJEXT): {$(VPATH)}encoding.h eval.$(OBJEXT): {$(VPATH)}eval.c eval.$(OBJEXT): {$(VPATH)}eval_error.c @@ -5584,6 +5593,7 @@ file.$(OBJEXT): $(top_srcdir)/internal/array.h file.$(OBJEXT): $(top_srcdir)/internal/class.h file.$(OBJEXT): $(top_srcdir)/internal/compilers.h file.$(OBJEXT): $(top_srcdir)/internal/dir.h +file.$(OBJEXT): $(top_srcdir)/internal/encoding.h file.$(OBJEXT): $(top_srcdir)/internal/error.h file.$(OBJEXT): $(top_srcdir)/internal/file.h file.$(OBJEXT): $(top_srcdir)/internal/gc.h @@ -5865,6 +5875,7 @@ gc.$(OBJEXT): {$(VPATH)}darray.h gc.$(OBJEXT): {$(VPATH)}debug.h gc.$(OBJEXT): {$(VPATH)}debug_counter.h gc.$(OBJEXT): {$(VPATH)}defines.h +gc.$(OBJEXT): {$(VPATH)}encindex.h gc.$(OBJEXT): {$(VPATH)}encoding.h gc.$(OBJEXT): {$(VPATH)}eval_intern.h gc.$(OBJEXT): {$(VPATH)}gc.c @@ -6373,6 +6384,7 @@ hash.$(OBJEXT): {$(VPATH)}config.h hash.$(OBJEXT): {$(VPATH)}constant.h hash.$(OBJEXT): {$(VPATH)}debug_counter.h hash.$(OBJEXT): {$(VPATH)}defines.h +hash.$(OBJEXT): {$(VPATH)}encindex.h hash.$(OBJEXT): {$(VPATH)}encoding.h hash.$(OBJEXT): {$(VPATH)}hash.c hash.$(OBJEXT): {$(VPATH)}hash.rbinc @@ -7203,6 +7215,7 @@ io_buffer.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h io_buffer.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h io_buffer.$(OBJEXT): {$(VPATH)}config.h io_buffer.$(OBJEXT): {$(VPATH)}defines.h +io_buffer.$(OBJEXT): {$(VPATH)}encindex.h io_buffer.$(OBJEXT): {$(VPATH)}encoding.h io_buffer.$(OBJEXT): {$(VPATH)}fiber/scheduler.h io_buffer.$(OBJEXT): {$(VPATH)}id.h @@ -7454,6 +7467,7 @@ iseq.$(OBJEXT): {$(VPATH)}config.h iseq.$(OBJEXT): {$(VPATH)}constant.h iseq.$(OBJEXT): {$(VPATH)}debug_counter.h iseq.$(OBJEXT): {$(VPATH)}defines.h +iseq.$(OBJEXT): {$(VPATH)}encindex.h iseq.$(OBJEXT): {$(VPATH)}encoding.h iseq.$(OBJEXT): {$(VPATH)}eval_intern.h iseq.$(OBJEXT): {$(VPATH)}id.h @@ -7702,6 +7716,7 @@ jit.$(OBJEXT): {$(VPATH)}config.h jit.$(OBJEXT): {$(VPATH)}constant.h jit.$(OBJEXT): {$(VPATH)}debug_counter.h jit.$(OBJEXT): {$(VPATH)}defines.h +jit.$(OBJEXT): {$(VPATH)}encindex.h jit.$(OBJEXT): {$(VPATH)}encoding.h jit.$(OBJEXT): {$(VPATH)}id.h jit.$(OBJEXT): {$(VPATH)}id_table.h @@ -7956,6 +7971,7 @@ load.$(OBJEXT): {$(VPATH)}constant.h load.$(OBJEXT): {$(VPATH)}darray.h load.$(OBJEXT): {$(VPATH)}defines.h load.$(OBJEXT): {$(VPATH)}dln.h +load.$(OBJEXT): {$(VPATH)}encindex.h load.$(OBJEXT): {$(VPATH)}encoding.h load.$(OBJEXT): {$(VPATH)}eval_intern.h load.$(OBJEXT): {$(VPATH)}id.h @@ -9979,6 +9995,7 @@ numeric.$(OBJEXT): {$(VPATH)}builtin.h numeric.$(OBJEXT): {$(VPATH)}config.h numeric.$(OBJEXT): {$(VPATH)}constant.h numeric.$(OBJEXT): {$(VPATH)}defines.h +numeric.$(OBJEXT): {$(VPATH)}encindex.h numeric.$(OBJEXT): {$(VPATH)}encoding.h numeric.$(OBJEXT): {$(VPATH)}id.h numeric.$(OBJEXT): {$(VPATH)}id_table.h @@ -10200,6 +10217,7 @@ object.$(OBJEXT): {$(VPATH)}config.h object.$(OBJEXT): {$(VPATH)}constant.h object.$(OBJEXT): {$(VPATH)}debug_counter.h object.$(OBJEXT): {$(VPATH)}defines.h +object.$(OBJEXT): {$(VPATH)}encindex.h object.$(OBJEXT): {$(VPATH)}encoding.h object.$(OBJEXT): {$(VPATH)}id.h object.$(OBJEXT): {$(VPATH)}id_table.h @@ -10418,6 +10436,7 @@ pack.$(OBJEXT): {$(VPATH)}builtin.h pack.$(OBJEXT): {$(VPATH)}config.h pack.$(OBJEXT): {$(VPATH)}constant.h pack.$(OBJEXT): {$(VPATH)}defines.h +pack.$(OBJEXT): {$(VPATH)}encindex.h pack.$(OBJEXT): {$(VPATH)}encoding.h pack.$(OBJEXT): {$(VPATH)}id.h pack.$(OBJEXT): {$(VPATH)}id_table.h @@ -10644,6 +10663,7 @@ parse.$(OBJEXT): {$(VPATH)}config.h parse.$(OBJEXT): {$(VPATH)}constant.h parse.$(OBJEXT): {$(VPATH)}defines.h parse.$(OBJEXT): {$(VPATH)}defs/keywords +parse.$(OBJEXT): {$(VPATH)}encindex.h parse.$(OBJEXT): {$(VPATH)}encoding.h parse.$(OBJEXT): {$(VPATH)}id.h parse.$(OBJEXT): {$(VPATH)}id_table.h @@ -12125,6 +12145,7 @@ proc.$(OBJEXT): {$(VPATH)}config.h proc.$(OBJEXT): {$(VPATH)}constant.h proc.$(OBJEXT): {$(VPATH)}debug_counter.h proc.$(OBJEXT): {$(VPATH)}defines.h +proc.$(OBJEXT): {$(VPATH)}encindex.h proc.$(OBJEXT): {$(VPATH)}encoding.h proc.$(OBJEXT): {$(VPATH)}eval_intern.h proc.$(OBJEXT): {$(VPATH)}id.h @@ -12356,6 +12377,7 @@ process.$(OBJEXT): {$(VPATH)}constant.h process.$(OBJEXT): {$(VPATH)}debug_counter.h process.$(OBJEXT): {$(VPATH)}defines.h process.$(OBJEXT): {$(VPATH)}dln.h +process.$(OBJEXT): {$(VPATH)}encindex.h process.$(OBJEXT): {$(VPATH)}encoding.h process.$(OBJEXT): {$(VPATH)}fiber/scheduler.h process.$(OBJEXT): {$(VPATH)}hrtime.h @@ -12585,6 +12607,7 @@ ractor.$(OBJEXT): {$(VPATH)}config.h ractor.$(OBJEXT): {$(VPATH)}constant.h ractor.$(OBJEXT): {$(VPATH)}debug_counter.h ractor.$(OBJEXT): {$(VPATH)}defines.h +ractor.$(OBJEXT): {$(VPATH)}encindex.h ractor.$(OBJEXT): {$(VPATH)}encoding.h ractor.$(OBJEXT): {$(VPATH)}eval_intern.h ractor.$(OBJEXT): {$(VPATH)}id.h @@ -13018,6 +13041,7 @@ range.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h range.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h range.$(OBJEXT): {$(VPATH)}config.h range.$(OBJEXT): {$(VPATH)}defines.h +range.$(OBJEXT): {$(VPATH)}encindex.h range.$(OBJEXT): {$(VPATH)}encoding.h range.$(OBJEXT): {$(VPATH)}id.h range.$(OBJEXT): {$(VPATH)}id_table.h @@ -14688,6 +14712,7 @@ ruby.$(OBJEXT): {$(VPATH)}constant.h ruby.$(OBJEXT): {$(VPATH)}debug_counter.h ruby.$(OBJEXT): {$(VPATH)}defines.h ruby.$(OBJEXT): {$(VPATH)}dln.h +ruby.$(OBJEXT): {$(VPATH)}encindex.h ruby.$(OBJEXT): {$(VPATH)}encoding.h ruby.$(OBJEXT): {$(VPATH)}eval_intern.h ruby.$(OBJEXT): {$(VPATH)}id.h @@ -14896,6 +14921,7 @@ ruby_parser.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h ruby_parser.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h ruby_parser.$(OBJEXT): {$(VPATH)}config.h ruby_parser.$(OBJEXT): {$(VPATH)}defines.h +ruby_parser.$(OBJEXT): {$(VPATH)}encindex.h ruby_parser.$(OBJEXT): {$(VPATH)}encoding.h ruby_parser.$(OBJEXT): {$(VPATH)}intern.h ruby_parser.$(OBJEXT): {$(VPATH)}internal.h @@ -15306,6 +15332,7 @@ set.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h set.$(OBJEXT): {$(VPATH)}config.h set.$(OBJEXT): {$(VPATH)}constant.h set.$(OBJEXT): {$(VPATH)}defines.h +set.$(OBJEXT): {$(VPATH)}encindex.h set.$(OBJEXT): {$(VPATH)}encoding.h set.$(OBJEXT): {$(VPATH)}id.h set.$(OBJEXT): {$(VPATH)}id_table.h @@ -15678,6 +15705,7 @@ shape.$(OBJEXT): {$(VPATH)}config.h shape.$(OBJEXT): {$(VPATH)}constant.h shape.$(OBJEXT): {$(VPATH)}debug_counter.h shape.$(OBJEXT): {$(VPATH)}defines.h +shape.$(OBJEXT): {$(VPATH)}encindex.h shape.$(OBJEXT): {$(VPATH)}encoding.h shape.$(OBJEXT): {$(VPATH)}id.h shape.$(OBJEXT): {$(VPATH)}id_table.h @@ -15892,6 +15920,7 @@ signal.$(OBJEXT): {$(VPATH)}config.h signal.$(OBJEXT): {$(VPATH)}constant.h signal.$(OBJEXT): {$(VPATH)}debug_counter.h signal.$(OBJEXT): {$(VPATH)}defines.h +signal.$(OBJEXT): {$(VPATH)}encindex.h signal.$(OBJEXT): {$(VPATH)}encoding.h signal.$(OBJEXT): {$(VPATH)}eval_intern.h signal.$(OBJEXT): {$(VPATH)}id.h @@ -16101,6 +16130,7 @@ sprintf.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h sprintf.$(OBJEXT): {$(VPATH)}config.h sprintf.$(OBJEXT): {$(VPATH)}constant.h sprintf.$(OBJEXT): {$(VPATH)}defines.h +sprintf.$(OBJEXT): {$(VPATH)}encindex.h sprintf.$(OBJEXT): {$(VPATH)}encoding.h sprintf.$(OBJEXT): {$(VPATH)}id.h sprintf.$(OBJEXT): {$(VPATH)}id_table.h @@ -16457,6 +16487,7 @@ strftime.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h strftime.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h strftime.$(OBJEXT): {$(VPATH)}config.h strftime.$(OBJEXT): {$(VPATH)}defines.h +strftime.$(OBJEXT): {$(VPATH)}encindex.h strftime.$(OBJEXT): {$(VPATH)}encoding.h strftime.$(OBJEXT): {$(VPATH)}intern.h strftime.$(OBJEXT): {$(VPATH)}internal.h @@ -16925,6 +16956,7 @@ struct.$(OBJEXT): {$(VPATH)}config.h struct.$(OBJEXT): {$(VPATH)}constant.h struct.$(OBJEXT): {$(VPATH)}debug_counter.h struct.$(OBJEXT): {$(VPATH)}defines.h +struct.$(OBJEXT): {$(VPATH)}encindex.h struct.$(OBJEXT): {$(VPATH)}encoding.h struct.$(OBJEXT): {$(VPATH)}id.h struct.$(OBJEXT): {$(VPATH)}id_table.h @@ -17141,6 +17173,7 @@ symbol.$(OBJEXT): {$(VPATH)}constant.h symbol.$(OBJEXT): {$(VPATH)}darray.h symbol.$(OBJEXT): {$(VPATH)}debug_counter.h symbol.$(OBJEXT): {$(VPATH)}defines.h +symbol.$(OBJEXT): {$(VPATH)}encindex.h symbol.$(OBJEXT): {$(VPATH)}encoding.h symbol.$(OBJEXT): {$(VPATH)}id.c symbol.$(OBJEXT): {$(VPATH)}id.h @@ -17398,6 +17431,7 @@ thread.$(OBJEXT): {$(VPATH)}constant.h thread.$(OBJEXT): {$(VPATH)}debug.h thread.$(OBJEXT): {$(VPATH)}debug_counter.h thread.$(OBJEXT): {$(VPATH)}defines.h +thread.$(OBJEXT): {$(VPATH)}encindex.h thread.$(OBJEXT): {$(VPATH)}encoding.h thread.$(OBJEXT): {$(VPATH)}eval_intern.h thread.$(OBJEXT): {$(VPATH)}fiber/scheduler.h @@ -17628,6 +17662,7 @@ time.$(OBJEXT): {$(VPATH)}builtin.h time.$(OBJEXT): {$(VPATH)}config.h time.$(OBJEXT): {$(VPATH)}constant.h time.$(OBJEXT): {$(VPATH)}defines.h +time.$(OBJEXT): {$(VPATH)}encindex.h time.$(OBJEXT): {$(VPATH)}encoding.h time.$(OBJEXT): {$(VPATH)}id.h time.$(OBJEXT): {$(VPATH)}id_table.h @@ -17830,6 +17865,7 @@ transcode.$(OBJEXT): {$(VPATH)}config.h transcode.$(OBJEXT): {$(VPATH)}constant.h transcode.$(OBJEXT): {$(VPATH)}debug_counter.h transcode.$(OBJEXT): {$(VPATH)}defines.h +transcode.$(OBJEXT): {$(VPATH)}encindex.h transcode.$(OBJEXT): {$(VPATH)}encoding.h transcode.$(OBJEXT): {$(VPATH)}id.h transcode.$(OBJEXT): {$(VPATH)}id_table.h @@ -18211,6 +18247,7 @@ variable.$(OBJEXT): {$(VPATH)}config.h variable.$(OBJEXT): {$(VPATH)}constant.h variable.$(OBJEXT): {$(VPATH)}debug_counter.h variable.$(OBJEXT): {$(VPATH)}defines.h +variable.$(OBJEXT): {$(VPATH)}encindex.h variable.$(OBJEXT): {$(VPATH)}encoding.h variable.$(OBJEXT): {$(VPATH)}id.h variable.$(OBJEXT): {$(VPATH)}id_table.h @@ -18687,6 +18724,7 @@ vm.$(OBJEXT): {$(VPATH)}constant.h vm.$(OBJEXT): {$(VPATH)}debug_counter.h vm.$(OBJEXT): {$(VPATH)}defines.h vm.$(OBJEXT): {$(VPATH)}defs/opt_operand.def +vm.$(OBJEXT): {$(VPATH)}encindex.h vm.$(OBJEXT): {$(VPATH)}encoding.h vm.$(OBJEXT): {$(VPATH)}eval_intern.h vm.$(OBJEXT): {$(VPATH)}id.h @@ -18951,6 +18989,7 @@ vm_backtrace.$(OBJEXT): {$(VPATH)}constant.h vm_backtrace.$(OBJEXT): {$(VPATH)}debug.h vm_backtrace.$(OBJEXT): {$(VPATH)}debug_counter.h vm_backtrace.$(OBJEXT): {$(VPATH)}defines.h +vm_backtrace.$(OBJEXT): {$(VPATH)}encindex.h vm_backtrace.$(OBJEXT): {$(VPATH)}encoding.h vm_backtrace.$(OBJEXT): {$(VPATH)}eval_intern.h vm_backtrace.$(OBJEXT): {$(VPATH)}id.h @@ -20087,6 +20126,7 @@ yjit.$(OBJEXT): {$(VPATH)}constant.h yjit.$(OBJEXT): {$(VPATH)}debug.h yjit.$(OBJEXT): {$(VPATH)}debug_counter.h yjit.$(OBJEXT): {$(VPATH)}defines.h +yjit.$(OBJEXT): {$(VPATH)}encindex.h yjit.$(OBJEXT): {$(VPATH)}encoding.h yjit.$(OBJEXT): {$(VPATH)}id.h yjit.$(OBJEXT): {$(VPATH)}id_table.h @@ -20342,6 +20382,7 @@ zjit.$(OBJEXT): {$(VPATH)}constant.h zjit.$(OBJEXT): {$(VPATH)}debug.h zjit.$(OBJEXT): {$(VPATH)}debug_counter.h zjit.$(OBJEXT): {$(VPATH)}defines.h +zjit.$(OBJEXT): {$(VPATH)}encindex.h zjit.$(OBJEXT): {$(VPATH)}encoding.h zjit.$(OBJEXT): {$(VPATH)}id.h zjit.$(OBJEXT): {$(VPATH)}id_table.h diff --git a/ext/-test-/stack/depend b/ext/-test-/stack/depend index 31571c882e..77e93bb201 100644 --- a/ext/-test-/stack/depend +++ b/ext/-test-/stack/depend @@ -172,6 +172,7 @@ stack.o: $(hdrdir)/ruby/oniguruma.h stack.o: $(hdrdir)/ruby/ruby.h stack.o: $(hdrdir)/ruby/st.h stack.o: $(hdrdir)/ruby/subst.h +stack.o: $(top_srcdir)/encindex.h stack.o: $(top_srcdir)/internal/compilers.h stack.o: $(top_srcdir)/internal/string.h stack.o: stack.c diff --git a/ext/-test-/string/depend b/ext/-test-/string/depend index de6e775acc..478ae3b82b 100644 --- a/ext/-test-/string/depend +++ b/ext/-test-/string/depend @@ -172,6 +172,7 @@ capacity.o: $(hdrdir)/ruby/oniguruma.h capacity.o: $(hdrdir)/ruby/ruby.h capacity.o: $(hdrdir)/ruby/st.h capacity.o: $(hdrdir)/ruby/subst.h +capacity.o: $(top_srcdir)/encindex.h capacity.o: $(top_srcdir)/internal/compilers.h capacity.o: $(top_srcdir)/internal/string.h capacity.o: capacity.c @@ -679,6 +680,7 @@ cstr.o: $(hdrdir)/ruby/oniguruma.h cstr.o: $(hdrdir)/ruby/ruby.h cstr.o: $(hdrdir)/ruby/st.h cstr.o: $(hdrdir)/ruby/subst.h +cstr.o: $(top_srcdir)/encindex.h cstr.o: $(top_srcdir)/internal.h cstr.o: $(top_srcdir)/internal/compilers.h cstr.o: $(top_srcdir)/internal/string.h @@ -1535,6 +1537,7 @@ fstring.o: $(hdrdir)/ruby/oniguruma.h fstring.o: $(hdrdir)/ruby/ruby.h fstring.o: $(hdrdir)/ruby/st.h fstring.o: $(hdrdir)/ruby/subst.h +fstring.o: $(top_srcdir)/encindex.h fstring.o: $(top_srcdir)/internal/compilers.h fstring.o: $(top_srcdir)/internal/string.h fstring.o: fstring.c diff --git a/ext/objspace/depend b/ext/objspace/depend index 04b26eb6c2..d9dfc0c42b 100644 --- a/ext/objspace/depend +++ b/ext/objspace/depend @@ -602,6 +602,7 @@ objspace_dump.o: $(top_srcdir)/ccan/list/list.h objspace_dump.o: $(top_srcdir)/ccan/str/str.h objspace_dump.o: $(top_srcdir)/constant.h objspace_dump.o: $(top_srcdir)/debug_counter.h +objspace_dump.o: $(top_srcdir)/encindex.h objspace_dump.o: $(top_srcdir)/id_table.h objspace_dump.o: $(top_srcdir)/internal.h objspace_dump.o: $(top_srcdir)/internal/array.h diff --git a/ext/ripper/depend b/ext/ripper/depend index bd2de75906..944da25ee9 100644 --- a/ext/ripper/depend +++ b/ext/ripper/depend @@ -578,6 +578,7 @@ ripper.o: $(top_srcdir)/ccan/container_of/container_of.h ripper.o: $(top_srcdir)/ccan/list/list.h ripper.o: $(top_srcdir)/ccan/str/str.h ripper.o: $(top_srcdir)/constant.h +ripper.o: $(top_srcdir)/encindex.h ripper.o: $(top_srcdir)/id_table.h ripper.o: $(top_srcdir)/internal.h ripper.o: $(top_srcdir)/internal/array.h diff --git a/ext/socket/depend b/ext/socket/depend index 3573dc45e2..77f6239a3d 100644 --- a/ext/socket/depend +++ b/ext/socket/depend @@ -193,6 +193,7 @@ ancdata.o: $(top_srcdir)/ccan/check_type/check_type.h ancdata.o: $(top_srcdir)/ccan/container_of/container_of.h ancdata.o: $(top_srcdir)/ccan/list/list.h ancdata.o: $(top_srcdir)/ccan/str/str.h +ancdata.o: $(top_srcdir)/encindex.h ancdata.o: $(top_srcdir)/id_table.h ancdata.o: $(top_srcdir)/internal.h ancdata.o: $(top_srcdir)/internal/array.h @@ -408,6 +409,7 @@ basicsocket.o: $(top_srcdir)/ccan/check_type/check_type.h basicsocket.o: $(top_srcdir)/ccan/container_of/container_of.h basicsocket.o: $(top_srcdir)/ccan/list/list.h basicsocket.o: $(top_srcdir)/ccan/str/str.h +basicsocket.o: $(top_srcdir)/encindex.h basicsocket.o: $(top_srcdir)/id_table.h basicsocket.o: $(top_srcdir)/internal.h basicsocket.o: $(top_srcdir)/internal/array.h @@ -623,6 +625,7 @@ constants.o: $(top_srcdir)/ccan/check_type/check_type.h constants.o: $(top_srcdir)/ccan/container_of/container_of.h constants.o: $(top_srcdir)/ccan/list/list.h constants.o: $(top_srcdir)/ccan/str/str.h +constants.o: $(top_srcdir)/encindex.h constants.o: $(top_srcdir)/id_table.h constants.o: $(top_srcdir)/internal.h constants.o: $(top_srcdir)/internal/array.h @@ -839,6 +842,7 @@ ifaddr.o: $(top_srcdir)/ccan/check_type/check_type.h ifaddr.o: $(top_srcdir)/ccan/container_of/container_of.h ifaddr.o: $(top_srcdir)/ccan/list/list.h ifaddr.o: $(top_srcdir)/ccan/str/str.h +ifaddr.o: $(top_srcdir)/encindex.h ifaddr.o: $(top_srcdir)/id_table.h ifaddr.o: $(top_srcdir)/internal.h ifaddr.o: $(top_srcdir)/internal/array.h @@ -1054,6 +1058,7 @@ init.o: $(top_srcdir)/ccan/check_type/check_type.h init.o: $(top_srcdir)/ccan/container_of/container_of.h init.o: $(top_srcdir)/ccan/list/list.h init.o: $(top_srcdir)/ccan/str/str.h +init.o: $(top_srcdir)/encindex.h init.o: $(top_srcdir)/id_table.h init.o: $(top_srcdir)/internal.h init.o: $(top_srcdir)/internal/array.h @@ -1269,6 +1274,7 @@ ipsocket.o: $(top_srcdir)/ccan/check_type/check_type.h ipsocket.o: $(top_srcdir)/ccan/container_of/container_of.h ipsocket.o: $(top_srcdir)/ccan/list/list.h ipsocket.o: $(top_srcdir)/ccan/str/str.h +ipsocket.o: $(top_srcdir)/encindex.h ipsocket.o: $(top_srcdir)/id_table.h ipsocket.o: $(top_srcdir)/internal.h ipsocket.o: $(top_srcdir)/internal/array.h @@ -1484,6 +1490,7 @@ option.o: $(top_srcdir)/ccan/check_type/check_type.h option.o: $(top_srcdir)/ccan/container_of/container_of.h option.o: $(top_srcdir)/ccan/list/list.h option.o: $(top_srcdir)/ccan/str/str.h +option.o: $(top_srcdir)/encindex.h option.o: $(top_srcdir)/id_table.h option.o: $(top_srcdir)/internal.h option.o: $(top_srcdir)/internal/array.h @@ -1699,6 +1706,7 @@ raddrinfo.o: $(top_srcdir)/ccan/check_type/check_type.h raddrinfo.o: $(top_srcdir)/ccan/container_of/container_of.h raddrinfo.o: $(top_srcdir)/ccan/list/list.h raddrinfo.o: $(top_srcdir)/ccan/str/str.h +raddrinfo.o: $(top_srcdir)/encindex.h raddrinfo.o: $(top_srcdir)/id_table.h raddrinfo.o: $(top_srcdir)/internal.h raddrinfo.o: $(top_srcdir)/internal/array.h @@ -1914,6 +1922,7 @@ socket.o: $(top_srcdir)/ccan/check_type/check_type.h socket.o: $(top_srcdir)/ccan/container_of/container_of.h socket.o: $(top_srcdir)/ccan/list/list.h socket.o: $(top_srcdir)/ccan/str/str.h +socket.o: $(top_srcdir)/encindex.h socket.o: $(top_srcdir)/id_table.h socket.o: $(top_srcdir)/internal.h socket.o: $(top_srcdir)/internal/array.h @@ -2129,6 +2138,7 @@ sockssocket.o: $(top_srcdir)/ccan/check_type/check_type.h sockssocket.o: $(top_srcdir)/ccan/container_of/container_of.h sockssocket.o: $(top_srcdir)/ccan/list/list.h sockssocket.o: $(top_srcdir)/ccan/str/str.h +sockssocket.o: $(top_srcdir)/encindex.h sockssocket.o: $(top_srcdir)/id_table.h sockssocket.o: $(top_srcdir)/internal.h sockssocket.o: $(top_srcdir)/internal/array.h @@ -2344,6 +2354,7 @@ tcpserver.o: $(top_srcdir)/ccan/check_type/check_type.h tcpserver.o: $(top_srcdir)/ccan/container_of/container_of.h tcpserver.o: $(top_srcdir)/ccan/list/list.h tcpserver.o: $(top_srcdir)/ccan/str/str.h +tcpserver.o: $(top_srcdir)/encindex.h tcpserver.o: $(top_srcdir)/id_table.h tcpserver.o: $(top_srcdir)/internal.h tcpserver.o: $(top_srcdir)/internal/array.h @@ -2559,6 +2570,7 @@ tcpsocket.o: $(top_srcdir)/ccan/check_type/check_type.h tcpsocket.o: $(top_srcdir)/ccan/container_of/container_of.h tcpsocket.o: $(top_srcdir)/ccan/list/list.h tcpsocket.o: $(top_srcdir)/ccan/str/str.h +tcpsocket.o: $(top_srcdir)/encindex.h tcpsocket.o: $(top_srcdir)/id_table.h tcpsocket.o: $(top_srcdir)/internal.h tcpsocket.o: $(top_srcdir)/internal/array.h @@ -2774,6 +2786,7 @@ udpsocket.o: $(top_srcdir)/ccan/check_type/check_type.h udpsocket.o: $(top_srcdir)/ccan/container_of/container_of.h udpsocket.o: $(top_srcdir)/ccan/list/list.h udpsocket.o: $(top_srcdir)/ccan/str/str.h +udpsocket.o: $(top_srcdir)/encindex.h udpsocket.o: $(top_srcdir)/id_table.h udpsocket.o: $(top_srcdir)/internal.h udpsocket.o: $(top_srcdir)/internal/array.h @@ -2989,6 +3002,7 @@ unixserver.o: $(top_srcdir)/ccan/check_type/check_type.h unixserver.o: $(top_srcdir)/ccan/container_of/container_of.h unixserver.o: $(top_srcdir)/ccan/list/list.h unixserver.o: $(top_srcdir)/ccan/str/str.h +unixserver.o: $(top_srcdir)/encindex.h unixserver.o: $(top_srcdir)/id_table.h unixserver.o: $(top_srcdir)/internal.h unixserver.o: $(top_srcdir)/internal/array.h @@ -3204,6 +3218,7 @@ unixsocket.o: $(top_srcdir)/ccan/check_type/check_type.h unixsocket.o: $(top_srcdir)/ccan/container_of/container_of.h unixsocket.o: $(top_srcdir)/ccan/list/list.h unixsocket.o: $(top_srcdir)/ccan/str/str.h +unixsocket.o: $(top_srcdir)/encindex.h unixsocket.o: $(top_srcdir)/id_table.h unixsocket.o: $(top_srcdir)/internal.h unixsocket.o: $(top_srcdir)/internal/array.h @@ -169,6 +169,7 @@ typedef struct timespec stat_timestamp; #include "internal.h" #include "internal/compilers.h" #include "internal/dir.h" +#include "internal/encoding.h" #include "internal/error.h" #include "internal/file.h" #include "internal/io.h" @@ -3713,6 +3714,22 @@ chompdirsep(const char *path, const char *end, rb_encoding *enc) return (char *)path; } +static char * +single_byte_chompdirsep(const char *path, const char *end) +{ + while (path < end) { + if (isdirsep(*path)) { + const char *last = path++; + while (path < end && isdirsep(*path)) path++; + if (path >= end) return (char *)last; + } + else { + path++; + } + } + return (char *)path; +} + char * rb_enc_path_end(const char *path, const char *end, rb_encoding *enc) { @@ -3723,7 +3740,7 @@ rb_enc_path_end(const char *path, const char *end, rb_encoding *enc) static rb_encoding * fs_enc_check(VALUE path1, VALUE path2) { - rb_encoding *enc = rb_enc_check(path1, path2); + rb_encoding *enc = rb_enc_check_str(path1, path2); int encidx = rb_enc_to_index(enc); if (encidx == ENCINDEX_US_ASCII) { encidx = rb_enc_get_index(path1); @@ -4651,7 +4668,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum return resolved; } -static VALUE rb_file_join(VALUE ary); +static VALUE rb_file_join(long argc, VALUE *args); #ifndef HAVE_REALPATH static VALUE @@ -4692,7 +4709,8 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, rb_encoding *origenc, enum unresolved_path = rb_str_dup_frozen(path); if (*RSTRING_PTR(unresolved_path) != '/' && !NIL_P(basedir)) { - unresolved_path = rb_file_join(rb_assoc_new(basedir, unresolved_path)); + VALUE paths[2] = {basedir, unresolved_path}; + unresolved_path = rb_file_join(2, paths); } if (origenc) unresolved_path = TO_OSPATH(unresolved_path); @@ -5255,15 +5273,17 @@ rb_file_s_split(VALUE klass, VALUE path) return rb_assoc_new(rb_file_dirname(path), rb_file_s_basename(1,&path,Qundef)); } +static VALUE rb_file_join_ary(VALUE ary); + static VALUE file_inspect_join(VALUE ary, VALUE arg, int recur) { if (recur || ary == arg) rb_raise(rb_eArgError, "recursive array"); - return rb_file_join(arg); + return rb_file_join_ary(arg); } static VALUE -rb_file_join(VALUE ary) +rb_file_join_ary(VALUE ary) { long len, i; VALUE result, tmp; @@ -5328,6 +5348,69 @@ rb_file_join(VALUE ary) return result; } +static inline VALUE +rb_file_join_fastpath(long argc, VALUE *args) +{ + long size = argc; + + long i; + for (i = 0; i < argc; i++) { + VALUE tmp = args[i]; + if (RB_LIKELY(RB_TYPE_P(tmp, T_STRING) && rb_str_enc_fastpath(tmp))) { + size += RSTRING_LEN(tmp); + } + else { + return 0; + } + } + + VALUE result = rb_str_buf_new(size); + + StringValueCStr(args[0]); + int encidx = ENCODING_GET_INLINED(args[0]); + ENCODING_SET_INLINED(result, encidx); + rb_str_buf_append(result, args[0]); + + const char *name = RSTRING_PTR(result); + for (i = 1; i < argc; i++) { + VALUE tmp = args[i]; + StringValueCStr(tmp); + long len = RSTRING_LEN(result); + + const char *tail = single_byte_chompdirsep(name, name + len); + if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) { + rb_str_set_len(result, tail - name); + } + else if (!*tail) { + rb_str_cat(result, "/", 1); + } + + if (RB_UNLIKELY(ENCODING_GET_INLINED(tmp) != encidx)) { + rb_encoding *new_enc = fs_enc_check(result, tmp); + rb_enc_associate(result, new_enc); + encidx = rb_enc_to_index(new_enc); + } + + rb_str_buf_append(result, tmp); + } + + return result; +} + +static inline VALUE +rb_file_join(long argc, VALUE *args) +{ + if (RB_UNLIKELY(argc == 0)) { + return rb_str_new(0, 0); + } + + VALUE result = rb_file_join_fastpath(argc, args); + if (RB_LIKELY(result)) { + return result; + } + + return rb_file_join_ary(rb_ary_new_from_values(argc, args)); +} /* * call-seq: * File.join(string, ...) -> string @@ -5340,9 +5423,9 @@ rb_file_join(VALUE ary) */ static VALUE -rb_file_s_join(VALUE klass, VALUE args) +rb_file_s_join(int argc, VALUE *argv, VALUE klass) { - return rb_file_join(args); + return rb_file_join(argc, argv); } #if defined(HAVE_TRUNCATE) @@ -7584,7 +7667,7 @@ Init_File(void) /* separates directory parts in path */ rb_define_const(rb_cFile, "SEPARATOR", separator); rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1); - rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2); + rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -1); #ifdef DOSISH /* platform specific alternative separator */ diff --git a/internal/string.h b/internal/string.h index d6fea62061..ea81db7ed3 100644 --- a/internal/string.h +++ b/internal/string.h @@ -14,6 +14,7 @@ #include "ruby/internal/stdbool.h" /* for bool */ #include "ruby/encoding.h" /* for rb_encoding */ #include "ruby/ruby.h" /* for VALUE */ +#include "encindex.h" #define STR_SHARED FL_USER0 /* = ELTS_SHARED */ #define STR_NOEMBED FL_USER1 @@ -29,6 +30,26 @@ enum ruby_rstring_private_flags { # undef rb_fstring_cstr #endif +static inline bool +rb_str_encindex_fastpath(int encindex) +{ + // The overwhelming majority of strings are in one of these 3 encodings. + switch (encindex) { + case ENCINDEX_ASCII_8BIT: + case ENCINDEX_UTF_8: + case ENCINDEX_US_ASCII: + return true; + default: + return false; + } +} + +static inline bool +rb_str_enc_fastpath(VALUE str) +{ + return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str)); +} + /* string.c */ VALUE rb_str_dup_m(VALUE str); VALUE rb_fstring(VALUE); @@ -146,27 +146,7 @@ VALUE rb_cSymbol; RSTRING(str)->len = (n); \ } while (0) -static inline bool -str_encindex_fastpath(int encindex) -{ - // The overwhelming majority of strings are in one of these 3 encodings. - switch (encindex) { - case ENCINDEX_ASCII_8BIT: - case ENCINDEX_UTF_8: - case ENCINDEX_US_ASCII: - return true; - default: - return false; - } -} - -static inline bool -str_enc_fastpath(VALUE str) -{ - return str_encindex_fastpath(ENCODING_GET_INLINED(str)); -} - -#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str)))) +#define TERM_LEN(str) (rb_str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str)))) #define TERM_FILL(ptr, termlen) do {\ char *const term_fill_ptr = (ptr);\ const int term_fill_len = (termlen);\ @@ -960,7 +940,7 @@ static inline bool rb_enc_str_asciicompat(VALUE str) { int encindex = ENCODING_GET_INLINED(str); - return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex)); + return rb_str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex)); } int @@ -2796,7 +2776,7 @@ rb_must_asciicompat(VALUE str) rb_raise(rb_eTypeError, "not encoding capable object"); } - if (RB_LIKELY(str_encindex_fastpath(encindex))) { + if (RB_LIKELY(rb_str_encindex_fastpath(encindex))) { return; } @@ -2897,16 +2877,21 @@ str_null_check(VALUE str, int *w) { char *s = RSTRING_PTR(str); long len = RSTRING_LEN(str); - rb_encoding *enc = rb_enc_get(str); - const int minlen = rb_enc_mbminlen(enc); + int minlen = 1; + + if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) { + rb_encoding *enc = rb_enc_get(str); + minlen = rb_enc_mbminlen(enc); - if (minlen > 1) { - *w = 1; - if (str_null_char(s, len, minlen, enc)) { - return NULL; + if (minlen > 1) { + *w = 1; + if (str_null_char(s, len, minlen, enc)) { + return NULL; + } + return str_fill_term(str, s, len, minlen); } - return str_fill_term(str, s, len, minlen); } + *w = 0; if (!s || memchr(s, 0, len)) { return NULL; @@ -3765,7 +3750,7 @@ rb_str_buf_append(VALUE str, VALUE str2) { int str2_cr = rb_enc_str_coderange(str2); - if (str_enc_fastpath(str)) { + if (rb_str_enc_fastpath(str)) { switch (str2_cr) { case ENC_CODERANGE_7BIT: // If RHS is 7bit we can do simple concatenation |
