diff options
| author | Jean Boussier <jean.boussier@gmail.com> | 2026-01-19 08:43:57 +0100 |
|---|---|---|
| committer | Jean Boussier <jean.boussier@gmail.com> | 2026-01-20 08:33:42 +0100 |
| commit | 11d29d32d270d2a98642858fdca25a5272563995 (patch) | |
| tree | ae21a602858d45e74b7a0d167693e4f8fb4e4865 | |
| parent | 27bb1623cd048f3cbfc527cc315894803deabba2 (diff) | |
file.c: strrdirsep search from the back of the string for common encodings
`strrdirsep` quite innficiently search for the last separator from the front
of the string.
This is surprising but necessary because in Shift-JS, `0x5c` can
be the second byte of some multi-byte characters, as such it's
not possible to do a pure ASCII search. And it's even more costly
because for each character we need to do expensive checks to
handle this possibility.
However in the overwhelming majority of cases, paths are encoded
in UTF-8 or ASCII, so for these common encodings we can use the
more logical and efficient algorithm.
```
compare-ruby: ruby 4.1.0dev (2026-01-17T14:40:03Z master 00a3b71eaf) +PRISM [arm64-darwin25]
built-ruby: ruby 4.1.0dev (2026-01-19T07:43:57Z file-dirname-lower.. a8d3535e5b) +PRISM [arm64-darwin25]
```
| |compare-ruby|built-ruby|
|:------|-----------:|---------:|
|long | 3.974M| 23.674M|
| | -| 5.96x|
|short | 15.281M| 29.034M|
| | -| 1.90x|
| -rw-r--r-- | file.c | 50 |
1 files changed, 44 insertions, 6 deletions
@@ -3693,7 +3693,6 @@ skipprefixroot(const char *path, const char *end, rb_encoding *enc) #endif } -#define strrdirsep rb_enc_path_last_separator char * rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc) { @@ -3712,6 +3711,30 @@ rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc) return last; } +static inline char * +strrdirsep(const char *path, const char *end, rb_encoding *enc) +{ + if (RB_LIKELY(enc == NULL)) { + const char *cursor = end - 1; + + while (isdirsep(cursor[0])) { + cursor--; + } + + while (cursor >= path) { + if (isdirsep(cursor[0])) { + while (cursor > path && isdirsep(cursor[-1])) { + cursor--; + } + return (char *)cursor; + } + cursor--; + } + return NULL; + } + return rb_enc_path_last_separator(path, end, enc); +} + static char * chompdirsep(const char *path, const char *end, rb_encoding *enc) { @@ -5036,6 +5059,15 @@ rb_file_dirname(VALUE fname) return rb_file_dirname_n(fname, 1); } +static inline rb_encoding * +path_enc_get(VALUE str) +{ + if (RB_LIKELY(rb_str_enc_fastpath(str))) { + return NULL; + } + return rb_str_enc_get(str); +} + static VALUE rb_file_dirname_n(VALUE fname, int n) { @@ -5048,7 +5080,7 @@ rb_file_dirname_n(VALUE fname, int n) if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n); CheckPath(fname, name); end = name + RSTRING_LEN(fname); - enc = rb_str_enc_get(fname); + enc = path_enc_get(fname); root = skiproot(name, end, enc); #ifdef DOSISH_UNC if (root > name + 1 && isdirsep(*name)) @@ -5082,7 +5114,12 @@ rb_file_dirname_n(VALUE fname, int n) if (i == n) i = 0; } else { - Inc(p, end, enc); + if (RB_UNLIKELY(enc)) { + Inc(p, end, enc); + } + else { + p++; + } } } p = seps[i]; @@ -5090,18 +5127,19 @@ rb_file_dirname_n(VALUE fname, int n) break; } } + if (p == name) { - return rb_enc_str_new(".", 1, enc); + return rb_enc_str_new(".", 1, rb_str_enc_get(fname)); } #ifdef DOSISH_DRIVE_LETTER if (has_drive_letter(name) && isdirsep(*(name + 2))) { const char *top = skiproot(name + 2, end, enc); - dirname = rb_enc_str_new(name, 3, enc); + dirname = rb_enc_str_new(name, 3, rb_str_enc_get(fname)); rb_str_cat(dirname, top, p - top); } else #endif - dirname = rb_enc_str_new(name, p - name, enc); + dirname = rb_enc_str_new(name, p - name, rb_str_enc_get(fname)); #ifdef DOSISH_DRIVE_LETTER if (has_drive_letter(name) && root == name + 2 && p - name == 2) rb_str_cat(dirname, ".", 1); |
