summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2026-01-19 08:43:57 +0100
committerJean Boussier <jean.boussier@gmail.com>2026-01-20 08:33:42 +0100
commit11d29d32d270d2a98642858fdca25a5272563995 (patch)
treeae21a602858d45e74b7a0d167693e4f8fb4e4865
parent27bb1623cd048f3cbfc527cc315894803deabba2 (diff)
file.c: strrdirsep search from the back of the string for common encodings
`strrdirsep` quite innficiently search for the last separator from the front of the string. This is surprising but necessary because in Shift-JS, `0x5c` can be the second byte of some multi-byte characters, as such it's not possible to do a pure ASCII search. And it's even more costly because for each character we need to do expensive checks to handle this possibility. However in the overwhelming majority of cases, paths are encoded in UTF-8 or ASCII, so for these common encodings we can use the more logical and efficient algorithm. ``` compare-ruby: ruby 4.1.0dev (2026-01-17T14:40:03Z master 00a3b71eaf) +PRISM [arm64-darwin25] built-ruby: ruby 4.1.0dev (2026-01-19T07:43:57Z file-dirname-lower.. a8d3535e5b) +PRISM [arm64-darwin25] ``` | |compare-ruby|built-ruby| |:------|-----------:|---------:| |long | 3.974M| 23.674M| | | -| 5.96x| |short | 15.281M| 29.034M| | | -| 1.90x|
-rw-r--r--file.c50
1 files changed, 44 insertions, 6 deletions
diff --git a/file.c b/file.c
index 9f4f45e5c6..7d47fc60a7 100644
--- a/file.c
+++ b/file.c
@@ -3693,7 +3693,6 @@ skipprefixroot(const char *path, const char *end, rb_encoding *enc)
#endif
}
-#define strrdirsep rb_enc_path_last_separator
char *
rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc)
{
@@ -3712,6 +3711,30 @@ rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc)
return last;
}
+static inline char *
+strrdirsep(const char *path, const char *end, rb_encoding *enc)
+{
+ if (RB_LIKELY(enc == NULL)) {
+ const char *cursor = end - 1;
+
+ while (isdirsep(cursor[0])) {
+ cursor--;
+ }
+
+ while (cursor >= path) {
+ if (isdirsep(cursor[0])) {
+ while (cursor > path && isdirsep(cursor[-1])) {
+ cursor--;
+ }
+ return (char *)cursor;
+ }
+ cursor--;
+ }
+ return NULL;
+ }
+ return rb_enc_path_last_separator(path, end, enc);
+}
+
static char *
chompdirsep(const char *path, const char *end, rb_encoding *enc)
{
@@ -5036,6 +5059,15 @@ rb_file_dirname(VALUE fname)
return rb_file_dirname_n(fname, 1);
}
+static inline rb_encoding *
+path_enc_get(VALUE str)
+{
+ if (RB_LIKELY(rb_str_enc_fastpath(str))) {
+ return NULL;
+ }
+ return rb_str_enc_get(str);
+}
+
static VALUE
rb_file_dirname_n(VALUE fname, int n)
{
@@ -5048,7 +5080,7 @@ rb_file_dirname_n(VALUE fname, int n)
if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n);
CheckPath(fname, name);
end = name + RSTRING_LEN(fname);
- enc = rb_str_enc_get(fname);
+ enc = path_enc_get(fname);
root = skiproot(name, end, enc);
#ifdef DOSISH_UNC
if (root > name + 1 && isdirsep(*name))
@@ -5082,7 +5114,12 @@ rb_file_dirname_n(VALUE fname, int n)
if (i == n) i = 0;
}
else {
- Inc(p, end, enc);
+ if (RB_UNLIKELY(enc)) {
+ Inc(p, end, enc);
+ }
+ else {
+ p++;
+ }
}
}
p = seps[i];
@@ -5090,18 +5127,19 @@ rb_file_dirname_n(VALUE fname, int n)
break;
}
}
+
if (p == name) {
- return rb_enc_str_new(".", 1, enc);
+ return rb_enc_str_new(".", 1, rb_str_enc_get(fname));
}
#ifdef DOSISH_DRIVE_LETTER
if (has_drive_letter(name) && isdirsep(*(name + 2))) {
const char *top = skiproot(name + 2, end, enc);
- dirname = rb_enc_str_new(name, 3, enc);
+ dirname = rb_enc_str_new(name, 3, rb_str_enc_get(fname));
rb_str_cat(dirname, top, p - top);
}
else
#endif
- dirname = rb_enc_str_new(name, p - name, enc);
+ dirname = rb_enc_str_new(name, p - name, rb_str_enc_get(fname));
#ifdef DOSISH_DRIVE_LETTER
if (has_drive_letter(name) && root == name + 2 && p - name == 2)
rb_str_cat(dirname, ".", 1);