summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2026-01-19 07:32:09 +0100
committerJean Boussier <jean.boussier@gmail.com>2026-01-20 08:33:42 +0100
commit27bb1623cd048f3cbfc527cc315894803deabba2 (patch)
treea7bcabe824382632378a07ad099ee1fcae1aa0cc
parent826dbcfb2be33ffbfbc99554244881e014513d20 (diff)
file.c: Optimize `rb_file_dirname_n` fixed costs
- `str_null_check` was performed twice, once by `FilePathStringValue` and a second time by `StringValueCStr`. - `StringValueCStr` was checking for the terminator presence, but we don't care about that. - `FilePathStringValue` calls `rb_str_new_frozen` to ensure `fname` isn't mutated, but that's costly for such a check. Instead we can do it in debug mode only. - `rb_enc_get` is slow because it accepts arbitrary objects, even immediates, so it has to do numerous type checks. Add a much faster `rb_str_enc_get` when we know we're dealing with a string. - `rb_enc_copy` is slow for the same reasons, since we already have the encoding, we can use `rb_enc_str_new` instead.
-rw-r--r--benchmark/file_dirname.yml5
-rw-r--r--file.c45
-rw-r--r--internal/string.h7
-rw-r--r--string.c2
4 files changed, 41 insertions, 18 deletions
diff --git a/benchmark/file_dirname.yml b/benchmark/file_dirname.yml
new file mode 100644
index 0000000000..d5c134ad4b
--- /dev/null
+++ b/benchmark/file_dirname.yml
@@ -0,0 +1,5 @@
+prelude: |
+ # frozen_string_literal: true
+benchmark:
+ long: File.dirname("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml")
+ short: File.dirname("foo/bar")
diff --git a/file.c b/file.c
index 809253fab0..9f4f45e5c6 100644
--- a/file.c
+++ b/file.c
@@ -214,15 +214,16 @@ file_path_convert(VALUE name)
return name;
}
-static rb_encoding *
+static void
check_path_encoding(VALUE str)
{
- rb_encoding *enc = rb_enc_get(str);
- if (!rb_enc_asciicompat(enc)) {
- rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE,
- rb_enc_name(enc), rb_str_inspect(str));
+ if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
+ rb_encoding *enc = rb_str_enc_get(str);
+ if (!rb_enc_asciicompat(enc)) {
+ rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE,
+ rb_enc_name(enc), rb_str_inspect(str));
+ }
}
- return enc;
}
VALUE
@@ -250,7 +251,7 @@ rb_get_path_check_convert(VALUE obj)
rb_raise(rb_eArgError, "path name contains null byte");
}
- return rb_str_new4(obj);
+ return rb_str_new_frozen(obj);
}
VALUE
@@ -265,6 +266,19 @@ rb_get_path(VALUE obj)
return rb_get_path_check_convert(rb_get_path_check_to_string(obj));
}
+static inline VALUE
+check_path(VALUE obj, const char **cstr)
+{
+ VALUE str = rb_get_path_check_convert(rb_get_path_check_to_string(obj));
+#if RUBY_DEBUG
+ str = rb_str_new_frozen(str);
+#endif
+ *cstr = RSTRING_PTR(str);
+ return str;
+}
+
+#define CheckPath(str, cstr) RB_GC_GUARD(str) = check_path(str, &cstr);
+
VALUE
rb_str_encode_ospath(VALUE path)
{
@@ -4952,7 +4966,8 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _)
if (rb_check_arity(argc, 1, 2) == 2) {
fext = argv[1];
StringValue(fext);
- enc = check_path_encoding(fext);
+ check_path_encoding(fext);
+ enc = rb_str_enc_get(fext);
}
fname = argv[0];
FilePathStringValue(fname);
@@ -5031,10 +5046,9 @@ rb_file_dirname_n(VALUE fname, int n)
const char **seps;
if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n);
- FilePathStringValue(fname);
- name = StringValueCStr(fname);
+ CheckPath(fname, name);
end = name + RSTRING_LEN(fname);
- enc = rb_enc_get(fname);
+ enc = rb_str_enc_get(fname);
root = skiproot(name, end, enc);
#ifdef DOSISH_UNC
if (root > name + 1 && isdirsep(*name))
@@ -5077,24 +5091,21 @@ rb_file_dirname_n(VALUE fname, int n)
}
}
if (p == name) {
- dirname = rb_str_new(".", 1);
- rb_enc_copy(dirname, fname);
- return dirname;
+ return rb_enc_str_new(".", 1, enc);
}
#ifdef DOSISH_DRIVE_LETTER
if (has_drive_letter(name) && isdirsep(*(name + 2))) {
const char *top = skiproot(name + 2, end, enc);
- dirname = rb_str_new(name, 3);
+ dirname = rb_enc_str_new(name, 3, enc);
rb_str_cat(dirname, top, p - top);
}
else
#endif
- dirname = rb_str_new(name, p - name);
+ dirname = rb_enc_str_new(name, p - name, enc);
#ifdef DOSISH_DRIVE_LETTER
if (has_drive_letter(name) && root == name + 2 && p - name == 2)
rb_str_cat(dirname, ".", 1);
#endif
- rb_enc_copy(dirname, fname);
return dirname;
}
diff --git a/internal/string.h b/internal/string.h
index cd1e8d7929..dd5e20c0c6 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -50,6 +50,13 @@ rb_str_enc_fastpath(VALUE str)
return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str));
}
+static inline rb_encoding *
+rb_str_enc_get(VALUE str)
+{
+ RUBY_ASSERT(RB_TYPE_P(str, T_STRING));
+ return rb_enc_from_index(ENCODING_GET(str));
+}
+
/* string.c */
VALUE rb_str_dup_m(VALUE str);
VALUE rb_fstring(VALUE);
diff --git a/string.c b/string.c
index 1e0b9929ef..464eab2146 100644
--- a/string.c
+++ b/string.c
@@ -2880,7 +2880,7 @@ str_null_check(VALUE str, int *w)
int minlen = 1;
if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
- rb_encoding *enc = rb_enc_get(str);
+ rb_encoding *enc = rb_str_enc_get(str);
minlen = rb_enc_mbminlen(enc);
if (minlen > 1) {