diff options
| author | Jean Boussier <jean.boussier@gmail.com> | 2026-01-19 07:32:09 +0100 |
|---|---|---|
| committer | Jean Boussier <jean.boussier@gmail.com> | 2026-01-20 08:33:42 +0100 |
| commit | 27bb1623cd048f3cbfc527cc315894803deabba2 (patch) | |
| tree | a7bcabe824382632378a07ad099ee1fcae1aa0cc | |
| parent | 826dbcfb2be33ffbfbc99554244881e014513d20 (diff) | |
file.c: Optimize `rb_file_dirname_n` fixed costs
- `str_null_check` was performed twice, once by `FilePathStringValue`
and a second time by `StringValueCStr`.
- `StringValueCStr` was checking for the terminator presence, but we
don't care about that.
- `FilePathStringValue` calls `rb_str_new_frozen` to ensure `fname`
isn't mutated, but that's costly for such a check. Instead we
can do it in debug mode only.
- `rb_enc_get` is slow because it accepts arbitrary objects, even immediates,
so it has to do numerous type checks. Add a much faster `rb_str_enc_get`
when we know we're dealing with a string.
- `rb_enc_copy` is slow for the same reasons, since we already have the
encoding, we can use `rb_enc_str_new` instead.
| -rw-r--r-- | benchmark/file_dirname.yml | 5 | ||||
| -rw-r--r-- | file.c | 45 | ||||
| -rw-r--r-- | internal/string.h | 7 | ||||
| -rw-r--r-- | string.c | 2 |
4 files changed, 41 insertions, 18 deletions
diff --git a/benchmark/file_dirname.yml b/benchmark/file_dirname.yml new file mode 100644 index 0000000000..d5c134ad4b --- /dev/null +++ b/benchmark/file_dirname.yml @@ -0,0 +1,5 @@ +prelude: | + # frozen_string_literal: true +benchmark: + long: File.dirname("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml") + short: File.dirname("foo/bar") @@ -214,15 +214,16 @@ file_path_convert(VALUE name) return name; } -static rb_encoding * +static void check_path_encoding(VALUE str) { - rb_encoding *enc = rb_enc_get(str); - if (!rb_enc_asciicompat(enc)) { - rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE, - rb_enc_name(enc), rb_str_inspect(str)); + if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) { + rb_encoding *enc = rb_str_enc_get(str); + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE, + rb_enc_name(enc), rb_str_inspect(str)); + } } - return enc; } VALUE @@ -250,7 +251,7 @@ rb_get_path_check_convert(VALUE obj) rb_raise(rb_eArgError, "path name contains null byte"); } - return rb_str_new4(obj); + return rb_str_new_frozen(obj); } VALUE @@ -265,6 +266,19 @@ rb_get_path(VALUE obj) return rb_get_path_check_convert(rb_get_path_check_to_string(obj)); } +static inline VALUE +check_path(VALUE obj, const char **cstr) +{ + VALUE str = rb_get_path_check_convert(rb_get_path_check_to_string(obj)); +#if RUBY_DEBUG + str = rb_str_new_frozen(str); +#endif + *cstr = RSTRING_PTR(str); + return str; +} + +#define CheckPath(str, cstr) RB_GC_GUARD(str) = check_path(str, &cstr); + VALUE rb_str_encode_ospath(VALUE path) { @@ -4952,7 +4966,8 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _) if (rb_check_arity(argc, 1, 2) == 2) { fext = argv[1]; StringValue(fext); - enc = check_path_encoding(fext); + check_path_encoding(fext); + enc = rb_str_enc_get(fext); } fname = argv[0]; FilePathStringValue(fname); @@ -5031,10 +5046,9 @@ rb_file_dirname_n(VALUE fname, int n) const char **seps; if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n); - FilePathStringValue(fname); - name = StringValueCStr(fname); + CheckPath(fname, name); end = name + RSTRING_LEN(fname); - enc = rb_enc_get(fname); + enc = rb_str_enc_get(fname); root = skiproot(name, end, enc); #ifdef DOSISH_UNC if (root > name + 1 && isdirsep(*name)) @@ -5077,24 +5091,21 @@ rb_file_dirname_n(VALUE fname, int n) } } if (p == name) { - dirname = rb_str_new(".", 1); - rb_enc_copy(dirname, fname); - return dirname; + return rb_enc_str_new(".", 1, enc); } #ifdef DOSISH_DRIVE_LETTER if (has_drive_letter(name) && isdirsep(*(name + 2))) { const char *top = skiproot(name + 2, end, enc); - dirname = rb_str_new(name, 3); + dirname = rb_enc_str_new(name, 3, enc); rb_str_cat(dirname, top, p - top); } else #endif - dirname = rb_str_new(name, p - name); + dirname = rb_enc_str_new(name, p - name, enc); #ifdef DOSISH_DRIVE_LETTER if (has_drive_letter(name) && root == name + 2 && p - name == 2) rb_str_cat(dirname, ".", 1); #endif - rb_enc_copy(dirname, fname); return dirname; } diff --git a/internal/string.h b/internal/string.h index cd1e8d7929..dd5e20c0c6 100644 --- a/internal/string.h +++ b/internal/string.h @@ -50,6 +50,13 @@ rb_str_enc_fastpath(VALUE str) return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str)); } +static inline rb_encoding * +rb_str_enc_get(VALUE str) +{ + RUBY_ASSERT(RB_TYPE_P(str, T_STRING)); + return rb_enc_from_index(ENCODING_GET(str)); +} + /* string.c */ VALUE rb_str_dup_m(VALUE str); VALUE rb_fstring(VALUE); @@ -2880,7 +2880,7 @@ str_null_check(VALUE str, int *w) int minlen = 1; if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) { - rb_encoding *enc = rb_enc_get(str); + rb_encoding *enc = rb_str_enc_get(str); minlen = rb_enc_mbminlen(enc); if (minlen > 1) { |
