summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2026-04-09 18:50:53 +0200
committerJean Boussier <jean.boussier@gmail.com>2026-04-10 09:56:20 +0200
commitb154cfa98c01b17b8f4df82ebb7a5bc9ab7bf7bb (patch)
tree2eba398d8933ed07f93c9de4a93815385825e996
parent36b0ae025a86b50837e814f7baa7a797184b89d5 (diff)
Add a fastpath for `rb_str_normalize_ospath`
This extra check is a hotspot for path operations on macOS. It was added in 9962aad7b0184e385b40c26c5a109bff7abbe43c because of a limitation of HFS+. But all the invalid characters are outside of ASCII range, and most paths are ASCII, so we can optimistically check the coderange instead. Most `rb_str_normalize_ospath` were first checking for ASCII range, but a few like `rb_dir_getwd_ospath` in `dir.c` or `ospath_new` in `file.c` didn't.
-rw-r--r--file.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/file.c b/file.c
index 79d46b2de9..832e4b1cbb 100644
--- a/file.c
+++ b/file.c
@@ -380,9 +380,15 @@ rb_str_normalize_ospath(const char *ptr, long len)
const char *p = ptr;
const char *e = ptr + len;
const char *p1 = p;
- VALUE str = rb_str_buf_new(len);
rb_encoding *enc = rb_utf8_encoding();
- rb_enc_associate(str, enc);
+ VALUE str = rb_utf8_str_new(ptr, len);
+ if (RB_LIKELY(rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)) {
+ return str;
+ }
+ else {
+ str = rb_str_buf_new(len);
+ rb_enc_associate(str, enc);
+ }
while (p < e) {
int l, c;