diff options
| author | Zack Deveau <zack.ref@gmail.com> | 2024-08-23 13:18:44 -0400 |
|---|---|---|
| committer | Nobuyoshi Nakada <nobu.nakada@gmail.com> | 2024-09-03 14:25:25 +0900 |
| commit | e7cb70be4eb7411204f73ee748e317fefaa0410a (patch) | |
| tree | 5df03ae6882f3ec5e82d23886a46835491539717 /string.c | |
| parent | 5fd3942466004daa0b07ed61acae1dee84ebdd75 (diff) | |
Improve String#rindex performance on OSX
On OSX, String#rindex is slow due to the lack of `memrchr`.
The fallback implementation finds a match by instead doing
a `memcmp` on every single character in the search string
looking for a substring match.
For OSX hosts, this changeset introduces a simple `memrchr`
implementation, `rb_memrchr`, that can be used instead. An
example benchmark below demonstrates an 8000 char long
search string with a 10 char substring near the end.
```
ruby-master | substring near the end | osx
UTF-8
user system total real
index 0.000111 0.000000 0.000111 ( 0.000110)
rindex 0.000446 0.000005 0.000451 ( 0.000454)
```
```
ruby-patched | substring near the end | osx
UTF-8
user system total real
index 0.000112 0.000000 0.000112 ( 0.000111)
rindex 0.000057 0.000001 0.000058 ( 0.000057)
```
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/11519
Diffstat (limited to 'string.c')
| -rw-r--r-- | string.c | 41 |
1 files changed, 17 insertions, 24 deletions
@@ -4345,7 +4345,19 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str) return Qnil; } -#ifdef HAVE_MEMRCHR +#ifndef HAVE_MEMRCHR +static void* +memrchr(const char *search_str, int chr, long search_len) +{ + const char *ptr = search_str + search_len; + do { + if ((unsigned char)*(--ptr) == chr) return (void *)ptr; + } while (ptr >= search_str); + + return ((void *)0); +} +#endif + static long str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc) { @@ -4362,6 +4374,10 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc) c = *t & 0xff; searchlen = s - sbeg + 1; + if (memcmp(s, t, slen) == 0) { + return s - sbeg; + } + do { hit = memrchr(sbeg, c, searchlen); if (!hit) break; @@ -4377,29 +4393,6 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc) return -1; } -#else -static long -str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc) -{ - long slen; - char *sbeg, *e, *t; - - sbeg = RSTRING_PTR(str); - e = RSTRING_END(str); - t = RSTRING_PTR(sub); - slen = RSTRING_LEN(sub); - - while (s) { - if (memcmp(s, t, slen) == 0) { - return s - sbeg; - } - if (s <= sbeg) break; - s = rb_enc_prev_char(sbeg, s, e, enc); - } - - return -1; -} -#endif /* found index in byte */ static long |
