summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorZack Deveau <zack.ref@gmail.com>2024-08-23 13:18:44 -0400
committerNobuyoshi Nakada <nobu.nakada@gmail.com>2024-09-03 14:25:25 +0900
commite7cb70be4eb7411204f73ee748e317fefaa0410a (patch)
tree5df03ae6882f3ec5e82d23886a46835491539717 /string.c
parent5fd3942466004daa0b07ed61acae1dee84ebdd75 (diff)
Improve String#rindex performance on OSX
On OSX, String#rindex is slow due to the lack of `memrchr`. The fallback implementation finds a match by instead doing a `memcmp` on every single character in the search string looking for a substring match. For OSX hosts, this changeset introduces a simple `memrchr` implementation, `rb_memrchr`, that can be used instead. An example benchmark below demonstrates an 8000 char long search string with a 10 char substring near the end. ``` ruby-master | substring near the end | osx UTF-8 user system total real index 0.000111 0.000000 0.000111 ( 0.000110) rindex 0.000446 0.000005 0.000451 ( 0.000454) ``` ``` ruby-patched | substring near the end | osx UTF-8 user system total real index 0.000112 0.000000 0.000112 ( 0.000111) rindex 0.000057 0.000001 0.000058 ( 0.000057) ```
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/11519
Diffstat (limited to 'string.c')
-rw-r--r--string.c41
1 files changed, 17 insertions, 24 deletions
diff --git a/string.c b/string.c
index 6a6b4c8b29..025382c0f4 100644
--- a/string.c
+++ b/string.c
@@ -4345,7 +4345,19 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str)
return Qnil;
}
-#ifdef HAVE_MEMRCHR
+#ifndef HAVE_MEMRCHR
+static void*
+memrchr(const char *search_str, int chr, long search_len)
+{
+ const char *ptr = search_str + search_len;
+ do {
+ if ((unsigned char)*(--ptr) == chr) return (void *)ptr;
+ } while (ptr >= search_str);
+
+ return ((void *)0);
+}
+#endif
+
static long
str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
{
@@ -4362,6 +4374,10 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
c = *t & 0xff;
searchlen = s - sbeg + 1;
+ if (memcmp(s, t, slen) == 0) {
+ return s - sbeg;
+ }
+
do {
hit = memrchr(sbeg, c, searchlen);
if (!hit) break;
@@ -4377,29 +4393,6 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
return -1;
}
-#else
-static long
-str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
-{
- long slen;
- char *sbeg, *e, *t;
-
- sbeg = RSTRING_PTR(str);
- e = RSTRING_END(str);
- t = RSTRING_PTR(sub);
- slen = RSTRING_LEN(sub);
-
- while (s) {
- if (memcmp(s, t, slen) == 0) {
- return s - sbeg;
- }
- if (s <= sbeg) break;
- s = rb_enc_prev_char(sbeg, s, e, enc);
- }
-
- return -1;
-}
-#endif
/* found index in byte */
static long