summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog22
-rw-r--r--configure.in2
-rw-r--r--internal.h46
-rw-r--r--string.c60
4 files changed, 107 insertions, 23 deletions
diff --git a/ChangeLog b/ChangeLog
index ea56000f14..40be5eea9f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+Sun May 1 00:03:30 2016 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * configure.in (__builtin_ctz): check.
+
+ * configure.in (__builtin_ctzll): check.
+
+ * internal.h (rb_popcount32): defined for ntz_int32.
+ it can use __builtin_popcount but this function is not used on
+ GCC environment because it uses __builtin_ctz.
+ When another function uses this, using __builtin_popcount
+ should be re-considered.
+
+ * internal.h (rb_popcount64): ditto.
+
+ * internal.h (ntz_int32): defined for ntz_intptr.
+
+ * internal.h (ntz_int64): defined for ntz_intptr.
+
+ * internal.h (ntz_intptr): defined as ntz for uintptr_t.
+
+ * string.c (search_nonascii): unroll and use ntz.
+
Sat Apr 30 21:54:13 2016 Tanaka Akira <akr@fsij.org>
* numeric.c (Init_Numeric): Gather Fixnum method definitions.
diff --git a/configure.in b/configure.in
index 1042b0329c..14711a6837 100644
--- a/configure.in
+++ b/configure.in
@@ -2432,6 +2432,8 @@ RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap64, [__builtin_bswap64(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_clz, [__builtin_clz(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_clzl, [__builtin_clzl(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_clzll, [__builtin_clzll(0)])
+RUBY_CHECK_BUILTIN_FUNC(__builtin_ctz, [__builtin_ctz(0)])
+RUBY_CHECK_BUILTIN_FUNC(__builtin_ctzll, [__builtin_ctzll(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_choose_expr, [
[int x[__extension__(__builtin_choose_expr(1, 1, -1))]];
[int y[__extension__(__builtin_choose_expr(0, -1, 1))]];
diff --git a/internal.h b/internal.h
index 7793c1ab32..a8f4240059 100644
--- a/internal.h
+++ b/internal.h
@@ -260,6 +260,52 @@ nlz_int128(uint128_t x)
}
#endif
+static inline int
+rb_popcount32(uint32_t x) {
+ x = (x & 0x55555555) + (x >> 1 & 0x55555555);
+ x = (x & 0x33333333) + (x >> 2 & 0x33333333);
+ x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f);
+ x = (x & 0x001f001f) + (x >> 8 & 0x001f001f);
+ return (x & 0x0000003f) + (x >>16 & 0x0000003f);
+}
+
+static inline int
+rb_popcount64(uint64_t x) {
+ x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555);
+ x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333);
+ x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707);
+ x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f);
+ x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f);
+ return (x & 0x7f) + (x >>32 & 0x7f);
+}
+
+static inline int
+ntz_int32(uint32_t x) {
+#ifdef HAVE_BUILTIN___BUILTIN_CTZ
+ return __builtin_ctz(x);
+#else
+ return rb_popcount32((~x) & (x-1));
+#endif
+}
+
+static inline int
+ntz_int64(uint64_t x) {
+#ifdef HAVE_BUILTIN___BUILTIN_CTZLL
+ return __builtin_ctzll(x);
+#else
+ return rb_popcount64((~x) & (x-1));
+#endif
+}
+
+static inline int
+ntz_intptr(uintptr_t x) {
+#if SIZEOF_VOIDP == 8
+ return ntz_int64(x);
+#elif SIZEOF_VOIDP == 4
+ return ntz_int32(x);
+#endif
+}
+
#if HAVE_LONG_LONG && SIZEOF_LONG * 2 <= SIZEOF_LONG_LONG
# define DLONG LONG_LONG
# define DL2NUM(x) LL2NUM(x)
diff --git a/string.c b/string.c
index 4ee59b2bd9..e2cb68aac2 100644
--- a/string.c
+++ b/string.c
@@ -427,32 +427,46 @@ search_nonascii(const char *p, const char *e)
#elif SIZEOF_VOIDP == 4
# define NONASCII_MASK 0x80808080UL
#endif
-#ifdef NONASCII_MASK
- if ((int)SIZEOF_VOIDP * 2 < e - p) {
- const uintptr_t *s, *t;
- const uintptr_t lowbits = SIZEOF_VOIDP - 1;
- s = (const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
- while (p < (const char *)s) {
- if (!ISASCII(*p))
- return p;
- p++;
- }
- t = (const uintptr_t*)(~lowbits & (uintptr_t)e);
- while (s < t) {
- if (*s & NONASCII_MASK) {
- t = s;
- break;
- }
- s++;
- }
- p = (const char *)t;
+
+#if !UNALIGNED_WORD_ACCESS
+ if (e - p > SIZEOF_VOIDP) {
+ switch (8 - (uintptr_t)p % 8) {
+#if SIZEOF_VOIDP > 4
+ case 7: if (*p&0x80) return p; p++;
+ case 6: if (*p&0x80) return p; p++;
+ case 5: if (*p&0x80) return p; p++;
+ case 4: if (*p&0x80) return p; p++;
+#endif
+ case 3: if (*p&0x80) return p; p++;
+ case 2: if (*p&0x80) return p; p++;
+ case 1: if (*p&0x80) return p; p++;
+ }
}
#endif
- while (p < e) {
- if (!ISASCII(*p))
- return p;
- p++;
+
+ {
+ const uintptr_t *s = (const uintptr_t *)p;
+ const uintptr_t *t = (const uintptr_t *)(e - (SIZEOF_VOIDP-1));
+ for (;s < t; s++) {
+ if (*s & NONASCII_MASK) {
+ return (const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
+ }
+ }
+ p = (const char *)s;
}
+
+ switch ((e - p) % SIZEOF_VOIDP) {
+#if SIZEOF_VOIDP > 4
+ case 7: if (*p&0x80) return p; p++;
+ case 6: if (*p&0x80) return p; p++;
+ case 5: if (*p&0x80) return p; p++;
+ case 4: if (*p&0x80) return p; p++;
+#endif
+ case 3: if (*p&0x80) return p; p++;
+ case 2: if (*p&0x80) return p; p++;
+ case 1: if (*p&0x80) return p;
+ }
+
return NULL;
}