summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--configure.in2
-rw-r--r--internal.h17
-rw-r--r--string.c12
4 files changed, 35 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 267b343..9312462 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+Tue May 3 22:10:09 2016 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * string.c (count_utf8_lead_bytes_with_word): Use __builtin_popcount
+ only if it can use SSE 4.2 POPCNT whose latency is 3 cycle.
+
+ * internal.h (rb_popcount64): use __builtin_popcountll because now
+ it is in fast path.
+
Tue May 3 14:19:18 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
* parse.y (new_if_gen): set newline flag to NODE_IF to trace all
diff --git a/configure.in b/configure.in
index 14711a6..ad8cc98 100644
--- a/configure.in
+++ b/configure.in
@@ -2429,6 +2429,8 @@ fi])
RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap16, [__builtin_bswap16(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap32, [__builtin_bswap32(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap64, [__builtin_bswap64(0)])
+RUBY_CHECK_BUILTIN_FUNC(__builtin_popcount, [__builtin_popcount(0)])
+RUBY_CHECK_BUILTIN_FUNC(__builtin_popcountll, [__builtin_popcountll(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_clz, [__builtin_clz(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_clzl, [__builtin_clzl(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_clzll, [__builtin_clzll(0)])
diff --git a/internal.h b/internal.h
index 0bac79e..765758a 100644
--- a/internal.h
+++ b/internal.h
@@ -271,21 +271,38 @@ nlz_intptr(uintptr_t x) {
static inline int
rb_popcount32(uint32_t x) {
+#ifdef HAVE_BUILTIN___BUILTIN_POPCOUNT
+ return __builtin_popcount(x);
+#else
x = (x & 0x55555555) + (x >> 1 & 0x55555555);
x = (x & 0x33333333) + (x >> 2 & 0x33333333);
x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f);
x = (x & 0x001f001f) + (x >> 8 & 0x001f001f);
return (x & 0x0000003f) + (x >>16 & 0x0000003f);
+#endif
}
static inline int
rb_popcount64(uint64_t x) {
+#ifdef HAVE_BUILTIN___BUILTIN_POPCOUNT
+ return __builtin_popcountll(x);
+#else
x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555);
x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333);
x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707);
x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f);
x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f);
return (x & 0x7f) + (x >>32 & 0x7f);
+#endif
+}
+
+static inline int
+rb_popcount_intptr(uintptr_t x) {
+#if SIZEOF_VOIDP == 8
+ return rb_popcount64(x);
+#elif SIZEOF_VOIDP == 4
+ return rb_popcount32(x);
+#endif
}
static inline int
diff --git a/string.c b/string.c
index 35e53f7..9ccf093 100644
--- a/string.c
+++ b/string.c
@@ -1476,17 +1476,21 @@ count_utf8_lead_bytes_with_word(const uintptr_t *s)
uintptr_t d = *s;
/* Transform so that bit0 indicates whether we have a UTF-8 leading byte or not. */
- d |= ~(d>>1);
- d >>= 6;
+ d = (d>>6) | (~d>>7);
d &= NONASCII_MASK >> 7;
/* Gather all bytes. */
+#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
+ /* use only if it can use POPCNT */
+ return rb_popcount_intptr(d);
+#else
d += (d>>8);
d += (d>>16);
-#if SIZEOF_VOIDP == 8
+# if SIZEOF_VOIDP == 8
d += (d>>32);
-#endif
+# endif
return (d&0xF);
+#endif
}
#endif