[ruby/bigdecimal] Implement special conversions for 64-bit integers

This change improves the conversion speed from small integers. ``` Comparison: big_n9 master: 4003688.9 i/s bigdecimal 3.0.0: 1270551.0 i/s - 3.15x slower big_n19 master: 5410096.4 i/s bigdecimal 3.0.0: 1000250.3 i/s - 5.41x slower ``` https://github.com/ruby/bigdecimal/commit/3429bd7e6f
author: Kenta Murata <mrkn@mrkn.jp> 2021-01-01 04:13:12 +0900
committer: Kenta Murata <mrkn@mrkn.jp> 2021-01-02 00:54:09 +0900
commit: 448a67cd812d0be0a7f1cc871daa598c3b846143 (patch)
tree: 95c627713f6b2129787d60731d57f0f26c0e8d04 /ext/bigdecimal/bits.h
parent: 4730efdd80f40119f8a397fe1b4b7ba88a0ce3d3 (diff)
1 files changed, 137 insertions, 0 deletions
diff --git a/ext/bigdecimal/bits.h b/ext/bigdecimal/bits.h
new file mode 100644
index 0000000000..5dfda49741
--- /dev/null
+++ b/ext/bigdecimal/bits.h
@@ -0,0 +1,137 @@
+#ifndef BIGDECIMAL_BITS_H
+#define BIGDECIMAL_BITS_H
+
+#include "feature.h"
+#include "static_assert.h"
+
+#if defined(HAVE_X86INTRIN_H)
+# include <x86intrin.h>         /* for _lzcnt_u64 */
+#elif defined(_MSC_VER) && _MSC_VER >= 1310
+# include <intrin.h>            /* for the following intrinsics */
+#endif
+
+#if defined(_MSC_VER) && defined(__AVX2__)
+# pragma intrinsic(__lzcnt)
+# pragma intrinsic(__lzcnt64)
+#endif
+
+#define MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \
+    (a) == 0 ? 0 : \
+    (a) == -1 ? (b) < -(max) : \
+    (a) > 0 ? \
+      ((b) > 0 ? (max) / (a) < (b) : (min) / (a) > (b)) : \
+      ((b) > 0 ? (min) / (a) < (b) : (max) / (a) > (b)))
+
+#ifdef HAVE_UINT128_T
+# define bit_length(x) \
+    (unsigned int) \
+    (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \
+     sizeof(x) <= sizeof(int64_t) ? 64 - nlz_int64((uint64_t)(x)) : \
+                                   128 - nlz_int128((uint128_t)(x)))
+#else
+# define bit_length(x) \
+    (unsigned int) \
+    (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \
+                                    64 - nlz_int64((uint64_t)(x)))
+#endif
+
+static inline unsigned nlz_int32(uint32_t x);
+static inline unsigned nlz_int64(uint64_t x);
+#ifdef HAVE_UINT128_T
+static inline unsigned nlz_int128(uint128_t x);
+#endif
+
+static inline unsigned int
+nlz_int32(uint32_t x)
+{
+#if defined(_MSC_VER) && defined(__AVX2__)
+    /* Note: It seems there is no such thing like __LZCNT__ predefined in MSVC.
+     * AMD  CPUs have  had this  instruction for  decades (since  K10) but  for
+     * Intel, Haswell is  the oldest one.  We need to  use __AVX2__ for maximum
+     * safety. */
+    return (unsigned int)__lzcnt(x);
+
+#elif defined(__x86_64__) && defined(__LZCNT__) /* && ! defined(MJIT_HEADER) */
+    return (unsigned int)_lzcnt_u32(x);
+
+#elif defined(_MSC_VER) && _MSC_VER >= 1400 /* &&! defined(__AVX2__) */
+    unsigned long r;
+    return _BitScanReverse(&r, x) ? (31 - (int)r) : 32;
+
+#elif __has_builtin(__builtin_clz)
+    STATIC_ASSERT(sizeof_int, sizeof(int) * CHAR_BIT == 32);
+    return x ? (unsigned int)__builtin_clz(x) : 32;
+
+#else
+    uint32_t y;
+    unsigned n = 32;
+    y = x >> 16; if (y) {n -= 16; x = y;}
+    y = x >>  8; if (y) {n -=  8; x = y;}
+    y = x >>  4; if (y) {n -=  4; x = y;}
+    y = x >>  2; if (y) {n -=  2; x = y;}
+    y = x >>  1; if (y) {return n - 2;}
+    return (unsigned int)(n - x);
+#endif
+}
+
+static inline unsigned int
+nlz_int64(uint64_t x)
+{
+#if defined(_MSC_VER) && defined(__AVX2__)
+    return (unsigned int)__lzcnt64(x);
+
+#elif defined(__x86_64__) && defined(__LZCNT__) /* && ! defined(MJIT_HEADER) */
+    return (unsigned int)_lzcnt_u64(x);
+
+#elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 /* &&! defined(__AVX2__) */
+    unsigned long r;
+    return _BitScanReverse64(&r, x) ? (63u - (unsigned int)r) : 64;
+
+#elif __has_builtin(__builtin_clzl)
+    if (x == 0) {
+        return 64;
+    }
+    else if (sizeof(long) * CHAR_BIT == 64) {
+        return (unsigned int)__builtin_clzl((unsigned long)x);
+    }
+    else if (sizeof(long long) * CHAR_BIT == 64) {
+        return (unsigned int)__builtin_clzll((unsigned long long)x);
+    }
+    else {
+        /* :FIXME: Is there a way to make this branch a compile-time error? */
+        __builtin_unreachable();
+    }
+
+#else
+    uint64_t y;
+    unsigned int n = 64;
+    y = x >> 32; if (y) {n -= 32; x = y;}
+    y = x >> 16; if (y) {n -= 16; x = y;}
+    y = x >>  8; if (y) {n -=  8; x = y;}
+    y = x >>  4; if (y) {n -=  4; x = y;}
+    y = x >>  2; if (y) {n -=  2; x = y;}
+    y = x >>  1; if (y) {return n - 2;}
+    return (unsigned int)(n - x);
+
+#endif
+}
+
+#ifdef HAVE_UINT128_T
+static inline unsigned int
+nlz_int128(uint128_t x)
+{
+    uint64_t y = (uint64_t)(x >> 64);
+
+    if (x == 0) {
+        return 128;
+    }
+    else if (y == 0) {
+        return (unsigned int)nlz_int64(x) + 64;
+    }
+    else {
+        return (unsigned int)nlz_int64(y);
+    }
+}
+#endif
+
+#endif /* BIGDECIMAL_BITS_H */
author	Kenta Murata <mrkn@mrkn.jp>	2021-01-01 04:13:12 +0900
committer	Kenta Murata <mrkn@mrkn.jp>	2021-01-02 00:54:09 +0900
commit	448a67cd812d0be0a7f1cc871daa598c3b846143 (patch)
tree	95c627713f6b2129787d60731d57f0f26c0e8d04 /ext/bigdecimal/bits.h
parent	4730efdd80f40119f8a397fe1b4b7ba88a0ce3d3 (diff)