diff options
author | Kenta Murata <mrkn@mrkn.jp> | 2021-01-01 04:13:12 +0900 |
---|---|---|
committer | Kenta Murata <mrkn@mrkn.jp> | 2021-01-02 00:54:09 +0900 |
commit | 448a67cd812d0be0a7f1cc871daa598c3b846143 (patch) | |
tree | 95c627713f6b2129787d60731d57f0f26c0e8d04 /ext/bigdecimal/bits.h | |
parent | 4730efdd80f40119f8a397fe1b4b7ba88a0ce3d3 (diff) |
[ruby/bigdecimal] Implement special conversions for 64-bit integers
This change improves the conversion speed from small integers.
```
Comparison:
big_n9
master: 4003688.9 i/s
bigdecimal 3.0.0: 1270551.0 i/s - 3.15x slower
big_n19
master: 5410096.4 i/s
bigdecimal 3.0.0: 1000250.3 i/s - 5.41x slower
```
https://github.com/ruby/bigdecimal/commit/3429bd7e6f
Diffstat (limited to 'ext/bigdecimal/bits.h')
-rw-r--r-- | ext/bigdecimal/bits.h | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/ext/bigdecimal/bits.h b/ext/bigdecimal/bits.h new file mode 100644 index 0000000000..5dfda49741 --- /dev/null +++ b/ext/bigdecimal/bits.h @@ -0,0 +1,137 @@ +#ifndef BIGDECIMAL_BITS_H +#define BIGDECIMAL_BITS_H + +#include "feature.h" +#include "static_assert.h" + +#if defined(HAVE_X86INTRIN_H) +# include <x86intrin.h> /* for _lzcnt_u64 */ +#elif defined(_MSC_VER) && _MSC_VER >= 1310 +# include <intrin.h> /* for the following intrinsics */ +#endif + +#if defined(_MSC_VER) && defined(__AVX2__) +# pragma intrinsic(__lzcnt) +# pragma intrinsic(__lzcnt64) +#endif + +#define MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \ + (a) == 0 ? 0 : \ + (a) == -1 ? (b) < -(max) : \ + (a) > 0 ? \ + ((b) > 0 ? (max) / (a) < (b) : (min) / (a) > (b)) : \ + ((b) > 0 ? (min) / (a) < (b) : (max) / (a) > (b))) + +#ifdef HAVE_UINT128_T +# define bit_length(x) \ + (unsigned int) \ + (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \ + sizeof(x) <= sizeof(int64_t) ? 64 - nlz_int64((uint64_t)(x)) : \ + 128 - nlz_int128((uint128_t)(x))) +#else +# define bit_length(x) \ + (unsigned int) \ + (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \ + 64 - nlz_int64((uint64_t)(x))) +#endif + +static inline unsigned nlz_int32(uint32_t x); +static inline unsigned nlz_int64(uint64_t x); +#ifdef HAVE_UINT128_T +static inline unsigned nlz_int128(uint128_t x); +#endif + +static inline unsigned int +nlz_int32(uint32_t x) +{ +#if defined(_MSC_VER) && defined(__AVX2__) + /* Note: It seems there is no such thing like __LZCNT__ predefined in MSVC. + * AMD CPUs have had this instruction for decades (since K10) but for + * Intel, Haswell is the oldest one. We need to use __AVX2__ for maximum + * safety. */ + return (unsigned int)__lzcnt(x); + +#elif defined(__x86_64__) && defined(__LZCNT__) /* && ! defined(MJIT_HEADER) */ + return (unsigned int)_lzcnt_u32(x); + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 /* &&! defined(__AVX2__) */ + unsigned long r; + return _BitScanReverse(&r, x) ? (31 - (int)r) : 32; + +#elif __has_builtin(__builtin_clz) + STATIC_ASSERT(sizeof_int, sizeof(int) * CHAR_BIT == 32); + return x ? (unsigned int)__builtin_clz(x) : 32; + +#else + uint32_t y; + unsigned n = 32; + y = x >> 16; if (y) {n -= 16; x = y;} + y = x >> 8; if (y) {n -= 8; x = y;} + y = x >> 4; if (y) {n -= 4; x = y;} + y = x >> 2; if (y) {n -= 2; x = y;} + y = x >> 1; if (y) {return n - 2;} + return (unsigned int)(n - x); +#endif +} + +static inline unsigned int +nlz_int64(uint64_t x) +{ +#if defined(_MSC_VER) && defined(__AVX2__) + return (unsigned int)__lzcnt64(x); + +#elif defined(__x86_64__) && defined(__LZCNT__) /* && ! defined(MJIT_HEADER) */ + return (unsigned int)_lzcnt_u64(x); + +#elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 /* &&! defined(__AVX2__) */ + unsigned long r; + return _BitScanReverse64(&r, x) ? (63u - (unsigned int)r) : 64; + +#elif __has_builtin(__builtin_clzl) + if (x == 0) { + return 64; + } + else if (sizeof(long) * CHAR_BIT == 64) { + return (unsigned int)__builtin_clzl((unsigned long)x); + } + else if (sizeof(long long) * CHAR_BIT == 64) { + return (unsigned int)__builtin_clzll((unsigned long long)x); + } + else { + /* :FIXME: Is there a way to make this branch a compile-time error? */ + __builtin_unreachable(); + } + +#else + uint64_t y; + unsigned int n = 64; + y = x >> 32; if (y) {n -= 32; x = y;} + y = x >> 16; if (y) {n -= 16; x = y;} + y = x >> 8; if (y) {n -= 8; x = y;} + y = x >> 4; if (y) {n -= 4; x = y;} + y = x >> 2; if (y) {n -= 2; x = y;} + y = x >> 1; if (y) {return n - 2;} + return (unsigned int)(n - x); + +#endif +} + +#ifdef HAVE_UINT128_T +static inline unsigned int +nlz_int128(uint128_t x) +{ + uint64_t y = (uint64_t)(x >> 64); + + if (x == 0) { + return 128; + } + else if (y == 0) { + return (unsigned int)nlz_int64(x) + 64; + } + else { + return (unsigned int)nlz_int64(y); + } +} +#endif + +#endif /* BIGDECIMAL_BITS_H */ |