summaryrefslogtreecommitdiff
path: root/internal/bits.h
diff options
context:
space:
mode:
Diffstat (limited to 'internal/bits.h')
-rw-r--r--internal/bits.h241
1 files changed, 189 insertions, 52 deletions
diff --git a/internal/bits.h b/internal/bits.h
index 2530bd89bc..698ab3e219 100644
--- a/internal/bits.h
+++ b/internal/bits.h
@@ -1,13 +1,12 @@
-#ifndef INTERNAL_BITS_H /* -*- C -*- */
+#ifndef INTERNAL_BITS_H /*-*-C-*-vi:se ft=c:*/
#define INTERNAL_BITS_H
/**
- * @file
- * @brief Internal header for bitwise integer algorithms.
- * @author \@shyouhei
+ * @author Ruby developers <ruby-core@ruby-lang.org>
* @copyright This file is a part of the programming language Ruby.
* Permission is hereby granted, to either redistribute and/or
* modify this file, provided that the conditions mentioned in the
* file COPYING are met. Consult the file for details.
+ * @brief Internal header for bitwise integer algorithms.
* @see Henry S. Warren Jr., "Hacker's Delight" (2nd ed.), 2013.
* @see SEI CERT C Coding Standard INT32-C. "Ensure that operations on
* signed integers do not result in overflow"
@@ -15,36 +14,58 @@
* @see https://clang.llvm.org/docs/LanguageExtensions.html#builtin-rotateleft
* @see https://clang.llvm.org/docs/LanguageExtensions.html#builtin-rotateright
* @see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/byteswap-uint64-byteswap-ulong-byteswap-ushort
+ * @see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64
* @see https://docs.microsoft.com/en-us/cpp/intrinsics/bitscanforward-bitscanforward64
* @see https://docs.microsoft.com/en-us/cpp/intrinsics/bitscanreverse-bitscanreverse64
* @see https://docs.microsoft.com/en-us/cpp/intrinsics/lzcnt16-lzcnt-lzcnt64
* @see https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32
+ * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rotl64
+ * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rotr64
+ * @see https://stackoverflow.com/a/776523
*/
-#include "ruby/config.h"
+#include "ruby/internal/config.h"
#include <limits.h> /* for CHAR_BITS */
#include <stdint.h> /* for uintptr_t */
+#include "internal/compilers.h" /* for MSC_VERSION_SINCE */
#ifdef _MSC_VER
# include <stdlib.h> /* for _byteswap_uint64 */
#endif
-#if defined(__x86_64__) && defined(__LZCNT__) && ! defined(MJIT_HEADER)
-# /* Rule out MJIT_HEADER, which does not interface well with <immintrin.h> */
-# include <immintrin.h> /* for _lzcnt_u64 */
+#if defined(HAVE_X86INTRIN_H)
+# include <x86intrin.h> /* for _lzcnt_u64 */
+#elif defined(_MSC_VER)
+# include <intrin.h> /* for the following intrinsics */
#endif
-#if defined(_MSC_VER) && defined(_WIN64)
-# include <intrin.h> /* for the following intrinsics */
+#if defined(_MSC_VER) && defined(__AVX__)
+# pragma intrinsic(__popcnt)
+# pragma intrinsic(__popcnt64)
+#endif
+
+#if defined(_MSC_VER) && defined(__AVX2__)
+# pragma intrinsic(__lzcnt)
+# pragma intrinsic(__lzcnt64)
+#endif
+
+#if defined(_MSC_VER)
+# pragma intrinsic(_rotl)
+# pragma intrinsic(_rotr)
+# ifdef _WIN64
+# pragma intrinsic(_rotl64)
+# pragma intrinsic(_rotr64)
+# endif
# pragma intrinsic(_BitScanForward)
-# pragma intrinsic(_BitScanForward64)
# pragma intrinsic(_BitScanReverse)
-# pragma intrinsic(_BitScanReverse64)
+# ifdef _WIN64
+# pragma intrinsic(_BitScanForward64)
+# pragma intrinsic(_BitScanReverse64)
+# endif
#endif
#include "ruby/ruby.h" /* for VALUE */
-#include "internal/compilers.h" /* for __has_builtin */
#include "internal/static_assert.h" /* for STATIC_ASSERT */
/* The most significant bit of the lower part of half-long integer.
@@ -66,6 +87,7 @@
#define UNSIGNED_INTEGER_MAX(T) ((T)~(T)0)
+#ifndef MUL_OVERFLOW_SIGNED_INTEGER_P
#if __has_builtin(__builtin_mul_overflow_p)
# define MUL_OVERFLOW_P(a, b) \
__builtin_mul_overflow_p((a), (b), (__typeof__(a * b))0)
@@ -86,7 +108,7 @@
/* and GCC permits bitfields for integers other than int */
# define MUL_OVERFLOW_FIXNUM_P(a, b) \
__extension__ ({ \
- struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c; \
+ struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \
__builtin_mul_overflow_p((a), (b), c.fixnum); \
})
#else
@@ -94,15 +116,100 @@
MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX)
#endif
-#ifdef MUL_OVERFLOW_P
+#if defined(MUL_OVERFLOW_P) && defined(USE___BUILTIN_MUL_OVERFLOW_LONG_LONG)
# define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_P(a, b)
+#else
+# define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX)
+#endif
+
+#ifdef MUL_OVERFLOW_P
# define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_P(a, b)
# define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_P(a, b)
#else
-# define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX)
# define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX)
# define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX)
#endif
+#endif
+
+#ifndef ADD_OVERFLOW_SIGNED_INTEGER_P
+#if __has_builtin(__builtin_add_overflow_p)
+# define ADD_OVERFLOW_P(a, b) \
+ __builtin_add_overflow_p((a), (b), (__typeof__(a * b))0)
+#elif __has_builtin(__builtin_add_overflow)
+# define ADD_OVERFLOW_P(a, b) \
+ __extension__ ({ __typeof__(a) c; __builtin_add_overflow((a), (b), &c); })
+#endif
+
+#define ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \
+ (a) > 0 ? (b) > (max) - (a) : (b) < (min) - (a))
+
+#if __has_builtin(__builtin_add_overflow_p)
+/* __builtin_add_overflow_p can take bitfield */
+/* and GCC permits bitfields for integers other than int */
+# define ADD_OVERFLOW_FIXNUM_P(a, b) \
+ __extension__ ({ \
+ struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \
+ __builtin_add_overflow_p((a), (b), c.fixnum); \
+ })
+#else
+# define ADD_OVERFLOW_FIXNUM_P(a, b) \
+ ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX)
+#endif
+
+#if defined(ADD_OVERFLOW_P) && defined(USE___BUILTIN_ADD_OVERFLOW_LONG_LONG)
+# define ADD_OVERFLOW_LONG_LONG_P(a, b) ADD_OVERFLOW_P(a, b)
+#else
+# define ADD_OVERFLOW_LONG_LONG_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX)
+#endif
+
+#ifdef ADD_OVERFLOW_P
+# define ADD_OVERFLOW_LONG_P(a, b) ADD_OVERFLOW_P(a, b)
+# define ADD_OVERFLOW_INT_P(a, b) ADD_OVERFLOW_P(a, b)
+#else
+# define ADD_OVERFLOW_LONG_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX)
+# define ADD_OVERFLOW_INT_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX)
+#endif
+#endif
+
+#ifndef SUB_OVERFLOW_SIGNED_INTEGER_P
+#if __has_builtin(__builtin_sub_overflow_p)
+# define SUB_OVERFLOW_P(a, b) \
+ __builtin_sub_overflow_p((a), (b), (__typeof__(a * b))0)
+#elif __has_builtin(__builtin_sub_overflow)
+# define SUB_OVERFLOW_P(a, b) \
+ __extension__ ({ __typeof__(a) c; __builtin_sub_overflow((a), (b), &c); })
+#endif
+
+#define SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \
+ (b) > 0 ? (a) < (min) + (b) : (a) > (max) + (b))
+
+#if __has_builtin(__builtin_sub_overflow_p)
+/* __builtin_sub_overflow_p can take bitfield */
+/* and GCC permits bitfields for integers other than int */
+# define SUB_OVERFLOW_FIXNUM_P(a, b) \
+ __extension__ ({ \
+ struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \
+ __builtin_sub_overflow_p((a), (b), c.fixnum); \
+ })
+#else
+# define SUB_OVERFLOW_FIXNUM_P(a, b) \
+ SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX)
+#endif
+
+#if defined(SUB_OVERFLOW_P) && defined(USE___BUILTIN_SUB_OVERFLOW_LONG_LONG)
+# define SUB_OVERFLOW_LONG_LONG_P(a, b) SUB_OVERFLOW_P(a, b)
+#else
+# define SUB_OVERFLOW_LONG_LONG_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX)
+#endif
+
+#ifdef SUB_OVERFLOW_P
+# define SUB_OVERFLOW_LONG_P(a, b) SUB_OVERFLOW_P(a, b)
+# define SUB_OVERFLOW_INT_P(a, b) SUB_OVERFLOW_P(a, b)
+#else
+# define SUB_OVERFLOW_LONG_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX)
+# define SUB_OVERFLOW_INT_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX)
+#endif
+#endif
#ifdef HAVE_UINT128_T
# define bit_length(x) \
@@ -117,9 +224,21 @@
64 - nlz_int64((uint64_t)(x)))
#endif
-static inline uint16_t swap16(uint16_t);
-static inline uint32_t swap32(uint32_t);
-static inline uint64_t swap64(uint64_t);
+#ifndef swap16
+# define swap16 ruby_swap16
+#endif
+
+#ifndef swap32
+# define swap32 ruby_swap32
+#endif
+
+#ifndef swap64
+# define swap64 ruby_swap64
+#endif
+
+static inline uint16_t ruby_swap16(uint16_t);
+static inline uint32_t ruby_swap32(uint32_t);
+static inline uint64_t ruby_swap64(uint64_t);
static inline unsigned nlz_int(unsigned x);
static inline unsigned nlz_long(unsigned long x);
static inline unsigned nlz_long_long(unsigned long long x);
@@ -139,7 +258,7 @@ static inline VALUE RUBY_BIT_ROTL(VALUE, int);
static inline VALUE RUBY_BIT_ROTR(VALUE, int);
static inline uint16_t
-swap16(uint16_t x)
+ruby_swap16(uint16_t x)
{
#if __has_builtin(__builtin_bswap16)
return __builtin_bswap16(x);
@@ -154,7 +273,7 @@ swap16(uint16_t x)
}
static inline uint32_t
-swap32(uint32_t x)
+ruby_swap32(uint32_t x)
{
#if __has_builtin(__builtin_bswap32)
return __builtin_bswap32(x);
@@ -171,7 +290,7 @@ swap32(uint32_t x)
}
static inline uint64_t
-swap64(uint64_t x)
+ruby_swap64(uint64_t x)
{
#if __has_builtin(__builtin_bswap64)
return __builtin_bswap64(x);
@@ -191,19 +310,19 @@ swap64(uint64_t x)
static inline unsigned int
nlz_int32(uint32_t x)
{
-#if defined(_MSC_VER) && defined(_WIN64) && defined(__AVX2__)
- /* Note: It seems there is no such tihng like __LZCNT__ predefined in MSVC.
+#if defined(_MSC_VER) && defined(__AVX2__)
+ /* Note: It seems there is no such thing like __LZCNT__ predefined in MSVC.
* AMD CPUs have had this instruction for decades (since K10) but for
* Intel, Haswell is the oldest one. We need to use __AVX2__ for maximum
* safety. */
return (unsigned int)__lzcnt(x);
-#elif defined(__x86_64__) && defined(__LZCNT__) && ! defined(MJIT_HEADER)
+#elif defined(__x86_64__) && defined(__LZCNT__)
return (unsigned int)_lzcnt_u32(x);
-#elif defined(_MSC_VER) && defined(_Win64) /* &&! defined(__AVX2__) */
+#elif defined(_MSC_VER) /* &&! defined(__AVX2__) */
unsigned long r;
- return _BitScanReverse(&r, x) ? (int)r : 32;
+ return _BitScanReverse(&r, x) ? (31 - (int)r) : 32;
#elif __has_builtin(__builtin_clz)
STATIC_ASSERT(sizeof_int, sizeof(int) * CHAR_BIT == 32);
@@ -224,15 +343,15 @@ nlz_int32(uint32_t x)
static inline unsigned int
nlz_int64(uint64_t x)
{
-#if defined(_MSC_VER) && defined(_WIN64) && defined(__AVX2__)
+#if defined(_MSC_VER) && defined(__AVX2__)
return (unsigned int)__lzcnt64(x);
-#elif defined(__x86_64__) && defined(__LZCNT__) && ! defined(MJIT_HEADER)
+#elif defined(__x86_64__) && defined(__LZCNT__)
return (unsigned int)_lzcnt_u64(x);
-#elif defined(_MSC_VER) && defined(_Win64) /* &&! defined(__AVX2__) */
+#elif defined(_WIN64) && defined(_MSC_VER) /* &&! defined(__AVX2__) */
unsigned long r;
- return _BitScanReverse64(&r, x) ? (unsigned int)r : 64;
+ return _BitScanReverse64(&r, x) ? (63u - (unsigned int)r) : 64;
#elif __has_builtin(__builtin_clzl)
if (x == 0) {
@@ -246,7 +365,7 @@ nlz_int64(uint64_t x)
}
else {
/* :FIXME: Is there a way to make this branch a compile-time error? */
- __builtin_unreachable();
+ UNREACHABLE_RETURN(~0);
}
#else
@@ -273,7 +392,7 @@ nlz_int128(uint128_t x)
return 128;
}
else if (y == 0) {
- return (unsigned int)nlz_int64(y) + 64;
+ return (unsigned int)nlz_int64(x) + 64;
}
else {
return (unsigned int)nlz_int64(y);
@@ -345,7 +464,7 @@ nlz_intptr(uintptr_t x)
static inline unsigned int
rb_popcount32(uint32_t x)
{
-#if defined(_MSC_VER) && defined(_WIN64) && defined(__AVX__)
+#if defined(_MSC_VER) && defined(__AVX__)
/* Note: CPUs since Nehalem and Barcelona have had this instruction so SSE
* 4.2 should suffice, but it seems there is no such thing like __SSE_4_2__
* predefined macro in MSVC. They do have __AVX__ so use it instead. */
@@ -358,9 +477,9 @@ rb_popcount32(uint32_t x)
#else
x = (x & 0x55555555) + (x >> 1 & 0x55555555);
x = (x & 0x33333333) + (x >> 2 & 0x33333333);
- x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f);
- x = (x & 0x001f001f) + (x >> 8 & 0x001f001f);
- x = (x & 0x0000003f) + (x >>16 & 0x0000003f);
+ x = (x & 0x07070707) + (x >> 4 & 0x07070707);
+ x = (x & 0x000f000f) + (x >> 8 & 0x000f000f);
+ x = (x & 0x0000001f) + (x >>16 & 0x0000001f);
return (unsigned int)x;
#endif
@@ -369,7 +488,7 @@ rb_popcount32(uint32_t x)
static inline unsigned int
rb_popcount64(uint64_t x)
{
-#if defined(_MSC_VER) && defined(_WIN64) && defined(__AVX__)
+#if defined(_MSC_VER) && defined(__AVX__)
return (unsigned int)__popcnt64(x);
#elif __has_builtin(__builtin_popcount)
@@ -381,16 +500,16 @@ rb_popcount64(uint64_t x)
}
else {
/* :FIXME: Is there a way to make this branch a compile-time error? */
- __builtin_unreachable();
+ UNREACHABLE_RETURN(~0);
}
#else
x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555);
x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333);
x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707);
- x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f);
- x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f);
- x = (x & 0x000000000000007f) + (x >>32 & 0x000000000000007f);
+ x = (x & 0x000f000f000f000f) + (x >> 8 & 0x000f000f000f000f);
+ x = (x & 0x0000001f0000001f) + (x >>16 & 0x0000001f0000001f);
+ x = (x & 0x000000000000003f) + (x >>32 & 0x000000000000003f);
return (unsigned int)x;
#endif
@@ -413,12 +532,12 @@ rb_popcount_intptr(uintptr_t x)
static inline int
ntz_int32(uint32_t x)
{
-#if defined(__x86_64__) && defined(__BMI__) && ! defined(MJIT_HEADER)
+#if defined(__x86_64__) && defined(__BMI__)
return (unsigned)_tzcnt_u32(x);
-#elif defined(_MSC_VER) && defined(_WIN64)
+#elif defined(_MSC_VER)
/* :FIXME: Is there any way to issue TZCNT instead of BSF, apart from using
- * assembly? Because issueing LZCNT seems possible (see nlz.h). */
+ * assembly? Because issuing LZCNT seems possible (see nlz.h). */
unsigned long r;
return _BitScanForward(&r, x) ? (int)r : 32;
@@ -435,10 +554,10 @@ ntz_int32(uint32_t x)
static inline int
ntz_int64(uint64_t x)
{
-#if defined(__x86_64__) && defined(__BMI__) && ! defined(MJIT_HEADER)
+#if defined(__x86_64__) && defined(__BMI__)
return (unsigned)_tzcnt_u64(x);
-#elif defined(_MSC_VER) && defined(_WIN64)
+#elif defined(_WIN64) && defined(_MSC_VER)
unsigned long r;
return _BitScanForward64(&r, x) ? (int)r : 64;
@@ -454,7 +573,7 @@ ntz_int64(uint64_t x)
}
else {
/* :FIXME: Is there a way to make this branch a compile-time error? */
- __builtin_unreachable();
+ UNREACHABLE_RETURN(~0);
}
#else
@@ -486,9 +605,18 @@ RUBY_BIT_ROTL(VALUE v, int n)
#elif __has_builtin(__builtin_rotateleft64) && (SIZEOF_VALUE * CHAR_BIT == 64)
return __builtin_rotateleft64(v, n);
+#elif defined(_MSC_VER) && (SIZEOF_VALUE * CHAR_BIT == 32)
+ return _rotl(v, n);
+
+#elif defined(_MSC_VER) && (SIZEOF_VALUE * CHAR_BIT == 64)
+ return _rotl64(v, n);
+
+#elif defined(_lrotl) && (SIZEOF_VALUE == SIZEOF_LONG)
+ return _lrotl(v, n);
+
#else
- const int m = sizeof(VALUE) * CHAR_BIT;
- return (v << n) | (v >> (m - n));
+ const int m = (sizeof(VALUE) * CHAR_BIT) - 1;
+ return (v << (n & m)) | (v >> (-n & m));
#endif
}
@@ -501,9 +629,18 @@ RUBY_BIT_ROTR(VALUE v, int n)
#elif __has_builtin(__builtin_rotateright64) && (SIZEOF_VALUE * CHAR_BIT == 64)
return __builtin_rotateright64(v, n);
+#elif defined(_MSC_VER) && (SIZEOF_VALUE * CHAR_BIT == 32)
+ return _rotr(v, n);
+
+#elif defined(_MSC_VER) && (SIZEOF_VALUE * CHAR_BIT == 64)
+ return _rotr64(v, n);
+
+#elif defined(_lrotr) && (SIZEOF_VALUE == SIZEOF_LONG)
+ return _lrotr(v, n);
+
#else
- const int m = sizeof(VALUE) * CHAR_BIT;
- return (v << (m - n)) | (v >> n);
+ const int m = (sizeof(VALUE) * CHAR_BIT) - 1;
+ return (v << (-n & m)) | (v >> (n & m));
#endif
}