diff options
Diffstat (limited to 'internal/bits.h')
| -rw-r--r-- | internal/bits.h | 241 |
1 files changed, 189 insertions, 52 deletions
diff --git a/internal/bits.h b/internal/bits.h index 2530bd89bc..698ab3e219 100644 --- a/internal/bits.h +++ b/internal/bits.h @@ -1,13 +1,12 @@ -#ifndef INTERNAL_BITS_H /* -*- C -*- */ +#ifndef INTERNAL_BITS_H /*-*-C-*-vi:se ft=c:*/ #define INTERNAL_BITS_H /** - * @file - * @brief Internal header for bitwise integer algorithms. - * @author \@shyouhei + * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. + * @brief Internal header for bitwise integer algorithms. * @see Henry S. Warren Jr., "Hacker's Delight" (2nd ed.), 2013. * @see SEI CERT C Coding Standard INT32-C. "Ensure that operations on * signed integers do not result in overflow" @@ -15,36 +14,58 @@ * @see https://clang.llvm.org/docs/LanguageExtensions.html#builtin-rotateleft * @see https://clang.llvm.org/docs/LanguageExtensions.html#builtin-rotateright * @see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/byteswap-uint64-byteswap-ulong-byteswap-ushort + * @see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64 * @see https://docs.microsoft.com/en-us/cpp/intrinsics/bitscanforward-bitscanforward64 * @see https://docs.microsoft.com/en-us/cpp/intrinsics/bitscanreverse-bitscanreverse64 * @see https://docs.microsoft.com/en-us/cpp/intrinsics/lzcnt16-lzcnt-lzcnt64 * @see https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64 * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32 * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32 + * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rotl64 + * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rotr64 + * @see https://stackoverflow.com/a/776523 */ -#include "ruby/config.h" +#include "ruby/internal/config.h" #include <limits.h> /* for CHAR_BITS */ #include <stdint.h> /* for uintptr_t */ +#include "internal/compilers.h" /* for MSC_VERSION_SINCE */ #ifdef _MSC_VER # include <stdlib.h> /* for _byteswap_uint64 */ #endif -#if defined(__x86_64__) && defined(__LZCNT__) && ! defined(MJIT_HEADER) -# /* Rule out MJIT_HEADER, which does not interface well with <immintrin.h> */ -# include <immintrin.h> /* for _lzcnt_u64 */ +#if defined(HAVE_X86INTRIN_H) +# include <x86intrin.h> /* for _lzcnt_u64 */ +#elif defined(_MSC_VER) +# include <intrin.h> /* for the following intrinsics */ #endif -#if defined(_MSC_VER) && defined(_WIN64) -# include <intrin.h> /* for the following intrinsics */ +#if defined(_MSC_VER) && defined(__AVX__) +# pragma intrinsic(__popcnt) +# pragma intrinsic(__popcnt64) +#endif + +#if defined(_MSC_VER) && defined(__AVX2__) +# pragma intrinsic(__lzcnt) +# pragma intrinsic(__lzcnt64) +#endif + +#if defined(_MSC_VER) +# pragma intrinsic(_rotl) +# pragma intrinsic(_rotr) +# ifdef _WIN64 +# pragma intrinsic(_rotl64) +# pragma intrinsic(_rotr64) +# endif # pragma intrinsic(_BitScanForward) -# pragma intrinsic(_BitScanForward64) # pragma intrinsic(_BitScanReverse) -# pragma intrinsic(_BitScanReverse64) +# ifdef _WIN64 +# pragma intrinsic(_BitScanForward64) +# pragma intrinsic(_BitScanReverse64) +# endif #endif #include "ruby/ruby.h" /* for VALUE */ -#include "internal/compilers.h" /* for __has_builtin */ #include "internal/static_assert.h" /* for STATIC_ASSERT */ /* The most significant bit of the lower part of half-long integer. @@ -66,6 +87,7 @@ #define UNSIGNED_INTEGER_MAX(T) ((T)~(T)0) +#ifndef MUL_OVERFLOW_SIGNED_INTEGER_P #if __has_builtin(__builtin_mul_overflow_p) # define MUL_OVERFLOW_P(a, b) \ __builtin_mul_overflow_p((a), (b), (__typeof__(a * b))0) @@ -86,7 +108,7 @@ /* and GCC permits bitfields for integers other than int */ # define MUL_OVERFLOW_FIXNUM_P(a, b) \ __extension__ ({ \ - struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c; \ + struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \ __builtin_mul_overflow_p((a), (b), c.fixnum); \ }) #else @@ -94,15 +116,100 @@ MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX) #endif -#ifdef MUL_OVERFLOW_P +#if defined(MUL_OVERFLOW_P) && defined(USE___BUILTIN_MUL_OVERFLOW_LONG_LONG) # define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_P(a, b) +#else +# define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX) +#endif + +#ifdef MUL_OVERFLOW_P # define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_P(a, b) # define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_P(a, b) #else -# define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX) # define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX) # define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX) #endif +#endif + +#ifndef ADD_OVERFLOW_SIGNED_INTEGER_P +#if __has_builtin(__builtin_add_overflow_p) +# define ADD_OVERFLOW_P(a, b) \ + __builtin_add_overflow_p((a), (b), (__typeof__(a * b))0) +#elif __has_builtin(__builtin_add_overflow) +# define ADD_OVERFLOW_P(a, b) \ + __extension__ ({ __typeof__(a) c; __builtin_add_overflow((a), (b), &c); }) +#endif + +#define ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \ + (a) > 0 ? (b) > (max) - (a) : (b) < (min) - (a)) + +#if __has_builtin(__builtin_add_overflow_p) +/* __builtin_add_overflow_p can take bitfield */ +/* and GCC permits bitfields for integers other than int */ +# define ADD_OVERFLOW_FIXNUM_P(a, b) \ + __extension__ ({ \ + struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \ + __builtin_add_overflow_p((a), (b), c.fixnum); \ + }) +#else +# define ADD_OVERFLOW_FIXNUM_P(a, b) \ + ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX) +#endif + +#if defined(ADD_OVERFLOW_P) && defined(USE___BUILTIN_ADD_OVERFLOW_LONG_LONG) +# define ADD_OVERFLOW_LONG_LONG_P(a, b) ADD_OVERFLOW_P(a, b) +#else +# define ADD_OVERFLOW_LONG_LONG_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX) +#endif + +#ifdef ADD_OVERFLOW_P +# define ADD_OVERFLOW_LONG_P(a, b) ADD_OVERFLOW_P(a, b) +# define ADD_OVERFLOW_INT_P(a, b) ADD_OVERFLOW_P(a, b) +#else +# define ADD_OVERFLOW_LONG_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX) +# define ADD_OVERFLOW_INT_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX) +#endif +#endif + +#ifndef SUB_OVERFLOW_SIGNED_INTEGER_P +#if __has_builtin(__builtin_sub_overflow_p) +# define SUB_OVERFLOW_P(a, b) \ + __builtin_sub_overflow_p((a), (b), (__typeof__(a * b))0) +#elif __has_builtin(__builtin_sub_overflow) +# define SUB_OVERFLOW_P(a, b) \ + __extension__ ({ __typeof__(a) c; __builtin_sub_overflow((a), (b), &c); }) +#endif + +#define SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \ + (b) > 0 ? (a) < (min) + (b) : (a) > (max) + (b)) + +#if __has_builtin(__builtin_sub_overflow_p) +/* __builtin_sub_overflow_p can take bitfield */ +/* and GCC permits bitfields for integers other than int */ +# define SUB_OVERFLOW_FIXNUM_P(a, b) \ + __extension__ ({ \ + struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \ + __builtin_sub_overflow_p((a), (b), c.fixnum); \ + }) +#else +# define SUB_OVERFLOW_FIXNUM_P(a, b) \ + SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX) +#endif + +#if defined(SUB_OVERFLOW_P) && defined(USE___BUILTIN_SUB_OVERFLOW_LONG_LONG) +# define SUB_OVERFLOW_LONG_LONG_P(a, b) SUB_OVERFLOW_P(a, b) +#else +# define SUB_OVERFLOW_LONG_LONG_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX) +#endif + +#ifdef SUB_OVERFLOW_P +# define SUB_OVERFLOW_LONG_P(a, b) SUB_OVERFLOW_P(a, b) +# define SUB_OVERFLOW_INT_P(a, b) SUB_OVERFLOW_P(a, b) +#else +# define SUB_OVERFLOW_LONG_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX) +# define SUB_OVERFLOW_INT_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX) +#endif +#endif #ifdef HAVE_UINT128_T # define bit_length(x) \ @@ -117,9 +224,21 @@ 64 - nlz_int64((uint64_t)(x))) #endif -static inline uint16_t swap16(uint16_t); -static inline uint32_t swap32(uint32_t); -static inline uint64_t swap64(uint64_t); +#ifndef swap16 +# define swap16 ruby_swap16 +#endif + +#ifndef swap32 +# define swap32 ruby_swap32 +#endif + +#ifndef swap64 +# define swap64 ruby_swap64 +#endif + +static inline uint16_t ruby_swap16(uint16_t); +static inline uint32_t ruby_swap32(uint32_t); +static inline uint64_t ruby_swap64(uint64_t); static inline unsigned nlz_int(unsigned x); static inline unsigned nlz_long(unsigned long x); static inline unsigned nlz_long_long(unsigned long long x); @@ -139,7 +258,7 @@ static inline VALUE RUBY_BIT_ROTL(VALUE, int); static inline VALUE RUBY_BIT_ROTR(VALUE, int); static inline uint16_t -swap16(uint16_t x) +ruby_swap16(uint16_t x) { #if __has_builtin(__builtin_bswap16) return __builtin_bswap16(x); @@ -154,7 +273,7 @@ swap16(uint16_t x) } static inline uint32_t -swap32(uint32_t x) +ruby_swap32(uint32_t x) { #if __has_builtin(__builtin_bswap32) return __builtin_bswap32(x); @@ -171,7 +290,7 @@ swap32(uint32_t x) } static inline uint64_t -swap64(uint64_t x) +ruby_swap64(uint64_t x) { #if __has_builtin(__builtin_bswap64) return __builtin_bswap64(x); @@ -191,19 +310,19 @@ swap64(uint64_t x) static inline unsigned int nlz_int32(uint32_t x) { -#if defined(_MSC_VER) && defined(_WIN64) && defined(__AVX2__) - /* Note: It seems there is no such tihng like __LZCNT__ predefined in MSVC. +#if defined(_MSC_VER) && defined(__AVX2__) + /* Note: It seems there is no such thing like __LZCNT__ predefined in MSVC. * AMD CPUs have had this instruction for decades (since K10) but for * Intel, Haswell is the oldest one. We need to use __AVX2__ for maximum * safety. */ return (unsigned int)__lzcnt(x); -#elif defined(__x86_64__) && defined(__LZCNT__) && ! defined(MJIT_HEADER) +#elif defined(__x86_64__) && defined(__LZCNT__) return (unsigned int)_lzcnt_u32(x); -#elif defined(_MSC_VER) && defined(_Win64) /* &&! defined(__AVX2__) */ +#elif defined(_MSC_VER) /* &&! defined(__AVX2__) */ unsigned long r; - return _BitScanReverse(&r, x) ? (int)r : 32; + return _BitScanReverse(&r, x) ? (31 - (int)r) : 32; #elif __has_builtin(__builtin_clz) STATIC_ASSERT(sizeof_int, sizeof(int) * CHAR_BIT == 32); @@ -224,15 +343,15 @@ nlz_int32(uint32_t x) static inline unsigned int nlz_int64(uint64_t x) { -#if defined(_MSC_VER) && defined(_WIN64) && defined(__AVX2__) +#if defined(_MSC_VER) && defined(__AVX2__) return (unsigned int)__lzcnt64(x); -#elif defined(__x86_64__) && defined(__LZCNT__) && ! defined(MJIT_HEADER) +#elif defined(__x86_64__) && defined(__LZCNT__) return (unsigned int)_lzcnt_u64(x); -#elif defined(_MSC_VER) && defined(_Win64) /* &&! defined(__AVX2__) */ +#elif defined(_WIN64) && defined(_MSC_VER) /* &&! defined(__AVX2__) */ unsigned long r; - return _BitScanReverse64(&r, x) ? (unsigned int)r : 64; + return _BitScanReverse64(&r, x) ? (63u - (unsigned int)r) : 64; #elif __has_builtin(__builtin_clzl) if (x == 0) { @@ -246,7 +365,7 @@ nlz_int64(uint64_t x) } else { /* :FIXME: Is there a way to make this branch a compile-time error? */ - __builtin_unreachable(); + UNREACHABLE_RETURN(~0); } #else @@ -273,7 +392,7 @@ nlz_int128(uint128_t x) return 128; } else if (y == 0) { - return (unsigned int)nlz_int64(y) + 64; + return (unsigned int)nlz_int64(x) + 64; } else { return (unsigned int)nlz_int64(y); @@ -345,7 +464,7 @@ nlz_intptr(uintptr_t x) static inline unsigned int rb_popcount32(uint32_t x) { -#if defined(_MSC_VER) && defined(_WIN64) && defined(__AVX__) +#if defined(_MSC_VER) && defined(__AVX__) /* Note: CPUs since Nehalem and Barcelona have had this instruction so SSE * 4.2 should suffice, but it seems there is no such thing like __SSE_4_2__ * predefined macro in MSVC. They do have __AVX__ so use it instead. */ @@ -358,9 +477,9 @@ rb_popcount32(uint32_t x) #else x = (x & 0x55555555) + (x >> 1 & 0x55555555); x = (x & 0x33333333) + (x >> 2 & 0x33333333); - x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f); - x = (x & 0x001f001f) + (x >> 8 & 0x001f001f); - x = (x & 0x0000003f) + (x >>16 & 0x0000003f); + x = (x & 0x07070707) + (x >> 4 & 0x07070707); + x = (x & 0x000f000f) + (x >> 8 & 0x000f000f); + x = (x & 0x0000001f) + (x >>16 & 0x0000001f); return (unsigned int)x; #endif @@ -369,7 +488,7 @@ rb_popcount32(uint32_t x) static inline unsigned int rb_popcount64(uint64_t x) { -#if defined(_MSC_VER) && defined(_WIN64) && defined(__AVX__) +#if defined(_MSC_VER) && defined(__AVX__) return (unsigned int)__popcnt64(x); #elif __has_builtin(__builtin_popcount) @@ -381,16 +500,16 @@ rb_popcount64(uint64_t x) } else { /* :FIXME: Is there a way to make this branch a compile-time error? */ - __builtin_unreachable(); + UNREACHABLE_RETURN(~0); } #else x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555); x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333); x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707); - x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f); - x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f); - x = (x & 0x000000000000007f) + (x >>32 & 0x000000000000007f); + x = (x & 0x000f000f000f000f) + (x >> 8 & 0x000f000f000f000f); + x = (x & 0x0000001f0000001f) + (x >>16 & 0x0000001f0000001f); + x = (x & 0x000000000000003f) + (x >>32 & 0x000000000000003f); return (unsigned int)x; #endif @@ -413,12 +532,12 @@ rb_popcount_intptr(uintptr_t x) static inline int ntz_int32(uint32_t x) { -#if defined(__x86_64__) && defined(__BMI__) && ! defined(MJIT_HEADER) +#if defined(__x86_64__) && defined(__BMI__) return (unsigned)_tzcnt_u32(x); -#elif defined(_MSC_VER) && defined(_WIN64) +#elif defined(_MSC_VER) /* :FIXME: Is there any way to issue TZCNT instead of BSF, apart from using - * assembly? Because issueing LZCNT seems possible (see nlz.h). */ + * assembly? Because issuing LZCNT seems possible (see nlz.h). */ unsigned long r; return _BitScanForward(&r, x) ? (int)r : 32; @@ -435,10 +554,10 @@ ntz_int32(uint32_t x) static inline int ntz_int64(uint64_t x) { -#if defined(__x86_64__) && defined(__BMI__) && ! defined(MJIT_HEADER) +#if defined(__x86_64__) && defined(__BMI__) return (unsigned)_tzcnt_u64(x); -#elif defined(_MSC_VER) && defined(_WIN64) +#elif defined(_WIN64) && defined(_MSC_VER) unsigned long r; return _BitScanForward64(&r, x) ? (int)r : 64; @@ -454,7 +573,7 @@ ntz_int64(uint64_t x) } else { /* :FIXME: Is there a way to make this branch a compile-time error? */ - __builtin_unreachable(); + UNREACHABLE_RETURN(~0); } #else @@ -486,9 +605,18 @@ RUBY_BIT_ROTL(VALUE v, int n) #elif __has_builtin(__builtin_rotateleft64) && (SIZEOF_VALUE * CHAR_BIT == 64) return __builtin_rotateleft64(v, n); +#elif defined(_MSC_VER) && (SIZEOF_VALUE * CHAR_BIT == 32) + return _rotl(v, n); + +#elif defined(_MSC_VER) && (SIZEOF_VALUE * CHAR_BIT == 64) + return _rotl64(v, n); + +#elif defined(_lrotl) && (SIZEOF_VALUE == SIZEOF_LONG) + return _lrotl(v, n); + #else - const int m = sizeof(VALUE) * CHAR_BIT; - return (v << n) | (v >> (m - n)); + const int m = (sizeof(VALUE) * CHAR_BIT) - 1; + return (v << (n & m)) | (v >> (-n & m)); #endif } @@ -501,9 +629,18 @@ RUBY_BIT_ROTR(VALUE v, int n) #elif __has_builtin(__builtin_rotateright64) && (SIZEOF_VALUE * CHAR_BIT == 64) return __builtin_rotateright64(v, n); +#elif defined(_MSC_VER) && (SIZEOF_VALUE * CHAR_BIT == 32) + return _rotr(v, n); + +#elif defined(_MSC_VER) && (SIZEOF_VALUE * CHAR_BIT == 64) + return _rotr64(v, n); + +#elif defined(_lrotr) && (SIZEOF_VALUE == SIZEOF_LONG) + return _lrotr(v, n); + #else - const int m = sizeof(VALUE) * CHAR_BIT; - return (v << (m - n)) | (v >> n); + const int m = (sizeof(VALUE) * CHAR_BIT) - 1; + return (v << (-n & m)) | (v >> (n & m)); #endif } |
