From 42d6020059f91c06b96bf7729342a71751d4e801 Mon Sep 17 00:00:00 2001
From: akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Sun, 7 Jul 2013 11:02:47 +0000
Subject: * bignum.c: Reorder functions to decrease forward reference.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@41822 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
---
 ChangeLog |    4 +
 bignum.c  | 6473 ++++++++++++++++++++++++++++++-------------------------------
 2 files changed, 3239 insertions(+), 3238 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6904fc9228..ad97dd8020 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Sun Jul  7 19:21:30 2013  Tanaka Akira  <akr@fsij.org>
+
+	* bignum.c: Reorder functions to decrease forward reference.
+
 Sun Jul  7 14:41:57 2013  Tanaka Akira  <akr@fsij.org>
 
 	* bignum.c: (bigsub_core): Use bary_sub.
diff --git a/bignum.c b/bignum.c
index de4c8537da..bc122fe8ce 100644
--- a/bignum.c
+++ b/bignum.c
@@ -26,6 +26,7 @@
 #include <assert.h>
 
 VALUE rb_cBignum;
+const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
 
 static VALUE big_three = Qnil;
 
@@ -93,51 +94,23 @@ static VALUE big_three = Qnil;
 #define RBIGNUM_SET_NEGATIVE_SIGN(b) RBIGNUM_SET_SIGN(b, 0)
 #define RBIGNUM_SET_POSITIVE_SIGN(b) RBIGNUM_SET_SIGN(b, 1)
 
+#define bignew(len,sign) bignew_1(rb_cBignum,(len),(sign))
+
 #define KARATSUBA_MUL_DIGITS 70
 #define TOOM3_MUL_DIGITS 150
 
 static BDIGIT bary_small_lshift(BDIGIT *zds, BDIGIT *xds, long n, int shift);
 static void bary_small_rshift(BDIGIT *zds, BDIGIT *xds, long n, int shift, int sign_bit);
-static void bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags);
-static void bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl);
 static void bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl);
-static int bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
-static int bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow);
 static void bary_divmod(BDIGIT *qds, size_t nq, BDIGIT *rds, size_t nr, BDIGIT *xds, size_t nx, BDIGIT *yds, size_t ny);
-static int bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
-static int bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry);
-static int bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords, size_t wordsize, size_t nails, int flags);
-static int bary_2comp(BDIGIT *ds, size_t n);
 
-static void bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn);
-static inline int bary_sparse_p(BDIGIT *ds, size_t n);
 static VALUE bigmul0(VALUE x, VALUE y);
 static VALUE bigmul1_toom3(VALUE x, VALUE y);
+static VALUE bignew_1(VALUE klass, long len, int sign);
+static inline VALUE bigtrunc(VALUE x);
 
-#define BIGNUM_DEBUG 0
-#if BIGNUM_DEBUG
-#define ON_DEBUG(x) do { x; } while (0)
-static void
-dump_bignum(VALUE x)
-{
-    long i;
-    printf("%c0x0", RBIGNUM_SIGN(x) ? '+' : '-');
-    for (i = RBIGNUM_LEN(x); i--; ) {
-        printf("_%0*"PRIxBDIGIT, SIZEOF_BDIGITS*2, BDIGITS(x)[i]);
-    }
-    printf(", len=%lu", RBIGNUM_LEN(x));
-    puts("");
-}
-
-static VALUE
-rb_big_dump(VALUE x)
-{
-    dump_bignum(x);
-    return x;
-}
-#else
-#define ON_DEBUG(x)
-#endif
+static VALUE bigsqr(VALUE x);
+static void bigdivmod(VALUE x, VALUE y, volatile VALUE *divp, volatile VALUE *modp);
 
 static int
 nlz16(uint16_t x)
@@ -479,121 +452,172 @@ bary_zero_p(BDIGIT *xds, size_t nx)
     return 1;
 }
 
+static void
+bary_neg(BDIGIT *ds, size_t n)
+{
+    while (n--)
+        ds[n] = BIGLO(~ds[n]);
+}
+
 static int
-bigzero_p(VALUE x)
+bary_plus_one(BDIGIT *ds, size_t n)
 {
-    return bary_zero_p(BDIGITS(x), RBIGNUM_LEN(x));
+    size_t i;
+    for (i = 0; i < n; i++) {
+	ds[i] = BIGLO(ds[i]+1);
+        if (ds[i] != 0)
+            return 0;
+    }
+    return 1;
 }
 
-int
-rb_bigzero_p(VALUE x)
+static int
+bary_2comp(BDIGIT *ds, size_t n)
 {
-    return BIGZEROP(x);
+    if (!n) return 1;
+    bary_neg(ds, n);
+    return bary_plus_one(ds, n);
 }
 
-int
-rb_cmpint(VALUE val, VALUE a, VALUE b)
+static void
+bary_swap(BDIGIT *ds, size_t num_bdigits)
 {
-    if (NIL_P(val)) {
-	rb_cmperr(a, b);
-    }
-    if (FIXNUM_P(val)) {
-        long l = FIX2LONG(val);
-        if (l > 0) return 1;
-        if (l < 0) return -1;
-        return 0;
-    }
-    if (RB_TYPE_P(val, T_BIGNUM)) {
-	if (BIGZEROP(val)) return 0;
-	if (RBIGNUM_SIGN(val)) return 1;
-	return -1;
+    BDIGIT *p1 = ds;
+    BDIGIT *p2 = ds + num_bdigits - 1;
+    for (; p1 < p2; p1++, p2--) {
+        BDIGIT tmp = *p1;
+        *p1 = *p2;
+        *p2 = tmp;
     }
-    if (RTEST(rb_funcall(val, '>', 1, INT2FIX(0)))) return 1;
-    if (RTEST(rb_funcall(val, '<', 1, INT2FIX(0)))) return -1;
-    return 0;
 }
 
-#define RBIGNUM_SET_LEN(b,l) \
-    ((RBASIC(b)->flags & RBIGNUM_EMBED_FLAG) ? \
-     (void)(RBASIC(b)->flags = \
-	    (RBASIC(b)->flags & ~RBIGNUM_EMBED_LEN_MASK) | \
-	    ((l) << RBIGNUM_EMBED_LEN_SHIFT)) : \
-     (void)(RBIGNUM(b)->as.heap.len = (l)))
+#define INTEGER_PACK_WORDORDER_MASK \
+    (INTEGER_PACK_MSWORD_FIRST | \
+     INTEGER_PACK_LSWORD_FIRST)
+#define INTEGER_PACK_BYTEORDER_MASK \
+    (INTEGER_PACK_MSBYTE_FIRST | \
+     INTEGER_PACK_LSBYTE_FIRST | \
+     INTEGER_PACK_NATIVE_BYTE_ORDER)
 
 static void
-rb_big_realloc(VALUE big, long len)
+validate_integer_pack_format(size_t numwords, size_t wordsize, size_t nails, int flags, int supported_flags)
 {
-    BDIGIT *ds;
-    if (RBASIC(big)->flags & RBIGNUM_EMBED_FLAG) {
-	if (RBIGNUM_EMBED_LEN_MAX < len) {
-	    ds = ALLOC_N(BDIGIT, len);
-	    MEMCPY(ds, RBIGNUM(big)->as.ary, BDIGIT, RBIGNUM_EMBED_LEN_MAX);
-	    RBIGNUM(big)->as.heap.len = RBIGNUM_LEN(big);
-	    RBIGNUM(big)->as.heap.digits = ds;
-	    RBASIC(big)->flags &= ~RBIGNUM_EMBED_FLAG;
-	}
+    int wordorder_bits = flags & INTEGER_PACK_WORDORDER_MASK;
+    int byteorder_bits = flags & INTEGER_PACK_BYTEORDER_MASK;
+
+    if (flags & ~supported_flags) {
+        rb_raise(rb_eArgError, "unsupported flags specified");
     }
-    else {
-	if (len <= RBIGNUM_EMBED_LEN_MAX) {
-	    ds = RBIGNUM(big)->as.heap.digits;
-	    RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG;
-	    RBIGNUM_SET_LEN(big, len);
-	    if (ds) {
-		MEMCPY(RBIGNUM(big)->as.ary, ds, BDIGIT, len);
-		xfree(ds);
-	    }
-	}
-	else {
-	    if (RBIGNUM_LEN(big) == 0) {
-		RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len);
-	    }
-	    else {
-		REALLOC_N(RBIGNUM(big)->as.heap.digits, BDIGIT, len);
-	    }
-	}
+    if (wordorder_bits == 0) {
+        if (1 < numwords)
+            rb_raise(rb_eArgError, "word order not specified");
+    }
+    else if (wordorder_bits != INTEGER_PACK_MSWORD_FIRST &&
+        wordorder_bits != INTEGER_PACK_LSWORD_FIRST)
+        rb_raise(rb_eArgError, "unexpected word order");
+    if (byteorder_bits == 0) {
+        rb_raise(rb_eArgError, "byte order not specified");
     }
+    else if (byteorder_bits != INTEGER_PACK_MSBYTE_FIRST &&
+        byteorder_bits != INTEGER_PACK_LSBYTE_FIRST &&
+        byteorder_bits != INTEGER_PACK_NATIVE_BYTE_ORDER)
+        rb_raise(rb_eArgError, "unexpected byte order");
+    if (wordsize == 0)
+        rb_raise(rb_eArgError, "invalid wordsize: %"PRI_SIZE_PREFIX"u", wordsize);
+    if (SSIZE_MAX < wordsize)
+        rb_raise(rb_eArgError, "too big wordsize: %"PRI_SIZE_PREFIX"u", wordsize);
+    if (wordsize <= nails / CHAR_BIT)
+        rb_raise(rb_eArgError, "too big nails: %"PRI_SIZE_PREFIX"u", nails);
+    if (SIZE_MAX / wordsize < numwords)
+        rb_raise(rb_eArgError, "too big numwords * wordsize: %"PRI_SIZE_PREFIX"u * %"PRI_SIZE_PREFIX"u", numwords, wordsize);
 }
 
-void
-rb_big_resize(VALUE big, long len)
+static void
+integer_pack_loop_setup(
+    size_t numwords, size_t wordsize, size_t nails, int flags,
+    size_t *word_num_fullbytes_ret,
+    int *word_num_partialbits_ret,
+    size_t *word_start_ret,
+    ssize_t *word_step_ret,
+    size_t *word_last_ret,
+    size_t *byte_start_ret,
+    int *byte_step_ret)
 {
-    rb_big_realloc(big, len);
-    RBIGNUM_SET_LEN(big, len);
-}
+    int wordorder_bits = flags & INTEGER_PACK_WORDORDER_MASK;
+    int byteorder_bits = flags & INTEGER_PACK_BYTEORDER_MASK;
+    size_t word_num_fullbytes;
+    int word_num_partialbits;
+    size_t word_start;
+    ssize_t word_step;
+    size_t word_last;
+    size_t byte_start;
+    int byte_step;
 
-static VALUE
-bignew_1(VALUE klass, long len, int sign)
-{
-    NEWOBJ_OF(big, struct RBignum, klass, T_BIGNUM | (RGENGC_WB_PROTECTED_BIGNUM ? FL_WB_PROTECTED : 0));
-    RBIGNUM_SET_SIGN(big, sign?1:0);
-    if (len <= RBIGNUM_EMBED_LEN_MAX) {
-	RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG;
-	RBIGNUM_SET_LEN(big, len);
+    word_num_partialbits = CHAR_BIT - (int)(nails % CHAR_BIT);
+    if (word_num_partialbits == CHAR_BIT)
+        word_num_partialbits = 0;
+    word_num_fullbytes = wordsize - (nails / CHAR_BIT);
+    if (word_num_partialbits != 0) {
+        word_num_fullbytes--;
+    }
+
+    if (wordorder_bits == INTEGER_PACK_MSWORD_FIRST) {
+        word_start = wordsize*(numwords-1);
+        word_step = -(ssize_t)wordsize;
+        word_last = 0;
     }
     else {
-	RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len);
-	RBIGNUM(big)->as.heap.len = len;
+        word_start = 0;
+        word_step = wordsize;
+        word_last = wordsize*(numwords-1);
     }
-    OBJ_FREEZE(big);
-    return (VALUE)big;
-}
 
-#define bignew(len,sign) bignew_1(rb_cBignum,(len),(sign))
+    if (byteorder_bits == INTEGER_PACK_NATIVE_BYTE_ORDER) {
+#ifdef WORDS_BIGENDIAN
+        byteorder_bits = INTEGER_PACK_MSBYTE_FIRST;
+#else
+        byteorder_bits = INTEGER_PACK_LSBYTE_FIRST;
+#endif
+    }
+    if (byteorder_bits == INTEGER_PACK_MSBYTE_FIRST) {
+        byte_start = wordsize-1;
+        byte_step = -1;
+    }
+    else {
+        byte_start = 0;
+        byte_step = 1;
+    }
 
-VALUE
-rb_big_new(long len, int sign)
-{
-    return bignew(len, sign != 0);
+    *word_num_partialbits_ret = word_num_partialbits;
+    *word_num_fullbytes_ret = word_num_fullbytes;
+    *word_start_ret = word_start;
+    *word_step_ret = word_step;
+    *word_last_ret = word_last;
+    *byte_start_ret = byte_start;
+    *byte_step_ret = byte_step;
 }
 
-VALUE
-rb_big_clone(VALUE x)
+static inline void
+integer_pack_fill_dd(BDIGIT **dpp, BDIGIT **dep, BDIGIT_DBL *ddp, int *numbits_in_dd_p)
 {
-    long len = RBIGNUM_LEN(x);
-    VALUE z = bignew_1(CLASS_OF(x), len, RBIGNUM_SIGN(x));
+    if (*dpp < *dep && BITSPERDIG <= (int)sizeof(*ddp) * CHAR_BIT - *numbits_in_dd_p) {
+        *ddp |= (BDIGIT_DBL)(*(*dpp)++) << *numbits_in_dd_p;
+        *numbits_in_dd_p += BITSPERDIG;
+    }
+    else if (*dpp == *dep) {
+        /* higher bits are infinity zeros */
+        *numbits_in_dd_p = (int)sizeof(*ddp) * CHAR_BIT;
+    }
+}
 
-    MEMCPY(BDIGITS(z), BDIGITS(x), BDIGIT, len);
-    return z;
+static inline BDIGIT_DBL
+integer_pack_take_lowbits(int n, BDIGIT_DBL *ddp, int *numbits_in_dd_p)
+{
+    BDIGIT_DBL ret;
+    ret = (*ddp) & (((BDIGIT_DBL)1 << n) - 1);
+    *ddp >>= n;
+    *numbits_in_dd_p -= n;
+    return ret;
 }
 
 static int
@@ -610,3663 +634,3686 @@ bytes_2comp(unsigned char *buf, size_t len)
     return 1;
 }
 
-static void
-bary_neg(BDIGIT *ds, size_t n)
+static int
+bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
 {
-    while (n--)
-        ds[n] = BIGLO(~ds[n]);
-}
+    BDIGIT *dp, *de;
+    unsigned char *buf, *bufend;
 
-static int
-bary_plus_one(BDIGIT *ds, size_t n)
-{
-    size_t i;
-    for (i = 0; i < n; i++) {
-	ds[i] = BIGLO(ds[i]+1);
-        if (ds[i] != 0)
+    dp = ds;
+    de = ds + num_bdigits;
+
+    validate_integer_pack_format(numwords, wordsize, nails, flags,
+            INTEGER_PACK_MSWORD_FIRST|
+            INTEGER_PACK_LSWORD_FIRST|
+            INTEGER_PACK_MSBYTE_FIRST|
+            INTEGER_PACK_LSBYTE_FIRST|
+            INTEGER_PACK_NATIVE_BYTE_ORDER|
+            INTEGER_PACK_2COMP|
+            INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
+
+    while (dp < de && de[-1] == 0)
+        de--;
+    if (dp == de) {
+        sign = 0;
+    }
+
+    if (!(flags & INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION)) {
+        if (sign == 0) {
+            MEMZERO(words, unsigned char, numwords * wordsize);
             return 0;
+        }
+        if (nails == 0 && numwords == 1) {
+            int need_swap = wordsize != 1 &&
+                (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_NATIVE_BYTE_ORDER &&
+                ((flags & INTEGER_PACK_MSBYTE_FIRST) ? !HOST_BIGENDIAN_P : HOST_BIGENDIAN_P);
+            if (0 < sign || !(flags & INTEGER_PACK_2COMP)) {
+                BDIGIT d;
+                if (wordsize == 1) {
+                    *((unsigned char *)words) = (unsigned char)(d = dp[0]);
+                    return ((1 < de - dp || CLEAR_LOWBITS(d, 8) != 0) ? 2 : 1) * sign;
+                }
+#if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGITS
+                if (wordsize == 2 && (uintptr_t)words % ALIGNOF(uint16_t) == 0) {
+                    uint16_t u = (uint16_t)(d = dp[0]);
+                    if (need_swap) u = swap16(u);
+                    *((uint16_t *)words) = u;
+                    return ((1 < de - dp || CLEAR_LOWBITS(d, 16) != 0) ? 2 : 1) * sign;
+                }
+#endif
+#if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGITS
+                if (wordsize == 4 && (uintptr_t)words % ALIGNOF(uint32_t) == 0) {
+                    uint32_t u = (uint32_t)(d = dp[0]);
+                    if (need_swap) u = swap32(u);
+                    *((uint32_t *)words) = u;
+                    return ((1 < de - dp || CLEAR_LOWBITS(d, 32) != 0) ? 2 : 1) * sign;
+                }
+#endif
+#if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGITS
+                if (wordsize == 8 && (uintptr_t)words % ALIGNOF(uint64_t) == 0) {
+                    uint64_t u = (uint64_t)(d = dp[0]);
+                    if (need_swap) u = swap64(u);
+                    *((uint64_t *)words) = u;
+                    return ((1 < de - dp || CLEAR_LOWBITS(d, 64) != 0) ? 2 : 1) * sign;
+                }
+#endif
+            }
+            else { /* sign < 0 && (flags & INTEGER_PACK_2COMP) */
+                BDIGIT_DBL_SIGNED d;
+                if (wordsize == 1) {
+                    *((unsigned char *)words) = (unsigned char)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
+                    return (1 < de - dp || FILL_LOWBITS(d, 8) != -1) ? -2 : -1;
+                }
+#if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGITS
+                if (wordsize == 2 && (uintptr_t)words % ALIGNOF(uint16_t) == 0) {
+                    uint16_t u = (uint16_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
+                    if (need_swap) u = swap16(u);
+                    *((uint16_t *)words) = u;
+                    return (wordsize == SIZEOF_BDIGITS && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
+                        (1 < de - dp || FILL_LOWBITS(d, 16) != -1) ? -2 : -1;
+                }
+#endif
+#if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGITS
+                if (wordsize == 4 && (uintptr_t)words % ALIGNOF(uint32_t) == 0) {
+                    uint32_t u = (uint32_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
+                    if (need_swap) u = swap32(u);
+                    *((uint32_t *)words) = u;
+                    return (wordsize == SIZEOF_BDIGITS && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
+                        (1 < de - dp || FILL_LOWBITS(d, 32) != -1) ? -2 : -1;
+                }
+#endif
+#if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGITS
+                if (wordsize == 8 && (uintptr_t)words % ALIGNOF(uint64_t) == 0) {
+                    uint64_t u = (uint64_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
+                    if (need_swap) u = swap64(u);
+                    *((uint64_t *)words) = u;
+                    return (wordsize == SIZEOF_BDIGITS && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
+                        (1 < de - dp || FILL_LOWBITS(d, 64) != -1) ? -2 : -1;
+                }
+#endif
+            }
+        }
+#if !defined(WORDS_BIGENDIAN)
+        if (nails == 0 && SIZEOF_BDIGITS == sizeof(BDIGIT) &&
+            (flags & INTEGER_PACK_WORDORDER_MASK) == INTEGER_PACK_LSWORD_FIRST &&
+            (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_MSBYTE_FIRST) {
+            size_t src_size = (de - dp) * SIZEOF_BDIGITS;
+            size_t dst_size = numwords * wordsize;
+            int overflow = 0;
+            while (0 < src_size && ((unsigned char *)ds)[src_size-1] == 0)
+                src_size--;
+            if (src_size <= dst_size) {
+                MEMCPY(words, dp, char, src_size);
+                MEMZERO((char*)words + src_size, char, dst_size - src_size);
+            }
+            else {
+                MEMCPY(words, dp, char, dst_size);
+                overflow = 1;
+            }
+            if (sign < 0 && (flags & INTEGER_PACK_2COMP)) {
+                int zero_p = bytes_2comp(words, dst_size);
+                if (zero_p && overflow) {
+                    unsigned char *p = (unsigned char *)dp;
+                    if (dst_size == src_size-1 &&
+                        p[dst_size] == 1) {
+                        overflow = 0;
+                    }
+                }
+            }
+            if (overflow)
+                sign *= 2;
+            return sign;
+        }
+#endif
+        if (nails == 0 && SIZEOF_BDIGITS == sizeof(BDIGIT) &&
+            wordsize % SIZEOF_BDIGITS == 0 && (uintptr_t)words % ALIGNOF(BDIGIT) == 0) {
+            size_t bdigits_per_word = wordsize / SIZEOF_BDIGITS;
+            size_t src_num_bdigits = de - dp;
+            size_t dst_num_bdigits = numwords * bdigits_per_word;
+            int overflow = 0;
+            int mswordfirst_p = (flags & INTEGER_PACK_MSWORD_FIRST) != 0;
+            int msbytefirst_p = (flags & INTEGER_PACK_NATIVE_BYTE_ORDER) ? HOST_BIGENDIAN_P :
+                (flags & INTEGER_PACK_MSBYTE_FIRST) != 0;
+            if (src_num_bdigits <= dst_num_bdigits) {
+                MEMCPY(words, dp, BDIGIT, src_num_bdigits);
+                MEMZERO((BDIGIT*)words + src_num_bdigits, BDIGIT, dst_num_bdigits - src_num_bdigits);
+            }
+            else {
+                MEMCPY(words, dp, BDIGIT, dst_num_bdigits);
+                overflow = 1;
+            }
+            if (sign < 0 && (flags & INTEGER_PACK_2COMP)) {
+                int zero_p = bary_2comp(words, dst_num_bdigits);
+                if (zero_p && overflow &&
+                    dst_num_bdigits == src_num_bdigits-1 &&
+                    dp[dst_num_bdigits] == 1)
+                    overflow = 0;
+            }
+            if (msbytefirst_p != HOST_BIGENDIAN_P) {
+                size_t i;
+                for (i = 0; i < dst_num_bdigits; i++) {
+                    BDIGIT d = ((BDIGIT*)words)[i];
+                    ((BDIGIT*)words)[i] = swap_bdigit(d);
+                }
+            }
+            if (mswordfirst_p ?  !msbytefirst_p : msbytefirst_p) {
+                size_t i;
+                BDIGIT *p = words;
+                for (i = 0; i < numwords; i++) {
+                    bary_swap(p, bdigits_per_word);
+                    p += bdigits_per_word;
+                }
+            }
+            if (mswordfirst_p) {
+                bary_swap(words, dst_num_bdigits);
+            }
+            if (overflow)
+                sign *= 2;
+            return sign;
+        }
     }
-    return 1;
-}
 
-static int
-bary_2comp(BDIGIT *ds, size_t n)
-{
-    if (!n) return 1;
-    bary_neg(ds, n);
-    return bary_plus_one(ds, n);
-}
+    buf = words;
+    bufend = buf + numwords * wordsize;
 
-static void
-big_extend_carry(VALUE x)
-{
-    rb_big_resize(x, RBIGNUM_LEN(x)+1);
-    BDIGITS(x)[RBIGNUM_LEN(x)-1] = 1;
-}
+    if (buf == bufend) {
+        /* overflow if non-zero*/
+        if (!(flags & INTEGER_PACK_2COMP) || 0 <= sign)
+            sign *= 2;
+        else {
+            if (de - dp == 1 && dp[0] == 1)
+                sign = -1; /* val == -1 == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
+            else
+                sign = -2; /* val < -1 == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
+        }
+    }
+    else if (dp == de) {
+        memset(buf, '\0', bufend - buf);
+    }
+    else if (dp < de && buf < bufend) {
+        int word_num_partialbits;
+        size_t word_num_fullbytes;
 
-/* modify a bignum by 2's complement */
-static void
-get2comp(VALUE x)
-{
-    long i = RBIGNUM_LEN(x);
-    BDIGIT *ds = BDIGITS(x);
+        ssize_t word_step;
+        size_t byte_start;
+        int byte_step;
 
-    if (bary_2comp(ds, i)) {
-        big_extend_carry(x);
-    }
-}
+        size_t word_start, word_last;
+        unsigned char *wordp, *last_wordp;
+        BDIGIT_DBL dd;
+        int numbits_in_dd;
 
-void
-rb_big_2comp(VALUE x)			/* get 2's complement */
-{
-    get2comp(x);
-}
+        integer_pack_loop_setup(numwords, wordsize, nails, flags,
+            &word_num_fullbytes, &word_num_partialbits,
+            &word_start, &word_step, &word_last, &byte_start, &byte_step);
 
-static BDIGIT
-abs2twocomp(VALUE *xp, long *n_ret)
-{
-    VALUE x = *xp;
-    long n = RBIGNUM_LEN(x);
-    BDIGIT *ds = BDIGITS(x);
-    BDIGIT hibits = 0;
+        wordp = buf + word_start;
+        last_wordp = buf + word_last;
 
-    while (0 < n && ds[n-1] == 0)
-        n--;
+        dd = 0;
+        numbits_in_dd = 0;
 
-    if (n != 0 && RBIGNUM_NEGATIVE_P(x)) {
-        VALUE z = bignew_1(CLASS_OF(x), n, 0);
-        MEMCPY(BDIGITS(z), ds, BDIGIT, n);
-        bary_2comp(BDIGITS(z), n);
-        hibits = BDIGMAX;
-	*xp = z;
-    }
-    *n_ret = n;
-    return hibits;
-}
+#define FILL_DD \
+    integer_pack_fill_dd(&dp, &de, &dd, &numbits_in_dd)
+#define TAKE_LOWBITS(n) \
+    integer_pack_take_lowbits(n, &dd, &numbits_in_dd)
 
-static void
-twocomp2abs_bang(VALUE x, int hibits)
-{
-    RBIGNUM_SET_SIGN(x, !hibits);
-    if (hibits) {
-        get2comp(x);
+        while (1) {
+            size_t index_in_word = 0;
+            unsigned char *bytep = wordp + byte_start;
+            while (index_in_word < word_num_fullbytes) {
+                FILL_DD;
+                *bytep = TAKE_LOWBITS(CHAR_BIT);
+                bytep += byte_step;
+                index_in_word++;
+            }
+            if (word_num_partialbits) {
+                FILL_DD;
+                *bytep = TAKE_LOWBITS(word_num_partialbits);
+                bytep += byte_step;
+                index_in_word++;
+            }
+            while (index_in_word < wordsize) {
+                *bytep = 0;
+                bytep += byte_step;
+                index_in_word++;
+            }
+
+            if (wordp == last_wordp)
+                break;
+
+            wordp += word_step;
+        }
+        FILL_DD;
+        /* overflow tests */
+        if (dp != de || 1 < dd) {
+            /* 2**(numwords*(wordsize*CHAR_BIT-nails)+1) <= abs(val) */
+            sign *= 2;
+        }
+        else if (dd == 1) {
+            /* 2**(numwords*(wordsize*CHAR_BIT-nails)) <= abs(val) < 2**(numwords*(wordsize*CHAR_BIT-nails)+1) */
+            if (!(flags & INTEGER_PACK_2COMP) || 0 <= sign)
+                sign *= 2;
+            else { /* overflow_2comp && sign == -1 */
+                /* test lower bits are all zero. */
+                dp = ds;
+                while (dp < de && *dp == 0)
+                    dp++;
+                if (de - dp == 1 && /* only one non-zero word. */
+                    POW2_P(*dp)) /* *dp contains only one bit set. */
+                    sign = -1; /* val == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
+                else
+                    sign = -2; /* val < -2**(numwords*(wordsize*CHAR_BIT-nails)) */
+            }
+        }
     }
-}
 
-static inline VALUE
-bigtrunc(VALUE x)
-{
-    long len = RBIGNUM_LEN(x);
-    BDIGIT *ds = BDIGITS(x);
+    if ((flags & INTEGER_PACK_2COMP) && (sign < 0 && numwords != 0)) {
+        unsigned char *buf;
 
-    if (len == 0) return x;
-    while (--len && !ds[len]);
-    if (RBIGNUM_LEN(x) > len+1) {
-	rb_big_resize(x, len+1);
-    }
-    return x;
-}
+        int word_num_partialbits;
+        size_t word_num_fullbytes;
 
-static inline VALUE
-bigfixize(VALUE x)
-{
-    long len = RBIGNUM_LEN(x);
-    BDIGIT *ds = BDIGITS(x);
+        ssize_t word_step;
+        size_t byte_start;
+        int byte_step;
 
-    if (len == 0) return INT2FIX(0);
-    if (BIGSIZE(x) <= sizeof(long)) {
-	long num = 0;
-#if SIZEOF_BDIGITS >= SIZEOF_LONG
-	num = (long)ds[0];
-#else
-	while (len--) {
-	    num = (long)(BIGUP(num) + ds[len]);
-	}
-#endif
-	if (num >= 0) {
-	    if (RBIGNUM_SIGN(x)) {
-		if (POSFIXABLE(num)) return LONG2FIX(num);
-	    }
-	    else {
-		if (NEGFIXABLE(-num)) return LONG2FIX(-num);
-	    }
-	}
-    }
-    return x;
-}
+        size_t word_start, word_last;
+        unsigned char *wordp, *last_wordp;
 
-static VALUE
-bignorm(VALUE x)
-{
-    if (RB_TYPE_P(x, T_BIGNUM)) {
-	x = bigfixize(x);
-        if (!FIXNUM_P(x))
-            bigtrunc(x);
-    }
-    return x;
-}
+        unsigned int partialbits_mask;
+        int carry;
 
-VALUE
-rb_big_norm(VALUE x)
-{
-    return bignorm(x);
-}
+        integer_pack_loop_setup(numwords, wordsize, nails, flags,
+            &word_num_fullbytes, &word_num_partialbits,
+            &word_start, &word_step, &word_last, &byte_start, &byte_step);
 
-VALUE
-rb_uint2big(VALUE n)
-{
-    long i;
-    VALUE big = bignew(bdigit_roomof(SIZEOF_VALUE), 1);
-    BDIGIT *digits = BDIGITS(big);
+        partialbits_mask = (1 << word_num_partialbits) - 1;
 
-#if SIZEOF_BDIGITS >= SIZEOF_VALUE
-    digits[0] = n;
-#else
-    for (i = 0; i < bdigit_roomof(SIZEOF_VALUE); i++) {
-	digits[i] = BIGLO(n);
-	n = BIGDN(n);
-    }
-#endif
+        buf = words;
+        wordp = buf + word_start;
+        last_wordp = buf + word_last;
 
-    i = bdigit_roomof(SIZEOF_VALUE);
-    while (--i && !digits[i]) ;
-    RBIGNUM_SET_LEN(big, i+1);
-    return big;
-}
+        carry = 1;
+        while (1) {
+            size_t index_in_word = 0;
+            unsigned char *bytep = wordp + byte_start;
+            while (index_in_word < word_num_fullbytes) {
+                carry += (unsigned char)~*bytep;
+                *bytep = (unsigned char)carry;
+                carry >>= CHAR_BIT;
+                bytep += byte_step;
+                index_in_word++;
+            }
+            if (word_num_partialbits) {
+                carry += (*bytep & partialbits_mask) ^ partialbits_mask;
+                *bytep = carry & partialbits_mask;
+                carry >>= word_num_partialbits;
+                bytep += byte_step;
+                index_in_word++;
+            }
 
-VALUE
-rb_int2big(SIGNED_VALUE n)
-{
-    long neg = 0;
-    VALUE u;
-    VALUE big;
+            if (wordp == last_wordp)
+                break;
 
-    if (n < 0) {
-        u = 1 + (VALUE)(-(n + 1)); /* u = -n avoiding overflow */
-	neg = 1;
-    }
-    else {
-        u = n;
-    }
-    big = rb_uint2big(u);
-    if (neg) {
-	RBIGNUM_SET_SIGN(big, 0);
+            wordp += word_step;
+        }
     }
-    return big;
-}
 
-VALUE
-rb_uint2inum(VALUE n)
-{
-    if (POSFIXABLE(n)) return LONG2FIX(n);
-    return rb_uint2big(n);
+    return sign;
+#undef FILL_DD
+#undef TAKE_LOWBITS
 }
 
-VALUE
-rb_int2inum(SIGNED_VALUE n)
+static size_t
+integer_unpack_num_bdigits_small(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
 {
-    if (FIXABLE(n)) return LONG2FIX(n);
-    return rb_int2big(n);
+    /* nlp_bits stands for number of leading padding bits */
+    size_t num_bits = (wordsize * CHAR_BIT - nails) * numwords;
+    size_t num_bdigits = (num_bits + BITSPERDIG - 1) / BITSPERDIG;
+    *nlp_bits_ret = (int)(num_bdigits * BITSPERDIG - num_bits);
+    return num_bdigits;
 }
 
-void
-rb_big_pack(VALUE val, unsigned long *buf, long num_longs)
+static size_t
+integer_unpack_num_bdigits_generic(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
 {
-    rb_integer_pack(val, buf, num_longs, sizeof(long), 0,
-            INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER|
-            INTEGER_PACK_2COMP);
-}
+    /* BITSPERDIG = SIZEOF_BDIGITS * CHAR_BIT */
+    /* num_bits = (wordsize * CHAR_BIT - nails) * numwords */
+    /* num_bdigits = (num_bits + BITSPERDIG - 1) / BITSPERDIG */
 
-VALUE
-rb_big_unpack(unsigned long *buf, long num_longs)
-{
-    return rb_integer_unpack(buf, num_longs, sizeof(long), 0,
-            INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER|
-            INTEGER_PACK_2COMP);
-}
+    /* num_bits = CHAR_BIT * (wordsize * numwords) - nails * numwords = CHAR_BIT * num_bytes1 - nails * numwords */
+    size_t num_bytes1 = wordsize * numwords;
 
-/*
- * Calculate the number of bytes to be required to represent
- * the absolute value of the integer given as _val_.
- *
- * [val] an integer.
- * [nlz_bits_ret] number of leading zero bits in the most significant byte is returned if not NULL.
- *
- * This function returns ((val_numbits * CHAR_BIT + CHAR_BIT - 1) / CHAR_BIT)
- * where val_numbits is the number of bits of abs(val).
- * This function should not overflow.
- *
- * If nlz_bits_ret is not NULL,
- * (return_value * CHAR_BIT - val_numbits) is stored in *nlz_bits_ret.
- * In this case, 0 <= *nlz_bits_ret < CHAR_BIT.
- *
- */
-size_t
-rb_absint_size(VALUE val, int *nlz_bits_ret)
-{
-    BDIGIT *dp;
-    BDIGIT *de;
-    BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
+    /* q1 * CHAR_BIT + r1 = numwords */
+    size_t q1 = numwords / CHAR_BIT;
+    size_t r1 = numwords % CHAR_BIT;
 
-    int num_leading_zeros;
+    /* num_bits = CHAR_BIT * num_bytes1 - nails * (q1 * CHAR_BIT + r1) = CHAR_BIT * num_bytes2 - nails * r1 */
+    size_t num_bytes2 = num_bytes1 - nails * q1;
 
-    val = rb_to_int(val);
+    /* q2 * CHAR_BIT + r2 = nails */
+    size_t q2 = nails / CHAR_BIT;
+    size_t r2 = nails % CHAR_BIT;
 
-    if (FIXNUM_P(val)) {
-        long v = FIX2LONG(val);
-        if (v < 0) {
-            v = -v;
-        }
-#if SIZEOF_BDIGITS >= SIZEOF_LONG
-        fixbuf[0] = v;
-#else
+    /* num_bits = CHAR_BIT * num_bytes2 - (q2 * CHAR_BIT + r2) * r1 = CHAR_BIT * num_bytes3 - r1 * r2 */
+    size_t num_bytes3 = num_bytes2 - q2 * r1;
+
+    /* q3 * BITSPERDIG + r3 = num_bytes3 */
+    size_t q3 = num_bytes3 / BITSPERDIG;
+    size_t r3 = num_bytes3 % BITSPERDIG;
+
+    /* num_bits = CHAR_BIT * (q3 * BITSPERDIG + r3) - r1 * r2 = BITSPERDIG * num_digits1 + CHAR_BIT * r3 - r1 * r2 */
+    size_t num_digits1 = CHAR_BIT * q3;
+
+    /*
+     * if CHAR_BIT * r3 >= r1 * r2
+     *   CHAR_BIT * r3 - r1 * r2 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2))
+     *   q4 * BITSPERDIG + r4 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2)
+     *   num_bits = BITSPERDIG * num_digits1 + CHAR_BIT * BITSPERDIG - (q4 * BITSPERDIG + r4) = BITSPERDIG * num_digits2 - r4
+     * else
+     *   q4 * BITSPERDIG + r4 = -(CHAR_BIT * r3 - r1 * r2)
+     *   num_bits = BITSPERDIG * num_digits1 - (q4 * BITSPERDIG + r4) = BITSPERDIG * num_digits2 - r4
+     * end
+     */
+
+    if (CHAR_BIT * r3 >= r1 * r2) {
+        size_t tmp1 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2);
+        size_t q4 = tmp1 / BITSPERDIG;
+        int r4 = (int)(tmp1 % BITSPERDIG);
+        size_t num_digits2 = num_digits1 + CHAR_BIT - q4;
+        *nlp_bits_ret = r4;
+        return num_digits2;
+    }
+    else {
+        size_t tmp1 = r1 * r2 - CHAR_BIT * r3;
+        size_t q4 = tmp1 / BITSPERDIG;
+        int r4 = (int)(tmp1 % BITSPERDIG);
+        size_t num_digits2 = num_digits1 - q4;
+        *nlp_bits_ret = r4;
+        return num_digits2;
+    }
+}
+
+static size_t
+integer_unpack_num_bdigits(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
+{
+    size_t num_bdigits;
+
+    if (numwords <= (SIZE_MAX - (BITSPERDIG-1)) / CHAR_BIT / wordsize) {
+        num_bdigits = integer_unpack_num_bdigits_small(numwords, wordsize, nails, nlp_bits_ret);
+#ifdef DEBUG_INTEGER_PACK
         {
-            int i;
-            for (i = 0; i < numberof(fixbuf); i++) {
-                fixbuf[i] = BIGLO(v);
-                v = BIGDN(v);
-            }
+            int nlp_bits1;
+            size_t num_bdigits1 = integer_unpack_num_bdigits_generic(numwords, wordsize, nails, &nlp_bits1);
+            assert(num_bdigits == num_bdigits1);
+            assert(*nlp_bits_ret == nlp_bits1);
         }
 #endif
-        dp = fixbuf;
-        de = fixbuf + numberof(fixbuf);
     }
     else {
-        dp = BDIGITS(val);
-        de = dp + RBIGNUM_LEN(val);
-    }
-    while (dp < de && de[-1] == 0)
-        de--;
-    if (dp == de) {
-        if (nlz_bits_ret)
-            *nlz_bits_ret = 0;
-        return 0;
+        num_bdigits = integer_unpack_num_bdigits_generic(numwords, wordsize, nails, nlp_bits_ret);
     }
-    num_leading_zeros = nlz(de[-1]);
-    if (nlz_bits_ret)
-        *nlz_bits_ret = num_leading_zeros % CHAR_BIT;
-    return (de - dp) * SIZEOF_BDIGITS - num_leading_zeros / CHAR_BIT;
+    return num_bdigits;
 }
 
-static size_t
-absint_numwords_small(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret)
+static inline void
+integer_unpack_push_bits(int data, int numbits, BDIGIT_DBL *ddp, int *numbits_in_dd_p, BDIGIT **dpp)
 {
-    size_t val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte;
-    size_t div = val_numbits / word_numbits;
-    size_t mod = val_numbits % word_numbits;
-    size_t numwords;
-    size_t nlz_bits;
-    numwords = mod == 0 ? div : div + 1;
-    nlz_bits = mod == 0 ? 0 : word_numbits - mod;
-    *nlz_bits_ret = nlz_bits;
-    return numwords;
+    (*ddp) |= ((BDIGIT_DBL)data) << (*numbits_in_dd_p);
+    *numbits_in_dd_p += numbits;
+    while (BITSPERDIG <= *numbits_in_dd_p) {
+        *(*dpp)++ = BIGLO(*ddp);
+        *ddp = BIGDN(*ddp);
+        *numbits_in_dd_p -= BITSPERDIG;
+    }
 }
 
-static size_t
-absint_numwords_generic(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret)
+static int
+integer_unpack_single_bdigit(BDIGIT u, size_t size, int flags, BDIGIT *dp)
 {
-    BDIGIT numbytes_bary[bdigit_roomof(sizeof(numbytes))];
-    BDIGIT char_bit[1] = { CHAR_BIT };
-    BDIGIT val_numbits_bary[bdigit_roomof(sizeof(numbytes) + 1)];
-    BDIGIT nlz_bits_in_msbyte_bary[1] = { nlz_bits_in_msbyte };
-    BDIGIT word_numbits_bary[bdigit_roomof(sizeof(word_numbits))];
-    BDIGIT div_bary[numberof(val_numbits_bary) + BIGDIVREM_EXTRA_WORDS];
-    BDIGIT mod_bary[numberof(word_numbits_bary)];
-    BDIGIT one[1] = { 1 };
-    size_t nlz_bits;
-    size_t mod;
     int sign;
-    size_t numwords;
-
-    /*
-     * val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte
-     * div, mod = val_numbits.divmod(word_numbits)
-     * numwords = mod == 0 ? div : div + 1
-     * nlz_bits = mod == 0 ? 0 : word_numbits - mod
-     */
-
-    bary_unpack(BARY_ARGS(numbytes_bary), &numbytes, 1, sizeof(numbytes), 0,
-        INTEGER_PACK_NATIVE_BYTE_ORDER);
-    BARY_MUL1(val_numbits_bary, numbytes_bary, char_bit);
-    if (nlz_bits_in_msbyte)
-        BARY_SUB(val_numbits_bary, val_numbits_bary, nlz_bits_in_msbyte_bary);
-    bary_unpack(BARY_ARGS(word_numbits_bary), &word_numbits, 1, sizeof(word_numbits), 0,
-        INTEGER_PACK_NATIVE_BYTE_ORDER);
-    BARY_DIVMOD(div_bary, mod_bary, val_numbits_bary, word_numbits_bary);
-    if (BARY_ZERO_P(mod_bary)) {
-        nlz_bits = 0;
-    }
-    else {
-        BARY_ADD(div_bary, div_bary, one);
-        bary_pack(+1, BARY_ARGS(mod_bary), &mod, 1, sizeof(mod), 0,
-            INTEGER_PACK_NATIVE_BYTE_ORDER);
-        nlz_bits = word_numbits - mod;
+    if (flags & INTEGER_PACK_2COMP) {
+        sign = (flags & INTEGER_PACK_NEGATIVE) ?
+            ((size == SIZEOF_BDIGITS && u == 0) ? -2 : -1) :
+            ((u >> (size * CHAR_BIT - 1)) ? -1 : 1);
+        if (sign < 0) {
+            u |= LSHIFTX(BDIGMAX, size * CHAR_BIT);
+            u = BIGLO(1 + ~u);
+        }
     }
-    sign = bary_pack(+1, BARY_ARGS(div_bary), &numwords, 1, sizeof(numwords), 0,
-        INTEGER_PACK_NATIVE_BYTE_ORDER);
-
-    if (sign == 2)
-        return (size_t)-1;
-    *nlz_bits_ret = nlz_bits;
-    return numwords;
+    else
+        sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
+    *dp = u;
+    return sign;
 }
 
-/*
- * Calculate the number of words to be required to represent
- * the absolute value of the integer given as _val_.
- *
- * [val] an integer.
- * [word_numbits] number of bits in a word.
- * [nlz_bits_ret] number of leading zero bits in the most significant word is returned if not NULL.
- *
- * This function returns ((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)
- * where val_numbits is the number of bits of abs(val).
- *
- * This function can overflow.
- * When overflow occur, (size_t)-1 is returned.
- *
- * If nlz_bits_ret is not NULL and overflow is not occur,
- * (return_value * word_numbits - val_numbits) is stored in *nlz_bits_ret.
- * In this case, 0 <= *nlz_bits_ret < word_numbits.
- *
- */
-size_t
-rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
+static int
+bary_unpack_internal(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags, int nlp_bits)
 {
-    size_t numbytes;
-    int nlz_bits_in_msbyte;
-    size_t numwords;
-    size_t nlz_bits;
-
-    if (word_numbits == 0)
-        return (size_t)-1;
+    int sign;
+    const unsigned char *buf = words;
+    BDIGIT *dp;
+    BDIGIT *de;
 
-    numbytes = rb_absint_size(val, &nlz_bits_in_msbyte);
+    dp = bdigits;
+    de = dp + num_bdigits;
 
-    if (numbytes <= SIZE_MAX / CHAR_BIT) {
-        numwords = absint_numwords_small(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits);
-#ifdef DEBUG_INTEGER_PACK
-        {
-            size_t numwords0, nlz_bits0;
-            numwords0 = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits0);
-            assert(numwords0 == numwords);
-            assert(nlz_bits0 == nlz_bits);
+    if (!(flags & INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION)) {
+        if (nails == 0 && numwords == 1) {
+            int need_swap = wordsize != 1 &&
+                (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_NATIVE_BYTE_ORDER &&
+                ((flags & INTEGER_PACK_MSBYTE_FIRST) ? !HOST_BIGENDIAN_P : HOST_BIGENDIAN_P);
+            if (wordsize == 1) {
+                return integer_unpack_single_bdigit(*(uint8_t *)buf, sizeof(uint8_t), flags, dp);
+            }
+#if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGITS
+            if (wordsize == 2 && (uintptr_t)words % ALIGNOF(uint16_t) == 0) {
+                BDIGIT u = *(uint16_t *)buf;
+                return integer_unpack_single_bdigit(need_swap ? swap16(u) : u, sizeof(uint16_t), flags, dp);
+            }
+#endif
+#if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGITS
+            if (wordsize == 4 && (uintptr_t)words % ALIGNOF(uint32_t) == 0) {
+                BDIGIT u = *(uint32_t *)buf;
+                return integer_unpack_single_bdigit(need_swap ? swap32(u) : u, sizeof(uint32_t), flags, dp);
+            }
+#endif
+#if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGITS
+            if (wordsize == 8 && (uintptr_t)words % ALIGNOF(uint64_t) == 0) {
+                BDIGIT u = *(uint64_t *)buf;
+                return integer_unpack_single_bdigit(need_swap ? swap64(u) : u, sizeof(uint64_t), flags, dp);
+            }
+#endif
+        }
+#if !defined(WORDS_BIGENDIAN)
+        if (nails == 0 && SIZEOF_BDIGITS == sizeof(BDIGIT) &&
+            (flags & INTEGER_PACK_WORDORDER_MASK) == INTEGER_PACK_LSWORD_FIRST &&
+            (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_MSBYTE_FIRST) {
+            size_t src_size = numwords * wordsize;
+            size_t dst_size = num_bdigits * SIZEOF_BDIGITS;
+            MEMCPY(dp, words, char, src_size);
+            if (flags & INTEGER_PACK_2COMP) {
+                if (flags & INTEGER_PACK_NEGATIVE) {
+                    int zero_p;
+                    memset((char*)dp + src_size, 0xff, dst_size - src_size);
+                    zero_p = bary_2comp(dp, num_bdigits);
+                    sign = zero_p ? -2 : -1;
+                }
+                else if (buf[src_size-1] >> (CHAR_BIT-1)) {
+                    memset((char*)dp + src_size, 0xff, dst_size - src_size);
+                    bary_2comp(dp, num_bdigits);
+                    sign = -1;
+                }
+                else {
+                    MEMZERO((char*)dp + src_size, char, dst_size - src_size);
+                    sign = 1;
+                }
+            }
+            else {
+                MEMZERO((char*)dp + src_size, char, dst_size - src_size);
+                sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
+            }
+            return sign;
+        }
+#endif
+        if (nails == 0 && SIZEOF_BDIGITS == sizeof(BDIGIT) &&
+            wordsize % SIZEOF_BDIGITS == 0) {
+            size_t bdigits_per_word = wordsize / SIZEOF_BDIGITS;
+            int mswordfirst_p = (flags & INTEGER_PACK_MSWORD_FIRST) != 0;
+            int msbytefirst_p = (flags & INTEGER_PACK_NATIVE_BYTE_ORDER) ? HOST_BIGENDIAN_P :
+                (flags & INTEGER_PACK_MSBYTE_FIRST) != 0;
+            MEMCPY(dp, words, BDIGIT, numwords*bdigits_per_word);
+            if (mswordfirst_p) {
+                bary_swap(dp, num_bdigits);
+            }
+            if (mswordfirst_p ? !msbytefirst_p : msbytefirst_p) {
+                size_t i;
+                BDIGIT *p = dp;
+                for (i = 0; i < numwords; i++) {
+                    bary_swap(p, bdigits_per_word);
+                    p += bdigits_per_word;
+                }
+            }
+            if (msbytefirst_p != HOST_BIGENDIAN_P) {
+                BDIGIT *p;
+                for (p = dp; p < de; p++) {
+                    BDIGIT d = *p;
+                    *p = swap_bdigit(d);
+                }
+            }
+            if (flags & INTEGER_PACK_2COMP) {
+                if (flags & INTEGER_PACK_NEGATIVE) {
+                    int zero_p = bary_2comp(dp, num_bdigits);
+                    sign = zero_p ? -2 : -1;
+                }
+                else if (BDIGIT_MSB(de[-1])) {
+                    bary_2comp(dp, num_bdigits);
+                    sign = -1;
+                }
+                else {
+                    sign = 1;
+                }
+            }
+            else {
+                sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
+            }
+            return sign;
         }
-#endif
-    }
-    else {
-        numwords = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits);
     }
-    if (numwords == (size_t)-1)
-        return numwords;
 
-    if (nlz_bits_ret)
-        *nlz_bits_ret = nlz_bits;
+    if (num_bdigits != 0) {
+        int word_num_partialbits;
+        size_t word_num_fullbytes;
 
-    return numwords;
-}
+        ssize_t word_step;
+        size_t byte_start;
+        int byte_step;
 
-int
-rb_absint_singlebit_p(VALUE val)
-{
-    BDIGIT *dp;
-    BDIGIT *de;
-    BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
-    BDIGIT d;
+        size_t word_start, word_last;
+        const unsigned char *wordp, *last_wordp;
+        BDIGIT_DBL dd;
+        int numbits_in_dd;
 
-    val = rb_to_int(val);
+        integer_pack_loop_setup(numwords, wordsize, nails, flags,
+            &word_num_fullbytes, &word_num_partialbits,
+            &word_start, &word_step, &word_last, &byte_start, &byte_step);
 
-    if (FIXNUM_P(val)) {
-        long v = FIX2LONG(val);
-        if (v < 0) {
-            v = -v;
-        }
-#if SIZEOF_BDIGITS >= SIZEOF_LONG
-        fixbuf[0] = v;
-#else
-        {
-            int i;
-            for (i = 0; i < numberof(fixbuf); i++) {
-                fixbuf[i] = BIGLO(v);
-                v = BIGDN(v);
+        wordp = buf + word_start;
+        last_wordp = buf + word_last;
+
+        dd = 0;
+        numbits_in_dd = 0;
+
+#define PUSH_BITS(data, numbits) \
+        integer_unpack_push_bits(data, numbits, &dd, &numbits_in_dd, &dp)
+
+        while (1) {
+            size_t index_in_word = 0;
+            const unsigned char *bytep = wordp + byte_start;
+            while (index_in_word < word_num_fullbytes) {
+                PUSH_BITS(*bytep, CHAR_BIT);
+                bytep += byte_step;
+                index_in_word++;
+            }
+            if (word_num_partialbits) {
+                PUSH_BITS(*bytep & ((1 << word_num_partialbits) - 1), word_num_partialbits);
+                bytep += byte_step;
+                index_in_word++;
             }
+
+            if (wordp == last_wordp)
+                break;
+
+            wordp += word_step;
         }
-#endif
-        dp = fixbuf;
-        de = fixbuf + numberof(fixbuf);
+        if (dd)
+            *dp++ = (BDIGIT)dd;
+        assert(dp <= de);
+        while (dp < de)
+            *dp++ = 0;
+#undef PUSH_BITS
+    }
+
+    if (!(flags & INTEGER_PACK_2COMP)) {
+        sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
     }
     else {
-        dp = BDIGITS(val);
-        de = dp + RBIGNUM_LEN(val);
+        if (nlp_bits) {
+            if ((flags & INTEGER_PACK_NEGATIVE) ||
+                (bdigits[num_bdigits-1] >> (BITSPERDIG - nlp_bits - 1))) {
+                bdigits[num_bdigits-1] |= BIGLO(BDIGMAX << (BITSPERDIG - nlp_bits));
+                sign = -1;
+            }
+            else {
+                sign = 1;
+            }
+        }
+        else {
+            if (flags & INTEGER_PACK_NEGATIVE) {
+                sign = bary_zero_p(bdigits, num_bdigits) ? -2 : -1;
+            }
+            else {
+                if (num_bdigits != 0 && BDIGIT_MSB(bdigits[num_bdigits-1]))
+                    sign = -1;
+                else
+                    sign = 1;
+            }
+        }
+        if (sign == -1 && num_bdigits != 0) {
+            bary_2comp(bdigits, num_bdigits);
+        }
     }
-    while (dp < de && de[-1] == 0)
-        de--;
-    while (dp < de && dp[0] == 0)
-        dp++;
-    if (dp == de) /* no bit set. */
-        return 0;
-    if (dp != de-1) /* two non-zero words. two bits set, at least. */
-        return 0;
-    d = *dp;
-    return POW2_P(d);
+
+    return sign;
 }
 
 static void
-bary_swap(BDIGIT *ds, size_t num_bdigits)
+bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
 {
-    BDIGIT *p1 = ds;
-    BDIGIT *p2 = ds + num_bdigits - 1;
-    for (; p1 < p2; p1++, p2--) {
-        BDIGIT tmp = *p1;
-        *p1 = *p2;
-        *p2 = tmp;
-    }
-}
+    size_t num_bdigits0;
+    int nlp_bits;
+    int sign;
 
-#define INTEGER_PACK_WORDORDER_MASK \
-    (INTEGER_PACK_MSWORD_FIRST | \
-     INTEGER_PACK_LSWORD_FIRST)
-#define INTEGER_PACK_BYTEORDER_MASK \
-    (INTEGER_PACK_MSBYTE_FIRST | \
-     INTEGER_PACK_LSBYTE_FIRST | \
-     INTEGER_PACK_NATIVE_BYTE_ORDER)
+    validate_integer_pack_format(numwords, wordsize, nails, flags,
+            INTEGER_PACK_MSWORD_FIRST|
+            INTEGER_PACK_LSWORD_FIRST|
+            INTEGER_PACK_MSBYTE_FIRST|
+            INTEGER_PACK_LSBYTE_FIRST|
+            INTEGER_PACK_NATIVE_BYTE_ORDER|
+            INTEGER_PACK_2COMP|
+            INTEGER_PACK_FORCE_BIGNUM|
+            INTEGER_PACK_NEGATIVE|
+            INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
 
-static void
-validate_integer_pack_format(size_t numwords, size_t wordsize, size_t nails, int flags, int supported_flags)
-{
-    int wordorder_bits = flags & INTEGER_PACK_WORDORDER_MASK;
-    int byteorder_bits = flags & INTEGER_PACK_BYTEORDER_MASK;
+    num_bdigits0 = integer_unpack_num_bdigits(numwords, wordsize, nails, &nlp_bits);
 
-    if (flags & ~supported_flags) {
-        rb_raise(rb_eArgError, "unsupported flags specified");
-    }
-    if (wordorder_bits == 0) {
-        if (1 < numwords)
-            rb_raise(rb_eArgError, "word order not specified");
-    }
-    else if (wordorder_bits != INTEGER_PACK_MSWORD_FIRST &&
-        wordorder_bits != INTEGER_PACK_LSWORD_FIRST)
-        rb_raise(rb_eArgError, "unexpected word order");
-    if (byteorder_bits == 0) {
-        rb_raise(rb_eArgError, "byte order not specified");
+    assert(num_bdigits0 <= num_bdigits);
+
+    sign = bary_unpack_internal(bdigits, num_bdigits0, words, numwords, wordsize, nails, flags, nlp_bits);
+
+    if (num_bdigits0 < num_bdigits) {
+        MEMZERO(bdigits + num_bdigits0, BDIGIT, num_bdigits - num_bdigits0);
+        if (sign == -2) {
+            bdigits[num_bdigits0] = 1;
+        }
     }
-    else if (byteorder_bits != INTEGER_PACK_MSBYTE_FIRST &&
-        byteorder_bits != INTEGER_PACK_LSBYTE_FIRST &&
-        byteorder_bits != INTEGER_PACK_NATIVE_BYTE_ORDER)
-        rb_raise(rb_eArgError, "unexpected byte order");
-    if (wordsize == 0)
-        rb_raise(rb_eArgError, "invalid wordsize: %"PRI_SIZE_PREFIX"u", wordsize);
-    if (SSIZE_MAX < wordsize)
-        rb_raise(rb_eArgError, "too big wordsize: %"PRI_SIZE_PREFIX"u", wordsize);
-    if (wordsize <= nails / CHAR_BIT)
-        rb_raise(rb_eArgError, "too big nails: %"PRI_SIZE_PREFIX"u", nails);
-    if (SIZE_MAX / wordsize < numwords)
-        rb_raise(rb_eArgError, "too big numwords * wordsize: %"PRI_SIZE_PREFIX"u * %"PRI_SIZE_PREFIX"u", numwords, wordsize);
 }
 
-static void
-integer_pack_loop_setup(
-    size_t numwords, size_t wordsize, size_t nails, int flags,
-    size_t *word_num_fullbytes_ret,
-    int *word_num_partialbits_ret,
-    size_t *word_start_ret,
-    ssize_t *word_step_ret,
-    size_t *word_last_ret,
-    size_t *byte_start_ret,
-    int *byte_step_ret)
+static int
+bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow)
 {
-    int wordorder_bits = flags & INTEGER_PACK_WORDORDER_MASK;
-    int byteorder_bits = flags & INTEGER_PACK_BYTEORDER_MASK;
-    size_t word_num_fullbytes;
-    int word_num_partialbits;
-    size_t word_start;
-    ssize_t word_step;
-    size_t word_last;
-    size_t byte_start;
-    int byte_step;
-
-    word_num_partialbits = CHAR_BIT - (int)(nails % CHAR_BIT);
-    if (word_num_partialbits == CHAR_BIT)
-        word_num_partialbits = 0;
-    word_num_fullbytes = wordsize - (nails / CHAR_BIT);
-    if (word_num_partialbits != 0) {
-        word_num_fullbytes--;
-    }
+    BDIGIT_DBL_SIGNED num;
+    size_t i;
 
-    if (wordorder_bits == INTEGER_PACK_MSWORD_FIRST) {
-        word_start = wordsize*(numwords-1);
-        word_step = -(ssize_t)wordsize;
-        word_last = 0;
-    }
-    else {
-        word_start = 0;
-        word_step = wordsize;
-        word_last = wordsize*(numwords-1);
-    }
+    assert(yn <= xn);
+    assert(xn <= zn);
 
-    if (byteorder_bits == INTEGER_PACK_NATIVE_BYTE_ORDER) {
-#ifdef WORDS_BIGENDIAN
-        byteorder_bits = INTEGER_PACK_MSBYTE_FIRST;
-#else
-        byteorder_bits = INTEGER_PACK_LSBYTE_FIRST;
-#endif
-    }
-    if (byteorder_bits == INTEGER_PACK_MSBYTE_FIRST) {
-        byte_start = wordsize-1;
-        byte_step = -1;
+    num = borrow ? -1 : 0;
+    for (i = 0; i < yn; i++) {
+	num += (BDIGIT_DBL_SIGNED)xds[i] - yds[i];
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
     }
-    else {
-        byte_start = 0;
-        byte_step = 1;
+    for (; i < xn; i++) {
+        if (num == 0) goto num_is_zero;
+	num += xds[i];
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
+    }
+    if (num == 0) goto num_is_zero;
+    for (; i < zn; i++) {
+	zds[i] = BDIGMAX;
     }
+    return 1;
 
-    *word_num_partialbits_ret = word_num_partialbits;
-    *word_num_fullbytes_ret = word_num_fullbytes;
-    *word_start_ret = word_start;
-    *word_step_ret = word_step;
-    *word_last_ret = word_last;
-    *byte_start_ret = byte_start;
-    *byte_step_ret = byte_step;
+  num_is_zero:
+    if (xds == zds && xn == zn)
+        return 0;
+    for (; i < xn; i++) {
+	zds[i] = xds[i];
+    }
+    for (; i < zn; i++) {
+	zds[i] = 0;
+    }
+    return 0;
 }
 
-static inline void
-integer_pack_fill_dd(BDIGIT **dpp, BDIGIT **dep, BDIGIT_DBL *ddp, int *numbits_in_dd_p)
+static int
+bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn)
 {
-    if (*dpp < *dep && BITSPERDIG <= (int)sizeof(*ddp) * CHAR_BIT - *numbits_in_dd_p) {
-        *ddp |= (BDIGIT_DBL)(*(*dpp)++) << *numbits_in_dd_p;
-        *numbits_in_dd_p += BITSPERDIG;
-    }
-    else if (*dpp == *dep) {
-        /* higher bits are infinity zeros */
-        *numbits_in_dd_p = (int)sizeof(*ddp) * CHAR_BIT;
-    }
+    return bary_subb(zds, zn, xds, xn, yds, yn, 0);
 }
 
-static inline BDIGIT_DBL
-integer_pack_take_lowbits(int n, BDIGIT_DBL *ddp, int *numbits_in_dd_p)
+static int
+bary_sub_one(BDIGIT *zds, size_t zn)
 {
-    BDIGIT_DBL ret;
-    ret = (*ddp) & (((BDIGIT_DBL)1 << n) - 1);
-    *ddp >>= n;
-    *numbits_in_dd_p -= n;
-    return ret;
+    return bary_subb(zds, zn, zds, zn, NULL, 0, 1);
 }
 
 static int
-bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
+bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry)
 {
-    BDIGIT *dp, *de;
-    unsigned char *buf, *bufend;
+    BDIGIT_DBL num;
+    size_t i;
 
-    dp = ds;
-    de = ds + num_bdigits;
+    assert(xn <= zn);
+    assert(yn <= zn);
 
-    validate_integer_pack_format(numwords, wordsize, nails, flags,
-            INTEGER_PACK_MSWORD_FIRST|
-            INTEGER_PACK_LSWORD_FIRST|
-            INTEGER_PACK_MSBYTE_FIRST|
-            INTEGER_PACK_LSBYTE_FIRST|
-            INTEGER_PACK_NATIVE_BYTE_ORDER|
-            INTEGER_PACK_2COMP|
-            INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
+    if (xn > yn) {
+	BDIGIT *tds;
+	tds = xds; xds = yds; yds = tds;
+	i = xn; xn = yn; yn = i;
+    }
 
-    while (dp < de && de[-1] == 0)
-        de--;
-    if (dp == de) {
-        sign = 0;
+    num = carry ? 1 : 0;
+    for (i = 0; i < xn; i++) {
+	num += (BDIGIT_DBL)xds[i] + yds[i];
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
+    }
+    for (; i < yn; i++) {
+        if (num == 0) goto num_is_zero;
+	num += yds[i];
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
+    }
+    for (; i < zn; i++) {
+        if (num == 0) goto num_is_zero;
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
     }
+    return num != 0;
 
-    if (!(flags & INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION)) {
-        if (sign == 0) {
-            MEMZERO(words, unsigned char, numwords * wordsize);
-            return 0;
-        }
-        if (nails == 0 && numwords == 1) {
-            int need_swap = wordsize != 1 &&
-                (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_NATIVE_BYTE_ORDER &&
-                ((flags & INTEGER_PACK_MSBYTE_FIRST) ? !HOST_BIGENDIAN_P : HOST_BIGENDIAN_P);
-            if (0 < sign || !(flags & INTEGER_PACK_2COMP)) {
-                BDIGIT d;
-                if (wordsize == 1) {
-                    *((unsigned char *)words) = (unsigned char)(d = dp[0]);
-                    return ((1 < de - dp || CLEAR_LOWBITS(d, 8) != 0) ? 2 : 1) * sign;
-                }
-#if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGITS
-                if (wordsize == 2 && (uintptr_t)words % ALIGNOF(uint16_t) == 0) {
-                    uint16_t u = (uint16_t)(d = dp[0]);
-                    if (need_swap) u = swap16(u);
-                    *((uint16_t *)words) = u;
-                    return ((1 < de - dp || CLEAR_LOWBITS(d, 16) != 0) ? 2 : 1) * sign;
-                }
-#endif
-#if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGITS
-                if (wordsize == 4 && (uintptr_t)words % ALIGNOF(uint32_t) == 0) {
-                    uint32_t u = (uint32_t)(d = dp[0]);
-                    if (need_swap) u = swap32(u);
-                    *((uint32_t *)words) = u;
-                    return ((1 < de - dp || CLEAR_LOWBITS(d, 32) != 0) ? 2 : 1) * sign;
-                }
-#endif
-#if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGITS
-                if (wordsize == 8 && (uintptr_t)words % ALIGNOF(uint64_t) == 0) {
-                    uint64_t u = (uint64_t)(d = dp[0]);
-                    if (need_swap) u = swap64(u);
-                    *((uint64_t *)words) = u;
-                    return ((1 < de - dp || CLEAR_LOWBITS(d, 64) != 0) ? 2 : 1) * sign;
-                }
-#endif
-            }
-            else { /* sign < 0 && (flags & INTEGER_PACK_2COMP) */
-                BDIGIT_DBL_SIGNED d;
-                if (wordsize == 1) {
-                    *((unsigned char *)words) = (unsigned char)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
-                    return (1 < de - dp || FILL_LOWBITS(d, 8) != -1) ? -2 : -1;
-                }
-#if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGITS
-                if (wordsize == 2 && (uintptr_t)words % ALIGNOF(uint16_t) == 0) {
-                    uint16_t u = (uint16_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
-                    if (need_swap) u = swap16(u);
-                    *((uint16_t *)words) = u;
-                    return (wordsize == SIZEOF_BDIGITS && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
-                        (1 < de - dp || FILL_LOWBITS(d, 16) != -1) ? -2 : -1;
-                }
-#endif
-#if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGITS
-                if (wordsize == 4 && (uintptr_t)words % ALIGNOF(uint32_t) == 0) {
-                    uint32_t u = (uint32_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
-                    if (need_swap) u = swap32(u);
-                    *((uint32_t *)words) = u;
-                    return (wordsize == SIZEOF_BDIGITS && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
-                        (1 < de - dp || FILL_LOWBITS(d, 32) != -1) ? -2 : -1;
-                }
-#endif
-#if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGITS
-                if (wordsize == 8 && (uintptr_t)words % ALIGNOF(uint64_t) == 0) {
-                    uint64_t u = (uint64_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
-                    if (need_swap) u = swap64(u);
-                    *((uint64_t *)words) = u;
-                    return (wordsize == SIZEOF_BDIGITS && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
-                        (1 < de - dp || FILL_LOWBITS(d, 64) != -1) ? -2 : -1;
-                }
-#endif
-            }
-        }
-#if !defined(WORDS_BIGENDIAN)
-        if (nails == 0 && SIZEOF_BDIGITS == sizeof(BDIGIT) &&
-            (flags & INTEGER_PACK_WORDORDER_MASK) == INTEGER_PACK_LSWORD_FIRST &&
-            (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_MSBYTE_FIRST) {
-            size_t src_size = (de - dp) * SIZEOF_BDIGITS;
-            size_t dst_size = numwords * wordsize;
-            int overflow = 0;
-            while (0 < src_size && ((unsigned char *)ds)[src_size-1] == 0)
-                src_size--;
-            if (src_size <= dst_size) {
-                MEMCPY(words, dp, char, src_size);
-                MEMZERO((char*)words + src_size, char, dst_size - src_size);
-            }
-            else {
-                MEMCPY(words, dp, char, dst_size);
-                overflow = 1;
-            }
-            if (sign < 0 && (flags & INTEGER_PACK_2COMP)) {
-                int zero_p = bytes_2comp(words, dst_size);
-                if (zero_p && overflow) {
-                    unsigned char *p = (unsigned char *)dp;
-                    if (dst_size == src_size-1 &&
-                        p[dst_size] == 1) {
-                        overflow = 0;
-                    }
-                }
-            }
-            if (overflow)
-                sign *= 2;
-            return sign;
-        }
-#endif
-        if (nails == 0 && SIZEOF_BDIGITS == sizeof(BDIGIT) &&
-            wordsize % SIZEOF_BDIGITS == 0 && (uintptr_t)words % ALIGNOF(BDIGIT) == 0) {
-            size_t bdigits_per_word = wordsize / SIZEOF_BDIGITS;
-            size_t src_num_bdigits = de - dp;
-            size_t dst_num_bdigits = numwords * bdigits_per_word;
-            int overflow = 0;
-            int mswordfirst_p = (flags & INTEGER_PACK_MSWORD_FIRST) != 0;
-            int msbytefirst_p = (flags & INTEGER_PACK_NATIVE_BYTE_ORDER) ? HOST_BIGENDIAN_P :
-                (flags & INTEGER_PACK_MSBYTE_FIRST) != 0;
-            if (src_num_bdigits <= dst_num_bdigits) {
-                MEMCPY(words, dp, BDIGIT, src_num_bdigits);
-                MEMZERO((BDIGIT*)words + src_num_bdigits, BDIGIT, dst_num_bdigits - src_num_bdigits);
-            }
-            else {
-                MEMCPY(words, dp, BDIGIT, dst_num_bdigits);
-                overflow = 1;
-            }
-            if (sign < 0 && (flags & INTEGER_PACK_2COMP)) {
-                int zero_p = bary_2comp(words, dst_num_bdigits);
-                if (zero_p && overflow &&
-                    dst_num_bdigits == src_num_bdigits-1 &&
-                    dp[dst_num_bdigits] == 1)
-                    overflow = 0;
-            }
-            if (msbytefirst_p != HOST_BIGENDIAN_P) {
-                size_t i;
-                for (i = 0; i < dst_num_bdigits; i++) {
-                    BDIGIT d = ((BDIGIT*)words)[i];
-                    ((BDIGIT*)words)[i] = swap_bdigit(d);
-                }
-            }
-            if (mswordfirst_p ?  !msbytefirst_p : msbytefirst_p) {
-                size_t i;
-                BDIGIT *p = words;
-                for (i = 0; i < numwords; i++) {
-                    bary_swap(p, bdigits_per_word);
-                    p += bdigits_per_word;
-                }
-            }
-            if (mswordfirst_p) {
-                bary_swap(words, dst_num_bdigits);
-            }
-            if (overflow)
-                sign *= 2;
-            return sign;
-        }
+  num_is_zero:
+    if (yds == zds && yn == zn)
+        return 0;
+    for (; i < yn; i++) {
+	zds[i] = yds[i];
     }
+    for (; i < zn; i++) {
+	zds[i] = 0;
+    }
+    return 0;
+}
+
+static int
+bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn)
+{
+    return bary_addc(zds, zn, xds, xn, yds, yn, 0);
+}
 
-    buf = words;
-    bufend = buf + numwords * wordsize;
+static int
+bary_add_one(BDIGIT *zds, size_t zn)
+{
+    return bary_addc(zds, zn, NULL, 0, zds, zn, 1);
+}
 
-    if (buf == bufend) {
-        /* overflow if non-zero*/
-        if (!(flags & INTEGER_PACK_2COMP) || 0 <= sign)
-            sign *= 2;
+static void
+bary_mul_single(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT y)
+{
+    BDIGIT_DBL n;
+
+    assert(2 <= zl);
+
+    n = (BDIGIT_DBL)x * y;
+    zds[0] = BIGLO(n);
+    zds[1] = (BDIGIT)BIGDN(n);
+}
+
+static int
+bary_muladd_1xN(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT *yds, size_t yl)
+{
+    BDIGIT_DBL n;
+    BDIGIT_DBL dd;
+    size_t j;
+
+    assert(zl > yl);
+
+    if (x == 0)
+        return 0;
+    dd = x;
+    n = 0;
+    for (j = 0; j < yl; j++) {
+        BDIGIT_DBL ee = n + dd * yds[j];
+        if (ee) {
+            n = zds[j] + ee;
+            zds[j] = BIGLO(n);
+            n = BIGDN(n);
+        }
         else {
-            if (de - dp == 1 && dp[0] == 1)
-                sign = -1; /* val == -1 == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
-            else
-                sign = -2; /* val < -1 == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
+            n = 0;
         }
+
     }
-    else if (dp == de) {
-        memset(buf, '\0', bufend - buf);
+    for (; j < zl; j++) {
+        if (n == 0)
+            break;
+        n += zds[j];
+        zds[j] = BIGLO(n);
+        n = BIGDN(n);
     }
-    else if (dp < de && buf < bufend) {
-        int word_num_partialbits;
-        size_t word_num_fullbytes;
+    return n != 0;
+}
 
-        ssize_t word_step;
-        size_t byte_start;
-        int byte_step;
+static void
+bary_mul_normal(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
+{
+    size_t i;
 
-        size_t word_start, word_last;
-        unsigned char *wordp, *last_wordp;
-        BDIGIT_DBL dd;
-        int numbits_in_dd;
+    assert(xl + yl <= zl);
 
-        integer_pack_loop_setup(numwords, wordsize, nails, flags,
-            &word_num_fullbytes, &word_num_partialbits,
-            &word_start, &word_step, &word_last, &byte_start, &byte_step);
+    MEMZERO(zds, BDIGIT, zl);
+    for (i = 0; i < xl; i++) {
+        bary_muladd_1xN(zds+i, zl-i, xds[i], yds, yl);
+    }
+}
 
-        wordp = buf + word_start;
-        last_wordp = buf + word_last;
+/* efficient squaring (2 times faster than normal multiplication)
+ * ref: Handbook of Applied Cryptography, Algorithm 14.16
+ *      http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
+ */
+static void
+bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn)
+{
+    size_t i, j;
+    BDIGIT_DBL c, v, w;
 
-        dd = 0;
-        numbits_in_dd = 0;
+    assert(xn * 2 <= zn);
 
-#define FILL_DD \
-    integer_pack_fill_dd(&dp, &de, &dd, &numbits_in_dd)
-#define TAKE_LOWBITS(n) \
-    integer_pack_take_lowbits(n, &dd, &numbits_in_dd)
+    MEMZERO(zds, BDIGIT, zn);
+    for (i = 0; i < xn; i++) {
+	v = (BDIGIT_DBL)xds[i];
+	if (!v) continue;
+	c = (BDIGIT_DBL)zds[i + i] + v * v;
+	zds[i + i] = BIGLO(c);
+	c = BIGDN(c);
+	v *= 2;
+	for (j = i + 1; j < xn; j++) {
+	    w = (BDIGIT_DBL)xds[j];
+	    c += (BDIGIT_DBL)zds[i + j] + BIGLO(v) * w;
+	    zds[i + j] = BIGLO(c);
+	    c = BIGDN(c);
+	    if (BIGDN(v)) c += w;
+	}
+	if (c) {
+	    c += (BDIGIT_DBL)zds[i + xn];
+	    zds[i + xn] = BIGLO(c);
+	    c = BIGDN(c);
+            assert(c == 0 || i != xn-1);
+            if (c && i != xn-1) zds[i + xn + 1] += (BDIGIT)c;
+	}
+    }
+}
 
-        while (1) {
-            size_t index_in_word = 0;
-            unsigned char *bytep = wordp + byte_start;
-            while (index_in_word < word_num_fullbytes) {
-                FILL_DD;
-                *bytep = TAKE_LOWBITS(CHAR_BIT);
-                bytep += byte_step;
-                index_in_word++;
-            }
-            if (word_num_partialbits) {
-                FILL_DD;
-                *bytep = TAKE_LOWBITS(word_num_partialbits);
-                bytep += byte_step;
-                index_in_word++;
-            }
-            while (index_in_word < wordsize) {
-                *bytep = 0;
-                bytep += byte_step;
-                index_in_word++;
-            }
+/* balancing multiplication by slicing larger argument */
+static void
+bary_mul_balance(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
+{
+    VALUE work = 0;
+    size_t r, n;
+    BDIGIT *wds;
+    size_t wl;
 
-            if (wordp == last_wordp)
-                break;
+    assert(xl + yl <= zl);
+    assert(2 * xl <= yl || 3 * xl <= 2*(yl+2));
 
-            wordp += word_step;
-        }
-        FILL_DD;
-        /* overflow tests */
-        if (dp != de || 1 < dd) {
-            /* 2**(numwords*(wordsize*CHAR_BIT-nails)+1) <= abs(val) */
-            sign *= 2;
-        }
-        else if (dd == 1) {
-            /* 2**(numwords*(wordsize*CHAR_BIT-nails)) <= abs(val) < 2**(numwords*(wordsize*CHAR_BIT-nails)+1) */
-            if (!(flags & INTEGER_PACK_2COMP) || 0 <= sign)
-                sign *= 2;
-            else { /* overflow_2comp && sign == -1 */
-                /* test lower bits are all zero. */
-                dp = ds;
-                while (dp < de && *dp == 0)
-                    dp++;
-                if (de - dp == 1 && /* only one non-zero word. */
-                    POW2_P(*dp)) /* *dp contains only one bit set. */
-                    sign = -1; /* val == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
-                else
-                    sign = -2; /* val < -2**(numwords*(wordsize*CHAR_BIT-nails)) */
-            }
+    wl = xl * 2;
+    wds = ALLOCV_N(BDIGIT, work, wl);
+
+    MEMZERO(zds, BDIGIT, zl);
+
+    n = 0;
+    while (yl > 0) {
+	r = xl > yl ? yl : xl;
+        bary_mul(wds, xl + r, xds, xl, yds + n, r);
+        bary_add(zds + n, zl - n,
+                 zds + n, zl - n,
+                 wds, xl + r);
+	yl -= r;
+	n += r;
+    }
+
+    if (work)
+        ALLOCV_END(work);
+}
+
+/* multiplication by karatsuba method */
+static void
+bary_mul_karatsuba(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
+{
+    VALUE work = 0;
+    BDIGIT *wds;
+    size_t wl;
+
+    size_t n;
+    int sub_p, borrow, carry1, carry2, carry3;
+
+    int odd_x = 0;
+    int odd_y = 0;
+
+    BDIGIT *xds0, *xds1, *yds0, *yds1, *zds0, *zds1, *zds2, *zds3;
+
+    assert(xl + yl <= zl);
+    assert(xl <= yl);
+    assert(yl < 2 * xl);
+
+    if (yl & 1) {
+        odd_y = 1;
+        yl--;
+        if (yl < xl) {
+            odd_x = 1;
+            xl--;
         }
     }
 
-    if ((flags & INTEGER_PACK_2COMP) && (sign < 0 && numwords != 0)) {
-        unsigned char *buf;
+    n = yl / 2;
+
+    assert(n < xl);
+
+    wl = n;
+    wds = ALLOCV_N(BDIGIT, work, wl);
+
+    /* Karatsuba algorithm:
+     *
+     * x = x0 + r*x1
+     * y = y0 + r*y1
+     * z = x*y
+     *   = (x0 + r*x1) * (y0 + r*y1)
+     *   = x0*y0 + r*(x1*y0 + x0*y1) + r*r*x1*y1
+     *   = x0*y0 + r*(x0*y0 + x1*y1 - (x1-x0)*(y1-y0)) + r*r*x1*y1
+     *   = x0*y0 + r*(x0*y0 + x1*y1 - (x0-x1)*(y0-y1)) + r*r*x1*y1
+     */
+
+    xds0 = xds;
+    xds1 = xds + n;
+    yds0 = yds;
+    yds1 = yds + n;
+    zds0 = zds;
+    zds1 = zds + n;
+    zds2 = zds + 2*n;
+    zds3 = zds + 3*n;
+
+    sub_p = 1;
+
+    /* zds0:? zds1:? zds2:? zds3:? wds:? */
+
+    if (bary_sub(zds0, n, xds, n, xds+n, xl-n)) {
+        bary_2comp(zds0, n);
+        sub_p = !sub_p;
+    }
+
+    /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:? */
 
-        int word_num_partialbits;
-        size_t word_num_fullbytes;
+    if (bary_sub(wds, n, yds, n, yds+n, n)) {
+        bary_2comp(wds, n);
+        sub_p = !sub_p;
+    }
 
-        ssize_t word_step;
-        size_t byte_start;
-        int byte_step;
+    /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:|y1-y0| */
 
-        size_t word_start, word_last;
-        unsigned char *wordp, *last_wordp;
+    bary_mul(zds1, 2*n, zds0, n, wds, n);
 
-        unsigned int partialbits_mask;
-        int carry;
+    /* zds0:|x1-x0| zds1,zds2:|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
 
-        integer_pack_loop_setup(numwords, wordsize, nails, flags,
-            &word_num_fullbytes, &word_num_partialbits,
-            &word_start, &word_step, &word_last, &byte_start, &byte_step);
+    borrow = 0;
+    if (sub_p) {
+        borrow = !bary_2comp(zds1, 2*n);
+    }
+    /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
 
-        partialbits_mask = (1 << word_num_partialbits) - 1;
+    MEMCPY(wds, zds1, BDIGIT, n);
 
-        buf = words;
-        wordp = buf + word_start;
-        last_wordp = buf + word_last;
+    /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
 
-        carry = 1;
-        while (1) {
-            size_t index_in_word = 0;
-            unsigned char *bytep = wordp + byte_start;
-            while (index_in_word < word_num_fullbytes) {
-                carry += (unsigned char)~*bytep;
-                *bytep = (unsigned char)carry;
-                carry >>= CHAR_BIT;
-                bytep += byte_step;
-                index_in_word++;
-            }
-            if (word_num_partialbits) {
-                carry += (*bytep & partialbits_mask) ^ partialbits_mask;
-                *bytep = carry & partialbits_mask;
-                carry >>= word_num_partialbits;
-                bytep += byte_step;
-                index_in_word++;
-            }
+    bary_mul(zds0, 2*n, xds0, n, yds0, n);
 
-            if (wordp == last_wordp)
-                break;
+    /* zds0,zds1:x0*y0 zds2:hi(-?|x1-x0|*|y1-y0|) zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
 
-            wordp += word_step;
-        }
-    }
+    carry1 = bary_add(wds, n, wds, n, zds0, n);
+    carry1 = bary_addc(zds2, n, zds2, n, zds1, n, carry1);
 
-    return sign;
-#undef FILL_DD
-#undef TAKE_LOWBITS
-}
+    /* zds0,zds1:x0*y0 zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
 
-/*
- * Export an integer into a buffer.
- *
- * This function fills the buffer specified by _words_ and _numwords_ as
- * val in the format specified by _wordsize_, _nails_ and _flags_.
- *
- * [val] Fixnum, Bignum or another integer like object which has to_int method.
- * [words] buffer to export abs(val).
- * [numwords] the size of given buffer as number of words.
- * [wordsize] the size of word as number of bytes.
- * [nails] number of padding bits in a word.
- *   Most significant nails bits of each word are filled by zero.
- * [flags] bitwise or of constants which name starts "INTEGER_PACK_".
- *
- * flags:
- * [INTEGER_PACK_MSWORD_FIRST] Store the most significant word as the first word.
- * [INTEGER_PACK_LSWORD_FIRST] Store the least significant word as the first word.
- * [INTEGER_PACK_MSBYTE_FIRST] Store the most significant byte in a word as the first byte in the word.
- * [INTEGER_PACK_LSBYTE_FIRST] Store the least significant byte in a word as the first byte in the word.
- * [INTEGER_PACK_NATIVE_BYTE_ORDER] INTEGER_PACK_MSBYTE_FIRST or INTEGER_PACK_LSBYTE_FIRST corresponding to the host's endian.
- * [INTEGER_PACK_2COMP] Use 2's complement representation.
- * [INTEGER_PACK_LITTLE_ENDIAN] Same as INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST
- * [INTEGER_PACK_BIG_ENDIAN] Same as INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST
- * [INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION] Use generic implementation (for test and debug).
- *
- * This function fills the buffer specified by _words_
- * as abs(val) if INTEGER_PACK_2COMP is not specified in _flags_.
- * If INTEGER_PACK_2COMP is specified, 2's complement representation of val is
- * filled in the buffer.
- *
- * This function returns the signedness and overflow condition.
- * The overflow condition depends on INTEGER_PACK_2COMP.
- *
- * INTEGER_PACK_2COMP is not specified:
- *   -2 : negative overflow.  val <= -2**(numwords*(wordsize*CHAR_BIT-nails))
- *   -1 : negative without overflow.  -2**(numwords*(wordsize*CHAR_BIT-nails)) < val < 0
- *   0 : zero.  val == 0
- *   1 : positive without overflow.  0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))
- *   2 : positive overflow.  2**(numwords*(wordsize*CHAR_BIT-nails)) <= val
- *
- * INTEGER_PACK_2COMP is specified:
- *   -2 : negative overflow.  val < -2**(numwords*(wordsize*CHAR_BIT-nails))
- *   -1 : negative without overflow.  -2**(numwords*(wordsize*CHAR_BIT-nails)) <= val < 0
- *   0 : zero.  val == 0
- *   1 : positive without overflow.  0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))
- *   2 : positive overflow.  2**(numwords*(wordsize*CHAR_BIT-nails)) <= val
- *
- * The value, -2**(numwords*(wordsize*CHAR_BIT-nails)), is representable
- * in 2's complement representation but not representable in absolute value.
- * So -1 is returned for the value if INTEGER_PACK_2COMP is specified
- * but returns -2 if INTEGER_PACK_2COMP is not specified.
- *
- * The least significant words are filled in the buffer when overflow occur.
- */
+    carry2 = bary_add(zds1, n, zds1, n, wds, n);
 
-int
-rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
-{
-    int sign;
-    BDIGIT *ds;
-    size_t num_bdigits;
-    BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
 
-    RB_GC_GUARD(val) = rb_to_int(val);
+    MEMCPY(wds, zds2, BDIGIT, n);
 
-    if (FIXNUM_P(val)) {
-        long v = FIX2LONG(val);
-        if (v < 0) {
-            sign = -1;
-            v = -v;
-        }
-        else {
-            sign = 1;
-        }
-#if SIZEOF_BDIGITS >= SIZEOF_LONG
-        fixbuf[0] = v;
-#else
-        {
-            int i;
-            for (i = 0; i < numberof(fixbuf); i++) {
-                fixbuf[i] = BIGLO(v);
-                v = BIGDN(v);
-            }
-        }
-#endif
-        ds = fixbuf;
-        num_bdigits = numberof(fixbuf);
-    }
-    else {
-        sign = RBIGNUM_POSITIVE_P(val) ? 1 : -1;
-        ds = BDIGITS(val);
-        num_bdigits = RBIGNUM_LEN(val);
-    }
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:_ zds3:? wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
 
-    return bary_pack(sign, ds, num_bdigits, words, numwords, wordsize, nails, flags);
-}
+    bary_mul(zds2, zl-2*n, xds1, xl-n, yds1, n);
 
-static size_t
-integer_unpack_num_bdigits_small(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
-{
-    /* nlp_bits stands for number of leading padding bits */
-    size_t num_bits = (wordsize * CHAR_BIT - nails) * numwords;
-    size_t num_bdigits = (num_bits + BITSPERDIG - 1) / BITSPERDIG;
-    *nlp_bits_ret = (int)(num_bdigits * BITSPERDIG - num_bits);
-    return num_bdigits;
-}
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
 
-static size_t
-integer_unpack_num_bdigits_generic(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
-{
-    /* BITSPERDIG = SIZEOF_BDIGITS * CHAR_BIT */
-    /* num_bits = (wordsize * CHAR_BIT - nails) * numwords */
-    /* num_bdigits = (num_bits + BITSPERDIG - 1) / BITSPERDIG */
+    carry3 = bary_add(zds1, n, zds1, n, zds2, n);
 
-    /* num_bits = CHAR_BIT * (wordsize * numwords) - nails * numwords = CHAR_BIT * num_bytes1 - nails * numwords */
-    size_t num_bytes1 = wordsize * numwords;
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
 
-    /* q1 * CHAR_BIT + r1 = numwords */
-    size_t q1 = numwords / CHAR_BIT;
-    size_t r1 = numwords % CHAR_BIT;
+    carry3 = bary_addc(zds2, n, zds2, n, zds3, (4*n < zl ? n : zl-3*n), carry3);
 
-    /* num_bits = CHAR_BIT * num_bytes1 - nails * (q1 * CHAR_BIT + r1) = CHAR_BIT * num_bytes2 - nails * r1 */
-    size_t num_bytes2 = num_bytes1 - nails * q1;
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1) wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
 
-    /* q2 * CHAR_BIT + r2 = nails */
-    size_t q2 = nails / CHAR_BIT;
-    size_t r2 = nails % CHAR_BIT;
+    bary_add(zds2, zl-2*n, zds2, zl-2*n, wds, n);
 
-    /* num_bits = CHAR_BIT * num_bytes2 - (q2 * CHAR_BIT + r2) * r1 = CHAR_BIT * num_bytes3 - r1 * r2 */
-    size_t num_bytes3 = num_bytes2 - q2 * r1;
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1)+hi(x0*y0-?|x1-x0|*|y1-y0|) wds:_ */
 
-    /* q3 * BITSPERDIG + r3 = num_bytes3 */
-    size_t q3 = num_bytes3 / BITSPERDIG;
-    size_t r3 = num_bytes3 % BITSPERDIG;
+    if (carry2)
+        bary_add_one(zds2, zl-2*n);
 
-    /* num_bits = CHAR_BIT * (q3 * BITSPERDIG + r3) - r1 * r2 = BITSPERDIG * num_digits1 + CHAR_BIT * r3 - r1 * r2 */
-    size_t num_digits1 = CHAR_BIT * q3;
+    if (borrow && carry1)
+        borrow = carry1 = 0;
+    if (borrow && carry3)
+        borrow = carry3 = 0;
 
-    /*
-     * if CHAR_BIT * r3 >= r1 * r2
-     *   CHAR_BIT * r3 - r1 * r2 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2))
-     *   q4 * BITSPERDIG + r4 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2)
-     *   num_bits = BITSPERDIG * num_digits1 + CHAR_BIT * BITSPERDIG - (q4 * BITSPERDIG + r4) = BITSPERDIG * num_digits2 - r4
-     * else
-     *   q4 * BITSPERDIG + r4 = -(CHAR_BIT * r3 - r1 * r2)
-     *   num_bits = BITSPERDIG * num_digits1 - (q4 * BITSPERDIG + r4) = BITSPERDIG * num_digits2 - r4
-     * end
-     */
+    if (borrow)
+        bary_sub_one(zds3, zl-3*n);
+    else if (carry1 || carry3) {
+        BDIGIT c = carry1 + carry3;
+        bary_add(zds3, zl-3*n, zds3, zl-3*n, &c, 1);
+    }
 
-    if (CHAR_BIT * r3 >= r1 * r2) {
-        size_t tmp1 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2);
-        size_t q4 = tmp1 / BITSPERDIG;
-        int r4 = (int)(tmp1 % BITSPERDIG);
-        size_t num_digits2 = num_digits1 + CHAR_BIT - q4;
-        *nlp_bits_ret = r4;
-        return num_digits2;
+    /*
+    if (SIZEOF_BDIGITS * zl <= 16) {
+        uint128_t z, x, y;
+        ssize_t i;
+        for (x = 0, i = xl-1; 0 <= i; i--) { x <<= SIZEOF_BDIGITS*CHAR_BIT; x |= xds[i]; }
+        for (y = 0, i = yl-1; 0 <= i; i--) { y <<= SIZEOF_BDIGITS*CHAR_BIT; y |= yds[i]; }
+        for (z = 0, i = zl-1; 0 <= i; i--) { z <<= SIZEOF_BDIGITS*CHAR_BIT; z |= zds[i]; }
+        assert(z == x * y);
     }
-    else {
-        size_t tmp1 = r1 * r2 - CHAR_BIT * r3;
-        size_t q4 = tmp1 / BITSPERDIG;
-        int r4 = (int)(tmp1 % BITSPERDIG);
-        size_t num_digits2 = num_digits1 - q4;
-        *nlp_bits_ret = r4;
-        return num_digits2;
+    */
+
+    if (odd_x && odd_y) {
+        bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl);
+        bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl+1);
+    }
+    else if (odd_x) {
+        bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl);
+    }
+    else if (odd_y) {
+        bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl);
     }
+
+    if (work)
+        ALLOCV_END(work);
 }
 
-static size_t
-integer_unpack_num_bdigits(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
+static void
+bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
 {
-    size_t num_bdigits;
+    size_t l;
 
-    if (numwords <= (SIZE_MAX - (BITSPERDIG-1)) / CHAR_BIT / wordsize) {
-        num_bdigits = integer_unpack_num_bdigits_small(numwords, wordsize, nails, nlp_bits_ret);
-#ifdef DEBUG_INTEGER_PACK
-        {
-            int nlp_bits1;
-            size_t num_bdigits1 = integer_unpack_num_bdigits_generic(numwords, wordsize, nails, &nlp_bits1);
-            assert(num_bdigits == num_bdigits1);
-            assert(*nlp_bits_ret == nlp_bits1);
-        }
-#endif
+    assert(xl + yl <= zl);
+
+    if (xl == 1 && yl == 1) {
+        l = 2;
+        bary_mul_single(zds, zl, xds[0], yds[0]);
     }
     else {
-        num_bdigits = integer_unpack_num_bdigits_generic(numwords, wordsize, nails, nlp_bits_ret);
+        l = xl + yl;
+        bary_mul_normal(zds, zl, xds, xl, yds, yl);
+        rb_thread_check_ints();
     }
-    return num_bdigits;
+    MEMZERO(zds + l, BDIGIT, zl - l);
 }
 
-static inline void
-integer_unpack_push_bits(int data, int numbits, BDIGIT_DBL *ddp, int *numbits_in_dd_p, BDIGIT **dpp)
+/* determine whether a bignum is sparse or not by random sampling */
+static inline int
+bary_sparse_p(BDIGIT *ds, size_t n)
 {
-    (*ddp) |= ((BDIGIT_DBL)data) << (*numbits_in_dd_p);
-    *numbits_in_dd_p += numbits;
-    while (BITSPERDIG <= *numbits_in_dd_p) {
-        *(*dpp)++ = BIGLO(*ddp);
-        *ddp = BIGDN(*ddp);
-        *numbits_in_dd_p -= BITSPERDIG;
-    }
-}
+    long c = 0;
 
-static int
-integer_unpack_single_bdigit(BDIGIT u, size_t size, int flags, BDIGIT *dp)
-{
-    int sign;
-    if (flags & INTEGER_PACK_2COMP) {
-        sign = (flags & INTEGER_PACK_NEGATIVE) ?
-            ((size == SIZEOF_BDIGITS && u == 0) ? -2 : -1) :
-            ((u >> (size * CHAR_BIT - 1)) ? -1 : 1);
-        if (sign < 0) {
-            u |= LSHIFTX(BDIGMAX, size * CHAR_BIT);
-            u = BIGLO(1 + ~u);
-        }
-    }
-    else
-        sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
-    *dp = u;
-    return sign;
+    if (          ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
+    if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
+    if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
+
+    return (c <= 1) ? 1 : 0;
 }
 
-static int
-bary_unpack_internal(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags, int nlp_bits)
+static void
+bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
 {
-    int sign;
-    const unsigned char *buf = words;
-    BDIGIT *dp;
-    BDIGIT *de;
-
-    dp = bdigits;
-    de = dp + num_bdigits;
-
-    if (!(flags & INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION)) {
-        if (nails == 0 && numwords == 1) {
-            int need_swap = wordsize != 1 &&
-                (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_NATIVE_BYTE_ORDER &&
-                ((flags & INTEGER_PACK_MSBYTE_FIRST) ? !HOST_BIGENDIAN_P : HOST_BIGENDIAN_P);
-            if (wordsize == 1) {
-                return integer_unpack_single_bdigit(*(uint8_t *)buf, sizeof(uint8_t), flags, dp);
-            }
-#if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGITS
-            if (wordsize == 2 && (uintptr_t)words % ALIGNOF(uint16_t) == 0) {
-                BDIGIT u = *(uint16_t *)buf;
-                return integer_unpack_single_bdigit(need_swap ? swap16(u) : u, sizeof(uint16_t), flags, dp);
-            }
-#endif
-#if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGITS
-            if (wordsize == 4 && (uintptr_t)words % ALIGNOF(uint32_t) == 0) {
-                BDIGIT u = *(uint32_t *)buf;
-                return integer_unpack_single_bdigit(need_swap ? swap32(u) : u, sizeof(uint32_t), flags, dp);
-            }
-#endif
-#if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGITS
-            if (wordsize == 8 && (uintptr_t)words % ALIGNOF(uint64_t) == 0) {
-                BDIGIT u = *(uint64_t *)buf;
-                return integer_unpack_single_bdigit(need_swap ? swap64(u) : u, sizeof(uint64_t), flags, dp);
-            }
-#endif
-        }
-#if !defined(WORDS_BIGENDIAN)
-        if (nails == 0 && SIZEOF_BDIGITS == sizeof(BDIGIT) &&
-            (flags & INTEGER_PACK_WORDORDER_MASK) == INTEGER_PACK_LSWORD_FIRST &&
-            (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_MSBYTE_FIRST) {
-            size_t src_size = numwords * wordsize;
-            size_t dst_size = num_bdigits * SIZEOF_BDIGITS;
-            MEMCPY(dp, words, char, src_size);
-            if (flags & INTEGER_PACK_2COMP) {
-                if (flags & INTEGER_PACK_NEGATIVE) {
-                    int zero_p;
-                    memset((char*)dp + src_size, 0xff, dst_size - src_size);
-                    zero_p = bary_2comp(dp, num_bdigits);
-                    sign = zero_p ? -2 : -1;
-                }
-                else if (buf[src_size-1] >> (CHAR_BIT-1)) {
-                    memset((char*)dp + src_size, 0xff, dst_size - src_size);
-                    bary_2comp(dp, num_bdigits);
-                    sign = -1;
-                }
-                else {
-                    MEMZERO((char*)dp + src_size, char, dst_size - src_size);
-                    sign = 1;
-                }
-            }
-            else {
-                MEMZERO((char*)dp + src_size, char, dst_size - src_size);
-                sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
-            }
-            return sign;
-        }
-#endif
-        if (nails == 0 && SIZEOF_BDIGITS == sizeof(BDIGIT) &&
-            wordsize % SIZEOF_BDIGITS == 0) {
-            size_t bdigits_per_word = wordsize / SIZEOF_BDIGITS;
-            int mswordfirst_p = (flags & INTEGER_PACK_MSWORD_FIRST) != 0;
-            int msbytefirst_p = (flags & INTEGER_PACK_NATIVE_BYTE_ORDER) ? HOST_BIGENDIAN_P :
-                (flags & INTEGER_PACK_MSBYTE_FIRST) != 0;
-            MEMCPY(dp, words, BDIGIT, numwords*bdigits_per_word);
-            if (mswordfirst_p) {
-                bary_swap(dp, num_bdigits);
-            }
-            if (mswordfirst_p ? !msbytefirst_p : msbytefirst_p) {
-                size_t i;
-                BDIGIT *p = dp;
-                for (i = 0; i < numwords; i++) {
-                    bary_swap(p, bdigits_per_word);
-                    p += bdigits_per_word;
-                }
-            }
-            if (msbytefirst_p != HOST_BIGENDIAN_P) {
-                BDIGIT *p;
-                for (p = dp; p < de; p++) {
-                    BDIGIT d = *p;
-                    *p = swap_bdigit(d);
-                }
-            }
-            if (flags & INTEGER_PACK_2COMP) {
-                if (flags & INTEGER_PACK_NEGATIVE) {
-                    int zero_p = bary_2comp(dp, num_bdigits);
-                    sign = zero_p ? -2 : -1;
-                }
-                else if (BDIGIT_MSB(de[-1])) {
-                    bary_2comp(dp, num_bdigits);
-                    sign = -1;
-                }
-                else {
-                    sign = 1;
-                }
-            }
-            else {
-                sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
-            }
-            return sign;
-        }
-    }
-
-    if (num_bdigits != 0) {
-        int word_num_partialbits;
-        size_t word_num_fullbytes;
-
-        ssize_t word_step;
-        size_t byte_start;
-        int byte_step;
-
-        size_t word_start, word_last;
-        const unsigned char *wordp, *last_wordp;
-        BDIGIT_DBL dd;
-        int numbits_in_dd;
+    size_t nlsz; /* number of least significant zero BDIGITs */
 
-        integer_pack_loop_setup(numwords, wordsize, nails, flags,
-            &word_num_fullbytes, &word_num_partialbits,
-            &word_start, &word_step, &word_last, &byte_start, &byte_step);
+    assert(xl + yl <= zl);
 
-        wordp = buf + word_start;
-        last_wordp = buf + word_last;
+    while (0 < xl && xds[xl-1] == 0)
+        xl--;
+    while (0 < yl && yds[yl-1] == 0)
+        yl--;
 
-        dd = 0;
-        numbits_in_dd = 0;
+    nlsz = 0;
+    while (0 < xl && xds[0] == 0) {
+        xds++;
+        xl--;
+        nlsz++;
+    }
+    while (0 < yl && yds[0] == 0) {
+        yds++;
+        yl--;
+        nlsz++;
+    }
+    if (nlsz) {
+        MEMZERO(zds, BDIGIT, nlsz);
+        zds += nlsz;
+        zl -= nlsz;
+    }
 
-#define PUSH_BITS(data, numbits) \
-        integer_unpack_push_bits(data, numbits, &dd, &numbits_in_dd, &dp)
+    /* make sure that y is longer than x */
+    if (xl > yl) {
+        BDIGIT *tds;
+        size_t tl;
+	tds = xds; xds = yds; yds = tds;
+	tl = xl; xl = yl; yl = tl;
+    }
+    assert(xl <= yl);
 
-        while (1) {
-            size_t index_in_word = 0;
-            const unsigned char *bytep = wordp + byte_start;
-            while (index_in_word < word_num_fullbytes) {
-                PUSH_BITS(*bytep, CHAR_BIT);
-                bytep += byte_step;
-                index_in_word++;
-            }
-            if (word_num_partialbits) {
-                PUSH_BITS(*bytep & ((1 << word_num_partialbits) - 1), word_num_partialbits);
-                bytep += byte_step;
-                index_in_word++;
-            }
+    if (xl == 0) {
+        MEMZERO(zds, BDIGIT, zl);
+        return;
+    }
 
-            if (wordp == last_wordp)
-                break;
+    /* normal multiplication when x is small */
+    if (xl < KARATSUBA_MUL_DIGITS) {
+      normal:
+        if (xds == yds && xl == yl)
+            bary_sq_fast(zds, zl, xds, xl);
+        else
+            bary_mul1(zds, zl, xds, xl, yds, yl);
+        return;
+    }
 
-            wordp += word_step;
-        }
-        if (dd)
-            *dp++ = (BDIGIT)dd;
-        assert(dp <= de);
-        while (dp < de)
-            *dp++ = 0;
-#undef PUSH_BITS
+    /* normal multiplication when x or y is a sparse bignum */
+    if (bary_sparse_p(xds, xl)) goto normal;
+    if (bary_sparse_p(yds, yl)) {
+        bary_mul1(zds, zl, yds, yl, xds, xl);
+        return;
     }
 
-    if (!(flags & INTEGER_PACK_2COMP)) {
-        sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
+    /* balance multiplication by slicing y when x is much smaller than y */
+    if (2 * xl <= yl) {
+        bary_mul_balance(zds, zl, xds, xl, yds, yl);
+        return;
     }
-    else {
-        if (nlp_bits) {
-            if ((flags & INTEGER_PACK_NEGATIVE) ||
-                (bdigits[num_bdigits-1] >> (BITSPERDIG - nlp_bits - 1))) {
-                bdigits[num_bdigits-1] |= BIGLO(BDIGMAX << (BITSPERDIG - nlp_bits));
-                sign = -1;
-            }
-            else {
-                sign = 1;
-            }
-        }
-        else {
-            if (flags & INTEGER_PACK_NEGATIVE) {
-                sign = bary_zero_p(bdigits, num_bdigits) ? -2 : -1;
-            }
-            else {
-                if (num_bdigits != 0 && BDIGIT_MSB(bdigits[num_bdigits-1]))
-                    sign = -1;
-                else
-                    sign = 1;
-            }
-        }
-        if (sign == -1 && num_bdigits != 0) {
-            bary_2comp(bdigits, num_bdigits);
-        }
+
+    if (xl < TOOM3_MUL_DIGITS) {
+        /* multiplication by karatsuba method */
+        bary_mul_karatsuba(zds, zl, xds, xl, yds, yl);
+        return;
     }
 
-    return sign;
+    if (3*xl <= 2*(yl + 2)) {
+        bary_mul_balance(zds, zl, xds, xl, yds, yl);
+        return;
+    }
+
+    {
+        VALUE x, y, z;
+        x = bignew(xl, 1);
+	MEMCPY(BDIGITS(x), xds, BDIGIT, xl);
+        y = bignew(yl, 1);
+	MEMCPY(BDIGITS(y), yds, BDIGIT, yl);
+        z = bigtrunc(bigmul1_toom3(x, y));
+        MEMCPY(zds, BDIGITS(z), BDIGIT, RBIGNUM_LEN(z));
+        MEMZERO(zds + RBIGNUM_LEN(z), BDIGIT, zl - RBIGNUM_LEN(z));
+    }
 }
 
+
+/*
+xxx
+*/
+
+#define BIGNUM_DEBUG 0
+#if BIGNUM_DEBUG
+#define ON_DEBUG(x) do { x; } while (0)
 static void
-bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
+dump_bignum(VALUE x)
 {
-    size_t num_bdigits0;
-    int nlp_bits;
-    int sign;
-
-    validate_integer_pack_format(numwords, wordsize, nails, flags,
-            INTEGER_PACK_MSWORD_FIRST|
-            INTEGER_PACK_LSWORD_FIRST|
-            INTEGER_PACK_MSBYTE_FIRST|
-            INTEGER_PACK_LSBYTE_FIRST|
-            INTEGER_PACK_NATIVE_BYTE_ORDER|
-            INTEGER_PACK_2COMP|
-            INTEGER_PACK_FORCE_BIGNUM|
-            INTEGER_PACK_NEGATIVE|
-            INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
+    long i;
+    printf("%c0x0", RBIGNUM_SIGN(x) ? '+' : '-');
+    for (i = RBIGNUM_LEN(x); i--; ) {
+        printf("_%0*"PRIxBDIGIT, SIZEOF_BDIGITS*2, BDIGITS(x)[i]);
+    }
+    printf(", len=%lu", RBIGNUM_LEN(x));
+    puts("");
+}
 
-    num_bdigits0 = integer_unpack_num_bdigits(numwords, wordsize, nails, &nlp_bits);
+static VALUE
+rb_big_dump(VALUE x)
+{
+    dump_bignum(x);
+    return x;
+}
+#else
+#define ON_DEBUG(x)
+#endif
 
-    assert(num_bdigits0 <= num_bdigits);
+static int
+bigzero_p(VALUE x)
+{
+    return bary_zero_p(BDIGITS(x), RBIGNUM_LEN(x));
+}
 
-    sign = bary_unpack_internal(bdigits, num_bdigits0, words, numwords, wordsize, nails, flags, nlp_bits);
+int
+rb_bigzero_p(VALUE x)
+{
+    return BIGZEROP(x);
+}
 
-    if (num_bdigits0 < num_bdigits) {
-        MEMZERO(bdigits + num_bdigits0, BDIGIT, num_bdigits - num_bdigits0);
-        if (sign == -2) {
-            bdigits[num_bdigits0] = 1;
-        }
+int
+rb_cmpint(VALUE val, VALUE a, VALUE b)
+{
+    if (NIL_P(val)) {
+	rb_cmperr(a, b);
+    }
+    if (FIXNUM_P(val)) {
+        long l = FIX2LONG(val);
+        if (l > 0) return 1;
+        if (l < 0) return -1;
+        return 0;
+    }
+    if (RB_TYPE_P(val, T_BIGNUM)) {
+	if (BIGZEROP(val)) return 0;
+	if (RBIGNUM_SIGN(val)) return 1;
+	return -1;
     }
+    if (RTEST(rb_funcall(val, '>', 1, INT2FIX(0)))) return 1;
+    if (RTEST(rb_funcall(val, '<', 1, INT2FIX(0)))) return -1;
+    return 0;
 }
 
-/*
- * Import an integer into a buffer.
- *
- * [words] buffer to import.
- * [numwords] the size of given buffer as number of words.
- * [wordsize] the size of word as number of bytes.
- * [nails] number of padding bits in a word.
- *   Most significant nails bits of each word are ignored.
- * [flags] bitwise or of constants which name starts "INTEGER_PACK_".
- *
- * flags:
- * [INTEGER_PACK_MSWORD_FIRST] Interpret the first word as the most significant word.
- * [INTEGER_PACK_LSWORD_FIRST] Interpret the first word as the least significant word.
- * [INTEGER_PACK_MSBYTE_FIRST] Interpret the first byte in a word as the most significant byte in the word.
- * [INTEGER_PACK_LSBYTE_FIRST] Interpret the first byte in a word as the least significant byte in the word.
- * [INTEGER_PACK_NATIVE_BYTE_ORDER] INTEGER_PACK_MSBYTE_FIRST or INTEGER_PACK_LSBYTE_FIRST corresponding to the host's endian.
- * [INTEGER_PACK_2COMP] Use 2's complement representation.
- * [INTEGER_PACK_LITTLE_ENDIAN] Same as INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST
- * [INTEGER_PACK_BIG_ENDIAN] Same as INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST
- * [INTEGER_PACK_FORCE_BIGNUM] the result will be a Bignum
- *   even if it is representable as a Fixnum.
- * [INTEGER_PACK_NEGATIVE] Returns non-positive value.
- *   (Returns non-negative value if not specified.)
- * [INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION] Use generic implementation (for test and debug).
- *
- * This function returns the imported integer as Fixnum or Bignum.
- *
- * The range of the result value depends on INTEGER_PACK_2COMP and INTEGER_PACK_NEGATIVE.
- *
- * INTEGER_PACK_2COMP is not set:
- *   0 <= val < 2**(numwords*(wordsize*CHAR_BIT-nails)) if !INTEGER_PACK_NEGATIVE
- *   -2**(numwords*(wordsize*CHAR_BIT-nails)) < val <= 0 if INTEGER_PACK_NEGATIVE
- *
- * INTEGER_PACK_2COMP is set:
- *   -2**(numwords*(wordsize*CHAR_BIT-nails)-1) <= val <= 2**(numwords*(wordsize*CHAR_BIT-nails)-1)-1 if !INTEGER_PACK_NEGATIVE
- *   -2**(numwords*(wordsize*CHAR_BIT-nails)) <= val <= -1 if INTEGER_PACK_NEGATIVE
- *
- * INTEGER_PACK_2COMP without INTEGER_PACK_NEGATIVE means sign extension.
- * INTEGER_PACK_2COMP with INTEGER_PACK_NEGATIVE mean assuming the higher bits are 1.
- *
- * Note that this function returns 0 when numwords is zero and
- * INTEGER_PACK_2COMP is set but INTEGER_PACK_NEGATIVE is not set.
- */
+#define RBIGNUM_SET_LEN(b,l) \
+    ((RBASIC(b)->flags & RBIGNUM_EMBED_FLAG) ? \
+     (void)(RBASIC(b)->flags = \
+	    (RBASIC(b)->flags & ~RBIGNUM_EMBED_LEN_MASK) | \
+	    ((l) << RBIGNUM_EMBED_LEN_SHIFT)) : \
+     (void)(RBIGNUM(b)->as.heap.len = (l)))
 
-VALUE
-rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
+static void
+rb_big_realloc(VALUE big, long len)
 {
-    VALUE val;
-    size_t num_bdigits;
-    int sign;
-    int nlp_bits;
     BDIGIT *ds;
-    BDIGIT fixbuf[2] = { 0, 0 };
-
-    validate_integer_pack_format(numwords, wordsize, nails, flags,
-            INTEGER_PACK_MSWORD_FIRST|
-            INTEGER_PACK_LSWORD_FIRST|
-            INTEGER_PACK_MSBYTE_FIRST|
-            INTEGER_PACK_LSBYTE_FIRST|
-            INTEGER_PACK_NATIVE_BYTE_ORDER|
-            INTEGER_PACK_2COMP|
-            INTEGER_PACK_FORCE_BIGNUM|
-            INTEGER_PACK_NEGATIVE|
-            INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
+    if (RBASIC(big)->flags & RBIGNUM_EMBED_FLAG) {
+	if (RBIGNUM_EMBED_LEN_MAX < len) {
+	    ds = ALLOC_N(BDIGIT, len);
+	    MEMCPY(ds, RBIGNUM(big)->as.ary, BDIGIT, RBIGNUM_EMBED_LEN_MAX);
+	    RBIGNUM(big)->as.heap.len = RBIGNUM_LEN(big);
+	    RBIGNUM(big)->as.heap.digits = ds;
+	    RBASIC(big)->flags &= ~RBIGNUM_EMBED_FLAG;
+	}
+    }
+    else {
+	if (len <= RBIGNUM_EMBED_LEN_MAX) {
+	    ds = RBIGNUM(big)->as.heap.digits;
+	    RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG;
+	    RBIGNUM_SET_LEN(big, len);
+	    if (ds) {
+		MEMCPY(RBIGNUM(big)->as.ary, ds, BDIGIT, len);
+		xfree(ds);
+	    }
+	}
+	else {
+	    if (RBIGNUM_LEN(big) == 0) {
+		RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len);
+	    }
+	    else {
+		REALLOC_N(RBIGNUM(big)->as.heap.digits, BDIGIT, len);
+	    }
+	}
+    }
+}
 
-    num_bdigits = integer_unpack_num_bdigits(numwords, wordsize, nails, &nlp_bits);
+void
+rb_big_resize(VALUE big, long len)
+{
+    rb_big_realloc(big, len);
+    RBIGNUM_SET_LEN(big, len);
+}
 
-    if (LONG_MAX-1 < num_bdigits)
-        rb_raise(rb_eArgError, "too big to unpack as an integer");
-    if (num_bdigits <= numberof(fixbuf) && !(flags & INTEGER_PACK_FORCE_BIGNUM)) {
-        val = Qfalse;
-        ds = fixbuf;
+static VALUE
+bignew_1(VALUE klass, long len, int sign)
+{
+    NEWOBJ_OF(big, struct RBignum, klass, T_BIGNUM | (RGENGC_WB_PROTECTED_BIGNUM ? FL_WB_PROTECTED : 0));
+    RBIGNUM_SET_SIGN(big, sign?1:0);
+    if (len <= RBIGNUM_EMBED_LEN_MAX) {
+	RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG;
+	RBIGNUM_SET_LEN(big, len);
     }
     else {
-        val = bignew((long)num_bdigits, 0);
-        ds = BDIGITS(val);
-    }
-    sign = bary_unpack_internal(ds, num_bdigits, words, numwords, wordsize, nails, flags, nlp_bits);
-
-    if (sign == -2) {
-        if (val) {
-            big_extend_carry(val);
-        }
-        else if (num_bdigits == numberof(fixbuf)) {
-            val = bignew((long)num_bdigits+1, 0);
-	    MEMCPY(BDIGITS(val), fixbuf, BDIGIT, num_bdigits);
-            BDIGITS(val)[num_bdigits++] = 1;
-        }
-        else {
-            ds[num_bdigits++] = 1;
-        }
+	RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len);
+	RBIGNUM(big)->as.heap.len = len;
     }
+    OBJ_FREEZE(big);
+    return (VALUE)big;
+}
 
-    if (!val) {
-        BDIGIT_DBL u = fixbuf[0] + BIGUP(fixbuf[1]);
-        if (u == 0)
-            return LONG2FIX(0);
-	if (0 < sign && POSFIXABLE(u))
-            return LONG2FIX(u);
-	if (sign < 0 && BDIGIT_MSB(fixbuf[1]) == 0 &&
-                NEGFIXABLE(-(BDIGIT_DBL_SIGNED)u))
-            return LONG2FIX(-(BDIGIT_DBL_SIGNED)u);
-        val = bignew((long)num_bdigits, 0 <= sign);
-        MEMCPY(BDIGITS(val), fixbuf, BDIGIT, num_bdigits);
-    }
+VALUE
+rb_big_new(long len, int sign)
+{
+    return bignew(len, sign != 0);
+}
 
-    if ((flags & INTEGER_PACK_FORCE_BIGNUM) && sign != 0 &&
-        bary_zero_p(BDIGITS(val), RBIGNUM_LEN(val)))
-        sign = 0;
-    RBIGNUM_SET_SIGN(val, 0 <= sign);
+VALUE
+rb_big_clone(VALUE x)
+{
+    long len = RBIGNUM_LEN(x);
+    VALUE z = bignew_1(CLASS_OF(x), len, RBIGNUM_SIGN(x));
 
-    if (flags & INTEGER_PACK_FORCE_BIGNUM)
-        return bigtrunc(val);
-    return bignorm(val);
+    MEMCPY(BDIGITS(z), BDIGITS(x), BDIGIT, len);
+    return z;
 }
 
-#define QUAD_SIZE 8
-
-void
-rb_quad_pack(char *buf, VALUE val)
+static void
+big_extend_carry(VALUE x)
 {
-    rb_integer_pack(val, buf, 1, QUAD_SIZE, 0,
-            INTEGER_PACK_NATIVE_BYTE_ORDER|
-            INTEGER_PACK_2COMP);
+    rb_big_resize(x, RBIGNUM_LEN(x)+1);
+    BDIGITS(x)[RBIGNUM_LEN(x)-1] = 1;
 }
 
-VALUE
-rb_quad_unpack(const char *buf, int signed_p)
+/* modify a bignum by 2's complement */
+static void
+get2comp(VALUE x)
 {
-    return rb_integer_unpack(buf, 1, QUAD_SIZE, 0,
-            INTEGER_PACK_NATIVE_BYTE_ORDER|
-            (signed_p ? INTEGER_PACK_2COMP : 0));
+    long i = RBIGNUM_LEN(x);
+    BDIGIT *ds = BDIGITS(x);
+
+    if (bary_2comp(ds, i)) {
+        big_extend_carry(x);
+    }
 }
 
-VALUE
-rb_cstr_to_inum(const char *str, int base, int badcheck)
+void
+rb_big_2comp(VALUE x)			/* get 2's complement */
 {
-    const char *s = str;
-    char sign = 1, nondigit = 0;
-    int c;
-    VALUE z;
-
-    int bits_per_digit;
-    size_t i;
+    get2comp(x);
+}
 
-    const char *digits_start, *digits_end, *p;
-    size_t num_digits;
-    size_t num_bdigits;
+static BDIGIT
+abs2twocomp(VALUE *xp, long *n_ret)
+{
+    VALUE x = *xp;
+    long n = RBIGNUM_LEN(x);
+    BDIGIT *ds = BDIGITS(x);
+    BDIGIT hibits = 0;
 
-#undef ISDIGIT
-#define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
-#define conv_digit(c) (ruby_digit36_to_number_table[(unsigned char)(c)])
+    while (0 < n && ds[n-1] == 0)
+        n--;
 
-    if (!str) {
-	if (badcheck) goto bad;
-	return INT2FIX(0);
+    if (n != 0 && RBIGNUM_NEGATIVE_P(x)) {
+        VALUE z = bignew_1(CLASS_OF(x), n, 0);
+        MEMCPY(BDIGITS(z), ds, BDIGIT, n);
+        bary_2comp(BDIGITS(z), n);
+        hibits = BDIGMAX;
+	*xp = z;
     }
-    while (ISSPACE(*str)) str++;
+    *n_ret = n;
+    return hibits;
+}
 
-    if (str[0] == '+') {
-	str++;
-    }
-    else if (str[0] == '-') {
-	str++;
-	sign = 0;
-    }
-    if (str[0] == '+' || str[0] == '-') {
-	if (badcheck) goto bad;
-	return INT2FIX(0);
-    }
-    if (base <= 0) {
-	if (str[0] == '0') {
-	    switch (str[1]) {
-	      case 'x': case 'X':
-		base = 16;
-                str += 2;
-		break;
-	      case 'b': case 'B':
-		base = 2;
-                str += 2;
-		break;
-	      case 'o': case 'O':
-		base = 8;
-                str += 2;
-		break;
-	      case 'd': case 'D':
-		base = 10;
-                str += 2;
-		break;
-	      default:
-		base = 8;
-	    }
-	}
-	else if (base < -1) {
-	    base = -base;
-	}
-	else {
-	    base = 10;
-	}
-    }
-    else if (base == 2) {
-	if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
-	    str += 2;
-	}
-    }
-    else if (base == 8) {
-	if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
-	    str += 2;
-	}
+static void
+twocomp2abs_bang(VALUE x, int hibits)
+{
+    RBIGNUM_SET_SIGN(x, !hibits);
+    if (hibits) {
+        get2comp(x);
     }
-    else if (base == 10) {
-	if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
-	    str += 2;
-	}
+}
+
+static inline VALUE
+bigtrunc(VALUE x)
+{
+    long len = RBIGNUM_LEN(x);
+    BDIGIT *ds = BDIGITS(x);
+
+    if (len == 0) return x;
+    while (--len && !ds[len]);
+    if (RBIGNUM_LEN(x) > len+1) {
+	rb_big_resize(x, len+1);
     }
-    else if (base == 16) {
-	if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
-	    str += 2;
+    return x;
+}
+
+static inline VALUE
+bigfixize(VALUE x)
+{
+    long len = RBIGNUM_LEN(x);
+    BDIGIT *ds = BDIGITS(x);
+
+    if (len == 0) return INT2FIX(0);
+    if (BIGSIZE(x) <= sizeof(long)) {
+	long num = 0;
+#if SIZEOF_BDIGITS >= SIZEOF_LONG
+	num = (long)ds[0];
+#else
+	while (len--) {
+	    num = (long)(BIGUP(num) + ds[len]);
 	}
-    }
-    if (base < 2 || 36 < base) {
-        rb_raise(rb_eArgError, "invalid radix %d", base);
-    }
-    if (*str == '0') {		/* squeeze preceding 0s */
-	int us = 0;
-	while ((c = *++str) == '0' || c == '_') {
-	    if (c == '_') {
-		if (++us >= 2)
-		    break;
-	    } else
-		us = 0;
+#endif
+	if (num >= 0) {
+	    if (RBIGNUM_SIGN(x)) {
+		if (POSFIXABLE(num)) return LONG2FIX(num);
+	    }
+	    else {
+		if (NEGFIXABLE(-num)) return LONG2FIX(-num);
+	    }
 	}
-	if (!(c = *str) || ISSPACE(c)) --str;
     }
-    c = *str;
-    c = conv_digit(c);
-    if (c < 0 || c >= base) {
-	if (badcheck) goto bad;
-	return INT2FIX(0);
+    return x;
+}
+
+static VALUE
+bignorm(VALUE x)
+{
+    if (RB_TYPE_P(x, T_BIGNUM)) {
+	x = bigfixize(x);
+        if (!FIXNUM_P(x))
+            bigtrunc(x);
     }
+    return x;
+}
 
-    bits_per_digit = bitsize(base-1);
-    if (bits_per_digit * strlen(str) <= sizeof(long) * CHAR_BIT) {
-        char *end;
-	unsigned long val = STRTOUL(str, &end, base);
+VALUE
+rb_big_norm(VALUE x)
+{
+    return bignorm(x);
+}
 
-	if (str < end && *end == '_') goto bigparse;
-	if (badcheck) {
-	    if (end == str) goto bad; /* no number */
-	    while (*end && ISSPACE(*end)) end++;
-	    if (*end) goto bad;	      /* trailing garbage */
-	}
+VALUE
+rb_uint2big(VALUE n)
+{
+    long i;
+    VALUE big = bignew(bdigit_roomof(SIZEOF_VALUE), 1);
+    BDIGIT *digits = BDIGITS(big);
 
-	if (POSFIXABLE(val)) {
-	    if (sign) return LONG2FIX(val);
-	    else {
-		long result = -(long)val;
-		return LONG2FIX(result);
-	    }
-	}
-	else {
-	    VALUE big = rb_uint2big(val);
-	    RBIGNUM_SET_SIGN(big, sign);
-	    return bignorm(big);
-	}
+#if SIZEOF_BDIGITS >= SIZEOF_VALUE
+    digits[0] = n;
+#else
+    for (i = 0; i < bdigit_roomof(SIZEOF_VALUE); i++) {
+	digits[i] = BIGLO(n);
+	n = BIGDN(n);
     }
-  bigparse:
-    if (badcheck && *str == '_') goto bad;
+#endif
 
-    num_digits = 0;
-    digits_start = digits_end = str;
-    while ((c = *str++) != 0) {
-	if (c == '_') {
-	    if (nondigit) {
-		if (badcheck) goto bad;
-		break;
-	    }
-	    nondigit = (char) c;
-	    continue;
-	}
-	else if ((c = conv_digit(c)) < 0) {
-	    break;
-	}
-	if (c >= base) break;
-	nondigit = 0;
-        num_digits++;
-        digits_end = str;
+    i = bdigit_roomof(SIZEOF_VALUE);
+    while (--i && !digits[i]) ;
+    RBIGNUM_SET_LEN(big, i+1);
+    return big;
+}
+
+VALUE
+rb_int2big(SIGNED_VALUE n)
+{
+    long neg = 0;
+    VALUE u;
+    VALUE big;
+
+    if (n < 0) {
+        u = 1 + (VALUE)(-(n + 1)); /* u = -n avoiding overflow */
+	neg = 1;
     }
-    if (badcheck) {
-	str--;
-	if (s+1 < str && str[-1] == '_') goto bad;
-	while (*str && ISSPACE(*str)) str++;
-	if (*str) {
-	  bad:
-	    rb_invalid_str(s, "Integer()");
-	}
+    else {
+        u = n;
+    }
+    big = rb_uint2big(u);
+    if (neg) {
+	RBIGNUM_SET_SIGN(big, 0);
     }
+    return big;
+}
 
-    if (POW2_P(base)) {
-        BDIGIT *dp;
-        BDIGIT_DBL dd;
-        int numbits;
-        num_bdigits = (num_digits / BITSPERDIG) * bits_per_digit + roomof((num_digits % BITSPERDIG) * bits_per_digit, BITSPERDIG);
-        z = bignew(num_bdigits, sign);
-        dp = BDIGITS(z);
-        dd = 0;
-        numbits = 0;
-        for (p = digits_end; digits_start < p; p--) {
-            if ((c = conv_digit(p[-1])) < 0)
-                continue;
-            dd |= (BDIGIT_DBL)c << numbits;
-            numbits += bits_per_digit;
-            if (BITSPERDIG <= numbits) {
-                *dp++ = BIGLO(dd);
-                dd = BIGDN(dd);
-                numbits -= BITSPERDIG;
-            }
+VALUE
+rb_uint2inum(VALUE n)
+{
+    if (POSFIXABLE(n)) return LONG2FIX(n);
+    return rb_uint2big(n);
+}
+
+VALUE
+rb_int2inum(SIGNED_VALUE n)
+{
+    if (FIXABLE(n)) return LONG2FIX(n);
+    return rb_int2big(n);
+}
+
+void
+rb_big_pack(VALUE val, unsigned long *buf, long num_longs)
+{
+    rb_integer_pack(val, buf, num_longs, sizeof(long), 0,
+            INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER|
+            INTEGER_PACK_2COMP);
+}
+
+VALUE
+rb_big_unpack(unsigned long *buf, long num_longs)
+{
+    return rb_integer_unpack(buf, num_longs, sizeof(long), 0,
+            INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER|
+            INTEGER_PACK_2COMP);
+}
+
+/*
+ * Calculate the number of bytes to be required to represent
+ * the absolute value of the integer given as _val_.
+ *
+ * [val] an integer.
+ * [nlz_bits_ret] number of leading zero bits in the most significant byte is returned if not NULL.
+ *
+ * This function returns ((val_numbits * CHAR_BIT + CHAR_BIT - 1) / CHAR_BIT)
+ * where val_numbits is the number of bits of abs(val).
+ * This function should not overflow.
+ *
+ * If nlz_bits_ret is not NULL,
+ * (return_value * CHAR_BIT - val_numbits) is stored in *nlz_bits_ret.
+ * In this case, 0 <= *nlz_bits_ret < CHAR_BIT.
+ *
+ */
+size_t
+rb_absint_size(VALUE val, int *nlz_bits_ret)
+{
+    BDIGIT *dp;
+    BDIGIT *de;
+    BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
+
+    int num_leading_zeros;
+
+    val = rb_to_int(val);
+
+    if (FIXNUM_P(val)) {
+        long v = FIX2LONG(val);
+        if (v < 0) {
+            v = -v;
         }
-        if (numbits) {
-            *dp++ = BIGLO(dd);
+#if SIZEOF_BDIGITS >= SIZEOF_LONG
+        fixbuf[0] = v;
+#else
+        {
+            int i;
+            for (i = 0; i < numberof(fixbuf); i++) {
+                fixbuf[i] = BIGLO(v);
+                v = BIGDN(v);
+            }
         }
-        assert((size_t)(dp - BDIGITS(z)) == num_bdigits);
+#endif
+        dp = fixbuf;
+        de = fixbuf + numberof(fixbuf);
+    }
+    else {
+        dp = BDIGITS(val);
+        de = dp + RBIGNUM_LEN(val);
+    }
+    while (dp < de && de[-1] == 0)
+        de--;
+    if (dp == de) {
+        if (nlz_bits_ret)
+            *nlz_bits_ret = 0;
+        return 0;
+    }
+    num_leading_zeros = nlz(de[-1]);
+    if (nlz_bits_ret)
+        *nlz_bits_ret = num_leading_zeros % CHAR_BIT;
+    return (de - dp) * SIZEOF_BDIGITS - num_leading_zeros / CHAR_BIT;
+}
+
+static size_t
+absint_numwords_small(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret)
+{
+    size_t val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte;
+    size_t div = val_numbits / word_numbits;
+    size_t mod = val_numbits % word_numbits;
+    size_t numwords;
+    size_t nlz_bits;
+    numwords = mod == 0 ? div : div + 1;
+    nlz_bits = mod == 0 ? 0 : word_numbits - mod;
+    *nlz_bits_ret = nlz_bits;
+    return numwords;
+}
+
+static size_t
+absint_numwords_generic(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret)
+{
+    BDIGIT numbytes_bary[bdigit_roomof(sizeof(numbytes))];
+    BDIGIT char_bit[1] = { CHAR_BIT };
+    BDIGIT val_numbits_bary[bdigit_roomof(sizeof(numbytes) + 1)];
+    BDIGIT nlz_bits_in_msbyte_bary[1] = { nlz_bits_in_msbyte };
+    BDIGIT word_numbits_bary[bdigit_roomof(sizeof(word_numbits))];
+    BDIGIT div_bary[numberof(val_numbits_bary) + BIGDIVREM_EXTRA_WORDS];
+    BDIGIT mod_bary[numberof(word_numbits_bary)];
+    BDIGIT one[1] = { 1 };
+    size_t nlz_bits;
+    size_t mod;
+    int sign;
+    size_t numwords;
+
+    /*
+     * val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte
+     * div, mod = val_numbits.divmod(word_numbits)
+     * numwords = mod == 0 ? div : div + 1
+     * nlz_bits = mod == 0 ? 0 : word_numbits - mod
+     */
+
+    bary_unpack(BARY_ARGS(numbytes_bary), &numbytes, 1, sizeof(numbytes), 0,
+        INTEGER_PACK_NATIVE_BYTE_ORDER);
+    BARY_MUL1(val_numbits_bary, numbytes_bary, char_bit);
+    if (nlz_bits_in_msbyte)
+        BARY_SUB(val_numbits_bary, val_numbits_bary, nlz_bits_in_msbyte_bary);
+    bary_unpack(BARY_ARGS(word_numbits_bary), &word_numbits, 1, sizeof(word_numbits), 0,
+        INTEGER_PACK_NATIVE_BYTE_ORDER);
+    BARY_DIVMOD(div_bary, mod_bary, val_numbits_bary, word_numbits_bary);
+    if (BARY_ZERO_P(mod_bary)) {
+        nlz_bits = 0;
     }
     else {
-        int digits_per_bdigits_dbl;
-        BDIGIT_DBL power;
-        power = maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
-        num_bdigits = roomof(num_digits, digits_per_bdigits_dbl)*2;
-
-        if (num_bdigits < KARATSUBA_MUL_DIGITS) {
-            size_t blen = 1;
-            BDIGIT *zds;
-            BDIGIT_DBL num;
-
-            z = bignew(num_bdigits, sign);
-            zds = BDIGITS(z);
-            MEMZERO(zds, BDIGIT, num_bdigits);
+        BARY_ADD(div_bary, div_bary, one);
+        bary_pack(+1, BARY_ARGS(mod_bary), &mod, 1, sizeof(mod), 0,
+            INTEGER_PACK_NATIVE_BYTE_ORDER);
+        nlz_bits = word_numbits - mod;
+    }
+    sign = bary_pack(+1, BARY_ARGS(div_bary), &numwords, 1, sizeof(numwords), 0,
+        INTEGER_PACK_NATIVE_BYTE_ORDER);
 
-            for (p = digits_start; p < digits_end; p++) {
-                if ((c = conv_digit(*p)) < 0)
-                    continue;
-                num = c;
-                i = 0;
-                for (;;) {
-                    while (i<blen) {
-                        num += (BDIGIT_DBL)zds[i]*base;
-                        zds[i++] = BIGLO(num);
-                        num = BIGDN(num);
-                    }
-                    if (num) {
-                        blen++;
-                        continue;
-                    }
-                    break;
-                }
-                assert(blen <= num_bdigits);
-            }
-        }
-        else {
-            VALUE powerv;
-            size_t unit;
-            VALUE tmpuv = 0;
-            BDIGIT *uds, *vds, *tds;
-            BDIGIT_DBL dd;
-            BDIGIT_DBL current_base;
-            int m;
+    if (sign == 2)
+        return (size_t)-1;
+    *nlz_bits_ret = nlz_bits;
+    return numwords;
+}
 
-            uds = ALLOCV_N(BDIGIT, tmpuv, 2*num_bdigits);
-            vds = uds + num_bdigits;
+/*
+ * Calculate the number of words to be required to represent
+ * the absolute value of the integer given as _val_.
+ *
+ * [val] an integer.
+ * [word_numbits] number of bits in a word.
+ * [nlz_bits_ret] number of leading zero bits in the most significant word is returned if not NULL.
+ *
+ * This function returns ((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)
+ * where val_numbits is the number of bits of abs(val).
+ *
+ * This function can overflow.
+ * When overflow occur, (size_t)-1 is returned.
+ *
+ * If nlz_bits_ret is not NULL and overflow is not occur,
+ * (return_value * word_numbits - val_numbits) is stored in *nlz_bits_ret.
+ * In this case, 0 <= *nlz_bits_ret < word_numbits.
+ *
+ */
+size_t
+rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
+{
+    size_t numbytes;
+    int nlz_bits_in_msbyte;
+    size_t numwords;
+    size_t nlz_bits;
 
-            powerv = bignew(2, 1);
-            BDIGITS(powerv)[0] = BIGLO(power);
-            BDIGITS(powerv)[1] = (BDIGIT)BIGDN(power);
+    if (word_numbits == 0)
+        return (size_t)-1;
 
-            i = 0;
-            dd = 0;
-            current_base = 1;
-            m = digits_per_bdigits_dbl;
-            if (num_digits < (size_t)m)
-                m = (int)num_digits;
-            for (p = digits_end; digits_start < p; p--) {
-                if ((c = conv_digit(p[-1])) < 0)
-                    continue;
-                dd = dd + c * current_base;
-                current_base *= base;
-                num_digits--;
-                m--;
-                if (m == 0) {
-                    uds[i++] = BIGLO(dd);
-                    uds[i++] = (BDIGIT)BIGDN(dd);
-                    dd = 0;
-                    m = digits_per_bdigits_dbl;
-                    if (num_digits < (size_t)m)
-                        m = (int)num_digits;
-                    current_base = 1;
-                }
-            }
-            assert(i == num_bdigits);
-            for (unit = 2; unit < num_bdigits; unit *= 2) {
-                for (i = 0; i < num_bdigits; i += unit*2) {
-                    if (2*unit <= num_bdigits - i) {
-                        bary_mul(vds+i, unit*2, BDIGITS(powerv), RBIGNUM_LEN(powerv), uds+i+unit, unit);
-                        bary_add(vds+i, unit*2, vds+i, unit*2, uds+i, unit);
-                    }
-                    else if (unit <= num_bdigits - i) {
-                        bary_mul(vds+i, num_bdigits-i, BDIGITS(powerv), RBIGNUM_LEN(powerv), uds+i+unit, num_bdigits-(i+unit));
-                        bary_add(vds+i, num_bdigits-i, vds+i, num_bdigits-i, uds+i, unit);
-                    }
-                    else {
-                        MEMCPY(vds+i, uds+i, BDIGIT, num_bdigits-i);
-                    }
-                }
-                powerv = bigtrunc(bigmul0(powerv, powerv));
-                tds = vds;
-                vds = uds;
-                uds = tds;
-            }
-            while (0 < num_bdigits && uds[num_bdigits-1] == 0)
-                num_bdigits--;
-            z = bignew(num_bdigits, sign);
-            MEMCPY(BDIGITS(z), uds, BDIGIT, num_bdigits);
+    numbytes = rb_absint_size(val, &nlz_bits_in_msbyte);
 
-            if (tmpuv)
-                ALLOCV_END(tmpuv);
+    if (numbytes <= SIZE_MAX / CHAR_BIT) {
+        numwords = absint_numwords_small(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits);
+#ifdef DEBUG_INTEGER_PACK
+        {
+            size_t numwords0, nlz_bits0;
+            numwords0 = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits0);
+            assert(numwords0 == numwords);
+            assert(nlz_bits0 == nlz_bits);
         }
+#endif
+    }
+    else {
+        numwords = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits);
     }
+    if (numwords == (size_t)-1)
+        return numwords;
 
-    return bignorm(z);
+    if (nlz_bits_ret)
+        *nlz_bits_ret = nlz_bits;
+
+    return numwords;
 }
 
-VALUE
-rb_str_to_inum(VALUE str, int base, int badcheck)
+int
+rb_absint_singlebit_p(VALUE val)
 {
-    char *s;
-    long len;
-    VALUE v = 0;
-    VALUE ret;
+    BDIGIT *dp;
+    BDIGIT *de;
+    BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
+    BDIGIT d;
 
-    StringValue(str);
-    rb_must_asciicompat(str);
-    if (badcheck) {
-	s = StringValueCStr(str);
+    val = rb_to_int(val);
+
+    if (FIXNUM_P(val)) {
+        long v = FIX2LONG(val);
+        if (v < 0) {
+            v = -v;
+        }
+#if SIZEOF_BDIGITS >= SIZEOF_LONG
+        fixbuf[0] = v;
+#else
+        {
+            int i;
+            for (i = 0; i < numberof(fixbuf); i++) {
+                fixbuf[i] = BIGLO(v);
+                v = BIGDN(v);
+            }
+        }
+#endif
+        dp = fixbuf;
+        de = fixbuf + numberof(fixbuf);
     }
     else {
-	s = RSTRING_PTR(str);
-    }
-    if (s) {
-	len = RSTRING_LEN(str);
-	if (s[len]) {		/* no sentinel somehow */
-	    char *p = ALLOCV(v, len+1);
-
-	    MEMCPY(p, s, char, len);
-	    p[len] = '\0';
-	    s = p;
-	}
+        dp = BDIGITS(val);
+        de = dp + RBIGNUM_LEN(val);
     }
-    ret = rb_cstr_to_inum(s, base, badcheck);
-    if (v)
-	ALLOCV_END(v);
-    return ret;
+    while (dp < de && de[-1] == 0)
+        de--;
+    while (dp < de && dp[0] == 0)
+        dp++;
+    if (dp == de) /* no bit set. */
+        return 0;
+    if (dp != de-1) /* two non-zero words. two bits set, at least. */
+        return 0;
+    d = *dp;
+    return POW2_P(d);
 }
 
-#if HAVE_LONG_LONG
 
-static VALUE
-rb_ull2big(unsigned LONG_LONG n)
+/*
+ * Export an integer into a buffer.
+ *
+ * This function fills the buffer specified by _words_ and _numwords_ as
+ * val in the format specified by _wordsize_, _nails_ and _flags_.
+ *
+ * [val] Fixnum, Bignum or another integer like object which has to_int method.
+ * [words] buffer to export abs(val).
+ * [numwords] the size of given buffer as number of words.
+ * [wordsize] the size of word as number of bytes.
+ * [nails] number of padding bits in a word.
+ *   Most significant nails bits of each word are filled by zero.
+ * [flags] bitwise or of constants which name starts "INTEGER_PACK_".
+ *
+ * flags:
+ * [INTEGER_PACK_MSWORD_FIRST] Store the most significant word as the first word.
+ * [INTEGER_PACK_LSWORD_FIRST] Store the least significant word as the first word.
+ * [INTEGER_PACK_MSBYTE_FIRST] Store the most significant byte in a word as the first byte in the word.
+ * [INTEGER_PACK_LSBYTE_FIRST] Store the least significant byte in a word as the first byte in the word.
+ * [INTEGER_PACK_NATIVE_BYTE_ORDER] INTEGER_PACK_MSBYTE_FIRST or INTEGER_PACK_LSBYTE_FIRST corresponding to the host's endian.
+ * [INTEGER_PACK_2COMP] Use 2's complement representation.
+ * [INTEGER_PACK_LITTLE_ENDIAN] Same as INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST
+ * [INTEGER_PACK_BIG_ENDIAN] Same as INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST
+ * [INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION] Use generic implementation (for test and debug).
+ *
+ * This function fills the buffer specified by _words_
+ * as abs(val) if INTEGER_PACK_2COMP is not specified in _flags_.
+ * If INTEGER_PACK_2COMP is specified, 2's complement representation of val is
+ * filled in the buffer.
+ *
+ * This function returns the signedness and overflow condition.
+ * The overflow condition depends on INTEGER_PACK_2COMP.
+ *
+ * INTEGER_PACK_2COMP is not specified:
+ *   -2 : negative overflow.  val <= -2**(numwords*(wordsize*CHAR_BIT-nails))
+ *   -1 : negative without overflow.  -2**(numwords*(wordsize*CHAR_BIT-nails)) < val < 0
+ *   0 : zero.  val == 0
+ *   1 : positive without overflow.  0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))
+ *   2 : positive overflow.  2**(numwords*(wordsize*CHAR_BIT-nails)) <= val
+ *
+ * INTEGER_PACK_2COMP is specified:
+ *   -2 : negative overflow.  val < -2**(numwords*(wordsize*CHAR_BIT-nails))
+ *   -1 : negative without overflow.  -2**(numwords*(wordsize*CHAR_BIT-nails)) <= val < 0
+ *   0 : zero.  val == 0
+ *   1 : positive without overflow.  0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))
+ *   2 : positive overflow.  2**(numwords*(wordsize*CHAR_BIT-nails)) <= val
+ *
+ * The value, -2**(numwords*(wordsize*CHAR_BIT-nails)), is representable
+ * in 2's complement representation but not representable in absolute value.
+ * So -1 is returned for the value if INTEGER_PACK_2COMP is specified
+ * but returns -2 if INTEGER_PACK_2COMP is not specified.
+ *
+ * The least significant words are filled in the buffer when overflow occur.
+ */
+
+int
+rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
 {
-    long i;
-    VALUE big = bignew(bdigit_roomof(SIZEOF_LONG_LONG), 1);
-    BDIGIT *digits = BDIGITS(big);
+    int sign;
+    BDIGIT *ds;
+    size_t num_bdigits;
+    BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
 
-#if SIZEOF_BDIGITS >= SIZEOF_LONG_LONG
-    digits[0] = n;
+    RB_GC_GUARD(val) = rb_to_int(val);
+
+    if (FIXNUM_P(val)) {
+        long v = FIX2LONG(val);
+        if (v < 0) {
+            sign = -1;
+            v = -v;
+        }
+        else {
+            sign = 1;
+        }
+#if SIZEOF_BDIGITS >= SIZEOF_LONG
+        fixbuf[0] = v;
 #else
-    for (i = 0; i < bdigit_roomof(SIZEOF_LONG_LONG); i++) {
-	digits[i] = BIGLO(n);
-	n = BIGDN(n);
-    }
+        {
+            int i;
+            for (i = 0; i < numberof(fixbuf); i++) {
+                fixbuf[i] = BIGLO(v);
+                v = BIGDN(v);
+            }
+        }
 #endif
-
-    i = bdigit_roomof(SIZEOF_LONG_LONG);
-    while (i-- && !digits[i]) ;
-    RBIGNUM_SET_LEN(big, i+1);
-    return big;
-}
-
-static VALUE
-rb_ll2big(LONG_LONG n)
-{
-    long neg = 0;
-    unsigned LONG_LONG u;
-    VALUE big;
-
-    if (n < 0) {
-        u = 1 + (unsigned LONG_LONG)(-(n + 1)); /* u = -n avoiding overflow */
-	neg = 1;
+        ds = fixbuf;
+        num_bdigits = numberof(fixbuf);
     }
     else {
-        u = n;
-    }
-    big = rb_ull2big(u);
-    if (neg) {
-	RBIGNUM_SET_SIGN(big, 0);
+        sign = RBIGNUM_POSITIVE_P(val) ? 1 : -1;
+        ds = BDIGITS(val);
+        num_bdigits = RBIGNUM_LEN(val);
     }
-    return big;
-}
 
-VALUE
-rb_ull2inum(unsigned LONG_LONG n)
-{
-    if (POSFIXABLE(n)) return LONG2FIX(n);
-    return rb_ull2big(n);
-}
-
-VALUE
-rb_ll2inum(LONG_LONG n)
-{
-    if (FIXABLE(n)) return LONG2FIX(n);
-    return rb_ll2big(n);
+    return bary_pack(sign, ds, num_bdigits, words, numwords, wordsize, nails, flags);
 }
 
-#endif  /* HAVE_LONG_LONG */
-
-VALUE
-rb_cstr2inum(const char *str, int base)
-{
-    return rb_cstr_to_inum(str, base, base==0);
-}
+/*
+ * Import an integer into a buffer.
+ *
+ * [words] buffer to import.
+ * [numwords] the size of given buffer as number of words.
+ * [wordsize] the size of word as number of bytes.
+ * [nails] number of padding bits in a word.
+ *   Most significant nails bits of each word are ignored.
+ * [flags] bitwise or of constants which name starts "INTEGER_PACK_".
+ *
+ * flags:
+ * [INTEGER_PACK_MSWORD_FIRST] Interpret the first word as the most significant word.
+ * [INTEGER_PACK_LSWORD_FIRST] Interpret the first word as the least significant word.
+ * [INTEGER_PACK_MSBYTE_FIRST] Interpret the first byte in a word as the most significant byte in the word.
+ * [INTEGER_PACK_LSBYTE_FIRST] Interpret the first byte in a word as the least significant byte in the word.
+ * [INTEGER_PACK_NATIVE_BYTE_ORDER] INTEGER_PACK_MSBYTE_FIRST or INTEGER_PACK_LSBYTE_FIRST corresponding to the host's endian.
+ * [INTEGER_PACK_2COMP] Use 2's complement representation.
+ * [INTEGER_PACK_LITTLE_ENDIAN] Same as INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST
+ * [INTEGER_PACK_BIG_ENDIAN] Same as INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST
+ * [INTEGER_PACK_FORCE_BIGNUM] the result will be a Bignum
+ *   even if it is representable as a Fixnum.
+ * [INTEGER_PACK_NEGATIVE] Returns non-positive value.
+ *   (Returns non-negative value if not specified.)
+ * [INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION] Use generic implementation (for test and debug).
+ *
+ * This function returns the imported integer as Fixnum or Bignum.
+ *
+ * The range of the result value depends on INTEGER_PACK_2COMP and INTEGER_PACK_NEGATIVE.
+ *
+ * INTEGER_PACK_2COMP is not set:
+ *   0 <= val < 2**(numwords*(wordsize*CHAR_BIT-nails)) if !INTEGER_PACK_NEGATIVE
+ *   -2**(numwords*(wordsize*CHAR_BIT-nails)) < val <= 0 if INTEGER_PACK_NEGATIVE
+ *
+ * INTEGER_PACK_2COMP is set:
+ *   -2**(numwords*(wordsize*CHAR_BIT-nails)-1) <= val <= 2**(numwords*(wordsize*CHAR_BIT-nails)-1)-1 if !INTEGER_PACK_NEGATIVE
+ *   -2**(numwords*(wordsize*CHAR_BIT-nails)) <= val <= -1 if INTEGER_PACK_NEGATIVE
+ *
+ * INTEGER_PACK_2COMP without INTEGER_PACK_NEGATIVE means sign extension.
+ * INTEGER_PACK_2COMP with INTEGER_PACK_NEGATIVE mean assuming the higher bits are 1.
+ *
+ * Note that this function returns 0 when numwords is zero and
+ * INTEGER_PACK_2COMP is set but INTEGER_PACK_NEGATIVE is not set.
+ */
 
 VALUE
-rb_str2inum(VALUE str, int base)
+rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
 {
-    return rb_str_to_inum(str, base, base==0);
-}
+    VALUE val;
+    size_t num_bdigits;
+    int sign;
+    int nlp_bits;
+    BDIGIT *ds;
+    BDIGIT fixbuf[2] = { 0, 0 };
 
-const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+    validate_integer_pack_format(numwords, wordsize, nails, flags,
+            INTEGER_PACK_MSWORD_FIRST|
+            INTEGER_PACK_LSWORD_FIRST|
+            INTEGER_PACK_MSBYTE_FIRST|
+            INTEGER_PACK_LSBYTE_FIRST|
+            INTEGER_PACK_NATIVE_BYTE_ORDER|
+            INTEGER_PACK_2COMP|
+            INTEGER_PACK_FORCE_BIGNUM|
+            INTEGER_PACK_NEGATIVE|
+            INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
 
-static VALUE bigsqr(VALUE x);
-static void bigdivmod(VALUE x, VALUE y, volatile VALUE *divp, volatile VALUE *modp);
+    num_bdigits = integer_unpack_num_bdigits(numwords, wordsize, nails, &nlp_bits);
 
-static inline int
-ones(register unsigned long x)
-{
-#if GCC_VERSION_SINCE(3, 4, 0)
-    return  __builtin_popcountl(x);
-#else
-#   if SIZEOF_LONG == 8
-#       define MASK_55 0x5555555555555555UL
-#       define MASK_33 0x3333333333333333UL
-#       define MASK_0f 0x0f0f0f0f0f0f0f0fUL
-#   else
-#       define MASK_55 0x55555555UL
-#       define MASK_33 0x33333333UL
-#       define MASK_0f 0x0f0f0f0fUL
-#   endif
-    x -= (x >> 1) & MASK_55;
-    x = ((x >> 2) & MASK_33) + (x & MASK_33);
-    x = ((x >> 4) + x) & MASK_0f;
-    x += (x >> 8);
-    x += (x >> 16);
-#   if SIZEOF_LONG == 8
-    x += (x >> 32);
-#   endif
-    return (int)(x & 0x7f);
-#   undef MASK_0f
-#   undef MASK_33
-#   undef MASK_55
-#endif
-}
+    if (LONG_MAX-1 < num_bdigits)
+        rb_raise(rb_eArgError, "too big to unpack as an integer");
+    if (num_bdigits <= numberof(fixbuf) && !(flags & INTEGER_PACK_FORCE_BIGNUM)) {
+        val = Qfalse;
+        ds = fixbuf;
+    }
+    else {
+        val = bignew((long)num_bdigits, 0);
+        ds = BDIGITS(val);
+    }
+    sign = bary_unpack_internal(ds, num_bdigits, words, numwords, wordsize, nails, flags, nlp_bits);
 
-static inline unsigned long
-next_pow2(register unsigned long x)
-{
-    x |= x >> 1;
-    x |= x >> 2;
-    x |= x >> 4;
-    x |= x >> 8;
-    x |= x >> 16;
-#if SIZEOF_LONG == 8
-    x |= x >> 32;
-#endif
-    return x + 1;
-}
+    if (sign == -2) {
+        if (val) {
+            big_extend_carry(val);
+        }
+        else if (num_bdigits == numberof(fixbuf)) {
+            val = bignew((long)num_bdigits+1, 0);
+	    MEMCPY(BDIGITS(val), fixbuf, BDIGIT, num_bdigits);
+            BDIGITS(val)[num_bdigits++] = 1;
+        }
+        else {
+            ds[num_bdigits++] = 1;
+        }
+    }
 
-static inline int
-floor_log2(register unsigned long x)
-{
-    x |= x >> 1;
-    x |= x >> 2;
-    x |= x >> 4;
-    x |= x >> 8;
-    x |= x >> 16;
-#if SIZEOF_LONG == 8
-    x |= x >> 32;
-#endif
-    return (int)ones(x) - 1;
-}
+    if (!val) {
+        BDIGIT_DBL u = fixbuf[0] + BIGUP(fixbuf[1]);
+        if (u == 0)
+            return LONG2FIX(0);
+	if (0 < sign && POSFIXABLE(u))
+            return LONG2FIX(u);
+	if (sign < 0 && BDIGIT_MSB(fixbuf[1]) == 0 &&
+                NEGFIXABLE(-(BDIGIT_DBL_SIGNED)u))
+            return LONG2FIX(-(BDIGIT_DBL_SIGNED)u);
+        val = bignew((long)num_bdigits, 0 <= sign);
+        MEMCPY(BDIGITS(val), fixbuf, BDIGIT, num_bdigits);
+    }
 
-static inline int
-ceil_log2(register unsigned long x)
-{
-    return floor_log2(x) + !POW2_P(x);
-}
+    if ((flags & INTEGER_PACK_FORCE_BIGNUM) && sign != 0 &&
+        bary_zero_p(BDIGITS(val), RBIGNUM_LEN(val)))
+        sign = 0;
+    RBIGNUM_SET_SIGN(val, 0 <= sign);
 
-#define LOG2_KARATSUBA_DIGITS 7
-#define KARATSUBA_DIGITS (1L<<LOG2_KARATSUBA_DIGITS)
-#define MAX_BIG2STR_TABLE_ENTRIES 64
+    if (flags & INTEGER_PACK_FORCE_BIGNUM)
+        return bigtrunc(val);
+    return bignorm(val);
+}
 
-static VALUE big2str_power_cache[35][MAX_BIG2STR_TABLE_ENTRIES];
+#define QUAD_SIZE 8
 
-static void
-power_cache_init(void)
+void
+rb_quad_pack(char *buf, VALUE val)
 {
-    int i, j;
-    for (i = 0; i < 35; ++i) {
-	for (j = 0; j < MAX_BIG2STR_TABLE_ENTRIES; ++j) {
-	    big2str_power_cache[i][j] = Qnil;
-	}
-    }
+    rb_integer_pack(val, buf, 1, QUAD_SIZE, 0,
+            INTEGER_PACK_NATIVE_BYTE_ORDER|
+            INTEGER_PACK_2COMP);
 }
 
-static inline VALUE
-power_cache_get_power0(int base, int i)
+VALUE
+rb_quad_unpack(const char *buf, int signed_p)
 {
-    if (NIL_P(big2str_power_cache[base - 2][i])) {
-	big2str_power_cache[base - 2][i] =
-	    i == 0 ? rb_big_pow(rb_int2big(base), INT2FIX(KARATSUBA_DIGITS))
-		   : bigsqr(power_cache_get_power0(base, i - 1));
-	rb_gc_register_mark_object(big2str_power_cache[base - 2][i]);
-    }
-    return big2str_power_cache[base - 2][i];
+    return rb_integer_unpack(buf, 1, QUAD_SIZE, 0,
+            INTEGER_PACK_NATIVE_BYTE_ORDER|
+            (signed_p ? INTEGER_PACK_2COMP : 0));
 }
 
-static VALUE
-power_cache_get_power(int base, long n1, long* m1)
+VALUE
+rb_cstr_to_inum(const char *str, int base, int badcheck)
 {
-    int i, m;
-    long j;
-    VALUE t;
-
-    if (n1 <= KARATSUBA_DIGITS)
-	rb_bug("n1 > KARATSUBA_DIGITS");
-
-    m = ceil_log2(n1);
-    if (m1) *m1 = 1 << m;
-    i = m - LOG2_KARATSUBA_DIGITS;
-    if (i >= MAX_BIG2STR_TABLE_ENTRIES)
-	i = MAX_BIG2STR_TABLE_ENTRIES - 1;
-    t = power_cache_get_power0(base, i);
+    const char *s = str;
+    char sign = 1, nondigit = 0;
+    int c;
+    VALUE z;
 
-    j = KARATSUBA_DIGITS*(1 << i);
-    while (n1 > j) {
-	t = bigsqr(t);
-	j *= 2;
-    }
-    return t;
-}
+    int bits_per_digit;
+    size_t i;
 
-/* big2str_muraken_find_n1
- *
- * Let a natural number x is given by:
- * x = 2^0 * x_0 + 2^1 * x_1 + ... + 2^(B*n_0 - 1) * x_{B*n_0 - 1},
- * where B is BITSPERDIG (i.e. BDIGITS*CHAR_BIT) and n_0 is
- * RBIGNUM_LEN(x).
- *
- * Now, we assume n_1 = min_n \{ n | 2^(B*n_0/2) <= b_1^(n_1) \}, so
- * it is realized that 2^(B*n_0) <= {b_1}^{2*n_1}, where b_1 is a
- * given radix number. And then, we have n_1 <= (B*n_0) /
- * (2*log_2(b_1)), therefore n_1 is given by ceil((B*n_0) /
- * (2*log_2(b_1))).
- */
-static long
-big2str_find_n1(VALUE x, int base)
-{
-    static const double log_2[] = {
-	1.0,              1.58496250072116, 2.0,
-	2.32192809488736, 2.58496250072116, 2.8073549220576,
-	3.0,              3.16992500144231, 3.32192809488736,
-	3.4594316186373,  3.58496250072116, 3.70043971814109,
-	3.8073549220576,  3.90689059560852, 4.0,
-	4.08746284125034, 4.16992500144231, 4.24792751344359,
-	4.32192809488736, 4.39231742277876, 4.4594316186373,
-	4.52356195605701, 4.58496250072116, 4.64385618977472,
-	4.70043971814109, 4.75488750216347, 4.8073549220576,
-	4.85798099512757, 4.90689059560852, 4.95419631038688,
-	5.0,              5.04439411935845, 5.08746284125034,
-	5.12928301694497, 5.16992500144231
-    };
-    long bits;
+    const char *digits_start, *digits_end, *p;
+    size_t num_digits;
+    size_t num_bdigits;
 
-    if (base < 2 || 36 < base)
-	rb_bug("invalid radix %d", base);
+#undef ISDIGIT
+#define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
+#define conv_digit(c) (ruby_digit36_to_number_table[(unsigned char)(c)])
 
-    if (FIXNUM_P(x)) {
-	bits = (SIZEOF_LONG*CHAR_BIT - 1)/2 + 1;
+    if (!str) {
+	if (badcheck) goto bad;
+	return INT2FIX(0);
     }
-    else if (BIGZEROP(x)) {
-	return 0;
+    while (ISSPACE(*str)) str++;
+
+    if (str[0] == '+') {
+	str++;
     }
-    else if (RBIGNUM_LEN(x) >= LONG_MAX/BITSPERDIG) {
-	rb_raise(rb_eRangeError, "bignum too big to convert into `string'");
+    else if (str[0] == '-') {
+	str++;
+	sign = 0;
     }
-    else {
-	bits = BITSPERDIG*RBIGNUM_LEN(x);
+    if (str[0] == '+' || str[0] == '-') {
+	if (badcheck) goto bad;
+	return INT2FIX(0);
     }
-
-    /* @shyouhei note: vvvvvvvvvvvvv this cast is suspicious.  But I believe it is OK, because if that cast loses data, this x value is too big, and should have raised RangeError. */
-    return (long)ceil(((double)bits)/log_2[base - 2]);
-}
-
-static long
-big2str_orig(VALUE x, int base, char* ptr, long len, BDIGIT hbase, int hbase_numdigits, int trim)
-{
-    long i = RBIGNUM_LEN(x), j = len;
-    BDIGIT* ds = BDIGITS(x);
-
-    while (i && j > 0) {
-	long k = i;
-	BDIGIT_DBL num = 0;
-
-	while (k--) {               /* x / hbase */
-	    num = BIGUP(num) + ds[k];
-	    ds[k] = (BDIGIT)(num / hbase);
-	    num %= hbase;
+    if (base <= 0) {
+	if (str[0] == '0') {
+	    switch (str[1]) {
+	      case 'x': case 'X':
+		base = 16;
+                str += 2;
+		break;
+	      case 'b': case 'B':
+		base = 2;
+                str += 2;
+		break;
+	      case 'o': case 'O':
+		base = 8;
+                str += 2;
+		break;
+	      case 'd': case 'D':
+		base = 10;
+                str += 2;
+		break;
+	      default:
+		base = 8;
+	    }
 	}
-	if (trim && ds[i-1] == 0) i--;
-	k = hbase_numdigits;
-	while (k--) {
-	    ptr[--j] = ruby_digitmap[num % base];
-	    num /= base;
-	    if (j <= 0) break;
-	    if (trim && i == 0 && num == 0) break;
+	else if (base < -1) {
+	    base = -base;
 	}
-    }
-    if (trim) {
-	while (j < len && ptr[j] == '0') j++;
-	MEMMOVE(ptr, ptr + j, char, len - j);
-	len -= j;
-    }
-    return len;
-}
-
-static long
-big2str_karatsuba(VALUE x, int base, char* ptr,
-		  long n1, long len, BDIGIT hbase, int hbase_numdigits, int trim)
-{
-    long lh, ll, m1;
-    VALUE b, q, r;
-
-    if (BIGZEROP(x)) {
-	if (trim) return 0;
 	else {
-	    memset(ptr, '0', len);
-	    return len;
+	    base = 10;
 	}
     }
-
-    if (n1 <= KARATSUBA_DIGITS) {
-	return big2str_orig(x, base, ptr, len, hbase, hbase_numdigits, trim);
+    else if (base == 2) {
+	if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
+	    str += 2;
+	}
     }
-
-    b = power_cache_get_power(base, n1, &m1);
-    bigdivmod(x, b, &q, &r);
-    rb_obj_hide(q);
-    rb_obj_hide(r);
-    lh = big2str_karatsuba(q, base, ptr, (len - m1)/2,
-			   len - m1, hbase, hbase_numdigits, trim);
-    rb_big_resize(q, 0);
-    ll = big2str_karatsuba(r, base, ptr + lh, m1/2,
-			   m1, hbase, hbase_numdigits, !lh && trim);
-    rb_big_resize(r, 0);
-
-    return lh + ll;
-}
-
-static VALUE
-big2str_base_powerof2(VALUE x, size_t len, int base, int trim)
-{
-    int word_numbits = ffs(base) - 1;
-    size_t numwords;
-    VALUE result;
-    char *ptr;
-    numwords = trim ? rb_absint_numwords(x, word_numbits, NULL) : len;
-    if (RBIGNUM_NEGATIVE_P(x) || !trim) {
-        if (LONG_MAX-1 < numwords)
-            rb_raise(rb_eArgError, "too big number");
-        result = rb_usascii_str_new(0, 1+numwords);
-        ptr = RSTRING_PTR(result);
-        *ptr++ = RBIGNUM_POSITIVE_P(x) ? '+' : '-';
+    else if (base == 8) {
+	if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
+	    str += 2;
+	}
     }
-    else {
-        if (LONG_MAX < numwords)
-            rb_raise(rb_eArgError, "too big number");
-        result = rb_usascii_str_new(0, numwords);
-        ptr = RSTRING_PTR(result);
+    else if (base == 10) {
+	if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
+	    str += 2;
+	}
     }
-    rb_integer_pack(x, ptr, numwords, 1, CHAR_BIT-word_numbits,
-                    INTEGER_PACK_BIG_ENDIAN);
-    while (0 < numwords) {
-        *ptr = ruby_digitmap[*(unsigned char *)ptr];
-        ptr++;
-        numwords--;
+    else if (base == 16) {
+	if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
+	    str += 2;
+	}
     }
-    return result;
-}
-
-VALUE
-rb_big2str0(VALUE x, int base, int trim)
-{
-    int off;
-    VALUE ss, xx;
-    long n1, n2, len;
-    BDIGIT hbase;
-    int hbase_numdigits;
-    char* ptr;
-
-    if (FIXNUM_P(x)) {
-	return rb_fix2str(x, base);
+    if (base < 2 || 36 < base) {
+        rb_raise(rb_eArgError, "invalid radix %d", base);
     }
-    if (BIGZEROP(x)) {
-	return rb_usascii_str_new2("0");
+    if (*str == '0') {		/* squeeze preceding 0s */
+	int us = 0;
+	while ((c = *++str) == '0' || c == '_') {
+	    if (c == '_') {
+		if (++us >= 2)
+		    break;
+	    } else
+		us = 0;
+	}
+	if (!(c = *str) || ISSPACE(c)) --str;
+    }
+    c = *str;
+    c = conv_digit(c);
+    if (c < 0 || c >= base) {
+	if (badcheck) goto bad;
+	return INT2FIX(0);
     }
 
-    if (base < 2 || 36 < base)
-	rb_raise(rb_eArgError, "invalid radix %d", base);
+    bits_per_digit = bitsize(base-1);
+    if (bits_per_digit * strlen(str) <= sizeof(long) * CHAR_BIT) {
+        char *end;
+	unsigned long val = STRTOUL(str, &end, base);
 
-    n2 = big2str_find_n1(x, base);
+	if (str < end && *end == '_') goto bigparse;
+	if (badcheck) {
+	    if (end == str) goto bad; /* no number */
+	    while (*end && ISSPACE(*end)) end++;
+	    if (*end) goto bad;	      /* trailing garbage */
+	}
 
-    if (POW2_P(base)) {
-        /* base == 2 || base == 4 || base == 8 || base == 16 || base == 32 */
-        return big2str_base_powerof2(x, (size_t)n2, base, trim);
+	if (POSFIXABLE(val)) {
+	    if (sign) return LONG2FIX(val);
+	    else {
+		long result = -(long)val;
+		return LONG2FIX(result);
+	    }
+	}
+	else {
+	    VALUE big = rb_uint2big(val);
+	    RBIGNUM_SET_SIGN(big, sign);
+	    return bignorm(big);
+	}
     }
+  bigparse:
+    if (badcheck && *str == '_') goto bad;
 
-    n1 = (n2 + 1) / 2;
-    ss = rb_usascii_str_new(0, n2 + 1); /* plus one for sign */
-    ptr = RSTRING_PTR(ss);
-    ptr[0] = RBIGNUM_SIGN(x) ? '+' : '-';
-
-    hbase = maxpow_in_bdigit(base, &hbase_numdigits);
-    off = !(trim && RBIGNUM_SIGN(x)); /* erase plus sign if trim */
-    xx = rb_big_clone(x);
-    RBIGNUM_SET_SIGN(xx, 1);
-    if (n1 <= KARATSUBA_DIGITS) {
-	len = off + big2str_orig(xx, base, ptr + off, n2, hbase, hbase_numdigits, trim);
+    num_digits = 0;
+    digits_start = digits_end = str;
+    while ((c = *str++) != 0) {
+	if (c == '_') {
+	    if (nondigit) {
+		if (badcheck) goto bad;
+		break;
+	    }
+	    nondigit = (char) c;
+	    continue;
+	}
+	else if ((c = conv_digit(c)) < 0) {
+	    break;
+	}
+	if (c >= base) break;
+	nondigit = 0;
+        num_digits++;
+        digits_end = str;
     }
-    else {
-	len = off + big2str_karatsuba(xx, base, ptr + off, n1,
-				      n2, hbase, hbase_numdigits, trim);
+    if (badcheck) {
+	str--;
+	if (s+1 < str && str[-1] == '_') goto bad;
+	while (*str && ISSPACE(*str)) str++;
+	if (*str) {
+	  bad:
+	    rb_invalid_str(s, "Integer()");
+	}
     }
-    rb_big_resize(xx, 0);
-
-    ptr[len] = '\0';
-    rb_str_resize(ss, len);
 
-    return ss;
-}
+    if (POW2_P(base)) {
+        BDIGIT *dp;
+        BDIGIT_DBL dd;
+        int numbits;
+        num_bdigits = (num_digits / BITSPERDIG) * bits_per_digit + roomof((num_digits % BITSPERDIG) * bits_per_digit, BITSPERDIG);
+        z = bignew(num_bdigits, sign);
+        dp = BDIGITS(z);
+        dd = 0;
+        numbits = 0;
+        for (p = digits_end; digits_start < p; p--) {
+            if ((c = conv_digit(p[-1])) < 0)
+                continue;
+            dd |= (BDIGIT_DBL)c << numbits;
+            numbits += bits_per_digit;
+            if (BITSPERDIG <= numbits) {
+                *dp++ = BIGLO(dd);
+                dd = BIGDN(dd);
+                numbits -= BITSPERDIG;
+            }
+        }
+        if (numbits) {
+            *dp++ = BIGLO(dd);
+        }
+        assert((size_t)(dp - BDIGITS(z)) == num_bdigits);
+    }
+    else {
+        int digits_per_bdigits_dbl;
+        BDIGIT_DBL power;
+        power = maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
+        num_bdigits = roomof(num_digits, digits_per_bdigits_dbl)*2;
 
-VALUE
-rb_big2str(VALUE x, int base)
-{
-    return rb_big2str0(x, base, 1);
-}
+        if (num_bdigits < KARATSUBA_MUL_DIGITS) {
+            size_t blen = 1;
+            BDIGIT *zds;
+            BDIGIT_DBL num;
 
-/*
- *  call-seq:
- *     big.to_s(base=10)   ->  string
- *
- *  Returns a string containing the representation of <i>big</i> radix
- *  <i>base</i> (2 through 36).
- *
- *     12345654321.to_s         #=> "12345654321"
- *     12345654321.to_s(2)      #=> "1011011111110110111011110000110001"
- *     12345654321.to_s(8)      #=> "133766736061"
- *     12345654321.to_s(16)     #=> "2dfdbbc31"
- *     78546939656932.to_s(36)  #=> "rubyrules"
- */
+            z = bignew(num_bdigits, sign);
+            zds = BDIGITS(z);
+            MEMZERO(zds, BDIGIT, num_bdigits);
 
-static VALUE
-rb_big_to_s(int argc, VALUE *argv, VALUE x)
-{
-    int base;
+            for (p = digits_start; p < digits_end; p++) {
+                if ((c = conv_digit(*p)) < 0)
+                    continue;
+                num = c;
+                i = 0;
+                for (;;) {
+                    while (i<blen) {
+                        num += (BDIGIT_DBL)zds[i]*base;
+                        zds[i++] = BIGLO(num);
+                        num = BIGDN(num);
+                    }
+                    if (num) {
+                        blen++;
+                        continue;
+                    }
+                    break;
+                }
+                assert(blen <= num_bdigits);
+            }
+        }
+        else {
+            VALUE powerv;
+            size_t unit;
+            VALUE tmpuv = 0;
+            BDIGIT *uds, *vds, *tds;
+            BDIGIT_DBL dd;
+            BDIGIT_DBL current_base;
+            int m;
 
-    if (argc == 0) base = 10;
-    else {
-	VALUE b;
+            uds = ALLOCV_N(BDIGIT, tmpuv, 2*num_bdigits);
+            vds = uds + num_bdigits;
 
-	rb_scan_args(argc, argv, "01", &b);
-	base = NUM2INT(b);
-    }
-    return rb_big2str(x, base);
-}
+            powerv = bignew(2, 1);
+            BDIGITS(powerv)[0] = BIGLO(power);
+            BDIGITS(powerv)[1] = (BDIGIT)BIGDN(power);
 
-static unsigned long
-big2ulong(VALUE x, const char *type)
-{
-    long len = RBIGNUM_LEN(x);
-    unsigned long num;
-    BDIGIT *ds;
+            i = 0;
+            dd = 0;
+            current_base = 1;
+            m = digits_per_bdigits_dbl;
+            if (num_digits < (size_t)m)
+                m = (int)num_digits;
+            for (p = digits_end; digits_start < p; p--) {
+                if ((c = conv_digit(p[-1])) < 0)
+                    continue;
+                dd = dd + c * current_base;
+                current_base *= base;
+                num_digits--;
+                m--;
+                if (m == 0) {
+                    uds[i++] = BIGLO(dd);
+                    uds[i++] = (BDIGIT)BIGDN(dd);
+                    dd = 0;
+                    m = digits_per_bdigits_dbl;
+                    if (num_digits < (size_t)m)
+                        m = (int)num_digits;
+                    current_base = 1;
+                }
+            }
+            assert(i == num_bdigits);
+            for (unit = 2; unit < num_bdigits; unit *= 2) {
+                for (i = 0; i < num_bdigits; i += unit*2) {
+                    if (2*unit <= num_bdigits - i) {
+                        bary_mul(vds+i, unit*2, BDIGITS(powerv), RBIGNUM_LEN(powerv), uds+i+unit, unit);
+                        bary_add(vds+i, unit*2, vds+i, unit*2, uds+i, unit);
+                    }
+                    else if (unit <= num_bdigits - i) {
+                        bary_mul(vds+i, num_bdigits-i, BDIGITS(powerv), RBIGNUM_LEN(powerv), uds+i+unit, num_bdigits-(i+unit));
+                        bary_add(vds+i, num_bdigits-i, vds+i, num_bdigits-i, uds+i, unit);
+                    }
+                    else {
+                        MEMCPY(vds+i, uds+i, BDIGIT, num_bdigits-i);
+                    }
+                }
+                powerv = bigtrunc(bigmul0(powerv, powerv));
+                tds = vds;
+                vds = uds;
+                uds = tds;
+            }
+            while (0 < num_bdigits && uds[num_bdigits-1] == 0)
+                num_bdigits--;
+            z = bignew(num_bdigits, sign);
+            MEMCPY(BDIGITS(z), uds, BDIGIT, num_bdigits);
 
-    if (len == 0)
-        return 0;
-    if (BIGSIZE(x) > sizeof(long)) {
-        rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type);
-    }
-    ds = BDIGITS(x);
-#if SIZEOF_LONG <= SIZEOF_BDIGITS
-    num = (unsigned long)ds[0];
-#else
-    num = 0;
-    while (len--) {
-	num <<= BITSPERDIG;
-	num += (unsigned long)ds[len]; /* overflow is already checked */
+            if (tmpuv)
+                ALLOCV_END(tmpuv);
+        }
     }
-#endif
-    return num;
-}
 
-VALUE
-rb_big2ulong_pack(VALUE x)
-{
-    unsigned long num;
-    rb_integer_pack(x, &num, 1, sizeof(num), 0,
-        INTEGER_PACK_NATIVE_BYTE_ORDER|INTEGER_PACK_2COMP);
-    return num;
+    return bignorm(z);
 }
 
 VALUE
-rb_big2ulong(VALUE x)
+rb_str_to_inum(VALUE str, int base, int badcheck)
 {
-    unsigned long num = big2ulong(x, "unsigned long");
+    char *s;
+    long len;
+    VALUE v = 0;
+    VALUE ret;
 
-    if (RBIGNUM_POSITIVE_P(x)) {
-        return num;
+    StringValue(str);
+    rb_must_asciicompat(str);
+    if (badcheck) {
+	s = StringValueCStr(str);
     }
     else {
-        if (num <= LONG_MAX)
-            return -(long)num;
-        if (num == 1+(unsigned long)(-(LONG_MIN+1)))
-            return LONG_MIN;
+	s = RSTRING_PTR(str);
     }
-    rb_raise(rb_eRangeError, "bignum out of range of unsigned long");
-}
-
-SIGNED_VALUE
-rb_big2long(VALUE x)
-{
-    unsigned long num = big2ulong(x, "long");
+    if (s) {
+	len = RSTRING_LEN(str);
+	if (s[len]) {		/* no sentinel somehow */
+	    char *p = ALLOCV(v, len+1);
 
-    if (RBIGNUM_POSITIVE_P(x)) {
-        if (num <= LONG_MAX)
-            return num;
-    }
-    else {
-        if (num <= LONG_MAX)
-            return -(long)num;
-        if (num == 1+(unsigned long)(-(LONG_MIN+1)))
-            return LONG_MIN;
+	    MEMCPY(p, s, char, len);
+	    p[len] = '\0';
+	    s = p;
+	}
     }
-    rb_raise(rb_eRangeError, "bignum too big to convert into `long'");
+    ret = rb_cstr_to_inum(s, base, badcheck);
+    if (v)
+	ALLOCV_END(v);
+    return ret;
 }
 
 #if HAVE_LONG_LONG
 
-static unsigned LONG_LONG
-big2ull(VALUE x, const char *type)
+static VALUE
+rb_ull2big(unsigned LONG_LONG n)
 {
-    long len = RBIGNUM_LEN(x);
-    unsigned LONG_LONG num;
-    BDIGIT *ds = BDIGITS(x);
+    long i;
+    VALUE big = bignew(bdigit_roomof(SIZEOF_LONG_LONG), 1);
+    BDIGIT *digits = BDIGITS(big);
 
-    if (len == 0)
-        return 0;
-    if (BIGSIZE(x) > SIZEOF_LONG_LONG)
-	rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type);
-#if SIZEOF_LONG_LONG <= SIZEOF_BDIGITS
-    num = (unsigned LONG_LONG)ds[0];
+#if SIZEOF_BDIGITS >= SIZEOF_LONG_LONG
+    digits[0] = n;
 #else
-    num = 0;
-    while (len--) {
-	num = BIGUP(num);
-	num += ds[len];
+    for (i = 0; i < bdigit_roomof(SIZEOF_LONG_LONG); i++) {
+	digits[i] = BIGLO(n);
+	n = BIGDN(n);
     }
 #endif
-    return num;
+
+    i = bdigit_roomof(SIZEOF_LONG_LONG);
+    while (i-- && !digits[i]) ;
+    RBIGNUM_SET_LEN(big, i+1);
+    return big;
 }
 
-unsigned LONG_LONG
-rb_big2ull(VALUE x)
+static VALUE
+rb_ll2big(LONG_LONG n)
 {
-    unsigned LONG_LONG num = big2ull(x, "unsigned long long");
+    long neg = 0;
+    unsigned LONG_LONG u;
+    VALUE big;
 
-    if (RBIGNUM_POSITIVE_P(x)) {
-        return num;
+    if (n < 0) {
+        u = 1 + (unsigned LONG_LONG)(-(n + 1)); /* u = -n avoiding overflow */
+	neg = 1;
     }
     else {
-        if (num <= LLONG_MAX)
-            return -(LONG_LONG)num;
-        if (num == 1+(unsigned LONG_LONG)(-(LLONG_MIN+1)))
-            return LLONG_MIN;
+        u = n;
     }
-    rb_raise(rb_eRangeError, "bignum out of range of unsigned long long");
+    big = rb_ull2big(u);
+    if (neg) {
+	RBIGNUM_SET_SIGN(big, 0);
+    }
+    return big;
 }
 
-LONG_LONG
-rb_big2ll(VALUE x)
+VALUE
+rb_ull2inum(unsigned LONG_LONG n)
 {
-    unsigned LONG_LONG num = big2ull(x, "long long");
+    if (POSFIXABLE(n)) return LONG2FIX(n);
+    return rb_ull2big(n);
+}
 
-    if (RBIGNUM_POSITIVE_P(x)) {
-        if (num <= LLONG_MAX)
-            return num;
-    }
-    else {
-        if (num <= LLONG_MAX)
-            return -(LONG_LONG)num;
-        if (num == 1+(unsigned LONG_LONG)(-(LLONG_MIN+1)))
-            return LLONG_MIN;
-    }
-    rb_raise(rb_eRangeError, "bignum too big to convert into `long long'");
+VALUE
+rb_ll2inum(LONG_LONG n)
+{
+    if (FIXABLE(n)) return LONG2FIX(n);
+    return rb_ll2big(n);
 }
 
 #endif  /* HAVE_LONG_LONG */
 
-static VALUE
-dbl2big(double d)
+VALUE
+rb_cstr2inum(const char *str, int base)
 {
-    long i = 0;
-    BDIGIT c;
-    BDIGIT *digits;
-    VALUE z;
-    double u = (d < 0)?-d:d;
+    return rb_cstr_to_inum(str, base, base==0);
+}
 
-    if (isinf(d)) {
-	rb_raise(rb_eFloatDomainError, d < 0 ? "-Infinity" : "Infinity");
-    }
-    if (isnan(d)) {
-	rb_raise(rb_eFloatDomainError, "NaN");
-    }
+VALUE
+rb_str2inum(VALUE str, int base)
+{
+    return rb_str_to_inum(str, base, base==0);
+}
 
-    while (!POSFIXABLE(u) || 0 != (long)u) {
-	u /= (double)(BIGRAD);
-	i++;
-    }
-    z = bignew(i, d>=0);
-    digits = BDIGITS(z);
-    while (i--) {
-	u *= BIGRAD;
-	c = (BDIGIT)u;
-	u -= c;
-	digits[i] = c;
-    }
+static inline int
+ones(register unsigned long x)
+{
+#if GCC_VERSION_SINCE(3, 4, 0)
+    return  __builtin_popcountl(x);
+#else
+#   if SIZEOF_LONG == 8
+#       define MASK_55 0x5555555555555555UL
+#       define MASK_33 0x3333333333333333UL
+#       define MASK_0f 0x0f0f0f0f0f0f0f0fUL
+#   else
+#       define MASK_55 0x55555555UL
+#       define MASK_33 0x33333333UL
+#       define MASK_0f 0x0f0f0f0fUL
+#   endif
+    x -= (x >> 1) & MASK_55;
+    x = ((x >> 2) & MASK_33) + (x & MASK_33);
+    x = ((x >> 4) + x) & MASK_0f;
+    x += (x >> 8);
+    x += (x >> 16);
+#   if SIZEOF_LONG == 8
+    x += (x >> 32);
+#   endif
+    return (int)(x & 0x7f);
+#   undef MASK_0f
+#   undef MASK_33
+#   undef MASK_55
+#endif
+}
 
-    return z;
+static inline unsigned long
+next_pow2(register unsigned long x)
+{
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+#if SIZEOF_LONG == 8
+    x |= x >> 32;
+#endif
+    return x + 1;
 }
 
-VALUE
-rb_dbl2big(double d)
+static inline int
+floor_log2(register unsigned long x)
 {
-    return bignorm(dbl2big(d));
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+#if SIZEOF_LONG == 8
+    x |= x >> 32;
+#endif
+    return (int)ones(x) - 1;
 }
 
-static double
-big2dbl(VALUE x)
+static inline int
+ceil_log2(register unsigned long x)
 {
-    double d = 0.0;
-    long i = (bigtrunc(x), RBIGNUM_LEN(x)), lo = 0, bits;
-    BDIGIT *ds = BDIGITS(x), dl;
+    return floor_log2(x) + !POW2_P(x);
+}
 
-    if (i) {
-	bits = i * BITSPERDIG - nlz(ds[i-1]);
-	if (bits > DBL_MANT_DIG+DBL_MAX_EXP) {
-	    d = HUGE_VAL;
-	}
-	else {
-	    if (bits > DBL_MANT_DIG+1)
-		lo = (bits -= DBL_MANT_DIG+1) / BITSPERDIG;
-	    else
-		bits = 0;
-	    while (--i > lo) {
-		d = ds[i] + BIGRAD*d;
-	    }
-	    dl = ds[i];
-	    if (bits && (dl & ((BDIGIT)1 << (bits %= BITSPERDIG)))) {
-		int carry = (dl & ~(BDIGMAX << bits)) != 0;
-		if (!carry) {
-		    while (i-- > 0) {
-			carry = ds[i] != 0;
-			if (carry) break;
-		    }
-		}
-		if (carry) {
-		    dl &= BDIGMAX << bits;
-		    dl = BIGLO(dl + ((BDIGIT)1 << bits));
-		    if (!dl) d += 1;
-		}
-	    }
-	    d = dl + BIGRAD*d;
-	    if (lo) {
-		if (lo > INT_MAX / BITSPERDIG)
-		    d = HUGE_VAL;
-		else if (lo < INT_MIN / BITSPERDIG)
-		    d = 0.0;
-		else
-		    d = ldexp(d, (int)(lo * BITSPERDIG));
-	    }
+#define LOG2_KARATSUBA_DIGITS 7
+#define KARATSUBA_DIGITS (1L<<LOG2_KARATSUBA_DIGITS)
+#define MAX_BIG2STR_TABLE_ENTRIES 64
+
+static VALUE big2str_power_cache[35][MAX_BIG2STR_TABLE_ENTRIES];
+
+static void
+power_cache_init(void)
+{
+    int i, j;
+    for (i = 0; i < 35; ++i) {
+	for (j = 0; j < MAX_BIG2STR_TABLE_ENTRIES; ++j) {
+	    big2str_power_cache[i][j] = Qnil;
 	}
     }
-    if (!RBIGNUM_SIGN(x)) d = -d;
-    return d;
 }
 
-double
-rb_big2dbl(VALUE x)
+static inline VALUE
+power_cache_get_power0(int base, int i)
 {
-    double d = big2dbl(x);
-
-    if (isinf(d)) {
-	rb_warning("Bignum out of Float range");
-	if (d < 0.0)
-	    d = -HUGE_VAL;
-	else
-	    d = HUGE_VAL;
+    if (NIL_P(big2str_power_cache[base - 2][i])) {
+	big2str_power_cache[base - 2][i] =
+	    i == 0 ? rb_big_pow(rb_int2big(base), INT2FIX(KARATSUBA_DIGITS))
+		   : bigsqr(power_cache_get_power0(base, i - 1));
+	rb_gc_register_mark_object(big2str_power_cache[base - 2][i]);
     }
-    return d;
+    return big2str_power_cache[base - 2][i];
 }
 
-/*
- *  call-seq:
- *     big.to_f -> float
- *
- *  Converts <i>big</i> to a <code>Float</code>. If <i>big</i> doesn't
- *  fit in a <code>Float</code>, the result is infinity.
- *
- */
-
 static VALUE
-rb_big_to_f(VALUE x)
-{
-    return DBL2NUM(rb_big2dbl(x));
-}
-
-VALUE
-rb_integer_float_cmp(VALUE x, VALUE y)
+power_cache_get_power(int base, long n1, long* m1)
 {
-    double yd = RFLOAT_VALUE(y);
-    double yi, yf;
-    VALUE rel;
+    int i, m;
+    long j;
+    VALUE t;
 
-    if (isnan(yd))
-        return Qnil;
-    if (isinf(yd)) {
-        if (yd > 0.0) return INT2FIX(-1);
-        else return INT2FIX(1);
-    }
-    yf = modf(yd, &yi);
-    if (FIXNUM_P(x)) {
-#if SIZEOF_LONG * CHAR_BIT < DBL_MANT_DIG /* assume FLT_RADIX == 2 */
-        double xd = (double)FIX2LONG(x);
-        if (xd < yd)
-            return INT2FIX(-1);
-        if (xd > yd)
-            return INT2FIX(1);
-        return INT2FIX(0);
-#else
-        long xl, yl;
-        if (yi < FIXNUM_MIN)
-            return INT2FIX(1);
-        if (FIXNUM_MAX+1 <= yi)
-            return INT2FIX(-1);
-        xl = FIX2LONG(x);
-        yl = (long)yi;
-        if (xl < yl)
-            return INT2FIX(-1);
-        if (xl > yl)
-            return INT2FIX(1);
-        if (yf < 0.0)
-            return INT2FIX(1);
-        if (0.0 < yf)
-            return INT2FIX(-1);
-        return INT2FIX(0);
-#endif
-    }
-    y = rb_dbl2big(yi);
-    rel = rb_big_cmp(x, y);
-    if (yf == 0.0 || rel != INT2FIX(0))
-        return rel;
-    if (yf < 0.0)
-        return INT2FIX(1);
-    return INT2FIX(-1);
-}
+    if (n1 <= KARATSUBA_DIGITS)
+	rb_bug("n1 > KARATSUBA_DIGITS");
 
-VALUE
-rb_integer_float_eq(VALUE x, VALUE y)
-{
-    double yd = RFLOAT_VALUE(y);
-    double yi, yf;
+    m = ceil_log2(n1);
+    if (m1) *m1 = 1 << m;
+    i = m - LOG2_KARATSUBA_DIGITS;
+    if (i >= MAX_BIG2STR_TABLE_ENTRIES)
+	i = MAX_BIG2STR_TABLE_ENTRIES - 1;
+    t = power_cache_get_power0(base, i);
 
-    if (isnan(yd) || isinf(yd))
-        return Qfalse;
-    yf = modf(yd, &yi);
-    if (yf != 0)
-        return Qfalse;
-    if (FIXNUM_P(x)) {
-#if SIZEOF_LONG * CHAR_BIT < DBL_MANT_DIG /* assume FLT_RADIX == 2 */
-        double xd = (double)FIX2LONG(x);
-        if (xd != yd)
-            return Qfalse;
-        return Qtrue;
-#else
-        long xl, yl;
-        if (yi < LONG_MIN || LONG_MAX < yi)
-            return Qfalse;
-        xl = FIX2LONG(x);
-        yl = (long)yi;
-        if (xl != yl)
-            return Qfalse;
-        return Qtrue;
-#endif
+    j = KARATSUBA_DIGITS*(1 << i);
+    while (n1 > j) {
+	t = bigsqr(t);
+	j *= 2;
     }
-    y = rb_dbl2big(yi);
-    return rb_big_eq(x, y);
+    return t;
 }
 
-/*
- *  call-seq:
- *     big <=> numeric   -> -1, 0, +1 or nil
- *
- *  Comparison---Returns -1, 0, or +1 depending on whether +big+ is
- *  less than, equal to, or greater than +numeric+. This is the
- *  basis for the tests in Comparable.
+/* big2str_muraken_find_n1
  *
- *  +nil+ is returned if the two values are incomparable.
+ * Let a natural number x is given by:
+ * x = 2^0 * x_0 + 2^1 * x_1 + ... + 2^(B*n_0 - 1) * x_{B*n_0 - 1},
+ * where B is BITSPERDIG (i.e. BDIGITS*CHAR_BIT) and n_0 is
+ * RBIGNUM_LEN(x).
  *
+ * Now, we assume n_1 = min_n \{ n | 2^(B*n_0/2) <= b_1^(n_1) \}, so
+ * it is realized that 2^(B*n_0) <= {b_1}^{2*n_1}, where b_1 is a
+ * given radix number. And then, we have n_1 <= (B*n_0) /
+ * (2*log_2(b_1)), therefore n_1 is given by ceil((B*n_0) /
+ * (2*log_2(b_1))).
  */
-
-VALUE
-rb_big_cmp(VALUE x, VALUE y)
+static long
+big2str_find_n1(VALUE x, int base)
 {
-    long xlen = RBIGNUM_LEN(x);
-    BDIGIT *xds, *yds;
+    static const double log_2[] = {
+	1.0,              1.58496250072116, 2.0,
+	2.32192809488736, 2.58496250072116, 2.8073549220576,
+	3.0,              3.16992500144231, 3.32192809488736,
+	3.4594316186373,  3.58496250072116, 3.70043971814109,
+	3.8073549220576,  3.90689059560852, 4.0,
+	4.08746284125034, 4.16992500144231, 4.24792751344359,
+	4.32192809488736, 4.39231742277876, 4.4594316186373,
+	4.52356195605701, 4.58496250072116, 4.64385618977472,
+	4.70043971814109, 4.75488750216347, 4.8073549220576,
+	4.85798099512757, 4.90689059560852, 4.95419631038688,
+	5.0,              5.04439411935845, 5.08746284125034,
+	5.12928301694497, 5.16992500144231
+    };
+    long bits;
+
+    if (base < 2 || 36 < base)
+	rb_bug("invalid radix %d", base);
+
+    if (FIXNUM_P(x)) {
+	bits = (SIZEOF_LONG*CHAR_BIT - 1)/2 + 1;
+    }
+    else if (BIGZEROP(x)) {
+	return 0;
+    }
+    else if (RBIGNUM_LEN(x) >= LONG_MAX/BITSPERDIG) {
+	rb_raise(rb_eRangeError, "bignum too big to convert into `string'");
+    }
+    else {
+	bits = BITSPERDIG*RBIGNUM_LEN(x);
+    }
 
-    switch (TYPE(y)) {
-      case T_FIXNUM:
-	y = rb_int2big(FIX2LONG(y));
-	break;
+    /* @shyouhei note: vvvvvvvvvvvvv this cast is suspicious.  But I believe it is OK, because if that cast loses data, this x value is too big, and should have raised RangeError. */
+    return (long)ceil(((double)bits)/log_2[base - 2]);
+}
 
-      case T_BIGNUM:
-	break;
+static long
+big2str_orig(VALUE x, int base, char* ptr, long len, BDIGIT hbase, int hbase_numdigits, int trim)
+{
+    long i = RBIGNUM_LEN(x), j = len;
+    BDIGIT* ds = BDIGITS(x);
 
-      case T_FLOAT:
-        return rb_integer_float_cmp(x, y);
+    while (i && j > 0) {
+	long k = i;
+	BDIGIT_DBL num = 0;
 
-      default:
-	return rb_num_coerce_cmp(x, y, rb_intern("<=>"));
+	while (k--) {               /* x / hbase */
+	    num = BIGUP(num) + ds[k];
+	    ds[k] = (BDIGIT)(num / hbase);
+	    num %= hbase;
+	}
+	if (trim && ds[i-1] == 0) i--;
+	k = hbase_numdigits;
+	while (k--) {
+	    ptr[--j] = ruby_digitmap[num % base];
+	    num /= base;
+	    if (j <= 0) break;
+	    if (trim && i == 0 && num == 0) break;
+	}
     }
-
-    if (RBIGNUM_SIGN(x) > RBIGNUM_SIGN(y)) return INT2FIX(1);
-    if (RBIGNUM_SIGN(x) < RBIGNUM_SIGN(y)) return INT2FIX(-1);
-    if (xlen < RBIGNUM_LEN(y))
-	return (RBIGNUM_SIGN(x)) ? INT2FIX(-1) : INT2FIX(1);
-    if (xlen > RBIGNUM_LEN(y))
-	return (RBIGNUM_SIGN(x)) ? INT2FIX(1) : INT2FIX(-1);
-
-    xds = BDIGITS(x);
-    yds = BDIGITS(y);
-
-    while (xlen-- && (xds[xlen]==yds[xlen]));
-    if (-1 == xlen) return INT2FIX(0);
-    return (xds[xlen] > yds[xlen]) ?
-	(RBIGNUM_SIGN(x) ? INT2FIX(1) : INT2FIX(-1)) :
-	    (RBIGNUM_SIGN(x) ? INT2FIX(-1) : INT2FIX(1));
+    if (trim) {
+	while (j < len && ptr[j] == '0') j++;
+	MEMMOVE(ptr, ptr + j, char, len - j);
+	len -= j;
+    }
+    return len;
 }
 
-enum big_op_t {
-    big_op_gt,
-    big_op_ge,
-    big_op_lt,
-    big_op_le
-};
-
-static VALUE
-big_op(VALUE x, VALUE y, enum big_op_t op)
+static long
+big2str_karatsuba(VALUE x, int base, char* ptr,
+		  long n1, long len, BDIGIT hbase, int hbase_numdigits, int trim)
 {
-    VALUE rel;
-    int n;
-
-    switch (TYPE(y)) {
-      case T_FIXNUM:
-      case T_BIGNUM:
-	rel = rb_big_cmp(x, y);
-	break;
-
-      case T_FLOAT:
-        rel = rb_integer_float_cmp(x, y);
-        break;
+    long lh, ll, m1;
+    VALUE b, q, r;
 
-      default:
-	{
-	    ID id = 0;
-	    switch (op) {
-		case big_op_gt: id = '>'; break;
-		case big_op_ge: id = rb_intern(">="); break;
-		case big_op_lt: id = '<'; break;
-		case big_op_le: id = rb_intern("<="); break;
-	    }
-	    return rb_num_coerce_relop(x, y, id);
+    if (BIGZEROP(x)) {
+	if (trim) return 0;
+	else {
+	    memset(ptr, '0', len);
+	    return len;
 	}
     }
 
-    if (NIL_P(rel)) return Qfalse;
-    n = FIX2INT(rel);
-
-    switch (op) {
-	case big_op_gt: return n >  0 ? Qtrue : Qfalse;
-	case big_op_ge: return n >= 0 ? Qtrue : Qfalse;
-	case big_op_lt: return n <  0 ? Qtrue : Qfalse;
-	case big_op_le: return n <= 0 ? Qtrue : Qfalse;
+    if (n1 <= KARATSUBA_DIGITS) {
+	return big2str_orig(x, base, ptr, len, hbase, hbase_numdigits, trim);
     }
-    return Qundef;
-}
 
-/*
- * call-seq:
- *   big > real  ->  true or false
- *
- * Returns <code>true</code> if the value of <code>big</code> is
- * greater than that of <code>real</code>.
- */
+    b = power_cache_get_power(base, n1, &m1);
+    bigdivmod(x, b, &q, &r);
+    rb_obj_hide(q);
+    rb_obj_hide(r);
+    lh = big2str_karatsuba(q, base, ptr, (len - m1)/2,
+			   len - m1, hbase, hbase_numdigits, trim);
+    rb_big_resize(q, 0);
+    ll = big2str_karatsuba(r, base, ptr + lh, m1/2,
+			   m1, hbase, hbase_numdigits, !lh && trim);
+    rb_big_resize(r, 0);
 
-static VALUE
-big_gt(VALUE x, VALUE y)
-{
-    return big_op(x, y, big_op_gt);
+    return lh + ll;
 }
 
-/*
- * call-seq:
- *   big >= real  ->  true or false
- *
- * Returns <code>true</code> if the value of <code>big</code> is
- * greater than or equal to that of <code>real</code>.
- */
-
 static VALUE
-big_ge(VALUE x, VALUE y)
+big2str_base_powerof2(VALUE x, size_t len, int base, int trim)
 {
-    return big_op(x, y, big_op_ge);
+    int word_numbits = ffs(base) - 1;
+    size_t numwords;
+    VALUE result;
+    char *ptr;
+    numwords = trim ? rb_absint_numwords(x, word_numbits, NULL) : len;
+    if (RBIGNUM_NEGATIVE_P(x) || !trim) {
+        if (LONG_MAX-1 < numwords)
+            rb_raise(rb_eArgError, "too big number");
+        result = rb_usascii_str_new(0, 1+numwords);
+        ptr = RSTRING_PTR(result);
+        *ptr++ = RBIGNUM_POSITIVE_P(x) ? '+' : '-';
+    }
+    else {
+        if (LONG_MAX < numwords)
+            rb_raise(rb_eArgError, "too big number");
+        result = rb_usascii_str_new(0, numwords);
+        ptr = RSTRING_PTR(result);
+    }
+    rb_integer_pack(x, ptr, numwords, 1, CHAR_BIT-word_numbits,
+                    INTEGER_PACK_BIG_ENDIAN);
+    while (0 < numwords) {
+        *ptr = ruby_digitmap[*(unsigned char *)ptr];
+        ptr++;
+        numwords--;
+    }
+    return result;
 }
 
-/*
- * call-seq:
- *   big < real  ->  true or false
- *
- * Returns <code>true</code> if the value of <code>big</code> is
- * less than that of <code>real</code>.
- */
-
-static VALUE
-big_lt(VALUE x, VALUE y)
+VALUE
+rb_big2str0(VALUE x, int base, int trim)
 {
-    return big_op(x, y, big_op_lt);
-}
+    int off;
+    VALUE ss, xx;
+    long n1, n2, len;
+    BDIGIT hbase;
+    int hbase_numdigits;
+    char* ptr;
 
-/*
- * call-seq:
- *   big <= real  ->  true or false
- *
- * Returns <code>true</code> if the value of <code>big</code> is
- * less than or equal to that of <code>real</code>.
- */
+    if (FIXNUM_P(x)) {
+	return rb_fix2str(x, base);
+    }
+    if (BIGZEROP(x)) {
+	return rb_usascii_str_new2("0");
+    }
 
-static VALUE
-big_le(VALUE x, VALUE y)
-{
-    return big_op(x, y, big_op_le);
-}
+    if (base < 2 || 36 < base)
+	rb_raise(rb_eArgError, "invalid radix %d", base);
 
-/*
- *  call-seq:
- *     big == obj  -> true or false
- *
- *  Returns <code>true</code> only if <i>obj</i> has the same value
- *  as <i>big</i>. Contrast this with <code>Bignum#eql?</code>, which
- *  requires <i>obj</i> to be a <code>Bignum</code>.
- *
- *     68719476736 == 68719476736.0   #=> true
- */
+    n2 = big2str_find_n1(x, base);
 
-VALUE
-rb_big_eq(VALUE x, VALUE y)
-{
-    switch (TYPE(y)) {
-      case T_FIXNUM:
-	if (bignorm(x) == y) return Qtrue;
-	y = rb_int2big(FIX2LONG(y));
-	break;
-      case T_BIGNUM:
-	break;
-      case T_FLOAT:
-        return rb_integer_float_eq(x, y);
-      default:
-	return rb_equal(y, x);
+    if (POW2_P(base)) {
+        /* base == 2 || base == 4 || base == 8 || base == 16 || base == 32 */
+        return big2str_base_powerof2(x, (size_t)n2, base, trim);
     }
-    if (RBIGNUM_SIGN(x) != RBIGNUM_SIGN(y)) return Qfalse;
-    if (RBIGNUM_LEN(x) != RBIGNUM_LEN(y)) return Qfalse;
-    if (MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,RBIGNUM_LEN(y)) != 0) return Qfalse;
-    return Qtrue;
-}
 
-/*
- *  call-seq:
- *     big.eql?(obj)   -> true or false
- *
- *  Returns <code>true</code> only if <i>obj</i> is a
- *  <code>Bignum</code> with the same value as <i>big</i>. Contrast this
- *  with <code>Bignum#==</code>, which performs type conversions.
- *
- *     68719476736.eql?(68719476736.0)   #=> false
- */
+    n1 = (n2 + 1) / 2;
+    ss = rb_usascii_str_new(0, n2 + 1); /* plus one for sign */
+    ptr = RSTRING_PTR(ss);
+    ptr[0] = RBIGNUM_SIGN(x) ? '+' : '-';
+
+    hbase = maxpow_in_bdigit(base, &hbase_numdigits);
+    off = !(trim && RBIGNUM_SIGN(x)); /* erase plus sign if trim */
+    xx = rb_big_clone(x);
+    RBIGNUM_SET_SIGN(xx, 1);
+    if (n1 <= KARATSUBA_DIGITS) {
+	len = off + big2str_orig(xx, base, ptr + off, n2, hbase, hbase_numdigits, trim);
+    }
+    else {
+	len = off + big2str_karatsuba(xx, base, ptr + off, n1,
+				      n2, hbase, hbase_numdigits, trim);
+    }
+    rb_big_resize(xx, 0);
+
+    ptr[len] = '\0';
+    rb_str_resize(ss, len);
+
+    return ss;
+}
 
 VALUE
-rb_big_eql(VALUE x, VALUE y)
+rb_big2str(VALUE x, int base)
 {
-    if (!RB_TYPE_P(y, T_BIGNUM)) return Qfalse;
-    if (RBIGNUM_SIGN(x) != RBIGNUM_SIGN(y)) return Qfalse;
-    if (RBIGNUM_LEN(x) != RBIGNUM_LEN(y)) return Qfalse;
-    if (MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,RBIGNUM_LEN(y)) != 0) return Qfalse;
-    return Qtrue;
+    return rb_big2str0(x, base, 1);
 }
 
 /*
- * call-seq:
- *    -big   ->  integer
+ *  call-seq:
+ *     big.to_s(base=10)   ->  string
  *
- * Unary minus (returns an integer whose value is 0-big)
+ *  Returns a string containing the representation of <i>big</i> radix
+ *  <i>base</i> (2 through 36).
+ *
+ *     12345654321.to_s         #=> "12345654321"
+ *     12345654321.to_s(2)      #=> "1011011111110110111011110000110001"
+ *     12345654321.to_s(8)      #=> "133766736061"
+ *     12345654321.to_s(16)     #=> "2dfdbbc31"
+ *     78546939656932.to_s(36)  #=> "rubyrules"
  */
 
-VALUE
-rb_big_uminus(VALUE x)
+static VALUE
+rb_big_to_s(int argc, VALUE *argv, VALUE x)
 {
-    VALUE z = rb_big_clone(x);
+    int base;
 
-    RBIGNUM_SET_SIGN(z, !RBIGNUM_SIGN(x));
+    if (argc == 0) base = 10;
+    else {
+	VALUE b;
 
-    return bignorm(z);
+	rb_scan_args(argc, argv, "01", &b);
+	base = NUM2INT(b);
+    }
+    return rb_big2str(x, base);
 }
 
-/*
- * call-seq:
- *     ~big  ->  integer
- *
- * Inverts the bits in big. As Bignums are conceptually infinite
- * length, the result acts as if it had an infinite number of one
- * bits to the left. In hex representations, this is displayed
- * as two periods to the left of the digits.
- *
- *   sprintf("%X", ~0x1122334455)    #=> "..FEEDDCCBBAA"
- */
+static unsigned long
+big2ulong(VALUE x, const char *type)
+{
+    long len = RBIGNUM_LEN(x);
+    unsigned long num;
+    BDIGIT *ds;
 
-static VALUE
-rb_big_neg(VALUE x)
+    if (len == 0)
+        return 0;
+    if (BIGSIZE(x) > sizeof(long)) {
+        rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type);
+    }
+    ds = BDIGITS(x);
+#if SIZEOF_LONG <= SIZEOF_BDIGITS
+    num = (unsigned long)ds[0];
+#else
+    num = 0;
+    while (len--) {
+	num <<= BITSPERDIG;
+	num += (unsigned long)ds[len]; /* overflow is already checked */
+    }
+#endif
+    return num;
+}
+
+VALUE
+rb_big2ulong_pack(VALUE x)
 {
-    VALUE z = rb_big_clone(x);
-    BDIGIT *ds = BDIGITS(z);
-    long n = RBIGNUM_LEN(z);
+    unsigned long num;
+    rb_integer_pack(x, &num, 1, sizeof(num), 0,
+        INTEGER_PACK_NATIVE_BYTE_ORDER|INTEGER_PACK_2COMP);
+    return num;
+}
 
-    if (!n) return INT2FIX(-1);
+VALUE
+rb_big2ulong(VALUE x)
+{
+    unsigned long num = big2ulong(x, "unsigned long");
 
-    if (RBIGNUM_POSITIVE_P(z)) {
-        if (bary_plus_one(ds, n)) {
-            big_extend_carry(z);
-        }
-        RBIGNUM_SET_NEGATIVE_SIGN(z);
+    if (RBIGNUM_POSITIVE_P(x)) {
+        return num;
     }
     else {
-        bary_neg(ds, n);
-        if (bary_plus_one(ds, n))
-            return INT2FIX(-1);
-        bary_neg(ds, n);
-        RBIGNUM_SET_POSITIVE_SIGN(z);
+        if (num <= LONG_MAX)
+            return -(long)num;
+        if (num == 1+(unsigned long)(-(LONG_MIN+1)))
+            return LONG_MIN;
     }
-
-    return bignorm(z);
+    rb_raise(rb_eRangeError, "bignum out of range of unsigned long");
 }
 
-static void
-bigsub_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
+SIGNED_VALUE
+rb_big2long(VALUE x)
 {
-    bary_sub(zds, zn, xds, xn, yds, yn);
+    unsigned long num = big2ulong(x, "long");
+
+    if (RBIGNUM_POSITIVE_P(x)) {
+        if (num <= LONG_MAX)
+            return num;
+    }
+    else {
+        if (num <= LONG_MAX)
+            return -(long)num;
+        if (num == 1+(unsigned long)(-(LONG_MIN+1)))
+            return LONG_MIN;
+    }
+    rb_raise(rb_eRangeError, "bignum too big to convert into `long'");
 }
 
-static int
-bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow)
-{
-    BDIGIT_DBL_SIGNED num;
-    size_t i;
+#if HAVE_LONG_LONG
 
-    assert(yn <= xn);
-    assert(xn <= zn);
+static unsigned LONG_LONG
+big2ull(VALUE x, const char *type)
+{
+    long len = RBIGNUM_LEN(x);
+    unsigned LONG_LONG num;
+    BDIGIT *ds = BDIGITS(x);
 
-    num = borrow ? -1 : 0;
-    for (i = 0; i < yn; i++) {
-	num += (BDIGIT_DBL_SIGNED)xds[i] - yds[i];
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
+    if (len == 0)
+        return 0;
+    if (BIGSIZE(x) > SIZEOF_LONG_LONG)
+	rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type);
+#if SIZEOF_LONG_LONG <= SIZEOF_BDIGITS
+    num = (unsigned LONG_LONG)ds[0];
+#else
+    num = 0;
+    while (len--) {
+	num = BIGUP(num);
+	num += ds[len];
     }
-    for (; i < xn; i++) {
-        if (num == 0) goto num_is_zero;
-	num += xds[i];
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
+#endif
+    return num;
+}
+
+unsigned LONG_LONG
+rb_big2ull(VALUE x)
+{
+    unsigned LONG_LONG num = big2ull(x, "unsigned long long");
+
+    if (RBIGNUM_POSITIVE_P(x)) {
+        return num;
     }
-    if (num == 0) goto num_is_zero;
-    for (; i < zn; i++) {
-	zds[i] = BDIGMAX;
+    else {
+        if (num <= LLONG_MAX)
+            return -(LONG_LONG)num;
+        if (num == 1+(unsigned LONG_LONG)(-(LLONG_MIN+1)))
+            return LLONG_MIN;
     }
-    return 1;
+    rb_raise(rb_eRangeError, "bignum out of range of unsigned long long");
+}
 
-  num_is_zero:
-    if (xds == zds && xn == zn)
-        return 0;
-    for (; i < xn; i++) {
-	zds[i] = xds[i];
+LONG_LONG
+rb_big2ll(VALUE x)
+{
+    unsigned LONG_LONG num = big2ull(x, "long long");
+
+    if (RBIGNUM_POSITIVE_P(x)) {
+        if (num <= LLONG_MAX)
+            return num;
     }
-    for (; i < zn; i++) {
-	zds[i] = 0;
+    else {
+        if (num <= LLONG_MAX)
+            return -(LONG_LONG)num;
+        if (num == 1+(unsigned LONG_LONG)(-(LLONG_MIN+1)))
+            return LLONG_MIN;
     }
-    return 0;
+    rb_raise(rb_eRangeError, "bignum too big to convert into `long long'");
 }
 
-static int
-bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn)
+#endif  /* HAVE_LONG_LONG */
+
+static VALUE
+dbl2big(double d)
 {
-    return bary_subb(zds, zn, xds, xn, yds, yn, 0);
+    long i = 0;
+    BDIGIT c;
+    BDIGIT *digits;
+    VALUE z;
+    double u = (d < 0)?-d:d;
+
+    if (isinf(d)) {
+	rb_raise(rb_eFloatDomainError, d < 0 ? "-Infinity" : "Infinity");
+    }
+    if (isnan(d)) {
+	rb_raise(rb_eFloatDomainError, "NaN");
+    }
+
+    while (!POSFIXABLE(u) || 0 != (long)u) {
+	u /= (double)(BIGRAD);
+	i++;
+    }
+    z = bignew(i, d>=0);
+    digits = BDIGITS(z);
+    while (i--) {
+	u *= BIGRAD;
+	c = (BDIGIT)u;
+	u -= c;
+	digits[i] = c;
+    }
+
+    return z;
 }
 
-static int
-bary_sub_one(BDIGIT *zds, size_t zn)
+VALUE
+rb_dbl2big(double d)
 {
-    return bary_subb(zds, zn, zds, zn, NULL, 0, 1);
+    return bignorm(dbl2big(d));
 }
 
-static VALUE
-bigsub(VALUE x, VALUE y)
+static double
+big2dbl(VALUE x)
 {
-    VALUE z = 0;
-    long i = RBIGNUM_LEN(x);
-    BDIGIT *xds, *yds;
+    double d = 0.0;
+    long i = (bigtrunc(x), RBIGNUM_LEN(x)), lo = 0, bits;
+    BDIGIT *ds = BDIGITS(x), dl;
 
-    /* if x is smaller than y, swap */
-    if (RBIGNUM_LEN(x) < RBIGNUM_LEN(y)) {
-	z = x; x = y; y = z;	/* swap x y */
-    }
-    else if (RBIGNUM_LEN(x) == RBIGNUM_LEN(y)) {
-	xds = BDIGITS(x);
-	yds = BDIGITS(y);
-	while (i > 0) {
-	    i--;
-	    if (xds[i] > yds[i]) {
-		break;
+    if (i) {
+	bits = i * BITSPERDIG - nlz(ds[i-1]);
+	if (bits > DBL_MANT_DIG+DBL_MAX_EXP) {
+	    d = HUGE_VAL;
+	}
+	else {
+	    if (bits > DBL_MANT_DIG+1)
+		lo = (bits -= DBL_MANT_DIG+1) / BITSPERDIG;
+	    else
+		bits = 0;
+	    while (--i > lo) {
+		d = ds[i] + BIGRAD*d;
 	    }
-	    if (xds[i] < yds[i]) {
-		z = x; x = y; y = z;	/* swap x y */
-		break;
+	    dl = ds[i];
+	    if (bits && (dl & ((BDIGIT)1 << (bits %= BITSPERDIG)))) {
+		int carry = (dl & ~(BDIGMAX << bits)) != 0;
+		if (!carry) {
+		    while (i-- > 0) {
+			carry = ds[i] != 0;
+			if (carry) break;
+		    }
+		}
+		if (carry) {
+		    dl &= BDIGMAX << bits;
+		    dl = BIGLO(dl + ((BDIGIT)1 << bits));
+		    if (!dl) d += 1;
+		}
+	    }
+	    d = dl + BIGRAD*d;
+	    if (lo) {
+		if (lo > INT_MAX / BITSPERDIG)
+		    d = HUGE_VAL;
+		else if (lo < INT_MIN / BITSPERDIG)
+		    d = 0.0;
+		else
+		    d = ldexp(d, (int)(lo * BITSPERDIG));
 	    }
 	}
     }
+    if (!RBIGNUM_SIGN(x)) d = -d;
+    return d;
+}
 
-    z = bignew(RBIGNUM_LEN(x), z==0);
-    bigsub_core(BDIGITS(x), RBIGNUM_LEN(x),
-		BDIGITS(y), RBIGNUM_LEN(y),
-		BDIGITS(z), RBIGNUM_LEN(z));
+double
+rb_big2dbl(VALUE x)
+{
+    double d = big2dbl(x);
 
-    return z;
+    if (isinf(d)) {
+	rb_warning("Bignum out of Float range");
+	if (d < 0.0)
+	    d = -HUGE_VAL;
+	else
+	    d = HUGE_VAL;
+    }
+    return d;
 }
 
-static VALUE bigadd_int(VALUE x, long y);
+/*
+ *  call-seq:
+ *     big.to_f -> float
+ *
+ *  Converts <i>big</i> to a <code>Float</code>. If <i>big</i> doesn't
+ *  fit in a <code>Float</code>, the result is infinity.
+ *
+ */
 
 static VALUE
-bigsub_int(VALUE x, long y0)
+rb_big_to_f(VALUE x)
 {
-    VALUE z;
-    BDIGIT *xds, *zds;
-    long xn, zn;
-    BDIGIT_DBL_SIGNED num;
-    long i, y;
-
-    y = y0;
-    xds = BDIGITS(x);
-    xn = RBIGNUM_LEN(x);
-
-    if (xn == 0)
-        return LONG2NUM(-y0);
+    return DBL2NUM(rb_big2dbl(x));
+}
 
-    zn = xn;
-#if SIZEOF_BDIGITS < SIZEOF_LONG
-    if (zn < bdigit_roomof(SIZEOF_LONG))
-        zn = bdigit_roomof(SIZEOF_LONG);
-#endif
-    z = bignew(zn, RBIGNUM_SIGN(x));
-    zds = BDIGITS(z);
+VALUE
+rb_integer_float_cmp(VALUE x, VALUE y)
+{
+    double yd = RFLOAT_VALUE(y);
+    double yi, yf;
+    VALUE rel;
 
-#if SIZEOF_BDIGITS >= SIZEOF_LONG
-    assert(xn == zn);
-    num = (BDIGIT_DBL_SIGNED)xds[0] - y;
-    if (xn == 1 && num < 0) {
-	RBIGNUM_SET_SIGN(z, !RBIGNUM_SIGN(x));
-	zds[0] = (BDIGIT)-num;
-	RB_GC_GUARD(x);
-	return bignorm(z);
+    if (isnan(yd))
+        return Qnil;
+    if (isinf(yd)) {
+        if (yd > 0.0) return INT2FIX(-1);
+        else return INT2FIX(1);
     }
-    zds[0] = BIGLO(num);
-    num = BIGDN(num);
-    i = 1;
-    if (i < xn)
-        goto y_is_zero_x;
-    goto finish;
+    yf = modf(yd, &yi);
+    if (FIXNUM_P(x)) {
+#if SIZEOF_LONG * CHAR_BIT < DBL_MANT_DIG /* assume FLT_RADIX == 2 */
+        double xd = (double)FIX2LONG(x);
+        if (xd < yd)
+            return INT2FIX(-1);
+        if (xd > yd)
+            return INT2FIX(1);
+        return INT2FIX(0);
 #else
-    num = 0;
-    for (i=0; i < xn; i++) {
-        if (y == 0) goto y_is_zero_x;
-	num += (BDIGIT_DBL_SIGNED)xds[i] - BIGLO(y);
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
-	y = BIGDN(y);
-    }
-    for (; i < zn; i++) {
-        if (y == 0) goto y_is_zero_z;
-        num -= BIGLO(y);
-        zds[i] = BIGLO(num);
-        num = BIGDN(num);
-        y = BIGDN(y);
-    }
-    goto finish;
+        long xl, yl;
+        if (yi < FIXNUM_MIN)
+            return INT2FIX(1);
+        if (FIXNUM_MAX+1 <= yi)
+            return INT2FIX(-1);
+        xl = FIX2LONG(x);
+        yl = (long)yi;
+        if (xl < yl)
+            return INT2FIX(-1);
+        if (xl > yl)
+            return INT2FIX(1);
+        if (yf < 0.0)
+            return INT2FIX(1);
+        if (0.0 < yf)
+            return INT2FIX(-1);
+        return INT2FIX(0);
 #endif
-
-    for (; i < xn; i++) {
-      y_is_zero_x:
-        if (num == 0) goto num_is_zero_x;
-	num += xds[i];
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
-    }
-#if SIZEOF_BDIGITS < SIZEOF_LONG
-    for (; i < zn; i++) {
-      y_is_zero_z:
-        if (num == 0) goto num_is_zero_z;
-        zds[i] = BIGLO(num);
-        num = BIGDN(num);
     }
-#endif
-    goto finish;
+    y = rb_dbl2big(yi);
+    rel = rb_big_cmp(x, y);
+    if (yf == 0.0 || rel != INT2FIX(0))
+        return rel;
+    if (yf < 0.0)
+        return INT2FIX(1);
+    return INT2FIX(-1);
+}
 
-    for (; i < xn; i++) {
-      num_is_zero_x:
-	zds[i] = xds[i];
-    }
-#if SIZEOF_BDIGITS < SIZEOF_LONG
-    for (; i < zn; i++) {
-      num_is_zero_z:
-        zds[i] = 0;
-    }
-#endif
-    goto finish;
+VALUE
+rb_integer_float_eq(VALUE x, VALUE y)
+{
+    double yd = RFLOAT_VALUE(y);
+    double yi, yf;
 
-  finish:
-    assert(num == 0 || num == -1);
-    if (num < 0) {
-        get2comp(z);
-	RBIGNUM_SET_SIGN(z, !RBIGNUM_SIGN(x));
+    if (isnan(yd) || isinf(yd))
+        return Qfalse;
+    yf = modf(yd, &yi);
+    if (yf != 0)
+        return Qfalse;
+    if (FIXNUM_P(x)) {
+#if SIZEOF_LONG * CHAR_BIT < DBL_MANT_DIG /* assume FLT_RADIX == 2 */
+        double xd = (double)FIX2LONG(x);
+        if (xd != yd)
+            return Qfalse;
+        return Qtrue;
+#else
+        long xl, yl;
+        if (yi < LONG_MIN || LONG_MAX < yi)
+            return Qfalse;
+        xl = FIX2LONG(x);
+        yl = (long)yi;
+        if (xl != yl)
+            return Qfalse;
+        return Qtrue;
+#endif
     }
-    RB_GC_GUARD(x);
-    return bignorm(z);
+    y = rb_dbl2big(yi);
+    return rb_big_eq(x, y);
 }
 
-static VALUE
-bigadd_int(VALUE x, long y)
+/*
+ *  call-seq:
+ *     big <=> numeric   -> -1, 0, +1 or nil
+ *
+ *  Comparison---Returns -1, 0, or +1 depending on whether +big+ is
+ *  less than, equal to, or greater than +numeric+. This is the
+ *  basis for the tests in Comparable.
+ *
+ *  +nil+ is returned if the two values are incomparable.
+ *
+ */
+
+VALUE
+rb_big_cmp(VALUE x, VALUE y)
 {
-    VALUE z;
-    BDIGIT *xds, *zds;
-    long xn, zn;
-    BDIGIT_DBL num;
-    long i;
-
-    xds = BDIGITS(x);
-    xn = RBIGNUM_LEN(x);
+    long xlen = RBIGNUM_LEN(x);
+    BDIGIT *xds, *yds;
 
-    if (xn == 0)
-        return LONG2NUM(y);
+    switch (TYPE(y)) {
+      case T_FIXNUM:
+	y = rb_int2big(FIX2LONG(y));
+	break;
 
-    zn = xn;
-#if SIZEOF_BDIGITS < SIZEOF_LONG
-    if (zn < bdigit_roomof(SIZEOF_LONG))
-        zn = bdigit_roomof(SIZEOF_LONG);
-#endif
-    zn++;
+      case T_BIGNUM:
+	break;
 
-    z = bignew(zn, RBIGNUM_SIGN(x));
-    zds = BDIGITS(z);
+      case T_FLOAT:
+        return rb_integer_float_cmp(x, y);
 
-#if SIZEOF_BDIGITS >= SIZEOF_LONG
-    num = (BDIGIT_DBL)xds[0] + y;
-    zds[0] = BIGLO(num);
-    num = BIGDN(num);
-    i = 1;
-    if (i < xn)
-        goto y_is_zero_x;
-    goto y_is_zero_z;
-#else
-    num = 0;
-    for (i=0; i < xn; i++) {
-        if (y == 0) goto y_is_zero_x;
-	num += (BDIGIT_DBL)xds[i] + BIGLO(y);
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
-	y = BIGDN(y);
-    }
-    for (; i < zn; i++) {
-        if (y == 0) goto y_is_zero_z;
-	num += BIGLO(y);
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
-	y = BIGDN(y);
+      default:
+	return rb_num_coerce_cmp(x, y, rb_intern("<=>"));
     }
-    goto finish;
-
-#endif
 
-    for (;i < xn; i++) {
-      y_is_zero_x:
-        if (num == 0) goto num_is_zero_x;
-	num += (BDIGIT_DBL)xds[i];
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
-    }
-    for (; i < zn; i++) {
-      y_is_zero_z:
-        if (num == 0) goto num_is_zero_z;
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
-    }
-    goto finish;
+    if (RBIGNUM_SIGN(x) > RBIGNUM_SIGN(y)) return INT2FIX(1);
+    if (RBIGNUM_SIGN(x) < RBIGNUM_SIGN(y)) return INT2FIX(-1);
+    if (xlen < RBIGNUM_LEN(y))
+	return (RBIGNUM_SIGN(x)) ? INT2FIX(-1) : INT2FIX(1);
+    if (xlen > RBIGNUM_LEN(y))
+	return (RBIGNUM_SIGN(x)) ? INT2FIX(1) : INT2FIX(-1);
 
-    for (;i < xn; i++) {
-      num_is_zero_x:
-	zds[i] = xds[i];
-    }
-    for (; i < zn; i++) {
-      num_is_zero_z:
-	zds[i] = 0;
-    }
-    goto finish;
+    xds = BDIGITS(x);
+    yds = BDIGITS(y);
 
-  finish:
-    RB_GC_GUARD(x);
-    return bignorm(z);
+    while (xlen-- && (xds[xlen]==yds[xlen]));
+    if (-1 == xlen) return INT2FIX(0);
+    return (xds[xlen] > yds[xlen]) ?
+	(RBIGNUM_SIGN(x) ? INT2FIX(1) : INT2FIX(-1)) :
+	    (RBIGNUM_SIGN(x) ? INT2FIX(-1) : INT2FIX(1));
 }
 
-static void
-bigadd_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
-{
-    bary_add(zds, zn, xds, xn, yds, yn);
-}
+enum big_op_t {
+    big_op_gt,
+    big_op_ge,
+    big_op_lt,
+    big_op_le
+};
 
-static int
-bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry)
+static VALUE
+big_op(VALUE x, VALUE y, enum big_op_t op)
 {
-    BDIGIT_DBL num;
-    size_t i;
+    VALUE rel;
+    int n;
 
-    assert(xn <= zn);
-    assert(yn <= zn);
+    switch (TYPE(y)) {
+      case T_FIXNUM:
+      case T_BIGNUM:
+	rel = rb_big_cmp(x, y);
+	break;
 
-    if (xn > yn) {
-	BDIGIT *tds;
-	tds = xds; xds = yds; yds = tds;
-	i = xn; xn = yn; yn = i;
-    }
+      case T_FLOAT:
+        rel = rb_integer_float_cmp(x, y);
+        break;
 
-    num = carry ? 1 : 0;
-    for (i = 0; i < xn; i++) {
-	num += (BDIGIT_DBL)xds[i] + yds[i];
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
-    }
-    for (; i < yn; i++) {
-        if (num == 0) goto num_is_zero;
-	num += yds[i];
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
-    }
-    for (; i < zn; i++) {
-        if (num == 0) goto num_is_zero;
-	zds[i] = BIGLO(num);
-	num = BIGDN(num);
+      default:
+	{
+	    ID id = 0;
+	    switch (op) {
+		case big_op_gt: id = '>'; break;
+		case big_op_ge: id = rb_intern(">="); break;
+		case big_op_lt: id = '<'; break;
+		case big_op_le: id = rb_intern("<="); break;
+	    }
+	    return rb_num_coerce_relop(x, y, id);
+	}
     }
-    return num != 0;
 
-  num_is_zero:
-    if (yds == zds && yn == zn)
-        return 0;
-    for (; i < yn; i++) {
-	zds[i] = yds[i];
-    }
-    for (; i < zn; i++) {
-	zds[i] = 0;
+    if (NIL_P(rel)) return Qfalse;
+    n = FIX2INT(rel);
+
+    switch (op) {
+	case big_op_gt: return n >  0 ? Qtrue : Qfalse;
+	case big_op_ge: return n >= 0 ? Qtrue : Qfalse;
+	case big_op_lt: return n <  0 ? Qtrue : Qfalse;
+	case big_op_le: return n <= 0 ? Qtrue : Qfalse;
     }
-    return 0;
+    return Qundef;
 }
 
-static int
-bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn)
-{
-    return bary_addc(zds, zn, xds, xn, yds, yn, 0);
-}
+/*
+ * call-seq:
+ *   big > real  ->  true or false
+ *
+ * Returns <code>true</code> if the value of <code>big</code> is
+ * greater than that of <code>real</code>.
+ */
 
-static int
-bary_add_one(BDIGIT *zds, size_t zn)
+static VALUE
+big_gt(VALUE x, VALUE y)
 {
-    return bary_addc(zds, zn, NULL, 0, zds, zn, 1);
+    return big_op(x, y, big_op_gt);
 }
 
+/*
+ * call-seq:
+ *   big >= real  ->  true or false
+ *
+ * Returns <code>true</code> if the value of <code>big</code> is
+ * greater than or equal to that of <code>real</code>.
+ */
+
 static VALUE
-bigadd(VALUE x, VALUE y, int sign)
+big_ge(VALUE x, VALUE y)
 {
-    VALUE z;
-    long len;
+    return big_op(x, y, big_op_ge);
+}
 
-    sign = (sign == RBIGNUM_SIGN(y));
-    if (RBIGNUM_SIGN(x) != sign) {
-	if (sign) return bigsub(y, x);
-	return bigsub(x, y);
-    }
+/*
+ * call-seq:
+ *   big < real  ->  true or false
+ *
+ * Returns <code>true</code> if the value of <code>big</code> is
+ * less than that of <code>real</code>.
+ */
 
-    if (RBIGNUM_LEN(x) > RBIGNUM_LEN(y)) {
-	len = RBIGNUM_LEN(x) + 1;
-    }
-    else {
-	len = RBIGNUM_LEN(y) + 1;
-    }
-    z = bignew(len, sign);
+static VALUE
+big_lt(VALUE x, VALUE y)
+{
+    return big_op(x, y, big_op_lt);
+}
 
-    bigadd_core(BDIGITS(x), RBIGNUM_LEN(x),
-		BDIGITS(y), RBIGNUM_LEN(y),
-		BDIGITS(z), RBIGNUM_LEN(z));
+/*
+ * call-seq:
+ *   big <= real  ->  true or false
+ *
+ * Returns <code>true</code> if the value of <code>big</code> is
+ * less than or equal to that of <code>real</code>.
+ */
 
-    return z;
+static VALUE
+big_le(VALUE x, VALUE y)
+{
+    return big_op(x, y, big_op_le);
 }
 
 /*
  *  call-seq:
- *     big + other  -> Numeric
+ *     big == obj  -> true or false
  *
- *  Adds big and other, returning the result.
+ *  Returns <code>true</code> only if <i>obj</i> has the same value
+ *  as <i>big</i>. Contrast this with <code>Bignum#eql?</code>, which
+ *  requires <i>obj</i> to be a <code>Bignum</code>.
+ *
+ *     68719476736 == 68719476736.0   #=> true
  */
 
 VALUE
-rb_big_plus(VALUE x, VALUE y)
+rb_big_eq(VALUE x, VALUE y)
 {
-    long n;
-
     switch (TYPE(y)) {
       case T_FIXNUM:
-	n = FIX2LONG(y);
-	if ((n > 0) != RBIGNUM_SIGN(x)) {
-	    if (n < 0) {
-		n = -n;
-	    }
-	    return bigsub_int(x, n);
-	}
-	if (n < 0) {
-	    n = -n;
-	}
-	return bigadd_int(x, n);
-
+	if (bignorm(x) == y) return Qtrue;
+	y = rb_int2big(FIX2LONG(y));
+	break;
       case T_BIGNUM:
-	return bignorm(bigadd(x, y, 1));
-
+	break;
       case T_FLOAT:
-	return DBL2NUM(rb_big2dbl(x) + RFLOAT_VALUE(y));
-
+        return rb_integer_float_eq(x, y);
       default:
-	return rb_num_coerce_bin(x, y, '+');
+	return rb_equal(y, x);
     }
+    if (RBIGNUM_SIGN(x) != RBIGNUM_SIGN(y)) return Qfalse;
+    if (RBIGNUM_LEN(x) != RBIGNUM_LEN(y)) return Qfalse;
+    if (MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,RBIGNUM_LEN(y)) != 0) return Qfalse;
+    return Qtrue;
 }
 
 /*
  *  call-seq:
- *     big - other  -> Numeric
+ *     big.eql?(obj)   -> true or false
  *
- *  Subtracts other from big, returning the result.
+ *  Returns <code>true</code> only if <i>obj</i> is a
+ *  <code>Bignum</code> with the same value as <i>big</i>. Contrast this
+ *  with <code>Bignum#==</code>, which performs type conversions.
+ *
+ *     68719476736.eql?(68719476736.0)   #=> false
  */
 
 VALUE
-rb_big_minus(VALUE x, VALUE y)
-{
-    long n;
-
-    switch (TYPE(y)) {
-      case T_FIXNUM:
-	n = FIX2LONG(y);
-	if ((n > 0) != RBIGNUM_SIGN(x)) {
-	    if (n < 0) {
-		n = -n;
-	    }
-	    return bigadd_int(x, n);
-	}
-	if (n < 0) {
-	    n = -n;
-	}
-	return bigsub_int(x, n);
-
-      case T_BIGNUM:
-	return bignorm(bigadd(x, y, 0));
-
-      case T_FLOAT:
-	return DBL2NUM(rb_big2dbl(x) - RFLOAT_VALUE(y));
-
-      default:
-	return rb_num_coerce_bin(x, y, '-');
-    }
-}
-
-static long
-big_real_len(VALUE x)
-{
-    long i = RBIGNUM_LEN(x);
-    BDIGIT *xds = BDIGITS(x);
-    while (--i && !xds[i]);
-    return i + 1;
-}
-
-static void
-bary_mul_single(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT y)
-{
-    BDIGIT_DBL n;
-
-    assert(2 <= zl);
-
-    n = (BDIGIT_DBL)x * y;
-    zds[0] = BIGLO(n);
-    zds[1] = (BDIGIT)BIGDN(n);
-}
-
-static int
-bary_muladd_1xN(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT *yds, size_t yl)
-{
-    BDIGIT_DBL n;
-    BDIGIT_DBL dd;
-    size_t j;
-
-    assert(zl > yl);
-
-    if (x == 0)
-        return 0;
-    dd = x;
-    n = 0;
-    for (j = 0; j < yl; j++) {
-        BDIGIT_DBL ee = n + dd * yds[j];
-        if (ee) {
-            n = zds[j] + ee;
-            zds[j] = BIGLO(n);
-            n = BIGDN(n);
-        }
-        else {
-            n = 0;
-        }
-
-    }
-    for (; j < zl; j++) {
-        if (n == 0)
-            break;
-        n += zds[j];
-        zds[j] = BIGLO(n);
-        n = BIGDN(n);
-    }
-    return n != 0;
-}
-
-static void
-bary_mul_normal(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
+rb_big_eql(VALUE x, VALUE y)
 {
-    size_t i;
-
-    assert(xl + yl <= zl);
-
-    MEMZERO(zds, BDIGIT, zl);
-    for (i = 0; i < xl; i++) {
-        bary_muladd_1xN(zds+i, zl-i, xds[i], yds, yl);
-    }
+    if (!RB_TYPE_P(y, T_BIGNUM)) return Qfalse;
+    if (RBIGNUM_SIGN(x) != RBIGNUM_SIGN(y)) return Qfalse;
+    if (RBIGNUM_LEN(x) != RBIGNUM_LEN(y)) return Qfalse;
+    if (MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,RBIGNUM_LEN(y)) != 0) return Qfalse;
+    return Qtrue;
 }
 
-/* balancing multiplication by slicing larger argument */
-static void
-bary_mul_balance(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
-{
-    VALUE work = 0;
-    size_t r, n;
-    BDIGIT *wds;
-    size_t wl;
-
-    assert(xl + yl <= zl);
-    assert(2 * xl <= yl || 3 * xl <= 2*(yl+2));
-
-    wl = xl * 2;
-    wds = ALLOCV_N(BDIGIT, work, wl);
-
-    MEMZERO(zds, BDIGIT, zl);
-
-    n = 0;
-    while (yl > 0) {
-	r = xl > yl ? yl : xl;
-        bary_mul(wds, xl + r, xds, xl, yds + n, r);
-        bary_add(zds + n, zl - n,
-                 zds + n, zl - n,
-                 wds, xl + r);
-	yl -= r;
-	n += r;
-    }
-
-    if (work)
-        ALLOCV_END(work);
-}
+/*
+ * call-seq:
+ *    -big   ->  integer
+ *
+ * Unary minus (returns an integer whose value is 0-big)
+ */
 
-/* multiplication by karatsuba method */
-static void
-bary_mul_karatsuba(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
+VALUE
+rb_big_uminus(VALUE x)
 {
-    VALUE work = 0;
-    BDIGIT *wds;
-    size_t wl;
-
-    size_t n;
-    int sub_p, borrow, carry1, carry2, carry3;
-
-    int odd_x = 0;
-    int odd_y = 0;
-
-    BDIGIT *xds0, *xds1, *yds0, *yds1, *zds0, *zds1, *zds2, *zds3;
-
-    assert(xl + yl <= zl);
-    assert(xl <= yl);
-    assert(yl < 2 * xl);
-
-    if (yl & 1) {
-        odd_y = 1;
-        yl--;
-        if (yl < xl) {
-            odd_x = 1;
-            xl--;
-        }
-    }
-
-    n = yl / 2;
-
-    assert(n < xl);
-
-    wl = n;
-    wds = ALLOCV_N(BDIGIT, work, wl);
-
-    /* Karatsuba algorithm:
-     *
-     * x = x0 + r*x1
-     * y = y0 + r*y1
-     * z = x*y
-     *   = (x0 + r*x1) * (y0 + r*y1)
-     *   = x0*y0 + r*(x1*y0 + x0*y1) + r*r*x1*y1
-     *   = x0*y0 + r*(x0*y0 + x1*y1 - (x1-x0)*(y1-y0)) + r*r*x1*y1
-     *   = x0*y0 + r*(x0*y0 + x1*y1 - (x0-x1)*(y0-y1)) + r*r*x1*y1
-     */
-
-    xds0 = xds;
-    xds1 = xds + n;
-    yds0 = yds;
-    yds1 = yds + n;
-    zds0 = zds;
-    zds1 = zds + n;
-    zds2 = zds + 2*n;
-    zds3 = zds + 3*n;
-
-    sub_p = 1;
-
-    /* zds0:? zds1:? zds2:? zds3:? wds:? */
-
-    if (bary_sub(zds0, n, xds, n, xds+n, xl-n)) {
-        bary_2comp(zds0, n);
-        sub_p = !sub_p;
-    }
+    VALUE z = rb_big_clone(x);
 
-    /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:? */
+    RBIGNUM_SET_SIGN(z, !RBIGNUM_SIGN(x));
 
-    if (bary_sub(wds, n, yds, n, yds+n, n)) {
-        bary_2comp(wds, n);
-        sub_p = !sub_p;
-    }
+    return bignorm(z);
+}
 
-    /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:|y1-y0| */
+/*
+ * call-seq:
+ *     ~big  ->  integer
+ *
+ * Inverts the bits in big. As Bignums are conceptually infinite
+ * length, the result acts as if it had an infinite number of one
+ * bits to the left. In hex representations, this is displayed
+ * as two periods to the left of the digits.
+ *
+ *   sprintf("%X", ~0x1122334455)    #=> "..FEEDDCCBBAA"
+ */
 
-    bary_mul(zds1, 2*n, zds0, n, wds, n);
+static VALUE
+rb_big_neg(VALUE x)
+{
+    VALUE z = rb_big_clone(x);
+    BDIGIT *ds = BDIGITS(z);
+    long n = RBIGNUM_LEN(z);
 
-    /* zds0:|x1-x0| zds1,zds2:|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
+    if (!n) return INT2FIX(-1);
 
-    borrow = 0;
-    if (sub_p) {
-        borrow = !bary_2comp(zds1, 2*n);
+    if (RBIGNUM_POSITIVE_P(z)) {
+        if (bary_plus_one(ds, n)) {
+            big_extend_carry(z);
+        }
+        RBIGNUM_SET_NEGATIVE_SIGN(z);
+    }
+    else {
+        bary_neg(ds, n);
+        if (bary_plus_one(ds, n))
+            return INT2FIX(-1);
+        bary_neg(ds, n);
+        RBIGNUM_SET_POSITIVE_SIGN(z);
     }
-    /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
 
-    MEMCPY(wds, zds1, BDIGIT, n);
+    return bignorm(z);
+}
 
-    /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
+static void
+bigsub_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
+{
+    bary_sub(zds, zn, xds, xn, yds, yn);
+}
 
-    bary_mul(zds0, 2*n, xds0, n, yds0, n);
+static VALUE
+bigsub(VALUE x, VALUE y)
+{
+    VALUE z = 0;
+    long i = RBIGNUM_LEN(x);
+    BDIGIT *xds, *yds;
 
-    /* zds0,zds1:x0*y0 zds2:hi(-?|x1-x0|*|y1-y0|) zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
+    /* if x is smaller than y, swap */
+    if (RBIGNUM_LEN(x) < RBIGNUM_LEN(y)) {
+	z = x; x = y; y = z;	/* swap x y */
+    }
+    else if (RBIGNUM_LEN(x) == RBIGNUM_LEN(y)) {
+	xds = BDIGITS(x);
+	yds = BDIGITS(y);
+	while (i > 0) {
+	    i--;
+	    if (xds[i] > yds[i]) {
+		break;
+	    }
+	    if (xds[i] < yds[i]) {
+		z = x; x = y; y = z;	/* swap x y */
+		break;
+	    }
+	}
+    }
 
-    carry1 = bary_add(wds, n, wds, n, zds0, n);
-    carry1 = bary_addc(zds2, n, zds2, n, zds1, n, carry1);
+    z = bignew(RBIGNUM_LEN(x), z==0);
+    bigsub_core(BDIGITS(x), RBIGNUM_LEN(x),
+		BDIGITS(y), RBIGNUM_LEN(y),
+		BDIGITS(z), RBIGNUM_LEN(z));
 
-    /* zds0,zds1:x0*y0 zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
+    return z;
+}
 
-    carry2 = bary_add(zds1, n, zds1, n, wds, n);
+static VALUE bigadd_int(VALUE x, long y);
 
-    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
+static VALUE
+bigsub_int(VALUE x, long y0)
+{
+    VALUE z;
+    BDIGIT *xds, *zds;
+    long xn, zn;
+    BDIGIT_DBL_SIGNED num;
+    long i, y;
 
-    MEMCPY(wds, zds2, BDIGIT, n);
+    y = y0;
+    xds = BDIGITS(x);
+    xn = RBIGNUM_LEN(x);
 
-    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:_ zds3:? wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
+    if (xn == 0)
+        return LONG2NUM(-y0);
 
-    bary_mul(zds2, zl-2*n, xds1, xl-n, yds1, n);
+    zn = xn;
+#if SIZEOF_BDIGITS < SIZEOF_LONG
+    if (zn < bdigit_roomof(SIZEOF_LONG))
+        zn = bdigit_roomof(SIZEOF_LONG);
+#endif
+    z = bignew(zn, RBIGNUM_SIGN(x));
+    zds = BDIGITS(z);
 
-    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
+#if SIZEOF_BDIGITS >= SIZEOF_LONG
+    assert(xn == zn);
+    num = (BDIGIT_DBL_SIGNED)xds[0] - y;
+    if (xn == 1 && num < 0) {
+	RBIGNUM_SET_SIGN(z, !RBIGNUM_SIGN(x));
+	zds[0] = (BDIGIT)-num;
+	RB_GC_GUARD(x);
+	return bignorm(z);
+    }
+    zds[0] = BIGLO(num);
+    num = BIGDN(num);
+    i = 1;
+    if (i < xn)
+        goto y_is_zero_x;
+    goto finish;
+#else
+    num = 0;
+    for (i=0; i < xn; i++) {
+        if (y == 0) goto y_is_zero_x;
+	num += (BDIGIT_DBL_SIGNED)xds[i] - BIGLO(y);
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
+	y = BIGDN(y);
+    }
+    for (; i < zn; i++) {
+        if (y == 0) goto y_is_zero_z;
+        num -= BIGLO(y);
+        zds[i] = BIGLO(num);
+        num = BIGDN(num);
+        y = BIGDN(y);
+    }
+    goto finish;
+#endif
 
-    carry3 = bary_add(zds1, n, zds1, n, zds2, n);
+    for (; i < xn; i++) {
+      y_is_zero_x:
+        if (num == 0) goto num_is_zero_x;
+	num += xds[i];
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
+    }
+#if SIZEOF_BDIGITS < SIZEOF_LONG
+    for (; i < zn; i++) {
+      y_is_zero_z:
+        if (num == 0) goto num_is_zero_z;
+        zds[i] = BIGLO(num);
+        num = BIGDN(num);
+    }
+#endif
+    goto finish;
 
-    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
+    for (; i < xn; i++) {
+      num_is_zero_x:
+	zds[i] = xds[i];
+    }
+#if SIZEOF_BDIGITS < SIZEOF_LONG
+    for (; i < zn; i++) {
+      num_is_zero_z:
+        zds[i] = 0;
+    }
+#endif
+    goto finish;
 
-    carry3 = bary_addc(zds2, n, zds2, n, zds3, (4*n < zl ? n : zl-3*n), carry3);
+  finish:
+    assert(num == 0 || num == -1);
+    if (num < 0) {
+        get2comp(z);
+	RBIGNUM_SET_SIGN(z, !RBIGNUM_SIGN(x));
+    }
+    RB_GC_GUARD(x);
+    return bignorm(z);
+}
 
-    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1) wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
+static VALUE
+bigadd_int(VALUE x, long y)
+{
+    VALUE z;
+    BDIGIT *xds, *zds;
+    long xn, zn;
+    BDIGIT_DBL num;
+    long i;
 
-    bary_add(zds2, zl-2*n, zds2, zl-2*n, wds, n);
+    xds = BDIGITS(x);
+    xn = RBIGNUM_LEN(x);
 
-    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1)+hi(x0*y0-?|x1-x0|*|y1-y0|) wds:_ */
+    if (xn == 0)
+        return LONG2NUM(y);
 
-    if (carry2)
-        bary_add_one(zds2, zl-2*n);
+    zn = xn;
+#if SIZEOF_BDIGITS < SIZEOF_LONG
+    if (zn < bdigit_roomof(SIZEOF_LONG))
+        zn = bdigit_roomof(SIZEOF_LONG);
+#endif
+    zn++;
 
-    if (borrow && carry1)
-        borrow = carry1 = 0;
-    if (borrow && carry3)
-        borrow = carry3 = 0;
+    z = bignew(zn, RBIGNUM_SIGN(x));
+    zds = BDIGITS(z);
 
-    if (borrow)
-        bary_sub_one(zds3, zl-3*n);
-    else if (carry1 || carry3) {
-        BDIGIT c = carry1 + carry3;
-        bary_add(zds3, zl-3*n, zds3, zl-3*n, &c, 1);
+#if SIZEOF_BDIGITS >= SIZEOF_LONG
+    num = (BDIGIT_DBL)xds[0] + y;
+    zds[0] = BIGLO(num);
+    num = BIGDN(num);
+    i = 1;
+    if (i < xn)
+        goto y_is_zero_x;
+    goto y_is_zero_z;
+#else
+    num = 0;
+    for (i=0; i < xn; i++) {
+        if (y == 0) goto y_is_zero_x;
+	num += (BDIGIT_DBL)xds[i] + BIGLO(y);
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
+	y = BIGDN(y);
+    }
+    for (; i < zn; i++) {
+        if (y == 0) goto y_is_zero_z;
+	num += BIGLO(y);
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
+	y = BIGDN(y);
     }
+    goto finish;
 
-    /*
-    if (SIZEOF_BDIGITS * zl <= 16) {
-        uint128_t z, x, y;
-        ssize_t i;
-        for (x = 0, i = xl-1; 0 <= i; i--) { x <<= SIZEOF_BDIGITS*CHAR_BIT; x |= xds[i]; }
-        for (y = 0, i = yl-1; 0 <= i; i--) { y <<= SIZEOF_BDIGITS*CHAR_BIT; y |= yds[i]; }
-        for (z = 0, i = zl-1; 0 <= i; i--) { z <<= SIZEOF_BDIGITS*CHAR_BIT; z |= zds[i]; }
-        assert(z == x * y);
+#endif
+
+    for (;i < xn; i++) {
+      y_is_zero_x:
+        if (num == 0) goto num_is_zero_x;
+	num += (BDIGIT_DBL)xds[i];
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
     }
-    */
-
-    if (odd_x && odd_y) {
-        bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl);
-        bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl+1);
+    for (; i < zn; i++) {
+      y_is_zero_z:
+        if (num == 0) goto num_is_zero_z;
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
     }
-    else if (odd_x) {
-        bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl);
+    goto finish;
+
+    for (;i < xn; i++) {
+      num_is_zero_x:
+	zds[i] = xds[i];
     }
-    else if (odd_y) {
-        bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl);
+    for (; i < zn; i++) {
+      num_is_zero_z:
+	zds[i] = 0;
     }
+    goto finish;
 
-    if (work)
-        ALLOCV_END(work);
+  finish:
+    RB_GC_GUARD(x);
+    return bignorm(z);
 }
 
 static void
-bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
+bigadd_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
 {
-    size_t l;
+    bary_add(zds, zn, xds, xn, yds, yn);
+}
 
-    assert(xl + yl <= zl);
+static VALUE
+bigadd(VALUE x, VALUE y, int sign)
+{
+    VALUE z;
+    long len;
 
-    if (xl == 1 && yl == 1) {
-        l = 2;
-        bary_mul_single(zds, zl, xds[0], yds[0]);
+    sign = (sign == RBIGNUM_SIGN(y));
+    if (RBIGNUM_SIGN(x) != sign) {
+	if (sign) return bigsub(y, x);
+	return bigsub(x, y);
+    }
+
+    if (RBIGNUM_LEN(x) > RBIGNUM_LEN(y)) {
+	len = RBIGNUM_LEN(x) + 1;
     }
     else {
-        l = xl + yl;
-        bary_mul_normal(zds, zl, xds, xl, yds, yl);
-        rb_thread_check_ints();
+	len = RBIGNUM_LEN(y) + 1;
     }
-    MEMZERO(zds + l, BDIGIT, zl - l);
+    z = bignew(len, sign);
+
+    bigadd_core(BDIGITS(x), RBIGNUM_LEN(x),
+		BDIGITS(y), RBIGNUM_LEN(y),
+		BDIGITS(z), RBIGNUM_LEN(z));
+
+    return z;
 }
 
-static void
-bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
+/*
+ *  call-seq:
+ *     big + other  -> Numeric
+ *
+ *  Adds big and other, returning the result.
+ */
+
+VALUE
+rb_big_plus(VALUE x, VALUE y)
 {
-    size_t nlsz; /* number of least significant zero BDIGITs */
+    long n;
 
-    assert(xl + yl <= zl);
+    switch (TYPE(y)) {
+      case T_FIXNUM:
+	n = FIX2LONG(y);
+	if ((n > 0) != RBIGNUM_SIGN(x)) {
+	    if (n < 0) {
+		n = -n;
+	    }
+	    return bigsub_int(x, n);
+	}
+	if (n < 0) {
+	    n = -n;
+	}
+	return bigadd_int(x, n);
 
-    while (0 < xl && xds[xl-1] == 0)
-        xl--;
-    while (0 < yl && yds[yl-1] == 0)
-        yl--;
+      case T_BIGNUM:
+	return bignorm(bigadd(x, y, 1));
 
-    nlsz = 0;
-    while (0 < xl && xds[0] == 0) {
-        xds++;
-        xl--;
-        nlsz++;
-    }
-    while (0 < yl && yds[0] == 0) {
-        yds++;
-        yl--;
-        nlsz++;
-    }
-    if (nlsz) {
-        MEMZERO(zds, BDIGIT, nlsz);
-        zds += nlsz;
-        zl -= nlsz;
-    }
+      case T_FLOAT:
+	return DBL2NUM(rb_big2dbl(x) + RFLOAT_VALUE(y));
 
-    /* make sure that y is longer than x */
-    if (xl > yl) {
-        BDIGIT *tds;
-        size_t tl;
-	tds = xds; xds = yds; yds = tds;
-	tl = xl; xl = yl; yl = tl;
+      default:
+	return rb_num_coerce_bin(x, y, '+');
     }
-    assert(xl <= yl);
+}
 
-    if (xl == 0) {
-        MEMZERO(zds, BDIGIT, zl);
-        return;
-    }
+/*
+ *  call-seq:
+ *     big - other  -> Numeric
+ *
+ *  Subtracts other from big, returning the result.
+ */
 
-    /* normal multiplication when x is small */
-    if (xl < KARATSUBA_MUL_DIGITS) {
-      normal:
-        if (xds == yds && xl == yl)
-            bary_sq_fast(zds, zl, xds, xl);
-        else
-            bary_mul1(zds, zl, xds, xl, yds, yl);
-        return;
-    }
+VALUE
+rb_big_minus(VALUE x, VALUE y)
+{
+    long n;
 
-    /* normal multiplication when x or y is a sparse bignum */
-    if (bary_sparse_p(xds, xl)) goto normal;
-    if (bary_sparse_p(yds, yl)) {
-        bary_mul1(zds, zl, yds, yl, xds, xl);
-        return;
-    }
+    switch (TYPE(y)) {
+      case T_FIXNUM:
+	n = FIX2LONG(y);
+	if ((n > 0) != RBIGNUM_SIGN(x)) {
+	    if (n < 0) {
+		n = -n;
+	    }
+	    return bigadd_int(x, n);
+	}
+	if (n < 0) {
+	    n = -n;
+	}
+	return bigsub_int(x, n);
 
-    /* balance multiplication by slicing y when x is much smaller than y */
-    if (2 * xl <= yl) {
-        bary_mul_balance(zds, zl, xds, xl, yds, yl);
-        return;
-    }
+      case T_BIGNUM:
+	return bignorm(bigadd(x, y, 0));
 
-    if (xl < TOOM3_MUL_DIGITS) {
-        /* multiplication by karatsuba method */
-        bary_mul_karatsuba(zds, zl, xds, xl, yds, yl);
-        return;
-    }
+      case T_FLOAT:
+	return DBL2NUM(rb_big2dbl(x) - RFLOAT_VALUE(y));
 
-    if (3*xl <= 2*(yl + 2)) {
-        bary_mul_balance(zds, zl, xds, xl, yds, yl);
-        return;
+      default:
+	return rb_num_coerce_bin(x, y, '-');
     }
+}
 
-    {
-        VALUE x, y, z;
-        x = bignew(xl, 1);
-	MEMCPY(BDIGITS(x), xds, BDIGIT, xl);
-        y = bignew(yl, 1);
-	MEMCPY(BDIGITS(y), yds, BDIGIT, yl);
-        z = bigtrunc(bigmul1_toom3(x, y));
-        MEMCPY(zds, BDIGITS(z), BDIGIT, RBIGNUM_LEN(z));
-        MEMZERO(zds + RBIGNUM_LEN(z), BDIGIT, zl - RBIGNUM_LEN(z));
-    }
+static long
+big_real_len(VALUE x)
+{
+    long i = RBIGNUM_LEN(x);
+    BDIGIT *xds = BDIGITS(x);
+    while (--i && !xds[i]);
+    return i + 1;
 }
 
 
@@ -4536,56 +4583,6 @@ bigmul1_toom3(VALUE x, VALUE y)
     return bignorm(z);
 }
 
-/* efficient squaring (2 times faster than normal multiplication)
- * ref: Handbook of Applied Cryptography, Algorithm 14.16
- *      http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
- */
-static void
-bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn)
-{
-    size_t i, j;
-    BDIGIT_DBL c, v, w;
-
-    assert(xn * 2 <= zn);
-
-    MEMZERO(zds, BDIGIT, zn);
-    for (i = 0; i < xn; i++) {
-	v = (BDIGIT_DBL)xds[i];
-	if (!v) continue;
-	c = (BDIGIT_DBL)zds[i + i] + v * v;
-	zds[i + i] = BIGLO(c);
-	c = BIGDN(c);
-	v *= 2;
-	for (j = i + 1; j < xn; j++) {
-	    w = (BDIGIT_DBL)xds[j];
-	    c += (BDIGIT_DBL)zds[i + j] + BIGLO(v) * w;
-	    zds[i + j] = BIGLO(c);
-	    c = BIGDN(c);
-	    if (BIGDN(v)) c += w;
-	}
-	if (c) {
-	    c += (BDIGIT_DBL)zds[i + xn];
-	    zds[i + xn] = BIGLO(c);
-	    c = BIGDN(c);
-            assert(c == 0 || i != xn-1);
-            if (c && i != xn-1) zds[i + xn + 1] += (BDIGIT)c;
-	}
-    }
-}
-
-/* determine whether a bignum is sparse or not by random sampling */
-static inline int
-bary_sparse_p(BDIGIT *ds, size_t n)
-{
-    long c = 0;
-
-    if (          ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
-    if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
-    if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
-
-    return (c <= 1) ? 1 : 0;
-}
-
 static VALUE
 bigmul0(VALUE x, VALUE y)
 {
-- 
cgit v1.2.3