1 files changed, 330 insertions, 199 deletions
diff --git a/bignum.c b/bignum.c
index 466b21abc0..28924b4eb9 100644
--- a/bignum.c
+++ b/bignum.c
@@ -30,9 +30,6 @@
 # define USE_GMP 0
 #endif
 #endif
-#if USE_GMP
-# include <gmp.h>
-#endif
 
 #include "id.h"
 #include "internal.h"
@@ -48,8 +45,40 @@
 #include "ruby/util.h"
 #include "ruby_assert.h"
 
+#if USE_GMP
+RBIMPL_WARNING_PUSH()
+# ifdef _MSC_VER
+RBIMPL_WARNING_IGNORED(4146) /* for mpn_neg() */
+# endif
+# include <gmp.h>
+RBIMPL_WARNING_POP()
+#endif
+
+static const bool debug_integer_pack = (
+#ifdef DEBUG_INTEGER_PACK
+    DEBUG_INTEGER_PACK+0
+#else
+    RUBY_DEBUG
+#endif
+    ) != 0;
+
 const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
 
+/* Two-digit decimal lookup table.  Offset 2*n holds the ASCII pair for
+ * n in the range 0..99.  Used by both rb_fix2str in numeric.c and
+ * big2str_2bdigits below to emit two base-10 digits per iteration. */
+const char ruby_decimal_digit_pairs[201] =
+    "00010203040506070809"
+    "10111213141516171819"
+    "20212223242526272829"
+    "30313233343536373839"
+    "40414243444546474849"
+    "50515253545556575859"
+    "60616263646566676869"
+    "70717273747576777879"
+    "80818283848586878889"
+    "90919293949596979899";
+
 #ifndef SIZEOF_BDIGIT_DBL
 # if SIZEOF_INT*2 <= SIZEOF_LONG_LONG
 #  define SIZEOF_BDIGIT_DBL SIZEOF_LONG_LONG
@@ -65,7 +94,6 @@ STATIC_ASSERT(sizeof_bdigit_and_dbl, SIZEOF_BDIGIT*2 <= SIZEOF_BDIGIT_DBL);
 STATIC_ASSERT(bdigit_signedness, 0 < (BDIGIT)-1);
 STATIC_ASSERT(bdigit_dbl_signedness, 0 < (BDIGIT_DBL)-1);
 STATIC_ASSERT(bdigit_dbl_signed_signedness, 0 > (BDIGIT_DBL_SIGNED)-1);
-STATIC_ASSERT(rbignum_embed_len_max, BIGNUM_EMBED_LEN_MAX <= (BIGNUM_EMBED_LEN_MASK >> BIGNUM_EMBED_LEN_SHIFT));
 
 #if SIZEOF_BDIGIT < SIZEOF_LONG
 STATIC_ASSERT(sizeof_long_and_sizeof_bdigit, SIZEOF_LONG % SIZEOF_BDIGIT == 0);
@@ -341,7 +369,7 @@ maxpow_in_bdigit_dbl(int base, int *exp_ret)
     BDIGIT_DBL maxpow;
     int exponent;
 
-    assert(2 <= base && base <= 36);
+    RUBY_ASSERT(2 <= base && base <= 36);
 
     {
 #if SIZEOF_BDIGIT_DBL == 2
@@ -373,7 +401,7 @@ maxpow_in_bdigit_dbl(int base, int *exp_ret)
 static inline BDIGIT_DBL
 bary2bdigitdbl(const BDIGIT *ds, size_t n)
 {
-    assert(n <= 2);
+    RUBY_ASSERT(n <= 2);
 
     if (n == 2)
         return ds[0] | BIGUP(ds[1]);
@@ -385,7 +413,7 @@ bary2bdigitdbl(const BDIGIT *ds, size_t n)
 static inline void
 bdigitdbl2bary(BDIGIT *ds, size_t n, BDIGIT_DBL num)
 {
-    assert(n == 2);
+    RUBY_ASSERT(n == 2);
 
     ds[0] = BIGLO(num);
     ds[1] = (BDIGIT)BIGDN(num);
@@ -416,7 +444,7 @@ bary_small_lshift(BDIGIT *zds, const BDIGIT *xds, size_t n, int shift)
 {
     size_t i;
     BDIGIT_DBL num = 0;
-    assert(0 <= shift && shift < BITSPERDIG);
+    RUBY_ASSERT(0 <= shift && shift < BITSPERDIG);
 
     for (i=0; i<n; i++) {
         num = num | (BDIGIT_DBL)*xds++ << shift;
@@ -432,7 +460,7 @@ bary_small_rshift(BDIGIT *zds, const BDIGIT *xds, size_t n, int shift, BDIGIT hi
     size_t i;
     BDIGIT_DBL num = 0;
 
-    assert(0 <= shift && shift < BITSPERDIG);
+    RUBY_ASSERT(0 <= shift && shift < BITSPERDIG);
 
     num = BIGUP(higher_bdigit);
     for (i = 0; i < n; i++) {
@@ -1049,15 +1077,13 @@ integer_unpack_num_bdigits(size_t numwords, size_t wordsize, size_t nails, int *
 
     if (numwords <= (SIZE_MAX - (BITSPERDIG-1)) / CHAR_BIT / wordsize) {
         num_bdigits = integer_unpack_num_bdigits_small(numwords, wordsize, nails, nlp_bits_ret);
-#ifdef DEBUG_INTEGER_PACK
-        {
+        if (debug_integer_pack) {
             int nlp_bits1;
             size_t num_bdigits1 = integer_unpack_num_bdigits_generic(numwords, wordsize, nails, &nlp_bits1);
-            assert(num_bdigits == num_bdigits1);
-            assert(*nlp_bits_ret == nlp_bits1);
+            RUBY_ASSERT(num_bdigits == num_bdigits1);
+            RUBY_ASSERT(*nlp_bits_ret == nlp_bits1);
             (void)num_bdigits1;
         }
-#endif
     }
     else {
         num_bdigits = integer_unpack_num_bdigits_generic(numwords, wordsize, nails, nlp_bits_ret);
@@ -1265,7 +1291,7 @@ bary_unpack_internal(BDIGIT *bdigits, size_t num_bdigits, const void *words, siz
         }
         if (dd)
             *dp++ = (BDIGIT)dd;
-        assert(dp <= de);
+        RUBY_ASSERT(dp <= de);
         while (dp < de)
             *dp++ = 0;
 #undef PUSH_BITS
@@ -1324,7 +1350,7 @@ bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwo
 
     num_bdigits0 = integer_unpack_num_bdigits(numwords, wordsize, nails, &nlp_bits);
 
-    assert(num_bdigits0 <= num_bdigits);
+    RUBY_ASSERT(num_bdigits0 <= num_bdigits);
 
     sign = bary_unpack_internal(bdigits, num_bdigits0, words, numwords, wordsize, nails, flags, nlp_bits);
 
@@ -1343,8 +1369,8 @@ bary_subb(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yd
     size_t i;
     size_t sn;
 
-    assert(xn <= zn);
-    assert(yn <= zn);
+    RUBY_ASSERT(xn <= zn);
+    RUBY_ASSERT(yn <= zn);
 
     sn = xn < yn ? xn : yn;
 
@@ -1405,8 +1431,8 @@ bary_addc(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yd
     BDIGIT_DBL num;
     size_t i;
 
-    assert(xn <= zn);
-    assert(yn <= zn);
+    RUBY_ASSERT(xn <= zn);
+    RUBY_ASSERT(yn <= zn);
 
     if (xn > yn) {
         const BDIGIT *tds;
@@ -1470,7 +1496,7 @@ bary_mul_single(BDIGIT *zds, size_t zn, BDIGIT x, BDIGIT y)
 {
     BDIGIT_DBL n;
 
-    assert(2 <= zn);
+    RUBY_ASSERT(2 <= zn);
 
     n = (BDIGIT_DBL)x * y;
     bdigitdbl2bary(zds, 2, n);
@@ -1484,7 +1510,7 @@ bary_muladd_1xN(BDIGIT *zds, size_t zn, BDIGIT x, const BDIGIT *yds, size_t yn)
     BDIGIT_DBL dd;
     size_t j;
 
-    assert(zn > yn);
+    RUBY_ASSERT(zn > yn);
 
     if (x == 0)
         return 0;
@@ -1519,7 +1545,7 @@ bigdivrem_mulsub(BDIGIT *zds, size_t zn, BDIGIT x, const BDIGIT *yds, size_t yn)
     BDIGIT_DBL t2;
     BDIGIT_DBL_SIGNED num;
 
-    assert(zn == yn + 1);
+    RUBY_ASSERT(zn == yn + 1);
 
     num = 0;
     t2 = 0;
@@ -1544,7 +1570,7 @@ bary_mulsub_1xN(BDIGIT *zds, size_t zn, BDIGIT x, const BDIGIT *yds, size_t yn)
 {
     BDIGIT_DBL_SIGNED num;
 
-    assert(zn == yn + 1);
+    RUBY_ASSERT(zn == yn + 1);
 
     num = bigdivrem_mulsub(zds, zn, x, yds, yn);
     zds[yn] = BIGLO(num);
@@ -1558,7 +1584,7 @@ bary_mul_normal(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIG
 {
     size_t i;
 
-    assert(xn + yn <= zn);
+    RUBY_ASSERT(xn + yn <= zn);
 
     BDIGITS_ZERO(zds, zn);
     for (i = 0; i < xn; i++) {
@@ -1589,7 +1615,7 @@ bary_sq_fast(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn)
     BDIGIT vl;
     int vh;
 
-    assert(xn * 2 <= zn);
+    RUBY_ASSERT(xn * 2 <= zn);
 
     BDIGITS_ZERO(zds, zn);
 
@@ -1661,9 +1687,9 @@ bary_mul_balance_with_mulfunc(BDIGIT *const zds, const size_t zn,
     VALUE work = 0;
     size_t n;
 
-    assert(xn + yn <= zn);
-    assert(xn <= yn);
-    assert(!KARATSUBA_BALANCED(xn, yn) || !TOOM3_BALANCED(xn, yn));
+    RUBY_ASSERT(xn + yn <= zn);
+    RUBY_ASSERT(xn <= yn);
+    RUBY_ASSERT(!KARATSUBA_BALANCED(xn, yn) || !TOOM3_BALANCED(xn, yn));
 
     BDIGITS_ZERO(zds, xn);
 
@@ -1745,9 +1771,9 @@ bary_mul_karatsuba(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const B
     const BDIGIT *xds0, *xds1, *yds0, *yds1;
     BDIGIT *zds0, *zds1, *zds2, *zds3;
 
-    assert(xn + yn <= zn);
-    assert(xn <= yn);
-    assert(yn < 2 * xn);
+    RUBY_ASSERT(xn + yn <= zn);
+    RUBY_ASSERT(xn <= yn);
+    RUBY_ASSERT(yn < 2 * xn);
 
     sq = xds == yds && xn == yn;
 
@@ -1762,7 +1788,7 @@ bary_mul_karatsuba(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const B
 
     n = yn / 2;
 
-    assert(n < xn);
+    RUBY_ASSERT(n < xn);
 
     if (wn < n) {
         /* This function itself needs only n BDIGITs for work area.
@@ -1883,7 +1909,7 @@ bary_mul_karatsuba(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const B
         for (x = 0, i = xn-1; 0 <= i; i--) { x <<= SIZEOF_BDIGIT*CHAR_BIT; x |= xds[i]; }
         for (y = 0, i = yn-1; 0 <= i; i--) { y <<= SIZEOF_BDIGIT*CHAR_BIT; y |= yds[i]; }
         for (z = 0, i = zn-1; 0 <= i; i--) { z <<= SIZEOF_BDIGIT*CHAR_BIT; z |= zds[i]; }
-        assert(z == x * y);
+        RUBY_ASSERT(z == x * y);
     }
     */
 
@@ -1951,11 +1977,11 @@ bary_mul_toom3(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGI
 
     int sq = xds == yds && xn == yn;
 
-    assert(xn <= yn);  /* assume y >= x */
-    assert(xn + yn <= zn);
+    RUBY_ASSERT(xn <= yn);  /* assume y >= x */
+    RUBY_ASSERT(xn + yn <= zn);
 
     n = (yn + 2) / 3;
-    assert(2*n < xn);
+    RUBY_ASSERT(2*n < xn);
 
     wnc = 0;
 
@@ -2142,19 +2168,19 @@ bary_mul_toom3(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGI
     /* z(1) : t1 <- u1 * v1 */
     bary_mul_toom3_start(t1ds, t1n, u1ds, u1n, v1ds, v1n, wds, wn);
     t1p = u1p == v1p;
-    assert(t1ds[t1n-1] == 0);
+    RUBY_ASSERT(t1ds[t1n-1] == 0);
     t1n--;
 
     /* z(-1) : t2 <- u2 * v2 */
     bary_mul_toom3_start(t2ds, t2n, u2ds, u2n, v2ds, v2n, wds, wn);
     t2p = u2p == v2p;
-    assert(t2ds[t2n-1] == 0);
+    RUBY_ASSERT(t2ds[t2n-1] == 0);
     t2n--;
 
     /* z(-2) : t3 <- u3 * v3 */
     bary_mul_toom3_start(t3ds, t3n, u3ds, u3n, v3ds, v3n, wds, wn);
     t3p = u3p == v3p;
-    assert(t3ds[t3n-1] == 0);
+    RUBY_ASSERT(t3ds[t3n-1] == 0);
     t3n--;
 
     /* z(inf) : t4 <- x2 * y2 */
@@ -2330,7 +2356,7 @@ bary_mul_gmp(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT
     mpz_t x, y, z;
     size_t count;
 
-    assert(xn + yn <= zn);
+    RUBY_ASSERT(xn + yn <= zn);
 
     mpz_init(x);
     mpz_init(y);
@@ -2365,7 +2391,7 @@ rb_big_mul_gmp(VALUE x, VALUE y)
 static void
 bary_short_mul(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
 {
-    assert(xn + yn <= zn);
+    RUBY_ASSERT(xn + yn <= zn);
 
     if (xn == 1 && yn == 1) {
         bary_mul_single(zds, zn, xds[0], yds[0]);
@@ -2401,7 +2427,7 @@ bary_mul_precheck(BDIGIT **zdsp, size_t *znp, const BDIGIT **xdsp, size_t *xnp,
     const BDIGIT *yds = *ydsp;
     size_t yn = *ynp;
 
-    assert(xn + yn <= zn);
+    RUBY_ASSERT(xn + yn <= zn);
 
     nlsz = 0;
 
@@ -2450,7 +2476,7 @@ bary_mul_precheck(BDIGIT **zdsp, size_t *znp, const BDIGIT **xdsp, size_t *xnp,
         tds = xds; xds = yds; yds = tds;
         tn = xn; xn = yn; yn = tn;
     }
-    assert(xn <= yn);
+    RUBY_ASSERT(xn <= yn);
 
     if (xn <= 1) {
         if (xn == 0) {
@@ -2633,8 +2659,8 @@ rb_big_stop(void *ptr)
 static BDIGIT
 bigdivrem_single1(BDIGIT *qds, const BDIGIT *xds, size_t xn, BDIGIT x_higher_bdigit, BDIGIT y)
 {
-    assert(0 < xn);
-    assert(x_higher_bdigit < y);
+    RUBY_ASSERT(0 < xn);
+    RUBY_ASSERT(x_higher_bdigit < y);
     if (POW2_P(y)) {
         BDIGIT r;
         r = xds[0] & (y-1);
@@ -2666,9 +2692,9 @@ bigdivrem_restoring(BDIGIT *zds, size_t zn, BDIGIT *yds, size_t yn)
     struct big_div_struct bds;
     size_t ynzero;
 
-    assert(yn < zn);
-    assert(BDIGIT_MSB(yds[yn-1]));
-    assert(zds[zn-1] < yds[yn-1]);
+    RUBY_ASSERT(yn < zn);
+    RUBY_ASSERT(BDIGIT_MSB(yds[yn-1]));
+    RUBY_ASSERT(zds[zn-1] < yds[yn-1]);
 
     for (ynzero = 0; !yds[ynzero]; ynzero++);
 
@@ -2687,7 +2713,7 @@ bigdivrem_restoring(BDIGIT *zds, size_t zn, BDIGIT *yds, size_t yn)
     if (bds.zn > 10000 || bds.yn > 10000) {
       retry:
         bds.stop = Qfalse;
-        rb_nogvl(bigdivrem1, &bds, rb_big_stop, &bds, RB_NOGVL_UBF_ASYNC_SAFE);
+        rb_nogvl(bigdivrem1, &bds, rb_big_stop, &bds, RB_NOGVL_UBF_ASYNC_SAFE | RB_NOGVL_OFFLOAD_SAFE);
 
         if (bds.stop == Qtrue) {
             /* execute trap handler, but exception was not raised. */
@@ -2707,9 +2733,9 @@ bary_divmod_normal(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT
     size_t zn;
     VALUE tmpyz = 0;
 
-    assert(yn < xn || (xn == yn && yds[yn - 1] <= xds[xn - 1]));
-    assert(qds ? (xn - yn + 1) <= qn : 1);
-    assert(rds ? yn <= rn : 1);
+    RUBY_ASSERT(yn < xn || (xn == yn && yds[yn - 1] <= xds[xn - 1]));
+    RUBY_ASSERT(qds ? (xn - yn + 1) <= qn : 1);
+    RUBY_ASSERT(rds ? yn <= rn : 1);
 
     zn = xn + BIGDIVREM_EXTRA_WORDS;
 
@@ -2801,10 +2827,10 @@ bary_divmod_gmp(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xd
     mpz_t x, y, q, r;
     size_t count;
 
-    assert(yn < xn || (xn == yn && yds[yn - 1] <= xds[xn - 1]));
-    assert(qds ? (xn - yn + 1) <= qn : 1);
-    assert(rds ? yn <= rn : 1);
-    assert(qds || rds);
+    RUBY_ASSERT(yn < xn || (xn == yn && yds[yn - 1] <= xds[xn - 1]));
+    RUBY_ASSERT(qds ? (xn - yn + 1) <= qn : 1);
+    RUBY_ASSERT(rds ? yn <= rn : 1);
+    RUBY_ASSERT(qds || rds);
 
     mpz_init(x);
     mpz_init(y);
@@ -2890,8 +2916,8 @@ bary_divmod_branch(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT
 static void
 bary_divmod(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
 {
-    assert(xn <= qn);
-    assert(yn <= rn);
+    RUBY_ASSERT(xn <= qn);
+    RUBY_ASSERT(yn <= rn);
 
     BARY_TRUNC(yds, yn);
     if (yn == 0)
@@ -2932,11 +2958,6 @@ bary_divmod(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xds, s
     }
 }
 
-
-#ifndef BIGNUM_DEBUG
-# define BIGNUM_DEBUG (0+RUBY_DEBUG)
-#endif
-
 static int
 bigzero_p(VALUE x)
 {
@@ -2953,7 +2974,7 @@ int
 rb_cmpint(VALUE val, VALUE a, VALUE b)
 {
     if (NIL_P(val)) {
-        rb_cmperr(a, b);
+        rb_cmperr_reason(a, b, "comparator returned nil");
     }
     if (FIXNUM_P(val)) {
         long l = FIX2LONG(val);
@@ -2978,36 +2999,68 @@ rb_cmpint(VALUE val, VALUE a, VALUE b)
             ((l) << BIGNUM_EMBED_LEN_SHIFT)) : \
      (void)(RBIGNUM(b)->as.heap.len = (l)))
 
+static size_t
+big_embed_capa(VALUE big)
+{
+    size_t size = rb_gc_obj_slot_size(big) - offsetof(struct RBignum, as.ary);
+    RUBY_ASSERT(size % sizeof(BDIGIT) == 0);
+    size_t capa = size / sizeof(BDIGIT);
+    RUBY_ASSERT(capa <= BIGNUM_EMBED_LEN_MAX);
+    return capa;
+}
+
+static size_t
+big_embed_size(size_t capa)
+{
+    size_t size = offsetof(struct RBignum, as.ary) + (sizeof(BDIGIT) * capa);
+    if (size < sizeof(struct RBignum)) {
+        size = sizeof(struct RBignum);
+    }
+    return size;
+}
+
+static bool
+big_embeddable_p(size_t capa)
+{
+    if (capa > BIGNUM_EMBED_LEN_MAX) {
+        return false;
+    }
+    return rb_gc_size_allocatable_p(big_embed_size(capa));
+}
+
 static void
 rb_big_realloc(VALUE big, size_t len)
 {
     BDIGIT *ds;
+    size_t embed_capa = big_embed_capa(big);
+
     if (BIGNUM_EMBED_P(big)) {
-        if (BIGNUM_EMBED_LEN_MAX < len) {
+        if (embed_capa < len) {
             ds = ALLOC_N(BDIGIT, len);
-            MEMCPY(ds, RBIGNUM(big)->as.ary, BDIGIT, BIGNUM_EMBED_LEN_MAX);
+            MEMCPY(ds, RBIGNUM(big)->as.ary, BDIGIT, embed_capa);
             RBIGNUM(big)->as.heap.len = BIGNUM_LEN(big);
             RBIGNUM(big)->as.heap.digits = ds;
             FL_UNSET_RAW(big, BIGNUM_EMBED_FLAG);
         }
     }
     else {
-        if (len <= BIGNUM_EMBED_LEN_MAX) {
+        if (len <= embed_capa) {
             ds = RBIGNUM(big)->as.heap.digits;
+            size_t old_len = RBIGNUM(big)->as.heap.len;
             FL_SET_RAW(big, BIGNUM_EMBED_FLAG);
             BIGNUM_SET_LEN(big, len);
-            (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)RBIGNUM(big)->as.ary, sizeof(RBIGNUM(big)->as.ary));
+            (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)RBIGNUM(big)->as.ary, embed_capa * sizeof(BDIGIT));
             if (ds) {
                 MEMCPY(RBIGNUM(big)->as.ary, ds, BDIGIT, len);
-                xfree(ds);
+                SIZED_FREE_N(ds, old_len);
             }
         }
         else {
             if (BIGNUM_LEN(big) == 0) {
                 RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len);
             }
-            else {
-                REALLOC_N(RBIGNUM(big)->as.heap.digits, BDIGIT, len);
+            else if (BIGNUM_LEN(big) != len) {
+                SIZED_REALLOC_N(RBIGNUM(big)->as.heap.digits, BDIGIT, len, BIGNUM_LEN(big));
             }
         }
     }
@@ -3023,15 +3076,21 @@ rb_big_resize(VALUE big, size_t len)
 static VALUE
 bignew_1(VALUE klass, size_t len, int sign)
 {
-    NEWOBJ_OF(big, struct RBignum, klass, T_BIGNUM | (RGENGC_WB_PROTECTED_BIGNUM ? FL_WB_PROTECTED : 0));
-    VALUE bigv = (VALUE)big;
-    BIGNUM_SET_SIGN(bigv, sign);
-    if (len <= BIGNUM_EMBED_LEN_MAX) {
-        FL_SET_RAW(bigv, BIGNUM_EMBED_FLAG);
+    VALUE bigv;
+
+    if (big_embeddable_p(len)) {
+        size_t size = big_embed_size(len);
+        RUBY_ASSERT(rb_gc_size_allocatable_p(size));
+        NEWOBJ_OF(big, struct RBignum, klass, T_BIGNUM | BIGNUM_EMBED_FLAG, size);
+        bigv = (VALUE)big;
+        BIGNUM_SET_SIGN(bigv, sign);
         BIGNUM_SET_LEN(bigv, len);
-        (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)big->as.ary, sizeof(big->as.ary));
+        (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)big->as.ary, len * sizeof(BDIGIT));
     }
     else {
+        NEWOBJ_OF(big, struct RBignum, klass, T_BIGNUM, sizeof(struct RBignum));
+        bigv = (VALUE)big;
+        BIGNUM_SET_SIGN(bigv, sign);
         big->as.heap.digits = ALLOC_N(BDIGIT, len);
         big->as.heap.len = len;
     }
@@ -3042,7 +3101,9 @@ bignew_1(VALUE klass, size_t len, int sign)
 VALUE
 rb_big_new(size_t len, int sign)
 {
-    return bignew(len, sign != 0);
+    VALUE obj = bignew(len, sign != 0);
+    memset(BIGNUM_DIGITS(obj), 0, len * sizeof(BDIGIT));
+    return obj;
 }
 
 VALUE
@@ -3422,15 +3483,13 @@ rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
 
     if (numbytes <= SIZE_MAX / CHAR_BIT) {
         numwords = absint_numwords_small(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits);
-#ifdef DEBUG_INTEGER_PACK
-        {
+        if (debug_integer_pack) {
             size_t numwords0, nlz_bits0;
             numwords0 = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits0);
-            assert(numwords0 == numwords);
-            assert(nlz_bits0 == nlz_bits);
+            RUBY_ASSERT(numwords0 == numwords);
+            RUBY_ASSERT(nlz_bits0 == nlz_bits);
             (void)numwords0;
         }
-#endif
     }
     else {
         numwords = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits);
@@ -3843,7 +3902,7 @@ str2big_poweroftwo(
     if (numbits) {
         *dp++ = BIGLO(dd);
     }
-    assert((size_t)(dp - BDIGITS(z)) == num_bdigits);
+    RUBY_ASSERT((size_t)(dp - BDIGITS(z)) == num_bdigits);
 
     return z;
 }
@@ -3886,7 +3945,7 @@ str2big_normal(
             }
             break;
         }
-        assert(blen <= num_bdigits);
+        RUBY_ASSERT(blen <= num_bdigits);
     }
 
     return z;
@@ -3944,7 +4003,7 @@ str2big_karatsuba(
             current_base = 1;
         }
     }
-    assert(i == num_bdigits);
+    RUBY_ASSERT(i == num_bdigits);
     for (unit = 2; unit < num_bdigits; unit *= 2) {
         for (i = 0; i < num_bdigits; i += unit*2) {
             if (2*unit <= num_bdigits - i) {
@@ -4089,8 +4148,8 @@ rb_int_parse_cstr(const char *str, ssize_t len, char **endp, size_t *ndigits,
         len -= (n); \
     } while (0)
 #define ASSERT_LEN() do {\
-        assert(len != 0); \
-        if (len0 >= 0) assert(s + len0 == str + len); \
+        RUBY_ASSERT(len != 0); \
+        if (len0 >= 0) RUBY_ASSERT(s + len0 == str + len); \
     } while (0)
 
     if (!str) {
@@ -4464,7 +4523,7 @@ rb_str2big_gmp(VALUE arg, int base, int badcheck)
 
 #if HAVE_LONG_LONG
 
-static VALUE
+VALUE
 rb_ull2big(unsigned LONG_LONG n)
 {
     long i;
@@ -4486,7 +4545,7 @@ rb_ull2big(unsigned LONG_LONG n)
     return big;
 }
 
-static VALUE
+VALUE
 rb_ll2big(LONG_LONG n)
 {
     long neg = 0;
@@ -4524,7 +4583,7 @@ rb_ll2inum(LONG_LONG n)
 #endif  /* HAVE_LONG_LONG */
 
 #ifdef HAVE_INT128_T
-static VALUE
+VALUE
 rb_uint128t2big(uint128_t n)
 {
     long i;
@@ -4635,8 +4694,8 @@ big_shift2(VALUE x, int lshift_p, VALUE y)
     size_t shift_numdigits;
     int shift_numbits;
 
-    assert(POW2_P(CHAR_BIT));
-    assert(POW2_P(BITSPERDIG));
+    RUBY_ASSERT(POW2_P(CHAR_BIT));
+    RUBY_ASSERT(POW2_P(BITSPERDIG));
 
     if (BIGZEROP(x))
         return INT2FIX(0);
@@ -4723,7 +4782,7 @@ power_cache_get_power(int base, int power_level, size_t *numdigits_ret)
         rb_obj_hide(power);
         base36_power_cache[base - 2][power_level] = power;
         base36_numdigits_cache[base - 2][power_level] = numdigits;
-        rb_gc_register_mark_object(power);
+        rb_vm_register_global_object(power);
     }
     if (numdigits_ret)
         *numdigits_ret = base36_numdigits_cache[base - 2][power_level];
@@ -4759,7 +4818,7 @@ big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t tail
     int beginning = !b2s->ptr;
     size_t len = 0;
 
-    assert(xn <= 2);
+    RUBY_ASSERT(xn <= 2);
     num = bary2bdigitdbl(xds, xn);
 
     if (beginning) {
@@ -4767,11 +4826,34 @@ big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t tail
             return;
         p = buf;
         j = sizeof(buf);
-        do {
-            BDIGIT_DBL idx = num % b2s->base;
-            num /= b2s->base;
-            p[--j] = ruby_digitmap[idx];
-        } while (num);
+        if (b2s->base == 10) {
+            /* Emit two decimal digits per iteration from ruby_decimal_digit_pairs.
+             * See the comment on the table in bignum.c near ruby_digitmap. */
+            while (num >= 100) {
+                BDIGIT_DBL idx = (num % 100) * 2;
+                num /= 100;
+                j -= 2;
+                p[j]     = ruby_decimal_digit_pairs[idx];
+                p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+            }
+            if (num >= 10) {
+                BDIGIT_DBL idx = num * 2;
+                j -= 2;
+                p[j]     = ruby_decimal_digit_pairs[idx];
+                p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+            }
+            else {
+                /* num is 1..9 here (0 was handled above) */
+                p[--j] = (char)('0' + num);
+            }
+        }
+        else {
+            do {
+                BDIGIT_DBL idx = num % b2s->base;
+                num /= b2s->base;
+                p[--j] = ruby_digitmap[idx];
+            } while (num);
+        }
         len = sizeof(buf) - j;
         big2str_alloc(b2s, len + taillen);
         MEMCPY(b2s->ptr, buf + j, char, len);
@@ -4779,11 +4861,39 @@ big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t tail
     else {
         p = b2s->ptr;
         j = b2s->hbase2_numdigits;
-        do {
-            BDIGIT_DBL idx = num % b2s->base;
-            num /= b2s->base;
-            p[--j] = ruby_digitmap[idx];
-        } while (j);
+        if (b2s->base == 10) {
+            /* Non-beginning chunks must emit EXACTLY hbase2_numdigits,
+             * zero-padded on the left.  Consume num in 2-digit groups,
+             * handle the odd trailing digit, then memset remaining
+             * positions with '0'. */
+            while (num >= 100) {
+                BDIGIT_DBL idx = (num % 100) * 2;
+                num /= 100;
+                j -= 2;
+                p[j]     = ruby_decimal_digit_pairs[idx];
+                p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+            }
+            if (num >= 10) {
+                BDIGIT_DBL idx = num * 2;
+                j -= 2;
+                p[j]     = ruby_decimal_digit_pairs[idx];
+                p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+            }
+            else if (num > 0) {
+                p[--j] = (char)('0' + num);
+            }
+            if (j > 0) {
+                memset(p, '0', j);
+                j = 0;
+            }
+        }
+        else {
+            do {
+                BDIGIT_DBL idx = num % b2s->base;
+                num /= b2s->base;
+                p[--j] = ruby_digitmap[idx];
+            } while (j);
+        }
         len = b2s->hbase2_numdigits;
     }
     b2s->ptr += len;
@@ -4887,7 +4997,7 @@ big2str_karatsuba(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t wn,
             /* bigdivrem_restoring will modify y.
              * So use temporary buffer.  */
             tds = xds + qn;
-            assert(qn + bn <= xn + wn);
+            RUBY_ASSERT(qn + bn <= xn + wn);
             bary_small_lshift(tds, bds, bn, shift);
             xds[xn] = bary_small_lshift(xds, xds, xn, shift);
         }
@@ -4905,7 +5015,7 @@ big2str_karatsuba(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t wn,
         }
 
         BARY_TRUNC(qds, qn);
-        assert(qn <= bn);
+        RUBY_ASSERT(qn <= bn);
         big2str_karatsuba(b2s, qds, qn, xn+wn - (rn+qn), lower_power_level, lower_numdigits+taillen);
         BARY_TRUNC(rds, rn);
         big2str_karatsuba(b2s, rds, rn, xn+wn - rn, lower_power_level, taillen);
@@ -4970,7 +5080,7 @@ big2str_generic(VALUE x, int base)
         invalid_radix(base);
 
     if (xn >= LONG_MAX/BITSPERDIG) {
-        rb_raise(rb_eRangeError, "bignum too big to convert into `string'");
+        rb_raise(rb_eRangeError, "bignum too big to convert into 'string'");
     }
 
     power_level = 0;
@@ -4980,7 +5090,7 @@ big2str_generic(VALUE x, int base)
         power_level++;
         power = power_cache_get_power(base, power_level, NULL);
     }
-    assert(power_level != MAX_BASE36_POWER_TABLE_ENTRIES);
+    RUBY_ASSERT(power_level != MAX_BASE36_POWER_TABLE_ENTRIES);
 
     if ((size_t)BIGNUM_LEN(power) <= xn) {
         /*
@@ -5095,7 +5205,7 @@ rb_big2str1(VALUE x, int base)
         invalid_radix(base);
 
     if (xn >= LONG_MAX/BITSPERDIG) {
-        rb_raise(rb_eRangeError, "bignum too big to convert into `string'");
+        rb_raise(rb_eRangeError, "bignum too big to convert into 'string'");
     }
 
     if (POW2_P(base)) {
@@ -5131,7 +5241,7 @@ big2ulong(VALUE x, const char *type)
     if (len == 0)
         return 0;
     if (BIGSIZE(x) > sizeof(long)) {
-        rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type);
+        rb_raise(rb_eRangeError, "bignum too big to convert into '%s'", type);
     }
     ds = BDIGITS(x);
 #if SIZEOF_LONG <= SIZEOF_BDIGIT
@@ -5174,7 +5284,7 @@ rb_big2long(VALUE x)
         if (num <= 1+(unsigned long)(-(LONG_MIN+1)))
             return -(long)(num-1)-1;
     }
-    rb_raise(rb_eRangeError, "bignum too big to convert into `long'");
+    rb_raise(rb_eRangeError, "bignum too big to convert into 'long'");
 }
 
 #if HAVE_LONG_LONG
@@ -5192,7 +5302,7 @@ big2ull(VALUE x, const char *type)
     if (len == 0)
         return 0;
     if (BIGSIZE(x) > SIZEOF_LONG_LONG)
-        rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type);
+        rb_raise(rb_eRangeError, "bignum too big to convert into '%s'", type);
 #if SIZEOF_LONG_LONG <= SIZEOF_BDIGIT
     num = (unsigned LONG_LONG)ds[0];
 #else
@@ -5233,7 +5343,7 @@ rb_big2ll(VALUE x)
         if (num <= 1+(unsigned LONG_LONG)(-(LLONG_MIN+1)))
             return -(LONG_LONG)(num-1)-1;
     }
-    rb_raise(rb_eRangeError, "bignum too big to convert into `long long'");
+    rb_raise(rb_eRangeError, "bignum too big to convert into 'long long'");
 }
 
 #endif  /* HAVE_LONG_LONG */
@@ -5496,10 +5606,10 @@ big_op(VALUE x, VALUE y, enum big_op_t op)
     n = FIX2INT(rel);
 
     switch (op) {
-        case big_op_gt: return RBOOL(n >  0);
-        case big_op_ge: return RBOOL(n >= 0);
-        case big_op_lt: return RBOOL(n <  0);
-        case big_op_le: return RBOOL(n <= 0);
+      case big_op_gt: return RBOOL(n >  0);
+      case big_op_ge: return RBOOL(n >= 0);
+      case big_op_lt: return RBOOL(n <  0);
+      case big_op_le: return RBOOL(n <= 0);
     }
     return Qundef;
 }
@@ -5655,7 +5765,7 @@ bigsub_int(VALUE x, long y0)
     zds = BDIGITS(z);
 
 #if SIZEOF_BDIGIT >= SIZEOF_LONG
-    assert(xn == zn);
+    RUBY_ASSERT(xn == zn);
     num = (BDIGIT_DBL_SIGNED)xds[0] - y;
     if (xn == 1 && num < 0) {
         BIGNUM_NEGATE(z);
@@ -5718,7 +5828,7 @@ bigsub_int(VALUE x, long y0)
     goto finish;
 
   finish:
-    assert(num == 0 || num == -1);
+    RUBY_ASSERT(num == 0 || num == -1);
     if (num < 0) {
         get2comp(z);
         BIGNUM_NEGATE(z);
@@ -5903,6 +6013,8 @@ bigsq(VALUE x)
     BDIGIT *xds, *zds;
 
     xn = BIGNUM_LEN(x);
+    if (MUL_OVERFLOW_LONG_P(2, xn))
+        rb_raise(rb_eArgError, "square overflow");
     zn = 2 * xn;
 
     z = bignew(zn, 1);
@@ -5931,6 +6043,8 @@ bigmul0(VALUE x, VALUE y)
 
     xn = BIGNUM_LEN(x);
     yn = BIGNUM_LEN(y);
+    if (ADD_OVERFLOW_LONG_P(xn, yn))
+        rb_raise(rb_eArgError, "multiplication overflow");
     zn = xn + yn;
 
     z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
@@ -6276,8 +6390,7 @@ rb_big_pow(VALUE x, VALUE y)
         y = bignorm(y);
         if (FIXNUM_P(y))
             goto again;
-        rb_warn("in a**b, b may be too big");
-        d = rb_big2dbl(y);
+        rb_raise(rb_eArgError, "exponent is too large");
     }
     else if (FIXNUM_P(y)) {
         yy = FIX2LONG(y);
@@ -6293,13 +6406,17 @@ rb_big_pow(VALUE x, VALUE y)
             VALUE z = 0;
             SIGNED_VALUE mask;
             const size_t xbits = rb_absint_numwords(x, 1, NULL);
-            const size_t BIGLEN_LIMIT = 32*1024*1024;
+#if SIZEOF_SIZE_T == 4
+            const size_t BIGLEN_LIMIT = 1ULL << 31; // 2 GB
+#else // SIZEOF_SIZE_T == 8
+            const size_t BIGLEN_LIMIT = 1ULL << 34; // 16 GB
+#endif
 
             if (xbits == (size_t)-1 ||
                 (xbits > BIGLEN_LIMIT) ||
+                MUL_OVERFLOW_LONG_P(yy, xbits) ||
                 (xbits * yy > BIGLEN_LIMIT)) {
-                rb_warn("in a**b, b may be too big");
-                d = (double)yy;
+                rb_raise(rb_eArgError, "exponent is too large");
             }
             else {
                 for (mask = FIXNUM_MAX + 1; mask; mask >>= 1) {
@@ -6328,7 +6445,7 @@ bigand_int(VALUE x, long xn, BDIGIT hibitsx, long y)
     BDIGIT hibitsy;
 
     if (y == 0) return INT2FIX(0);
-    if (xn == 0) return hibitsx ? LONG2NUM(y) : 0;
+    if (xn == 0) return hibitsx ? LONG2NUM(y) : INT2FIX(0);
     hibitsy = 0 <= y ? 0 : BDIGMAX;
     xds = BDIGITS(x);
 #if SIZEOF_BDIGIT >= SIZEOF_LONG
@@ -6740,6 +6857,73 @@ rb_big_aref(VALUE x, VALUE y)
 }
 
 VALUE
+rb_big_aref2(VALUE x, VALUE beg, VALUE len)
+{
+    BDIGIT *xds, *vds;
+    VALUE v;
+    size_t copy_begin, xn, shift;
+    ssize_t begin, length, end;
+    bool negative_add_one;
+
+    beg = rb_to_int(beg);
+    len = rb_to_int(len);
+    length = NUM2SSIZET(len);
+    begin = NUM2SSIZET(beg);
+    end = NUM2SSIZET(rb_int_plus(beg, len));
+    shift = begin < 0 ? -begin : 0;
+    xn = BIGNUM_LEN(x);
+    xds = BDIGITS(x);
+
+    if (length < 0) return rb_big_rshift(x, beg);
+    if (length == 0 || end <= 0) return INT2FIX(0);
+    if (begin < 0) begin = 0;
+
+    if ((size_t)(end - 1) / BITSPERDIG >= xn) {
+        /* end > xn * BITSPERDIG */
+        end = xn * BITSPERDIG;
+    }
+
+    if ((size_t)begin / BITSPERDIG < xn) {
+        /* begin < xn * BITSPERDIG */
+        size_t shift_bits, copy_end;
+        copy_begin = begin / BITSPERDIG;
+        shift_bits = begin % BITSPERDIG;
+        copy_end = (end - 1) / BITSPERDIG + 1;
+        v = bignew(copy_end - copy_begin, 1);
+        vds = BDIGITS(v);
+        MEMCPY(vds, xds + copy_begin, BDIGIT, copy_end - copy_begin);
+        negative_add_one = (vds[0] & ((1 << shift_bits) - 1)) == 0;
+        v = bignorm(v);
+        if (shift_bits) v = rb_int_rshift(v, SIZET2NUM(shift_bits));
+    }
+    else {
+        /* Out of range */
+        v = INT2FIX(0);
+        negative_add_one = false;
+        copy_begin = begin = end = 0;
+    }
+
+    if (BIGNUM_NEGATIVE_P(x)) {
+        size_t mask_size = length - shift;
+        VALUE mask = rb_int_minus(rb_int_lshift(INT2FIX(1), SIZET2NUM(mask_size)), INT2FIX(1));
+        v = rb_int_xor(v, mask);
+        for (size_t i = 0; negative_add_one && i < copy_begin; i++) {
+            if (xds[i]) negative_add_one = false;
+        }
+        if (negative_add_one) v = rb_int_plus(v, INT2FIX(1));
+        v = rb_int_and(v, mask);
+    }
+    else {
+        size_t mask_size = (size_t)end - begin;
+        VALUE mask = rb_int_minus(rb_int_lshift(INT2FIX(1), SIZET2NUM(mask_size)), INT2FIX(1));
+        v = rb_int_and(v, mask);
+    }
+    RB_GC_GUARD(x);
+    if (shift) v = rb_int_lshift(v, SSIZET2NUM(shift));
+    return v;
+}
+
+VALUE
 rb_big_hash(VALUE x)
 {
     st_index_t hash;
@@ -6873,63 +7057,11 @@ BDIGIT rb_bdigit_dbl_isqrt(BDIGIT_DBL);
 # define BDIGIT_DBL_TO_DOUBLE(n) (double)(n)
 #endif
 
-static BDIGIT *
-estimate_initial_sqrt(VALUE *xp, const size_t xn, const BDIGIT *nds, size_t len)
-{
-    enum {dbl_per_bdig = roomof(DBL_MANT_DIG,BITSPERDIG)};
-    const int zbits = nlz(nds[len-1]);
-    VALUE x = *xp = bignew_1(0, xn, 1); /* division may release the GVL */
-    BDIGIT *xds = BDIGITS(x);
-    BDIGIT_DBL d = bary2bdigitdbl(nds+len-dbl_per_bdig, dbl_per_bdig);
-    BDIGIT lowbits = 1;
-    int rshift = (int)((BITSPERDIG*2-zbits+(len&BITSPERDIG&1) - DBL_MANT_DIG + 1) & ~1);
-    double f;
-
-    if (rshift > 0) {
-        lowbits = (BDIGIT)d & ~(~(BDIGIT)1U << rshift);
-        d >>= rshift;
-    }
-    else if (rshift < 0) {
-        d <<= -rshift;
-        d |= nds[len-dbl_per_bdig-1] >> (BITSPERDIG+rshift);
-    }
-    f = sqrt(BDIGIT_DBL_TO_DOUBLE(d));
-    d = (BDIGIT_DBL)ceil(f);
-    if (BDIGIT_DBL_TO_DOUBLE(d) == f) {
-        if (lowbits || (lowbits = !bary_zero_p(nds, len-dbl_per_bdig)))
-            ++d;
-    }
-    else {
-        lowbits = 1;
-    }
-    rshift /= 2;
-    rshift += (2-(len&1))*BITSPERDIG/2;
-    if (rshift >= 0) {
-        if (nlz((BDIGIT)d) + rshift >= BITSPERDIG) {
-            /* (d << rshift) does cause overflow.
-             * example: Integer.sqrt(0xffff_ffff_ffff_ffff ** 2)
-             */
-            d = ~(BDIGIT_DBL)0;
-        }
-        else {
-            d <<= rshift;
-        }
-    }
-    BDIGITS_ZERO(xds, xn-2);
-    bdigitdbl2bary(&xds[xn-2], 2, d);
-
-    if (!lowbits) return NULL; /* special case, exact result */
-    return xds;
-}
-
 VALUE
 rb_big_isqrt(VALUE n)
 {
     BDIGIT *nds = BDIGITS(n);
     size_t len = BIGNUM_LEN(n);
-    size_t xn = (len+1) / 2;
-    VALUE x;
-    BDIGIT *xds;
 
     if (len <= 2) {
         BDIGIT sq = rb_bdigit_dbl_isqrt(bary2bdigitdbl(nds, len));
@@ -6939,25 +7071,19 @@ rb_big_isqrt(VALUE n)
         return ULONG2NUM(sq);
 #endif
     }
-    else if ((xds = estimate_initial_sqrt(&x, xn, nds, len)) != 0) {
-        size_t tn = xn + BIGDIVREM_EXTRA_WORDS;
-        VALUE t = bignew_1(0, tn, 1);
-        BDIGIT *tds = BDIGITS(t);
-        tn = BIGNUM_LEN(t);
-
-        /* t = n/x */
-        while (bary_divmod_branch(tds, tn, NULL, 0, nds, len, xds, xn),
-               bary_cmp(tds, tn, xds, xn) < 0) {
-            int carry;
-            BARY_TRUNC(tds, tn);
-            /* x = (x+t)/2 */
-            carry = bary_add(xds, xn, xds, xn, tds, tn);
-            bary_small_rshift(xds, xds, xn, 1, carry);
-            tn = BIGNUM_LEN(t);
+    else {
+        size_t shift = FIX2LONG(rb_big_bit_length(n)) / 4;
+        VALUE n2 = rb_int_rshift(n, SIZET2NUM(2 * shift));
+        VALUE x = FIXNUM_P(n2) ? LONG2FIX(rb_ulong_isqrt(FIX2ULONG(n2))) : rb_big_isqrt(n2);
+        /* x = (x+n/x)/2 */
+        x = rb_int_plus(rb_int_lshift(x, SIZET2NUM(shift - 1)), rb_int_idiv(rb_int_rshift(n, SIZET2NUM(shift + 1)), x));
+        VALUE xx = rb_int_mul(x, x);
+        while (rb_int_gt(xx, n)) {
+            xx = rb_int_minus(xx, rb_int_minus(rb_int_plus(x, x), INT2FIX(1)));
+            x = rb_int_minus(x, INT2FIX(1));
         }
+        return x;
     }
-    RBASIC_SET_CLASS_RAW(x, rb_cInteger);
-    return x;
 }
 
 #if USE_GMP
@@ -6996,14 +7122,14 @@ int_pow_tmp3(VALUE x, VALUE y, VALUE m, int nega_flg)
     if (FIXNUM_P(y)) {
        y = rb_int2big(FIX2LONG(y));
     }
-    assert(RB_BIGNUM_TYPE_P(m));
+    RUBY_ASSERT(RB_BIGNUM_TYPE_P(m));
     xn = BIGNUM_LEN(x);
     yn = BIGNUM_LEN(y);
     mn = BIGNUM_LEN(m);
     zn = mn;
     z = bignew(zn, 1);
     bary_powm_gmp(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn, BDIGITS(m), mn);
-    if (nega_flg & BIGNUM_POSITIVE_P(z)) {
+    if (nega_flg && BIGNUM_POSITIVE_P(z) && !BIGZEROP(z)) {
         z = rb_big_minus(z, m);
     }
     RB_GC_GUARD(x);
@@ -7031,7 +7157,7 @@ int_pow_tmp3(VALUE x, VALUE y, VALUE m, int nega_flg)
         x = rb_int_modulo(x, m);
     }
 
-    if (nega_flg && rb_int_positive_p(tmp)) {
+    if (nega_flg && rb_int_positive_p(tmp) && !rb_int_zero_p(tmp)) {
         tmp = rb_int_minus(tmp, m);
     }
     return tmp;
@@ -7143,6 +7269,11 @@ rb_int_powm(int const argc, VALUE * const argv, VALUE const num)
             rb_raise(rb_eTypeError, "Integer#pow() 2nd argument not allowed unless all arguments are integers");
         }
 
+        if (rb_int_zero_p(a) && !rb_int_zero_p(b)) {
+            /* shortcut; 0**x => 0 except for x == 0 */
+            return INT2FIX(0);
+        }
+
         if (rb_int_negative_p(m)) {
             m = rb_int_uminus(m);
             nega_flg = 1;