From 19f45f853c125ff744c53bd548f29f891c76c28b Mon Sep 17 00:00:00 2001 From: mame Date: Sun, 14 Dec 2008 03:59:02 +0000 Subject: * bignum.c (rb_big_mul): faster multiplication by Karatsuba method and twice faster square than normal multiplication. * random.c (rb_rand_internal): used by Bignum#*. * test/ruby/test_bignum.rb: add some tests for above. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@20733 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- bignum.c | 395 ++++++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 327 insertions(+), 68 deletions(-) (limited to 'bignum.c') diff --git a/bignum.c b/bignum.c index 35db091454..bcac99309b 100644 --- a/bignum.c +++ b/bignum.c @@ -17,6 +17,7 @@ #ifdef HAVE_IEEEFP_H #include #endif +#include VALUE rb_cBignum; @@ -1380,12 +1381,36 @@ rb_big_neg(VALUE x) return bignorm(z); } +static void +bigsub_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn) +{ + BDIGIT_DBL_SIGNED num; + long i; + + for (i = 0, num = 0; i < yn; i++) { + num += (BDIGIT_DBL_SIGNED)xds[i] - yds[i]; + zds[i] = BIGLO(num); + num = BIGDN(num); + } + while (num && i < xn) { + num += xds[i]; + zds[i++] = BIGLO(num); + num = BIGDN(num); + } + while (i < xn) { + zds[i] = xds[i]; + i++; + } + assert(i <= zn); + while (i < zn) { + zds[i++] = 0; + } +} + static VALUE bigsub(VALUE x, VALUE y) { VALUE z = 0; - BDIGIT *zds; - BDIGIT_DBL_SIGNED num; long i = RBIGNUM_LEN(x); /* if x is larger than y, swap */ @@ -1406,32 +1431,52 @@ bigsub(VALUE x, VALUE y) } z = bignew(RBIGNUM_LEN(x), z==0); - zds = BDIGITS(z); + bigsub_core(BDIGITS(x), RBIGNUM_LEN(x), + BDIGITS(y), RBIGNUM_LEN(y), + BDIGITS(z), RBIGNUM_LEN(z)); - for (i = 0, num = 0; i < RBIGNUM_LEN(y); i++) { - num += (BDIGIT_DBL_SIGNED)BDIGITS(x)[i] - BDIGITS(y)[i]; - zds[i] = BIGLO(num); + return z; +} + +static void +bigadd_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn) +{ + BDIGIT_DBL num = 0; + long i; + + if (xn > yn) { + BDIGIT *tds; + tds = xds; xds = yds; yds = tds; + i = xn; xn = yn; yn = i; + } + + i = 0; + while (i < xn) { + num += (BDIGIT_DBL)xds[i] + yds[i]; + zds[i++] = BIGLO(num); num = BIGDN(num); } - while (num && i < RBIGNUM_LEN(x)) { - num += BDIGITS(x)[i]; + while (num && i < yn) { + num += yds[i]; zds[i++] = BIGLO(num); num = BIGDN(num); } - while (i < RBIGNUM_LEN(x)) { - zds[i] = BDIGITS(x)[i]; + while (i < yn) { + zds[i] = yds[i]; i++; } - - return z; + if (num) zds[i++] = (BDIGIT)num; + assert(i <= zn); + while (i < zn) { + zds[i++] = 0; + } } static VALUE bigadd(VALUE x, VALUE y, int sign) { VALUE z; - BDIGIT_DBL num; - long i, len; + long len; sign = (sign == RBIGNUM_SIGN(y)); if (RBIGNUM_SIGN(x) != sign) { @@ -1441,30 +1486,15 @@ bigadd(VALUE x, VALUE y, int sign) if (RBIGNUM_LEN(x) > RBIGNUM_LEN(y)) { len = RBIGNUM_LEN(x) + 1; - z = x; x = y; y = z; } else { len = RBIGNUM_LEN(y) + 1; } z = bignew(len, sign); - len = RBIGNUM_LEN(x); - for (i = 0, num = 0; i < len; i++) { - num += (BDIGIT_DBL)BDIGITS(x)[i] + BDIGITS(y)[i]; - BDIGITS(z)[i] = BIGLO(num); - num = BIGDN(num); - } - len = RBIGNUM_LEN(y); - while (num && i < len) { - num += BDIGITS(y)[i]; - BDIGITS(z)[i++] = BIGLO(num); - num = BIGDN(num); - } - while (i < len) { - BDIGITS(z)[i] = BDIGITS(y)[i]; - i++; - } - BDIGITS(z)[i] = (BDIGIT)num; + bigadd_core(BDIGITS(x), RBIGNUM_LEN(x), + BDIGITS(y), RBIGNUM_LEN(y), + BDIGITS(z), RBIGNUM_LEN(z)); return z; } @@ -1519,24 +1549,20 @@ rb_big_minus(VALUE x, VALUE y) } } -static void -rb_big_stop(void *ptr) +static long +big_real_len(VALUE x) { - VALUE *stop = (VALUE*)ptr; - *stop = Qtrue; + long i = RBIGNUM_LEN(x); + while (--i && !BDIGITS(x)[i]); + return i + 1; } -struct big_mul_struct { - VALUE x, y, z, stop; -}; - static VALUE -bigmul1(void *ptr) +bigmul1_normal(VALUE x, VALUE y) { - struct big_mul_struct *bms = (struct big_mul_struct*)ptr; long i, j; BDIGIT_DBL n = 0; - VALUE x = bms->x, y = bms->y, z = bms->z; + VALUE z = bignew(RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y)); BDIGIT *zds; j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1; @@ -1544,7 +1570,6 @@ bigmul1(void *ptr) while (j--) zds[j] = 0; for (i = 0; i < RBIGNUM_LEN(x); i++) { BDIGIT_DBL dd; - if (bms->stop) return Qnil; dd = BDIGITS(x)[i]; if (dd == 0) continue; n = 0; @@ -1558,45 +1583,257 @@ bigmul1(void *ptr) zds[i + j] = n; } } + rb_thread_check_ints(); return z; } +static VALUE bigmul0(VALUE x, VALUE y); + +/* balancing multiplication by slicing larger argument */ static VALUE -rb_big_mul0(VALUE x, VALUE y) +bigmul1_balance(VALUE x, VALUE y) { - struct big_mul_struct bms; - volatile VALUE z; + VALUE z, t1, t2; + long i, xn, yn, r, n; - switch (TYPE(y)) { - case T_FIXNUM: - y = rb_int2big(FIX2LONG(y)); - break; + xn = RBIGNUM_LEN(x); + yn = RBIGNUM_LEN(y); + assert(2 * xn <= yn); - case T_BIGNUM: - break; + z = bignew(xn + yn, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y)); + t1 = bignew(xn, 1); - case T_FLOAT: - return DBL2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y)); + for (i = 0; i < xn + yn; i++) BDIGITS(z)[i] = 0; - default: - return rb_num_coerce_bin(x, y, '*'); + n = 0; + while (yn > 0) { + r = xn > yn ? yn : xn; + MEMCPY(BDIGITS(t1), BDIGITS(y) + n, BDIGIT, r); + RBIGNUM_SET_LEN(t1, r); + t2 = bigmul0(x, t1); + bigadd_core(BDIGITS(z) + n, RBIGNUM_LEN(z) - n, + BDIGITS(t2), big_real_len(t2), + BDIGITS(z) + n, RBIGNUM_LEN(z) - n); + yn -= r; + n += r; } + rb_gc_force_recycle(t1); + + return z; +} + +/* split a bignum into high and low bignums */ +static void +big_split(VALUE v, long n, VALUE *ph, VALUE *pl) +{ + long hn, ln; + VALUE h, l; + + ln = RBIGNUM_LEN(v) > n ? n : RBIGNUM_LEN(v); + hn = RBIGNUM_LEN(v) - ln; + + while (--hn && !BDIGITS(v)[hn + ln]); + h = bignew(++hn, 1); + MEMCPY(BDIGITS(h), BDIGITS(v) + ln, BDIGIT, hn); + + while (--ln && !BDIGITS(v)[ln]); + l = bignew(++ln, 1); + MEMCPY(BDIGITS(l), BDIGITS(v), BDIGIT, ln); - bms.x = x; - bms.y = y; - bms.z = bignew(RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y)); - bms.stop = Qfalse; + *pl = l; + *ph = h; +} + +/* multiplication by karatsuba method */ +static VALUE +bigmul1_karatsuba(VALUE x, VALUE y) +{ + long i, n, xn, yn, t1n, t2n; + VALUE xh, xl, yh, yl, z, t1, t2, t3; + BDIGIT *zds; + + xn = RBIGNUM_LEN(x); + yn = RBIGNUM_LEN(y); + n = yn / 2; + big_split(x, n, &xh, &xl); + if (x == y) { + yh = xh; yl = xl; + } + else big_split(y, n, &yh, &yl); + + /* x = xh * b + xl + * y = yh * b + yl + * + * Karatsuba method: + * x * y = z2 * b^2 + z1 * b + z0 + * where + * z2 = xh * yh + * z0 = xl * yl + * z1 = (xh + xl) * (yh + yl) - x2 - x0 + * + * ref: http://en.wikipedia.org/wiki/Karatsuba_algorithm + */ + + /* allocate a result bignum */ + z = bignew(xn + yn, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y)); + zds = BDIGITS(z); + + /* t1 <- xh * yh */ + t1 = bigmul0(xh, yh); + t1n = big_real_len(t1); + + /* copy t1 into high bytes of the result (z2) */ + MEMCPY(zds + 2 * n, BDIGITS(t1), BDIGIT, t1n); + for (i = 2 * n + t1n; i < xn + yn; i++) BDIGITS(z)[i] = 0; + + if (!BIGZEROP(xl) && !BIGZEROP(yl)) { + /* t2 <- xl * yl */ + t2 = bigmul0(xl, yl); + t2n = big_real_len(t2); - if (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > 10000) { - z = rb_thread_blocking_region(bigmul1, &bms, rb_big_stop, &bms.stop); + /* copy t2 into low bytes of the result (z0) */ + MEMCPY(zds, BDIGITS(t2), BDIGIT, t2n); + for (i = t2n; i < 2 * n; i++) BDIGITS(z)[i] = 0; + + /* subtract t2 from middle bytes of the result (z1) */ + i = xn + yn - n; + bigsub_core(zds + n, i, BDIGITS(t2), t2n, zds + n, i); + rb_gc_force_recycle(t2); } else { - z = bigmul1(&bms); + /* copy 0 into low bytes of the result (z0) */ + for (i = 0; i < 2 * n; i++) BDIGITS(z)[i] = 0; } + /* subtract t1 from middle bytes of the result (z1) */ + i = xn + yn - n; + bigsub_core(zds + n, i, BDIGITS(t1), t1n, zds + n, i); + rb_gc_force_recycle(t1); + + /* t1 <- xh + xl */ + t1 = bigadd(xh, xl, 1); + if (xh != yh) rb_gc_force_recycle(xh); + if (xl != yl) rb_gc_force_recycle(xl); + + /* t2 <- yh + yl */ + t2 = (x == y) ? t1 : bigadd(yh, yl, 1); + rb_gc_force_recycle(yh); + rb_gc_force_recycle(yl); + + /* t3 <- t1 * t2 */ + t3 = bigmul0(t1, t2); + rb_gc_force_recycle(t1); + if (t1 != t2) rb_gc_force_recycle(t2); + + /* add t3 to middle bytes of the result (z1) */ + bigadd_core(zds + n, i, BDIGITS(t3), big_real_len(t3), zds + n, i); + rb_gc_force_recycle(t3); + return z; } +/* efficient squaring (2 times faster than normal multiplication) + * ref: Handbook of Applied Cryptography, Algorithm 14.16 + * http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf + */ +static VALUE +bigsqr_fast(VALUE x) +{ + long len = RBIGNUM_LEN(x), i, j; + VALUE z = bignew(2 * len + 1, 1); + BDIGIT *xds = BDIGITS(x), *zds = BDIGITS(z); + BDIGIT_DBL c, v, w; + + for (i = 2 * len + 1; i--; ) zds[i] = 0; + for (i = 0; i < len; i++) { + v = (BDIGIT_DBL)xds[i]; + if (!v) continue; + c = (BDIGIT_DBL)zds[i + i] + v * v; + zds[i + i] = BIGLO(c); + c = BIGDN(c); + v *= 2; + for (j = i + 1; j < len; j++) { + w = (BDIGIT_DBL)xds[j]; + c += (BDIGIT_DBL)zds[i + j] + BIGLO(v) * w; + zds[i + j] = BIGLO(c); + c = BIGDN(c); + if (BIGDN(v)) c += w; + } + if (c) { + c += (BDIGIT_DBL)zds[i + len]; + zds[i + len] = BIGLO(c); + c = BIGDN(c); + } + if (c) zds[i + len + 1] += c; + } + return z; +} + +#define KARATSUBA_MUL_DIGITS 70 + + +/* determine whether a bignum is sparse or not by random sampling */ +static inline VALUE +big_sparse_p(VALUE x) +{ + long c = 0, n = RBIGNUM_LEN(x); + unsigned long rb_rand_internal(unsigned long i); + + if ( BDIGITS(x)[rb_rand_internal(n / 2) + n / 4]) c++; + if (c <= 1 && BDIGITS(x)[rb_rand_internal(n / 2) + n / 4]) c++; + if (c <= 1 && BDIGITS(x)[rb_rand_internal(n / 2) + n / 4]) c++; + + return (c <= 1) ? Qtrue : Qfalse; +} + +#if 0 +static void +dump_bignum(VALUE x) +{ + long i; + printf("0x0"); + for (i = RBIGNUM_LEN(x); i--; ) { + printf("_%08x", BDIGITS(x)[i]); + } + puts(""); +} +#endif + +static VALUE +bigmul0(VALUE x, VALUE y) +{ + long xn, yn; + + xn = RBIGNUM_LEN(x); + yn = RBIGNUM_LEN(y); + + /* make sure that y is longer than x */ + if (xn > yn) { + VALUE t; + long tn; + t = x; x = y; y = t; + tn = xn; xn = yn; yn = tn; + } + assert(xn <= yn); + + /* normal multiplication when x is small */ + if (xn < KARATSUBA_MUL_DIGITS) { + normal: + if (x == y) return bigsqr_fast(x); + return bigmul1_normal(x, y); + } + + /* normal multiplication when x or y is a sparse bignum */ + if (big_sparse_p(x)) goto normal; + if (big_sparse_p(y)) return bigmul1_normal(y, x); + + /* balance multiplication by slicing y when x is much smaller than y */ + if (2 * xn <= yn) return bigmul1_balance(x, y); + + /* multiplication by karatsuba method */ + return bigmul1_karatsuba(x, y); +} + /* * call-seq: * big * other => Numeric @@ -1607,7 +1844,22 @@ rb_big_mul0(VALUE x, VALUE y) VALUE rb_big_mul(VALUE x, VALUE y) { - return bignorm(rb_big_mul0(x, y)); + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + case T_FLOAT: + return DBL2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y)); + + default: + return rb_num_coerce_bin(x, y, '*'); + } + + return bignorm(bigmul0(x, y)); } struct big_div_struct { @@ -1661,6 +1913,13 @@ bigdivrem1(void *ptr) return Qnil; } +static void +rb_big_stop(void *ptr) +{ + VALUE *stop = (VALUE*)ptr; + *stop = Qtrue; +} + static VALUE bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp) { @@ -2037,7 +2296,7 @@ bigsqr(VALUE x) BDIGIT_DBL num; if (len < 4000 / BITSPERDIG) { - return bigtrunc(rb_big_mul0(x, x)); + return bigtrunc(bigmul0(x, x)); } a = bignew(len - k, 1); @@ -2054,7 +2313,7 @@ bigsqr(VALUE x) } MEMCPY(BDIGITS(z) + 2 * k, BDIGITS(a2), BDIGIT, RBIGNUM_LEN(a2)); RBIGNUM_SET_LEN(z, len); - a2 = bigtrunc(rb_big_mul0(a, b)); + a2 = bigtrunc(bigmul0(a, b)); len = RBIGNUM_LEN(a2); for (i = 0, num = 0; i < len; i++) { num += (BDIGIT_DBL)BDIGITS(z)[i + k] + ((BDIGIT_DBL)BDIGITS(a2)[i] << 1); @@ -2125,7 +2384,7 @@ rb_big_pow(VALUE x, VALUE y) for (mask = FIXNUM_MAX + 1; mask; mask >>= 1) { if (z) z = bigtrunc(bigsqr(z)); if (yy & mask) { - z = z ? bigtrunc(rb_big_mul0(z, x)) : x; + z = z ? bigtrunc(bigmul0(z, x)) : x; } } return bignorm(z); -- cgit v1.2.3