summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Hasiński <krzysztof.hasinski@gmail.com>2026-05-08 19:10:12 +0200
committerGitHub <noreply@github.com>2026-05-09 02:10:12 +0900
commit4a0072d5f29befde814ea0d9a83c711e1f049564 (patch)
tree08228a0a0cabc77049fc1754fa02ec4a03e82568
parent33744d25cfdd056552ab9a464ac250d56dfaaf2c (diff)
Speed up Integer#to_s with a two digit lookup table (#16719)
* numeric: emit two decimal digits per iteration in rb_fix2str Replace the digit-at-a-time loop in rb_fix2str with the standard itoa 2-digit lookup table for base 10. Each iteration now writes two digits using a single (u % 100, u / 100) pair, so the number of loop iterations is halved for multi-digit integers. The classic per-digit loop is kept for non-base-10 conversion. Benchmark (Apple M-series, 5M-10M ops, best of 3 runs): case base patch delta --------- ----- ----- ----- 1-digit (5) 64 ns/op 64 ns/op -0% 2-digit (42) 64 ns/op 65 ns/op +2% (noise) 3-digit (400) 66 ns/op 64 ns/op -3% 5-digit (12345) 69 ns/op 67 ns/op -3% 10-digit (1234567890) 77 ns/op 67 ns/op -13% 19-digit (2^62-1) 111 ns/op 75 ns/op -33% The crossover is at ~3 digits: below that the constant setup dominates and the benefit is within noise, above that the halved iteration count shows up linearly. Typical Rails payloads mix short IDs (1-5 digits) and longer values (timestamps, nanos, large counts), so the win is workload-dependent but strictly non-negative for real code. Correctness: 100k random fuzz across the full fixnum range plus targeted edges (0, ±1, ±99, ±100, 2^30-1, 2^62-1, etc.) all pass. make test-all shows 34694 tests, 7325860 assertions, 0 new failures (same pre-existing TestArgf#test_puts flake as on master) — test_integer.rb alone runs 38 tests / 421628 assertions of which Integer#to_s exercises the bulk, all pass. The 200-byte lookup table sits in .rodata and fits in a single cache line of its own (3 lines for the whole table). No change to public API, no change to bignum conversion, no change to non-base-10 conversion paths. * bignum: emit two decimal digits per iteration in big2str_2bdigits Extend the 2-digit lookup-table itoa optimisation from rb_fix2str to the inner conversion loop used by Bignum#to_s. big2str_2bdigits has two code paths — a leading-chunk path that emits variable-length digits, and a recursive-chunk path that emits a fixed-width zero- padded block — and both gain from the halved division count. The classic per-digit loop is preserved for non-base-10 conversion. Moves the ruby_decimal_digit_pairs table from a file-static in numeric.c to bignum.c next to ruby_digitmap, and exposes it through internal/bignum.h so both files share the same 200-byte .rodata instance. Benchmark (Apple M-series, best of 3 runs, measures bignum-only speedup against the preceding fixnum commit): case base patch delta --------- ----- ----- ----- big_20dig 10^19+... 146 ns/op 124 ns/op -15% big_40dig 10^39+... 174 ns/op 152 ns/op -13% big_100dig 10^99+42 236 ns/op 213 ns/op -10% big_500dig 10^499+7 1119 ns/op 1086 ns/op -3% big_1000dig 10^999 3490 ns/op 3459 ns/op -1% fix_19dig 2^62-1 76 ns/op 76 ns/op 0% (unchanged path) Wins concentrate in the 20-100 digit range where big2str_2bdigits is the dominant cost. Above ~500 digits the Karatsuba divmod recursion dominates and the digit-emission saving shrinks to the noise floor. The 20-100 range is what actual Ruby code exercises (financial high-precision sums, nanosecond timestamps, large counters); crypto-size (1000+ digit) bignums are rare in to_s paths. Correctness: 100k random fixnum fuzz unchanged, 500 random bignum fuzz up to 2^256 with cross-check against sprintf("%d"), bases 2/8/16/36 round-trip, plus edge cases (0, just-above-fixnum, ±2^100, 20-digit strings near the fixnum boundary). test/ruby/test_integer.rb stays at 38 tests / 421628 assertions / 0 failures, test_bignum.rb passes 74 / 607 / 0 failures, full make test-all reports 34694 tests / 0 new failures (same TestArgf#test_puts pre-existing flake as master). * benchmark: add int_to_s yaml for Integer#to_s Reproducible benchmark for the two preceding commits. Covers: - 1/2/3/5/10/19-digit positive fixnums (spans the break-even point and the two large-number wins at the top) - A negative fixnum (exercises the minus-sign prepend path) - 20/40/100-digit bignums (spans the big2str_2bdigits win range) - Two string-interpolation scenarios, so reviewers can see how much of the Integer#to_s speedup reaches real code that allocates the result string too Intended to be consumed by benchmark-driver against master vs int-to-s-twodigit for A/B comparison. Matches the numbers in the commit messages of 5bfb7e02a2 and c5df6de835. --------- Co-authored-by: tomoya ishida <tomoyapenguin@gmail.com>
-rw-r--r--benchmark/int_to_s.yml25
-rw-r--r--bignum.c86
-rw-r--r--internal/bignum.h1
-rw-r--r--numeric.c36
4 files changed, 135 insertions, 13 deletions
diff --git a/benchmark/int_to_s.yml b/benchmark/int_to_s.yml
new file mode 100644
index 0000000000..000dae9612
--- /dev/null
+++ b/benchmark/int_to_s.yml
@@ -0,0 +1,25 @@
+prelude: |
+ # frozen_string_literal: true
+ N1 = 5
+ N2 = 42
+ N3 = 400
+ N5 = 12345
+ N10 = 1_234_567_890
+ N19 = 4_611_686_018_427_387_903
+ NEG = -1_234_567_890
+ BIG20 = 10 ** 19 + 12_345_678_901_234_567
+ BIG40 = 10 ** 39 + 123_456_789_012_345
+ BIG100 = 10 ** 99 + 42
+benchmark:
+ fix_1digit: "N1.to_s"
+ fix_2digit: "N2.to_s"
+ fix_3digit: "N3.to_s"
+ fix_5digit: "N5.to_s"
+ fix_10digit: "N10.to_s"
+ fix_19digit: "N19.to_s"
+ fix_negative: "NEG.to_s"
+ big_20digit: "BIG20.to_s"
+ big_40digit: "BIG40.to_s"
+ big_100digit: "BIG100.to_s"
+ interp_id: '"id=#{N10}"'
+ interp_mixed: '"a=#{N2},b=#{N5},c=#{N10}"'
diff --git a/bignum.c b/bignum.c
index e4af035cac..28924b4eb9 100644
--- a/bignum.c
+++ b/bignum.c
@@ -64,6 +64,21 @@ static const bool debug_integer_pack = (
const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+/* Two-digit decimal lookup table. Offset 2*n holds the ASCII pair for
+ * n in the range 0..99. Used by both rb_fix2str in numeric.c and
+ * big2str_2bdigits below to emit two base-10 digits per iteration. */
+const char ruby_decimal_digit_pairs[201] =
+ "00010203040506070809"
+ "10111213141516171819"
+ "20212223242526272829"
+ "30313233343536373839"
+ "40414243444546474849"
+ "50515253545556575859"
+ "60616263646566676869"
+ "70717273747576777879"
+ "80818283848586878889"
+ "90919293949596979899";
+
#ifndef SIZEOF_BDIGIT_DBL
# if SIZEOF_INT*2 <= SIZEOF_LONG_LONG
# define SIZEOF_BDIGIT_DBL SIZEOF_LONG_LONG
@@ -4811,11 +4826,34 @@ big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t tail
return;
p = buf;
j = sizeof(buf);
- do {
- BDIGIT_DBL idx = num % b2s->base;
- num /= b2s->base;
- p[--j] = ruby_digitmap[idx];
- } while (num);
+ if (b2s->base == 10) {
+ /* Emit two decimal digits per iteration from ruby_decimal_digit_pairs.
+ * See the comment on the table in bignum.c near ruby_digitmap. */
+ while (num >= 100) {
+ BDIGIT_DBL idx = (num % 100) * 2;
+ num /= 100;
+ j -= 2;
+ p[j] = ruby_decimal_digit_pairs[idx];
+ p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+ }
+ if (num >= 10) {
+ BDIGIT_DBL idx = num * 2;
+ j -= 2;
+ p[j] = ruby_decimal_digit_pairs[idx];
+ p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+ }
+ else {
+ /* num is 1..9 here (0 was handled above) */
+ p[--j] = (char)('0' + num);
+ }
+ }
+ else {
+ do {
+ BDIGIT_DBL idx = num % b2s->base;
+ num /= b2s->base;
+ p[--j] = ruby_digitmap[idx];
+ } while (num);
+ }
len = sizeof(buf) - j;
big2str_alloc(b2s, len + taillen);
MEMCPY(b2s->ptr, buf + j, char, len);
@@ -4823,11 +4861,39 @@ big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t tail
else {
p = b2s->ptr;
j = b2s->hbase2_numdigits;
- do {
- BDIGIT_DBL idx = num % b2s->base;
- num /= b2s->base;
- p[--j] = ruby_digitmap[idx];
- } while (j);
+ if (b2s->base == 10) {
+ /* Non-beginning chunks must emit EXACTLY hbase2_numdigits,
+ * zero-padded on the left. Consume num in 2-digit groups,
+ * handle the odd trailing digit, then memset remaining
+ * positions with '0'. */
+ while (num >= 100) {
+ BDIGIT_DBL idx = (num % 100) * 2;
+ num /= 100;
+ j -= 2;
+ p[j] = ruby_decimal_digit_pairs[idx];
+ p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+ }
+ if (num >= 10) {
+ BDIGIT_DBL idx = num * 2;
+ j -= 2;
+ p[j] = ruby_decimal_digit_pairs[idx];
+ p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+ }
+ else if (num > 0) {
+ p[--j] = (char)('0' + num);
+ }
+ if (j > 0) {
+ memset(p, '0', j);
+ j = 0;
+ }
+ }
+ else {
+ do {
+ BDIGIT_DBL idx = num % b2s->base;
+ num /= b2s->base;
+ p[--j] = ruby_digitmap[idx];
+ } while (j);
+ }
len = b2s->hbase2_numdigits;
}
b2s->ptr += len;
diff --git a/internal/bignum.h b/internal/bignum.h
index f11fbd3a4d..7389a17c74 100644
--- a/internal/bignum.h
+++ b/internal/bignum.h
@@ -107,6 +107,7 @@ struct RBignum {
/* bignum.c */
extern const char ruby_digitmap[];
+extern const char ruby_decimal_digit_pairs[];
double rb_big_fdiv_double(VALUE x, VALUE y);
VALUE rb_big_uminus(VALUE x);
VALUE rb_big_hash(VALUE);
diff --git a/numeric.c b/numeric.c
index 40b7bfc0f8..175bd7cfa0 100644
--- a/numeric.c
+++ b/numeric.c
@@ -4040,6 +4040,11 @@ rb_int_uminus(VALUE num)
}
}
+/* ruby_decimal_digit_pairs is defined in bignum.c and declared in
+ * internal/bignum.h. See there for the rationale of the 2-digit
+ * lookup-table itoa optimisation; both rb_fix2str here and big2str_2bdigits
+ * in bignum.c consume it. */
+
VALUE
rb_fix2str(VALUE x, int base)
{
@@ -4072,9 +4077,34 @@ rb_fix2str(VALUE x, int base)
else {
u = val;
}
- do {
- *--b = ruby_digitmap[(int)(u % base)];
- } while (u /= base);
+ if (base == 10) {
+ /* Emit two digits per iteration from a precomputed table. The
+ * compiler lowers `u % 100` and `u / 100` to a single multiply +
+ * shift, so each iteration costs roughly one multiply, one shift,
+ * and two stores. About 2x fewer iterations than the classic
+ * per-digit loop for multi-digit inputs. */
+ while (u >= 100) {
+ unsigned long idx = (u % 100) * 2;
+ u /= 100;
+ b -= 2;
+ b[0] = ruby_decimal_digit_pairs[idx];
+ b[1] = ruby_decimal_digit_pairs[idx + 1];
+ }
+ if (u >= 10) {
+ unsigned long idx = u * 2;
+ b -= 2;
+ b[0] = ruby_decimal_digit_pairs[idx];
+ b[1] = ruby_decimal_digit_pairs[idx + 1];
+ }
+ else {
+ *--b = (char)('0' + u);
+ }
+ }
+ else {
+ do {
+ *--b = ruby_digitmap[(int)(u % base)];
+ } while (u /= base);
+ }
if (neg) {
*--b = '-';
}