summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2025-11-01 11:06:32 +0100
committergit <svn-admin@ruby-lang.org>2025-11-01 10:22:55 +0000
commitbabf50e33bb0d9e1f3c37d11c1cfdc50c4f5bc7e (patch)
tree597b26775a7206fd268a598f7162392b142c961f
parentdb5708045037a159458de741b46e9c47fe430284 (diff)
[ruby/json] Use SWAR for parsing integers on little endian machines
Closes: https://github.com/ruby/json/pull/878 ``` == Parsing float parsing (2251051 bytes) ruby 3.4.6 (2025-09-16 revision https://github.com/ruby/json/commit/dbd83256b1) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 23.000 i/100ms Calculating ------------------------------------- after 214.382 (± 0.5%) i/s (4.66 ms/i) - 1.081k in 5.042555s Comparison: before: 189.5 i/s after: 214.4 i/s - 1.13x faster ``` https://github.com/ruby/json/commit/6348ff0891 Co-Authored-By: Scott Myron <samyron@gmail.com>
-rw-r--r--ext/json/parser/parser.c52
1 files changed, 51 insertions, 1 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 3bd654dc3b..e591ca2c5a 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -1039,11 +1039,61 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
return Qfalse;
}
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
+// Additional References:
+// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
+static inline uint64_t decode_8digits_unrolled(uint64_t val) {
+ const uint64_t mask = 0x000000FF000000FF;
+ const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
+ const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
+ val -= 0x3030303030303030;
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+ val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+ return val;
+}
+
+static inline uint64_t decode_4digits_unrolled(uint32_t val) {
+ const uint32_t mask = 0x000000FF;
+ const uint32_t mul1 = 100;
+ val -= 0x30303030;
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+ val = ((val & mask) * mul1) + (((val >> 16) & mask));
+ return val;
+}
+#endif
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
{
const char *start = state->cursor;
- char next_char;
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ while (rest(state) >= 8) {
+ uint64_t next_8bytes;
+ memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
+
+ // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
+ // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
+ uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
+
+ if (match == 0x3333333333333333) { // 8 consecutive digits
+ *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
+ state->cursor += 8;
+ continue;
+ }
+
+ if ((match & 0xFFFFFFFF) == 0x33333333) { // 4 consecutive digits
+ *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
+ state->cursor += 4;
+ break;
+ }
+
+ break;
+ }
+#endif
+
+ char next_char;
while (rb_isdigit(next_char = peek(state))) {
*accumulator = *accumulator * 10 + (next_char - '0');
state->cursor++;