summaryrefslogtreecommitdiff
path: root/ext/json/parser/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/json/parser/parser.c')
-rw-r--r--ext/json/parser/parser.c785
1 files changed, 565 insertions, 220 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index d0482f6861..c0631728c3 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -243,7 +243,7 @@ static void rvalue_stack_mark(void *ptr)
long index;
if (stack && stack->ptr) {
for (index = 0; index < stack->head; index++) {
- rb_gc_mark(stack->ptr[index]);
+ rb_gc_mark_movable(stack->ptr[index]);
}
}
}
@@ -268,7 +268,22 @@ static void rvalue_stack_free(void *ptr)
static size_t rvalue_stack_memsize(const void *ptr)
{
const rvalue_stack *stack = (const rvalue_stack *)ptr;
- return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
+ size_t memsize = sizeof(VALUE) * stack->capa;
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ memsize += sizeof(rvalue_stack);
+#endif
+ return memsize;
+}
+
+static void rvalue_stack_compact(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ long index;
+ if (stack && stack->ptr) {
+ for (index = 0; index < stack->head; index++) {
+ stack->ptr[index] = rb_gc_location(stack->ptr[index]);
+ }
+ }
}
static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
@@ -277,7 +292,10 @@ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
.dmark = rvalue_stack_mark,
.dfree = rvalue_stack_free,
.dsize = rvalue_stack_memsize,
+ .dcompact = rvalue_stack_compact,
},
+ // We deliberately don't declare rvalue_stack as RUBY_TYPED_WB_PROTECTED
+ // because it churns a lot of values so trigering write barriers every time is very costly.
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
};
@@ -309,31 +327,60 @@ static void rvalue_stack_eagerly_release(VALUE handle)
}
}
-static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
-{
- int len = 1;
- if (ch <= 0x7F) {
- buf[0] = (char) ch;
- } else if (ch <= 0x07FF) {
- buf[0] = (char) ((ch >> 6) | 0xC0);
- buf[1] = (char) ((ch & 0x3F) | 0x80);
- len++;
- } else if (ch <= 0xFFFF) {
- buf[0] = (char) ((ch >> 12) | 0xE0);
- buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
- buf[2] = (char) ((ch & 0x3F) | 0x80);
- len += 2;
- } else if (ch <= 0x1fffff) {
- buf[0] =(char) ((ch >> 18) | 0xF0);
- buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
- buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
- buf[3] =(char) ((ch & 0x3F) | 0x80);
- len += 3;
- } else {
- buf[0] = '?';
- }
- return len;
-}
+/* frame stack */
+
+// Iterative (non-recursive) parsing keeps an explicit stack of the containers
+// currently being built, instead of relying on the C call stack. Each frame
+// only needs enough bookkeeping to close its container: which kind it is, the
+// rvalue_stack position where its children start (so we know how many to pop),
+// and the cursor at its opening brace (used to rewind for duplicate key
+// errors). Frames hold no VALUEs, so this stack needs no GC marking; it reuses
+// the same stack-allocated-with-heap-spill strategy as the rvalue_stack so that
+// it's freed even if parsing raises.
+//
+// The lifecycle helpers below (grow/push/peek/pop/spill/free/eagerly_release
+// and the rb_data_type_t) deliberately mirror their rvalue_stack counterparts
+// -- the element type and the absence of a mark function are the only real
+// differences. Keep the two in sync: a fix to the spill/release or
+// HAVE_RUBY_TYPED_EMBEDDABLE handling in one almost certainly belongs in the
+// other.
+#define JSON_FRAME_STACK_INITIAL_CAPA 32
+
+enum json_frame_type {
+ JSON_FRAME_ROOT, // == JSON_PHASE_DONE
+ JSON_FRAME_ARRAY, // == JSON_PHASE_ARRAY_COMMA
+ JSON_FRAME_OBJECT, // = JSON_PHASE_OBJECT_COMMA
+};
+
+// Where a frame is within its container's grammar. This is the entirety of the
+// parser's "what to do next" state: json_parse_any dispatches on the top
+// frame's phase and holds no resume state in C locals, so a parse can stop at
+// any value boundary and be resumed purely from the (persistable) frame stack.
+//
+// The first three phases are deliberately equal to the corresponding json_frame_type
+// to simplify the transition of phase in json_value_completed.
+enum json_frame_phase {
+ JSON_PHASE_DONE = JSON_FRAME_ROOT, // root only: the document value has been parsed
+ JSON_PHASE_ARRAY_COMMA = JSON_FRAME_ARRAY, // after a value: expecting ',' or the closing ']'
+ JSON_PHASE_OBJECT_COMMA = JSON_FRAME_OBJECT, // after a value: expecting ',' or the closing '}'
+ JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':')
+ JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',')
+ JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':'
+};
+
+typedef struct json_frame_struct {
+ enum json_frame_type type;
+ enum json_frame_phase phase;
+ long value_stack_head; // rvalue_stack->head when this container opened
+ const char *start_cursor; // object frames only (the '{'); NULL otherwise
+} json_frame;
+
+typedef struct json_frame_stack_struct {
+ enum rvalue_stack_type type; // shared with rvalue_stack: is ptr stack- or heap-allocated
+ long capa;
+ long head;
+ json_frame *ptr;
+} json_frame_stack;
enum duplicate_key_action {
JSON_DEPRECATED = 0,
@@ -356,17 +403,148 @@ typedef struct JSON_ParserStruct {
} JSON_ParserConfig;
typedef struct JSON_ParserStateStruct {
- VALUE *stack_handle;
+ VALUE *value_stack_handle;
+ VALUE *frame_stack_handle;
const char *start;
const char *cursor;
const char *end;
- rvalue_stack *stack;
+ rvalue_stack *value_stack;
+ json_frame_stack *frames;
rvalue_cache name_cache;
int in_array;
int current_nesting;
unsigned int emitted_deprecations;
} JSON_ParserState;
+static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref);
+
+static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *handle, json_frame_stack **stack_ref)
+{
+ long required = stack->capa * 2;
+
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
+ stack = json_frame_stack_spill(stack, handle, stack_ref);
+ } else {
+ JSON_SIZED_REALLOC_N(stack->ptr, json_frame, required, stack->capa);
+ stack->capa = required;
+ }
+ return stack;
+}
+
+static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame)
+{
+ json_frame_stack *stack = state->frames;
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
+ stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames);
+ }
+
+ json_frame *frame_ptr = &stack->ptr[stack->head++];
+ *frame_ptr = frame;
+ return frame_ptr;
+}
+
+static inline json_frame *json_frame_stack_peek(json_frame_stack *stack)
+{
+ return &stack->ptr[stack->head - 1];
+}
+
+static inline void json_frame_stack_pop(json_frame_stack *stack)
+{
+ stack->head--;
+}
+
+static void json_frame_stack_free_buffer(json_frame_stack *stack)
+{
+ JSON_SIZED_FREE_N(stack->ptr, stack->capa);
+ stack->ptr = NULL;
+}
+
+static void json_frame_stack_free(void *ptr)
+{
+ json_frame_stack *stack = (json_frame_stack *)ptr;
+ if (stack) {
+ json_frame_stack_free_buffer(stack);
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ JSON_SIZED_FREE(stack);
+#endif
+ }
+}
+
+static size_t json_frame_stack_memsize(const void *ptr)
+{
+ const json_frame_stack *stack = (const json_frame_stack *)ptr;
+
+ size_t memsize = sizeof(json_frame) * stack->capa;
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ memsize += sizeof(json_frame_stack);
+#endif
+ return memsize;
+}
+
+static const rb_data_type_t JSON_Parser_frame_stack_type = {
+ .wrap_struct_name = "JSON::Ext::Parser/frame_stack",
+ .function = {
+ .dmark = NULL,
+ .dfree = json_frame_stack_free,
+ .dsize = json_frame_stack_memsize,
+ },
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
+};
+
+static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref)
+{
+ json_frame_stack *stack;
+ *handle = TypedData_Make_Struct(0, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
+ *stack_ref = stack;
+ MEMCPY(stack, old_stack, json_frame_stack, 1);
+
+ stack->capa = old_stack->capa << 1;
+ stack->ptr = ALLOC_N(json_frame, stack->capa);
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
+ MEMCPY(stack->ptr, old_stack->ptr, json_frame, old_stack->head);
+ return stack;
+}
+
+static void json_frame_stack_eagerly_release(VALUE handle)
+{
+ if (handle) {
+ json_frame_stack *stack;
+ TypedData_Get_Struct(handle, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
+#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
+ json_frame_stack_free_buffer(stack);
+#else
+ json_frame_stack_free(stack);
+ RTYPEDDATA_DATA(handle) = NULL;
+#endif
+ }
+}
+
+static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
+{
+ int len = 1;
+ if (ch <= 0x7F) {
+ buf[0] = (char) ch;
+ } else if (ch <= 0x07FF) {
+ buf[0] = (char) ((ch >> 6) | 0xC0);
+ buf[1] = (char) ((ch & 0x3F) | 0x80);
+ len++;
+ } else if (ch <= 0xFFFF) {
+ buf[0] = (char) ((ch >> 12) | 0xE0);
+ buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
+ buf[2] = (char) ((ch & 0x3F) | 0x80);
+ len += 2;
+ } else if (ch <= 0x1fffff) {
+ buf[0] =(char) ((ch >> 18) | 0xF0);
+ buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
+ buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
+ buf[3] =(char) ((ch & 0x3F) | 0x80);
+ len += 3;
+ } else {
+ buf[0] = '?';
+ }
+ return len;
+}
+
static inline size_t rest(JSON_ParserState *state) {
return state->end - state->cursor;
}
@@ -890,8 +1068,8 @@ static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantis
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
{
- VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
- rvalue_stack_pop(state->stack, count);
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->value_stack, count));
+ rvalue_stack_pop(state->value_stack, count);
if (config->freeze) {
RB_OBJ_FREEZE(array);
@@ -941,32 +1119,39 @@ NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE d
rb_exc_raise(parse_error_new(message, line, column));
}
+NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs)
+{
+ switch (config->on_duplicate_key) {
+ case JSON_IGNORE:
+ return;
+
+ case JSON_DEPRECATED:
+ // Only emit the first few deprecations to avoid spamming.
+ if (state->emitted_deprecations < 5) {
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
+ state->emitted_deprecations++;
+ }
+ return;
+
+ case JSON_RAISE:
+ raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
+ return;
+ }
+ UNREACHABLE;
+}
+
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
{
size_t entries_count = count / 2;
VALUE object = rb_hash_new_capa(entries_count);
- const VALUE *pairs = rvalue_stack_peek(state->stack, count);
+ const VALUE *pairs = rvalue_stack_peek(state->value_stack, count);
rb_hash_bulk_insert(count, pairs, object);
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
- switch (config->on_duplicate_key) {
- case JSON_IGNORE:
- break;
- case JSON_DEPRECATED:
- // Only emit the first few deprecations to avoid spamming.
- if (state->emitted_deprecations < 5) {
- emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
- state->emitted_deprecations++;
- }
-
- break;
- case JSON_RAISE:
- raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
- break;
- }
+ json_on_duplicate_key(state, config, count, pairs);
}
- rvalue_stack_pop(state->stack, count);
+ rvalue_stack_pop(state->value_stack, count);
if (config->freeze) {
RB_OBJ_FREEZE(object);
@@ -980,7 +1165,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
if (RB_UNLIKELY(config->on_load_proc)) {
value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
}
- rvalue_stack_push(state->stack, value, state->stack_handle, &state->stack);
+ rvalue_stack_push(state->value_stack, value, state->value_stack_handle, &state->value_stack);
return value;
}
@@ -1053,7 +1238,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
case '"': {
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
state->cursor++;
- return json_push_value(state, config, string);
+ return string;
}
case '\\': {
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
@@ -1088,12 +1273,16 @@ ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_Pars
raise_parse_error("unexpected end of input, expected closing \"", state);
}
+ VALUE string;
if (RB_LIKELY(*state->cursor == '"')) {
- VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
+ string = json_string_fastpath(state, config, start, state->cursor, is_name);
state->cursor++;
- return json_push_value(state, config, string);
}
- return json_parse_escaped_string(state, config, is_name, start);
+ else {
+ string = json_parse_escaped_string(state, config, is_name, start);
+ }
+
+ return string;
}
#if JSON_CPU_LITTLE_ENDIAN_64BITS
@@ -1242,215 +1431,339 @@ static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_Par
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
{
- const char *start = state->cursor;
- state->cursor++;
- return json_parse_number(state, config, true, start);
+ return json_parse_number(state, config, true, state->cursor - 1);
+}
+
+// How many values (array elements, or interleaved object keys+values) have been
+// pushed onto the rvalue stack since this container opened. Used to size the
+// bulk decode on close, and to tell the first key/colon from later ones.
+static inline long json_frame_entry_count(const json_frame *frame, const rvalue_stack *value_stack)
+{
+ return value_stack->head - frame->value_stack_head;
+}
+
+// A complete value now sits on top of the rvalue stack. Advance the frame that
+// was waiting for it: the root document is done, or the enclosing container
+// moves on to expecting a ',' or its closing bracket. The caller passes the
+// frame it already has in hand -- the one that was expecting the value -- which
+// after a container close is the freshly re-exposed parent.
+static inline void json_value_completed(json_frame *frame)
+{
+ JSON_ASSERT((int)JSON_PHASE_DONE == (int)JSON_FRAME_ROOT);
+ JSON_ASSERT((int)JSON_PHASE_ARRAY_COMMA == (int)JSON_FRAME_ARRAY);
+ JSON_ASSERT((int)JSON_PHASE_OBJECT_COMMA == (int)JSON_FRAME_OBJECT);
+
+ frame->phase = (enum json_frame_phase) frame->type;
}
+ALWAYS_INLINE(static) bool json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset)
+{
+ // It is assumed that since `keyword` is always a literal, the compiler is able to constantize this
+ // `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch.
+
+ size_t len = strlen(keyword);
+
+ // Note: memcmp with a small power of two and a literal string compile to an integer comparison /
+ // That's why we sometime compare starting from the first byte and sometimes from the second.
+ if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) {
+ state->cursor += len;
+ return true;
+ }
+ return false;
+}
+
+// Parse an arbitrary JSON value iteratively. This is a state machine driven
+// entirely by the top frame's phase so it can stop at any value boundary and
+// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the
+// bottom of the stack, so the stack is never empty mid-parse and the document
+// itself is just another frame whose value, once parsed, leaves its phase DONE.
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
{
- json_eat_whitespace(state);
+ json_frame *frame = json_frame_stack_peek(state->frames);
- switch (peek(state)) {
- case 'n':
- if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
- state->cursor += 4;
- return json_push_value(state, config, Qnil);
- }
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: goto JSON_PHASE_OBJECT_KEY;
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
+ }
+ UNREACHABLE_RETURN(Qundef);
- raise_parse_error("unexpected token %s", state);
- break;
- case 't':
- if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
- state->cursor += 4;
- return json_push_value(state, config, Qtrue);
- }
+ JSON_PHASE_DONE: {
+ // The root document value is parsed; it is the lone survivor on
+ // the rvalue stack.
+ return *rvalue_stack_peek(state->value_stack, 1);
+ }
- raise_parse_error("unexpected token %s", state);
- break;
- case 'f':
- // Note: memcmp with a small power of two compile to an integer comparison
- if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
- state->cursor += 5;
- return json_push_value(state, config, Qfalse);
- }
+ JSON_PHASE_VALUE: {
+ json_eat_whitespace(state);
- raise_parse_error("unexpected token %s", state);
- break;
- case 'N':
- // Note: memcmp with a small power of two compile to an integer comparison
- if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
- state->cursor += 3;
- return json_push_value(state, config, CNaN);
- }
+ VALUE value;
+ switch (peek(state)) {
+ case 'n':
+ if (json_match_keyword(state, "null", 0)) {
+ value = Qnil;
+ break;
+ }
+ raise_parse_error("unexpected token %s", state);
- raise_parse_error("unexpected token %s", state);
- break;
- case 'I':
- if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
- state->cursor += 8;
- return json_push_value(state, config, CInfinity);
- }
+ case 't':
+ if (json_match_keyword(state, "true", 0)) {
+ value = Qtrue;
+ break;
+ }
+ raise_parse_error("unexpected token %s", state);
- raise_parse_error("unexpected token %s", state);
- break;
- case '-': {
- // Note: memcmp with a small power of two compile to an integer comparison
- if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
- if (config->allow_nan) {
- state->cursor += 9;
- return json_push_value(state, config, CMinusInfinity);
+ case 'f':
+ if (json_match_keyword(state, "false", 1)) {
+ value = Qfalse;
+ break;
+ }
+ raise_parse_error("unexpected token %s", state);
+
+ case 'N':
+ // Note: memcmp with a small power of two compile to an integer comparison
+ if (config->allow_nan && json_match_keyword(state, "NaN", 1)) {
+ value = CNaN;
+ break;
+ }
+ raise_parse_error("unexpected token %s", state);
+
+ case 'I':
+ if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) {
+ value = CInfinity;
+ break;
+ }
+ raise_parse_error("unexpected token %s", state);
+
+ case '-': {
+ state->cursor++;
+ if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) {
+ value = CMinusInfinity;
} else {
- raise_parse_error("unexpected token %s", state);
+ value = json_parse_negative_number(state, config);
}
+ break;
}
- return json_push_value(state, config, json_parse_negative_number(state, config));
- break;
- }
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
- return json_push_value(state, config, json_parse_positive_number(state, config));
- break;
- case '"': {
- // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
- return json_parse_string(state, config, false);
- break;
- }
- case '[': {
- state->cursor++;
- json_eat_whitespace(state);
- long stack_head = state->stack->head;
- if (peek(state) == ']') {
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+ value = json_parse_positive_number(state, config);
+ break;
+
+ case '"':
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
+ value = json_parse_string(state, config, false);
+ break;
+
+ case '[': {
state->cursor++;
- return json_push_value(state, config, json_decode_array(state, config, 0));
- } else {
+ json_eat_whitespace(state);
+
+ if (peek(state) == ']') {
+ state->cursor++;
+ value = json_decode_array(state, config, 0);
+ break;
+ }
+
state->current_nesting++;
if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
}
state->in_array++;
- json_parse_any(state, config);
+
+ // Phase stays VALUE: the next iteration reads the first element.
+ frame = json_frame_stack_push(state, (json_frame){
+ .type = JSON_FRAME_ARRAY,
+ .phase = JSON_PHASE_VALUE,
+ .value_stack_head = state->value_stack->head,
+ });
+ goto JSON_PHASE_VALUE;
}
+ case '{': {
+ const char *object_start_cursor = state->cursor;
- while (true) {
+ state->cursor++;
json_eat_whitespace(state);
- const char next_char = peek(state);
-
- if (RB_LIKELY(next_char == ',')) {
+ if (peek(state) == '}') {
state->cursor++;
- if (config->allow_trailing_comma) {
- json_eat_whitespace(state);
- if (peek(state) == ']') {
- continue;
- }
- }
- json_parse_any(state, config);
- continue;
+ value = json_decode_object(state, config, 0);
+ break;
}
- if (next_char == ']') {
- state->cursor++;
- long count = state->stack->head - stack_head;
- state->current_nesting--;
- state->in_array--;
- return json_push_value(state, config, json_decode_array(state, config, count));
+ state->current_nesting++;
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
}
- raise_parse_error("expected ',' or ']' after array value", state);
+ // Phase KEY: the next iteration reads the first key.
+ frame = json_frame_stack_push(state, (json_frame){
+ .type = JSON_FRAME_OBJECT,
+ .phase = JSON_PHASE_OBJECT_KEY,
+ .value_stack_head = state->value_stack->head,
+ .start_cursor = object_start_cursor,
+ });
+ goto JSON_PHASE_OBJECT_KEY;
}
- break;
+
+ case 0:
+ raise_parse_error("unexpected end of input", state);
+
+ default:
+ raise_parse_error("unexpected character: %s", state);
}
- case '{': {
- const char *object_start_cursor = state->cursor;
- state->cursor++;
- json_eat_whitespace(state);
- long stack_head = state->stack->head;
+ json_push_value(state, config, value);
+ json_value_completed(frame);
- if (peek(state) == '}') {
- state->cursor++;
- return json_push_value(state, config, json_decode_object(state, config, 0));
- } else {
- state->current_nesting++;
- if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
- rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
- }
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
- if (peek(state) != '"') {
- raise_parse_error("expected object key, got %s", state);
- }
- json_parse_string(state, config, true);
+ JSON_PHASE_OBJECT_KEY: {
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- json_eat_whitespace(state);
- if (peek(state) != ':') {
- raise_parse_error("expected ':' after object key", state);
- }
- state->cursor++;
+ json_eat_whitespace(state);
- json_parse_any(state, config);
+ if (RB_LIKELY(peek(state) == '"')) {
+ json_push_value(state, config, json_parse_string(state, config, true));
+ frame->phase = JSON_PHASE_OBJECT_COLON;
+ goto JSON_PHASE_OBJECT_COLON;
+ } else {
+ // The message differs for the first key vs. a key after a
+ // ',': the first is the only one reached with nothing pushed
+ // for this object yet.
+ if (json_frame_entry_count(frame, state->value_stack) == 0) {
+ raise_parse_error("expected object key, got %s", state);
+ } else {
+ raise_parse_error("expected object key, got: %s", state);
}
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
- while (true) {
- json_eat_whitespace(state);
+ JSON_PHASE_OBJECT_COLON: {
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- const char next_char = peek(state);
- if (next_char == '}') {
- state->cursor++;
- state->current_nesting--;
- size_t count = state->stack->head - stack_head;
+ json_eat_whitespace(state);
- // Temporary rewind cursor in case an error is raised
- const char *final_cursor = state->cursor;
- state->cursor = object_start_cursor;
- VALUE object = json_decode_object(state, config, count);
- state->cursor = final_cursor;
+ if (RB_LIKELY(peek(state) == ':')) {
+ state->cursor++;
+ frame->phase = JSON_PHASE_VALUE;
+ goto JSON_PHASE_VALUE;
+ } else {
+ // First colon (only the first pair's key is pushed, nothing
+ // else) vs. a later one.
+ if (json_frame_entry_count(frame, state->value_stack) == 1) {
+ raise_parse_error("expected ':' after object key", state);
+ } else {
+ raise_parse_error("expected ':' after object key, got: %s", state);
+ }
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
- return json_push_value(state, config, object);
- }
+ JSON_PHASE_ARRAY_COMMA: {
+ JSON_ASSERT(frame->type == JSON_FRAME_ARRAY);
- if (next_char == ',') {
- state->cursor++;
- json_eat_whitespace(state);
+ json_eat_whitespace(state);
- if (config->allow_trailing_comma) {
- if (peek(state) == '}') {
- continue;
- }
- }
+ const char next_char = peek(state);
- if (RB_UNLIKELY(peek(state) != '"')) {
- raise_parse_error("expected object key, got: %s", state);
- }
- json_parse_string(state, config, true);
+ if (RB_LIKELY(next_char == ',')) {
+ state->cursor++;
+ if (config->allow_trailing_comma) {
+ json_eat_whitespace(state);
+ if (peek(state) == ']') {
+ // Trailing comma: stay in COMMA to close on the next iteration.
+ goto JSON_PHASE_ARRAY_COMMA;
+ }
+ }
+ frame->phase = JSON_PHASE_VALUE;
+ goto JSON_PHASE_VALUE;
+ } else if (next_char == ']') {
+ state->cursor++;
+ long count = json_frame_entry_count(frame, state->value_stack);
+ state->current_nesting--;
+ state->in_array--;
+ json_frame_stack_pop(state->frames);
+ json_push_value(state, config, json_decode_array(state, config, count));
+ frame = json_frame_stack_peek(state->frames);
+ json_value_completed(frame);
+
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
+ }
+ } else {
+ raise_parse_error("expected ',' or ']' after array value", state);
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
- json_eat_whitespace(state);
- if (RB_UNLIKELY(peek(state) != ':')) {
- raise_parse_error("expected ':' after object key, got: %s", state);
- }
- state->cursor++;
+ JSON_PHASE_OBJECT_COMMA: {
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- json_parse_any(state, config);
+ json_eat_whitespace(state);
+ const char next_char = peek(state);
+
+ if (RB_LIKELY(next_char == ',')) {
+ state->cursor++;
- continue;
+ if (config->allow_trailing_comma) {
+ json_eat_whitespace(state);
+ if (peek(state) == '}') {
+ // Trailing comma: stay in COMMA to close on the next iteration.
+ goto JSON_PHASE_OBJECT_COMMA;
}
+ }
- raise_parse_error("expected ',' or '}' after object value, got: %s", state);
+ frame->phase = JSON_PHASE_OBJECT_KEY;
+ goto JSON_PHASE_OBJECT_KEY;
+ } else if (next_char == '}') {
+ state->cursor++;
+ state->current_nesting--;
+ size_t count = json_frame_entry_count(frame, state->value_stack);
+
+ // Temporary rewind cursor in case an error is raised
+ const char *final_cursor = state->cursor;
+ state->cursor = frame->start_cursor;
+ VALUE object = json_decode_object(state, config, count);
+ state->cursor = final_cursor;
+
+ json_push_value(state, config, object);
+ json_frame_stack_pop(state->frames);
+ frame = json_frame_stack_peek(state->frames);
+ json_value_completed(frame);
+
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
}
- break;
+ } else {
+ raise_parse_error("expected ',' or '}' after object value, got: %s", state);
}
-
- case 0:
- raise_parse_error("unexpected end of input", state);
- break;
-
- default:
- raise_parse_error("unexpected character: %s", state);
- break;
+ UNREACHABLE_RETURN(Qundef);
}
- raise_parse_error("unreachable: %s", state);
- return Qundef;
+ UNREACHABLE_RETURN(Qundef);
}
static void json_ensure_eof(JSON_ParserState *state)
@@ -1616,24 +1929,42 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
}
VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
- rvalue_stack stack = {
+ rvalue_stack value_stack = {
.type = RVALUE_STACK_STACK_ALLOCATED,
.ptr = rvalue_stack_buffer,
.capa = RVALUE_STACK_INITIAL_CAPA,
};
+ // Seed the frame stack with the root frame, establishing the invariant that
+ // json_parse_any always has a top frame to dispatch on (so the stack is never
+ // empty mid-parse).
+ json_frame frame_stack_buffer[JSON_FRAME_STACK_INITIAL_CAPA];
+ frame_stack_buffer[0] = (json_frame){
+ .type = JSON_FRAME_ROOT,
+ .phase = JSON_PHASE_VALUE,
+ };
+ json_frame_stack frames = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = frame_stack_buffer,
+ .capa = JSON_FRAME_STACK_INITIAL_CAPA,
+ .head = 1,
+ };
+
long len;
const char *start;
RSTRING_GETMEM(Vsource, start, len);
- VALUE stack_handle = 0;
+ VALUE value_stack_handle = 0;
+ VALUE frame_stack_handle = 0;
JSON_ParserState _state = {
.start = start,
.cursor = start,
.end = start + len,
- .stack = &stack,
- .stack_handle = &stack_handle,
+ .value_stack = &value_stack,
+ .value_stack_handle = &value_stack_handle,
+ .frames = &frames,
+ .frame_stack_handle = &frame_stack_handle,
};
JSON_ParserState *state = &_state;
@@ -1641,8 +1972,10 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
// This may be skipped in case of exception, but
// it won't cause a leak.
- rvalue_stack_eagerly_release(stack_handle);
- RB_GC_GUARD(stack_handle);
+ rvalue_stack_eagerly_release(value_stack_handle);
+ json_frame_stack_eagerly_release(frame_stack_handle);
+ RB_GC_GUARD(value_stack_handle);
+ RB_GC_GUARD(frame_stack_handle);
RB_GC_GUARD(Vsource);
json_ensure_eof(state);
@@ -1674,21 +2007,33 @@ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
static void JSON_ParserConfig_mark(void *ptr)
{
JSON_ParserConfig *config = ptr;
- rb_gc_mark(config->on_load_proc);
- rb_gc_mark(config->decimal_class);
+ rb_gc_mark_movable(config->on_load_proc);
+ rb_gc_mark_movable(config->decimal_class);
}
static size_t JSON_ParserConfig_memsize(const void *ptr)
{
+#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
+ return 0;
+#else
return sizeof(JSON_ParserConfig);
+#endif
+}
+
+static void JSON_ParserConfig_compact(void *ptr)
+{
+ JSON_ParserConfig *config = ptr;
+ config->on_load_proc = rb_gc_location(config->on_load_proc);
+ config->decimal_class = rb_gc_location(config->decimal_class);
}
static const rb_data_type_t JSON_ParserConfig_type = {
.wrap_struct_name = "JSON::Ext::Parser/ParserConfig",
.function = {
- JSON_ParserConfig_mark,
- RUBY_DEFAULT_FREE,
- JSON_ParserConfig_memsize,
+ .dmark = JSON_ParserConfig_mark,
+ .dfree = RUBY_DEFAULT_FREE,
+ .dsize = JSON_ParserConfig_memsize,
+ .dcompact = JSON_ParserConfig_compact,
},
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
};