diff options
Diffstat (limited to 'ext/json/parser/parser.c')
| -rw-r--r-- | ext/json/parser/parser.c | 461 |
1 files changed, 248 insertions, 213 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 308b47c373..c0631728c3 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -294,6 +294,8 @@ static const rb_data_type_t JSON_Parser_rvalue_stack_type = { .dsize = rvalue_stack_memsize, .dcompact = rvalue_stack_compact, }, + // We deliberately don't declare rvalue_stack as RUBY_TYPED_WB_PROTECTED + // because it churns a lot of values so trigering write barriers every time is very costly. .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE, }; @@ -1454,7 +1456,7 @@ static inline void json_value_completed(json_frame *frame) frame->phase = (enum json_frame_phase) frame->type; } -static inline bool json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset) +ALWAYS_INLINE(static) bool json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset) { // It is assumed that since `keyword` is always a literal, the compiler is able to constantize this // `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch. @@ -1477,258 +1479,291 @@ static inline bool json_match_keyword(JSON_ParserState *state, const char *keywo // itself is just another frame whose value, once parsed, leaves its phase DONE. static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) { - while (true) { - json_frame *frame = json_frame_stack_peek(state->frames); - - switch (frame->phase) { - case JSON_PHASE_DONE: { - // The root document value is parsed; it is the lone survivor on - // the rvalue stack. - return *rvalue_stack_peek(state->value_stack, 1); - } - - case JSON_PHASE_VALUE: { - JSON_PHASE_VALUE: - json_eat_whitespace(state); - - VALUE value; - switch (peek(state)) { - case 'n': - if (json_match_keyword(state, "null", 0)) { - value = Qnil; - break; - } - - raise_parse_error("unexpected token %s", state); - case 't': - if (json_match_keyword(state, "true", 0)) { - value = Qtrue; - break; - } - - raise_parse_error("unexpected token %s", state); - case 'f': - if (json_match_keyword(state, "false", 1)) { - value = Qfalse; - break; - } - - raise_parse_error("unexpected token %s", state); - case 'N': - // Note: memcmp with a small power of two compile to an integer comparison - if (config->allow_nan && json_match_keyword(state, "NaN", 1)) { - value = CNaN; - break; - } - - raise_parse_error("unexpected token %s", state); - case 'I': - if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) { - value = CInfinity; - break; - } + json_frame *frame = json_frame_stack_peek(state->frames); - raise_parse_error("unexpected token %s", state); - case '-': { - state->cursor++; - if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) { - value = CMinusInfinity; - } else { - value = json_parse_negative_number(state, config); - } - break; - } - case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - value = json_parse_positive_number(state, config); - break; - case '"': - // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} - value = json_parse_string(state, config, false); - break; - case '[': { - state->cursor++; - json_eat_whitespace(state); - - if (peek(state) == ']') { - state->cursor++; - value = json_decode_array(state, config, 0); - break; - } + switch (frame->phase) { + case JSON_PHASE_DONE: goto JSON_PHASE_DONE; + case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA; + case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA; + case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE; + case JSON_PHASE_OBJECT_KEY: goto JSON_PHASE_OBJECT_KEY; + case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON; + } + UNREACHABLE_RETURN(Qundef); - state->current_nesting++; - if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { - rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); - } - state->in_array++; - - // Phase stays VALUE: the next iteration reads the first element. - frame = json_frame_stack_push(state, (json_frame){ - .type = JSON_FRAME_ARRAY, - .phase = JSON_PHASE_VALUE, - .value_stack_head = state->value_stack->head, - }); - goto JSON_PHASE_VALUE; - break; - } - case '{': { - const char *object_start_cursor = state->cursor; + JSON_PHASE_DONE: { + // The root document value is parsed; it is the lone survivor on + // the rvalue stack. + return *rvalue_stack_peek(state->value_stack, 1); + } - state->cursor++; - json_eat_whitespace(state); + JSON_PHASE_VALUE: { + json_eat_whitespace(state); - if (peek(state) == '}') { - state->cursor++; - value = json_decode_object(state, config, 0); - break; - } + VALUE value; + switch (peek(state)) { + case 'n': + if (json_match_keyword(state, "null", 0)) { + value = Qnil; + break; + } + raise_parse_error("unexpected token %s", state); - state->current_nesting++; - if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { - rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); - } + case 't': + if (json_match_keyword(state, "true", 0)) { + value = Qtrue; + break; + } + raise_parse_error("unexpected token %s", state); - // Phase KEY: the next iteration reads the first key. - frame = json_frame_stack_push(state, (json_frame){ - .type = JSON_FRAME_OBJECT, - .phase = JSON_PHASE_OBJECT_KEY, - .value_stack_head = state->value_stack->head, - .start_cursor = object_start_cursor, - }); - goto JSON_PHASE_OBJECT_KEY; - break; - } + case 'f': + if (json_match_keyword(state, "false", 1)) { + value = Qfalse; + break; + } + raise_parse_error("unexpected token %s", state); - case 0: - raise_parse_error("unexpected end of input", state); + case 'N': + // Note: memcmp with a small power of two compile to an integer comparison + if (config->allow_nan && json_match_keyword(state, "NaN", 1)) { + value = CNaN; + break; + } + raise_parse_error("unexpected token %s", state); - default: - raise_parse_error("unexpected character: %s", state); + case 'I': + if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) { + value = CInfinity; + break; } + raise_parse_error("unexpected token %s", state); - json_push_value(state, config, value); - json_value_completed(frame); + case '-': { + state->cursor++; + if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) { + value = CMinusInfinity; + } else { + value = json_parse_negative_number(state, config); + } break; } - case JSON_PHASE_OBJECT_KEY: { - JSON_PHASE_OBJECT_KEY: - JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + value = json_parse_positive_number(state, config); + break; + case '"': + // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} + value = json_parse_string(state, config, false); + break; + + case '[': { + state->cursor++; json_eat_whitespace(state); - if (RB_LIKELY(peek(state) == '"')) { - json_push_value(state, config, json_parse_string(state, config, true)); - frame->phase = JSON_PHASE_OBJECT_COLON; - goto JSON_PHASE_OBJECT_COLON; - } else { - // The message differs for the first key vs. a key after a - // ',': the first is the only one reached with nothing pushed - // for this object yet. - if (json_frame_entry_count(frame, state->value_stack) == 0) { - raise_parse_error("expected object key, got %s", state); - } else { - raise_parse_error("expected object key, got: %s", state); - } + if (peek(state) == ']') { + state->cursor++; + value = json_decode_array(state, config, 0); + break; } - break; - } - case JSON_PHASE_OBJECT_COLON: { - JSON_PHASE_OBJECT_COLON: - JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } + state->in_array++; + + // Phase stays VALUE: the next iteration reads the first element. + frame = json_frame_stack_push(state, (json_frame){ + .type = JSON_FRAME_ARRAY, + .phase = JSON_PHASE_VALUE, + .value_stack_head = state->value_stack->head, + }); + goto JSON_PHASE_VALUE; + } + case '{': { + const char *object_start_cursor = state->cursor; + state->cursor++; json_eat_whitespace(state); - if (RB_LIKELY(peek(state) == ':')) { + if (peek(state) == '}') { state->cursor++; - frame->phase = JSON_PHASE_VALUE; - goto JSON_PHASE_VALUE; - } else { - // First colon (only the first pair's key is pushed, nothing - // else) vs. a later one. - if (json_frame_entry_count(frame, state->value_stack) == 1) { - raise_parse_error("expected ':' after object key", state); - } else { - raise_parse_error("expected ':' after object key, got: %s", state); - } + value = json_decode_object(state, config, 0); + break; } - break; + + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } + + // Phase KEY: the next iteration reads the first key. + frame = json_frame_stack_push(state, (json_frame){ + .type = JSON_FRAME_OBJECT, + .phase = JSON_PHASE_OBJECT_KEY, + .value_stack_head = state->value_stack->head, + .start_cursor = object_start_cursor, + }); + goto JSON_PHASE_OBJECT_KEY; } - case JSON_PHASE_ARRAY_COMMA: { - JSON_ASSERT(frame->type == JSON_FRAME_ARRAY); + case 0: + raise_parse_error("unexpected end of input", state); - json_eat_whitespace(state); + default: + raise_parse_error("unexpected character: %s", state); + } - const char next_char = peek(state); + json_push_value(state, config, value); + json_value_completed(frame); - if (RB_LIKELY(next_char == ',')) { - state->cursor++; - if (config->allow_trailing_comma) { - json_eat_whitespace(state); - if (peek(state) == ']') { - // Trailing comma: stay in COMMA to close on the next iteration. - break; - } - } - frame->phase = JSON_PHASE_VALUE; - goto JSON_PHASE_VALUE; - } else if (next_char == ']') { - state->cursor++; - long count = json_frame_entry_count(frame, state->value_stack); - state->current_nesting--; - state->in_array--; - json_frame_stack_pop(state->frames); - json_push_value(state, config, json_decode_array(state, config, count)); - json_value_completed(json_frame_stack_peek(state->frames)); - } else { - raise_parse_error("expected ',' or ']' after array value", state); - } - break; + switch (frame->phase) { + case JSON_PHASE_DONE: goto JSON_PHASE_DONE; + case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA; + case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA; + case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE; + case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef); + case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON; + } + UNREACHABLE_RETURN(Qundef); + } + + JSON_PHASE_OBJECT_KEY: { + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); + + json_eat_whitespace(state); + + if (RB_LIKELY(peek(state) == '"')) { + json_push_value(state, config, json_parse_string(state, config, true)); + frame->phase = JSON_PHASE_OBJECT_COLON; + goto JSON_PHASE_OBJECT_COLON; + } else { + // The message differs for the first key vs. a key after a + // ',': the first is the only one reached with nothing pushed + // for this object yet. + if (json_frame_entry_count(frame, state->value_stack) == 0) { + raise_parse_error("expected object key, got %s", state); + } else { + raise_parse_error("expected object key, got: %s", state); + } + } + UNREACHABLE_RETURN(Qundef); + } + + JSON_PHASE_OBJECT_COLON: { + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); + + json_eat_whitespace(state); + + if (RB_LIKELY(peek(state) == ':')) { + state->cursor++; + frame->phase = JSON_PHASE_VALUE; + goto JSON_PHASE_VALUE; + } else { + // First colon (only the first pair's key is pushed, nothing + // else) vs. a later one. + if (json_frame_entry_count(frame, state->value_stack) == 1) { + raise_parse_error("expected ':' after object key", state); + } else { + raise_parse_error("expected ':' after object key, got: %s", state); } + } + UNREACHABLE_RETURN(Qundef); + } + + JSON_PHASE_ARRAY_COMMA: { + JSON_ASSERT(frame->type == JSON_FRAME_ARRAY); - case JSON_PHASE_OBJECT_COMMA: { - JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); + json_eat_whitespace(state); + + const char next_char = peek(state); + if (RB_LIKELY(next_char == ',')) { + state->cursor++; + if (config->allow_trailing_comma) { json_eat_whitespace(state); - const char next_char = peek(state); + if (peek(state) == ']') { + // Trailing comma: stay in COMMA to close on the next iteration. + goto JSON_PHASE_ARRAY_COMMA; + } + } + frame->phase = JSON_PHASE_VALUE; + goto JSON_PHASE_VALUE; + } else if (next_char == ']') { + state->cursor++; + long count = json_frame_entry_count(frame, state->value_stack); + state->current_nesting--; + state->in_array--; + json_frame_stack_pop(state->frames); + json_push_value(state, config, json_decode_array(state, config, count)); + frame = json_frame_stack_peek(state->frames); + json_value_completed(frame); + + switch (frame->phase) { + case JSON_PHASE_DONE: goto JSON_PHASE_DONE; + case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA; + case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA; + case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE; + case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef); + case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON; + } + } else { + raise_parse_error("expected ',' or ']' after array value", state); + } + UNREACHABLE_RETURN(Qundef); + } - if (RB_LIKELY(next_char == ',')) { - state->cursor++; - json_eat_whitespace(state); + JSON_PHASE_OBJECT_COMMA: { + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); - if (config->allow_trailing_comma) { - if (peek(state) == '}') { - // Trailing comma: stay in COMMA to close on the next iteration. - break; - } - } + json_eat_whitespace(state); + const char next_char = peek(state); - frame->phase = JSON_PHASE_OBJECT_KEY; - goto JSON_PHASE_OBJECT_KEY; + if (RB_LIKELY(next_char == ',')) { + state->cursor++; - break; - } else if (next_char == '}') { - state->cursor++; - state->current_nesting--; - size_t count = json_frame_entry_count(frame, state->value_stack); - - // Temporary rewind cursor in case an error is raised - const char *final_cursor = state->cursor; - state->cursor = frame->start_cursor; - VALUE object = json_decode_object(state, config, count); - state->cursor = final_cursor; - - json_frame_stack_pop(state->frames); - json_push_value(state, config, object); - json_value_completed(json_frame_stack_peek(state->frames)); - break; - } else { - raise_parse_error("expected ',' or '}' after object value, got: %s", state); + if (config->allow_trailing_comma) { + json_eat_whitespace(state); + if (peek(state) == '}') { + // Trailing comma: stay in COMMA to close on the next iteration. + goto JSON_PHASE_OBJECT_COMMA; } } + + frame->phase = JSON_PHASE_OBJECT_KEY; + goto JSON_PHASE_OBJECT_KEY; + } else if (next_char == '}') { + state->cursor++; + state->current_nesting--; + size_t count = json_frame_entry_count(frame, state->value_stack); + + // Temporary rewind cursor in case an error is raised + const char *final_cursor = state->cursor; + state->cursor = frame->start_cursor; + VALUE object = json_decode_object(state, config, count); + state->cursor = final_cursor; + + json_push_value(state, config, object); + json_frame_stack_pop(state->frames); + frame = json_frame_stack_peek(state->frames); + json_value_completed(frame); + + switch (frame->phase) { + case JSON_PHASE_DONE: goto JSON_PHASE_DONE; + case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA; + case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA; + case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE; + case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef); + case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON; + } + } else { + raise_parse_error("expected ',' or '}' after object value, got: %s", state); } + UNREACHABLE_RETURN(Qundef); } + + UNREACHABLE_RETURN(Qundef); } static void json_ensure_eof(JSON_ParserState *state) |
