summaryrefslogtreecommitdiff
path: root/ext/json/parser
diff options
context:
space:
mode:
Diffstat (limited to 'ext/json/parser')
-rw-r--r--ext/json/parser/parser.c81
1 files changed, 48 insertions, 33 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index c0631728c3..dc76ca2cda 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -7,9 +7,9 @@ static VALUE CNaN, CInfinity, CMinusInfinity;
static ID i_new, i_try_convert, i_uminus, i_encode;
-static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
- sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
- sym_allow_duplicate_key;
+static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_comments,
+ sym_allow_control_characters, sym_allow_invalid_escape, sym_symbolize_names,
+ sym_freeze, sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
static int binary_encindex;
static int utf8_encindex;
@@ -382,7 +382,7 @@ typedef struct json_frame_stack_struct {
json_frame *ptr;
} json_frame_stack;
-enum duplicate_key_action {
+enum deprecatable_action {
JSON_DEPRECATED = 0,
JSON_IGNORE,
JSON_RAISE,
@@ -392,7 +392,8 @@ typedef struct JSON_ParserStruct {
VALUE on_load_proc;
VALUE decimal_class;
ID decimal_method_id;
- enum duplicate_key_action on_duplicate_key;
+ enum deprecatable_action on_duplicate_key;
+ enum deprecatable_action on_comment;
int max_nesting;
bool allow_nan;
bool allow_trailing_comma;
@@ -488,7 +489,7 @@ static const rb_data_type_t JSON_Parser_frame_stack_type = {
.dfree = json_frame_stack_free,
.dsize = json_frame_stack_memsize,
},
- .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE,
};
static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref)
@@ -590,6 +591,8 @@ static void cursor_position(JSON_ParserState *state, long *line_out, long *colum
*column_out = column;
}
+static const unsigned int MAX_DEPRECATIONS = 5;
+
static void emit_parse_warning(const char *message, JSON_ParserState *state)
{
long line, column;
@@ -707,9 +710,14 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const
static const rb_data_type_t JSON_ParserConfig_type;
+const char *COMMENT_DEPRECATION_MESSAGE = "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`";
NOINLINE(static) void
-json_eat_comments(JSON_ParserState *state)
+json_eat_comments(JSON_ParserState *state, JSON_ParserConfig *config)
{
+ if (config->on_comment == JSON_RAISE) {
+ raise_parse_error("unexpected token %s", state);
+ }
+
const char *start = state->cursor;
state->cursor++;
@@ -744,10 +752,15 @@ json_eat_comments(JSON_ParserState *state)
raise_parse_error_at("unexpected token %s", state, start);
break;
}
+
+ if (config->on_comment == JSON_DEPRECATED && state->emitted_deprecations < MAX_DEPRECATIONS) {
+ state->emitted_deprecations++;
+ emit_parse_warning(COMMENT_DEPRECATION_MESSAGE, state);
+ }
}
ALWAYS_INLINE(static) void
-json_eat_whitespace(JSON_ParserState *state)
+json_eat_whitespace(JSON_ParserState *state, JSON_ParserConfig *config)
{
while (true) {
switch (peek(state)) {
@@ -778,7 +791,7 @@ json_eat_whitespace(JSON_ParserState *state)
state->cursor++;
break;
case '/':
- json_eat_comments(state);
+ json_eat_comments(state, config);
break;
default:
@@ -1127,9 +1140,9 @@ NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_Parser
case JSON_DEPRECATED:
// Only emit the first few deprecations to avoid spamming.
- if (state->emitted_deprecations < 5) {
- emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
+ if (state->emitted_deprecations < MAX_DEPRECATIONS) {
state->emitted_deprecations++;
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
}
return;
@@ -1489,7 +1502,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
case JSON_PHASE_OBJECT_KEY: goto JSON_PHASE_OBJECT_KEY;
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
}
- UNREACHABLE_RETURN(Qundef);
+ JSON_UNREACHABLE_RETURN(Qundef);
JSON_PHASE_DONE: {
// The root document value is parsed; it is the lone survivor on
@@ -1498,7 +1511,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
}
JSON_PHASE_VALUE: {
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
VALUE value;
switch (peek(state)) {
@@ -1559,7 +1572,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
case '[': {
state->cursor++;
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (peek(state) == ']') {
state->cursor++;
@@ -1585,7 +1598,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
const char *object_start_cursor = state->cursor;
state->cursor++;
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (peek(state) == '}') {
state->cursor++;
@@ -1623,16 +1636,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
- case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(Qundef);
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
}
- UNREACHABLE_RETURN(Qundef);
+ JSON_UNREACHABLE_RETURN(Qundef);
}
JSON_PHASE_OBJECT_KEY: {
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (RB_LIKELY(peek(state) == '"')) {
json_push_value(state, config, json_parse_string(state, config, true));
@@ -1648,13 +1661,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
raise_parse_error("expected object key, got: %s", state);
}
}
- UNREACHABLE_RETURN(Qundef);
+ JSON_UNREACHABLE_RETURN(Qundef);
}
JSON_PHASE_OBJECT_COLON: {
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (RB_LIKELY(peek(state) == ':')) {
state->cursor++;
@@ -1669,20 +1682,20 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
raise_parse_error("expected ':' after object key, got: %s", state);
}
}
- UNREACHABLE_RETURN(Qundef);
+ JSON_UNREACHABLE_RETURN(Qundef);
}
JSON_PHASE_ARRAY_COMMA: {
JSON_ASSERT(frame->type == JSON_FRAME_ARRAY);
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
const char next_char = peek(state);
if (RB_LIKELY(next_char == ',')) {
state->cursor++;
if (config->allow_trailing_comma) {
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (peek(state) == ']') {
// Trailing comma: stay in COMMA to close on the next iteration.
goto JSON_PHASE_ARRAY_COMMA;
@@ -1705,26 +1718,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
- case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(Qundef);
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
}
} else {
raise_parse_error("expected ',' or ']' after array value", state);
}
- UNREACHABLE_RETURN(Qundef);
+ JSON_UNREACHABLE_RETURN(Qundef);
}
JSON_PHASE_OBJECT_COMMA: {
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
const char next_char = peek(state);
if (RB_LIKELY(next_char == ',')) {
state->cursor++;
if (config->allow_trailing_comma) {
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (peek(state) == '}') {
// Trailing comma: stay in COMMA to close on the next iteration.
goto JSON_PHASE_OBJECT_COMMA;
@@ -1754,21 +1767,21 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
- case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(Qundef);
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
}
} else {
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
}
- UNREACHABLE_RETURN(Qundef);
+ JSON_UNREACHABLE_RETURN(Qundef);
}
- UNREACHABLE_RETURN(Qundef);
+ JSON_UNREACHABLE_RETURN(Qundef);
}
-static void json_ensure_eof(JSON_ParserState *state)
+static void json_ensure_eof(JSON_ParserState *state, JSON_ParserConfig *config)
{
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (!eos(state)) {
raise_parse_error("unexpected token at end of stream %s", state);
}
@@ -1825,6 +1838,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
+ else if (key == sym_allow_comments) { config->on_comment = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
@@ -1977,7 +1991,7 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
RB_GC_GUARD(value_stack_handle);
RB_GC_GUARD(frame_stack_handle);
RB_GC_GUARD(Vsource);
- json_ensure_eof(state);
+ json_ensure_eof(state, config);
return result;
}
@@ -2079,6 +2093,7 @@ void Init_parser(void)
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
+ sym_allow_comments = ID2SYM(rb_intern("allow_comments"));
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));