summaryrefslogtreecommitdiff
path: root/ext/json/parser/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/json/parser/parser.c')
-rw-r--r--ext/json/parser/parser.c58
1 files changed, 42 insertions, 16 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index a740bd42ed..503bed1fd4 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -385,6 +385,13 @@ static inline char peek(JSON_ParserState *state)
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
{
+ JSON_ASSERT(state->cursor <= state->end);
+
+ // Redundant but helpful for hardening
+ if (RB_UNLIKELY(state->cursor > state->end)) {
+ state->cursor = state->end;
+ }
+
const char *cursor = state->cursor;
long column = 0;
long line = 1;
@@ -1022,6 +1029,13 @@ ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
}
state->cursor++;
}
+
+ // If the string ended with an unterminated escape sequence, we might
+ // have gone past the end.
+ if (RB_UNLIKELY(state->cursor > state->end)) {
+ state->cursor = state->end;
+ }
+
return false;
}
@@ -1202,7 +1216,11 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
raise_parse_error_at("invalid number: %s", state, start);
}
- exponent = negative_exponent ? -abs_exponent : abs_exponent;
+ if (RB_UNLIKELY(exponent_digits >= 20 || abs_exponent > (uint64_t)INT64_MAX)) {
+ exponent = negative_exponent ? INT64_MIN : INT64_MAX;
+ } else {
+ exponent = negative_exponent ? -(int64_t)abs_exponent : (int64_t)abs_exponent;
+ }
}
if (integer) {
@@ -1457,18 +1475,21 @@ static void json_ensure_eof(JSON_ParserState *state)
static VALUE convert_encoding(VALUE source)
{
- int encindex = RB_ENCODING_GET(source);
+ StringValue(source);
+ int encindex = RB_ENCODING_GET(source);
- if (RB_LIKELY(encindex == utf8_encindex)) {
- return source;
- }
+ if (RB_LIKELY(encindex == utf8_encindex)) {
+ return source;
+ }
- if (encindex == binary_encindex) {
- // For historical reason, we silently reinterpret binary strings as UTF-8
- return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
- }
+ if (encindex == binary_encindex) {
+ // For historical reason, we silently reinterpret binary strings as UTF-8
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
+ }
- return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
+ source = rb_funcall(source, i_encode, 1, Encoding_UTF_8);
+ StringValue(source);
+ return source;
}
struct parser_config_init_args {
@@ -1583,10 +1604,16 @@ static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
return self;
}
-static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
+static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
{
- Vsource = convert_encoding(StringValue(Vsource));
- StringValue(Vsource);
+ VALUE Vsource = convert_encoding(src);
+
+ // Ensure the string isn't mutated under us.
+ // The classic API to use is `rb_str_locktmp`, but then we'd
+ // need to use `rb_protect` to make sure we always unlock.
+ if (Vsource == src) {
+ Vsource = rb_str_new_frozen(Vsource);
+ }
VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
rvalue_stack stack = {
@@ -1597,6 +1624,7 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
long len;
const char *start;
+
RSTRING_GETMEM(Vsource, start, len);
VALUE stack_handle = 0;
@@ -1615,6 +1643,7 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
// it won't cause a leak.
rvalue_stack_eagerly_release(stack_handle);
RB_GC_GUARD(stack_handle);
+ RB_GC_GUARD(Vsource);
json_ensure_eof(state);
return result;
@@ -1635,9 +1664,6 @@ static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
{
- Vsource = convert_encoding(StringValue(Vsource));
- StringValue(Vsource);
-
JSON_ParserConfig _config = {0};
JSON_ParserConfig *config = &_config;
parser_config_init(config, opts, false);