summaryrefslogtreecommitdiff
path: root/ext/json
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2020-11-18 11:59:27 +0100
committerHiroshi SHIBATA <hsbt@ruby-lang.org>2021-05-17 19:51:51 +0900
commit2de594ca98d95e62f7fcf000f21e174ac3f6fcaf (patch)
tree8719739e48ce50c28bfe93c3ce9bf366eb2f2f87 /ext/json
parent1d2b4ccaf28596efee65c59dc69ea489a4237079 (diff)
[flori/json] Deduplicate strings inside json_string_unescape
[ci 2] https://github.com/flori/json/commit/1982070cb8
Diffstat (limited to 'ext/json')
-rw-r--r--ext/json/parser/extconf.rb1
-rw-r--r--ext/json/parser/parser.c84
-rw-r--r--ext/json/parser/parser.h2
-rw-r--r--ext/json/parser/parser.rl70
4 files changed, 85 insertions, 72 deletions
diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb
index f832b56a61..feb586e1b4 100644
--- a/ext/json/parser/extconf.rb
+++ b/ext/json/parser/extconf.rb
@@ -2,6 +2,7 @@
require 'mkmf'
have_func("rb_enc_raise", "ruby.h")
+have_func("rb_enc_interned_str", "ruby.h")
# checking if String#-@ (str_uminus) dedupes... '
begin
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 241ec0d266..a15d20a43b 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -2354,7 +2354,7 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
}
static const size_t MAX_STACK_BUFFER_SIZE = 128;
-static VALUE json_string_unescape(char *string, char *stringEnd)
+static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
{
VALUE result = Qnil;
size_t bufferSize = stringEnd - string;
@@ -2363,10 +2363,9 @@ static VALUE json_string_unescape(char *string, char *stringEnd)
char buf[4];
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
- buffer = xmalloc(bufferSize);
- bufferStart = buffer;
+ bufferStart = buffer = ALLOC_N(char, bufferSize);
} else {
- bufferStart = buffer = alloca(bufferSize);
+ bufferStart = buffer = ALLOCA_N(char, bufferSize);
}
while (pe < stringEnd) {
@@ -2453,15 +2452,42 @@ static VALUE json_string_unescape(char *string, char *stringEnd)
buffer += pe - p;
}
- #ifdef HAVE_RUBY_ENCODING_H
- result = rb_utf8_str_new(bufferStart, buffer - bufferStart);
- #else
- result = rb_str_new(bufferStart, buffer - bufferStart);
- #endif
+ # ifdef HAVE_RB_ENC_INTERNED_STR
+ if (intern) {
+ result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
+ } else {
+ result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
+ }
+ if (bufferSize > MAX_STACK_BUFFER_SIZE) {
+ free(bufferStart);
+ }
+ # else
+ result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
free(bufferStart);
}
+
+ if (intern) {
+ # if STR_UMINUS_DEDUPE_FROZEN
+ // Starting from MRI 2.8 it is preferable to freeze the string
+ // before deduplication so that it can be interned directly
+ // otherwise it would be duplicated first which is wasteful.
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
+ # elif STR_UMINUS_DEDUPE
+ // MRI 2.5 and older do not deduplicate strings that are already
+ // frozen.
+ result = rb_funcall(result, i_uminus, 0);
+ # else
+ result = rb_str_freeze(result);
+ # endif
+ }
+ # endif
+
+ if (symbolize) {
+ result = rb_str_intern(result);
+ }
+
return result;
}
@@ -2490,7 +2516,7 @@ static const char _JSON_string_nfa_pop_trans[] = {
};
-#line 586 "parser.rl"
+#line 612 "parser.rl"
static int
@@ -2515,7 +2541,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
cs = (int)JSON_string_start;
}
- #line 606 "parser.rl"
+ #line 632 "parser.rl"
json->memo = p;
@@ -2576,9 +2602,9 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
ctr2:
{
- #line 573 "parser.rl"
+ #line 599 "parser.rl"
- *result = json_string_unescape(json->memo + 1, p);
+ *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
if (NIL_P(*result)) {
{p = p - 1; }
{p+= 1; cs = 8; goto _out;}
@@ -2588,7 +2614,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
}
{
- #line 583 "parser.rl"
+ #line 609 "parser.rl"
{p = p - 1; } {p+= 1; cs = 8; goto _out;} }
goto st8;
@@ -2703,7 +2729,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
_out: {}
}
- #line 608 "parser.rl"
+ #line 634 "parser.rl"
if (json->create_additions && RTEST(match_string = json->match_string)) {
@@ -2717,26 +2743,6 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
}
- if (json->symbolize_names && json->parsing_name) {
- *result = rb_str_intern(*result);
- } else if (RB_TYPE_P(*result, T_STRING)) {
- # if STR_UMINUS_DEDUPE_FROZEN
- if (json->freeze) {
- // Starting from MRI 2.8 it is preferable to freeze the string
- // before deduplication so that it can be interned directly
- // otherwise it would be duplicated first which is wasteful.
- *result = rb_funcall(rb_str_freeze(*result), i_uminus, 0);
- }
- # elif STR_UMINUS_DEDUPE
- if (json->freeze) {
- // MRI 2.5 and older do not deduplicate strings that are already
- // frozen.
- *result = rb_funcall(*result, i_uminus, 0);
- }
- # else
- rb_str_resize(*result, RSTRING_LEN(*result));
- # endif
- }
if (cs >= JSON_string_first_final) {
return p + 1;
} else {
@@ -2936,7 +2942,7 @@ static const char _JSON_nfa_pop_trans[] = {
};
-#line 829 "parser.rl"
+#line 835 "parser.rl"
/*
@@ -2957,7 +2963,7 @@ static VALUE cParser_parse(VALUE self)
cs = (int)JSON_start;
}
- #line 845 "parser.rl"
+ #line 851 "parser.rl"
p = json->source;
pe = p + json->len;
@@ -3050,7 +3056,7 @@ static VALUE cParser_parse(VALUE self)
goto _out;
ctr2:
{
- #line 821 "parser.rl"
+ #line 827 "parser.rl"
char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { {p = p - 1; } {p+= 1; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -3204,7 +3210,7 @@ static VALUE cParser_parse(VALUE self)
_out: {}
}
- #line 848 "parser.rl"
+ #line 854 "parser.rl"
if (cs >= JSON_first_final && p == pe) {
diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h
index d2b074de86..0992bbf6e5 100644
--- a/ext/json/parser/parser.h
+++ b/ext/json/parser/parser.h
@@ -63,7 +63,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static VALUE json_string_unescape(char *string, char *stringEnd);
+static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize);
static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
static VALUE convert_encoding(VALUE source);
static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self);
diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl
index 230d725236..f7be1a5acc 100644
--- a/ext/json/parser/parser.rl
+++ b/ext/json/parser/parser.rl
@@ -453,7 +453,7 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
}
static const size_t MAX_STACK_BUFFER_SIZE = 128;
-static VALUE json_string_unescape(char *string, char *stringEnd)
+static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
{
VALUE result = Qnil;
size_t bufferSize = stringEnd - string;
@@ -462,10 +462,9 @@ static VALUE json_string_unescape(char *string, char *stringEnd)
char buf[4];
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
- buffer = xmalloc(bufferSize);
- bufferStart = buffer;
+ bufferStart = buffer = ALLOC_N(char, bufferSize);
} else {
- bufferStart = buffer = alloca(bufferSize);
+ bufferStart = buffer = ALLOCA_N(char, bufferSize);
}
while (pe < stringEnd) {
@@ -552,15 +551,42 @@ static VALUE json_string_unescape(char *string, char *stringEnd)
buffer += pe - p;
}
- #ifdef HAVE_RUBY_ENCODING_H
- result = rb_utf8_str_new(bufferStart, buffer - bufferStart);
- #else
- result = rb_str_new(bufferStart, buffer - bufferStart);
- #endif
+# ifdef HAVE_RB_ENC_INTERNED_STR
+ if (intern) {
+ result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
+ } else {
+ result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
+ }
+ if (bufferSize > MAX_STACK_BUFFER_SIZE) {
+ free(bufferStart);
+ }
+# else
+ result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
- free(bufferStart);
+ if (bufferSize > MAX_STACK_BUFFER_SIZE) {
+ free(bufferStart);
+ }
+
+ if (intern) {
+ # if STR_UMINUS_DEDUPE_FROZEN
+ // Starting from MRI 2.8 it is preferable to freeze the string
+ // before deduplication so that it can be interned directly
+ // otherwise it would be duplicated first which is wasteful.
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
+ # elif STR_UMINUS_DEDUPE
+ // MRI 2.5 and older do not deduplicate strings that are already
+ // frozen.
+ result = rb_funcall(result, i_uminus, 0);
+ # else
+ result = rb_str_freeze(result);
+ # endif
+ }
+# endif
+
+ if (symbolize) {
+ result = rb_str_intern(result);
}
+
return result;
}
@@ -571,7 +597,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd)
write data;
action parse_string {
- *result = json_string_unescape(json->memo + 1, p);
+ *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
if (NIL_P(*result)) {
fhold;
fbreak;
@@ -617,26 +643,6 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
}
- if (json->symbolize_names && json->parsing_name) {
- *result = rb_str_intern(*result);
- } else if (RB_TYPE_P(*result, T_STRING)) {
-# if STR_UMINUS_DEDUPE_FROZEN
- if (json->freeze) {
- // Starting from MRI 2.8 it is preferable to freeze the string
- // before deduplication so that it can be interned directly
- // otherwise it would be duplicated first which is wasteful.
- *result = rb_funcall(rb_str_freeze(*result), i_uminus, 0);
- }
-# elif STR_UMINUS_DEDUPE
- if (json->freeze) {
- // MRI 2.5 and older do not deduplicate strings that are already
- // frozen.
- *result = rb_funcall(*result, i_uminus, 0);
- }
-# else
- rb_str_resize(*result, RSTRING_LEN(*result));
-# endif
- }
if (cs >= JSON_string_first_final) {
return p + 1;
} else {