summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-08-22 18:18:50 +0000
committernagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-08-22 18:18:50 +0000
commit6f122a4f10512bb60795faa0021e498800a7f1b8 (patch)
tree733a74d4471c4c67eb60a83573016d9873ba7da0
parent0b8b3a8296e81db90cc29c2ac663b0e8e810c89b (diff)
merge revision(s) 55547,55551,55552,55555,55557,55559,55575,55691,55568: [Backport #12536]
* string.c: Fix memory corruptions when using UTF-16/32 strings. [Bug #12536] [ruby-dev:49699] * string.c (TERM_LEN_MAX): Macro for the longest TERM_FILL length, the same as largest value of rb_enc_mbminlen(enc) among encodings. * string.c (str_new, rb_str_buf_new, str_shared_replace): Allocate +TERM_LEN_MAX bytes instead of +1. This change may increase memory usage. * string.c (rb_str_new_with_class): Use TERM_LEN of the "obj". * string.c (rb_str_plus, rb_str_justify): Use str_new0 which is aware of termlen. * string.c (str_shared_replace): Copy +termlen bytes instead of +1. * string.c (rb_str_times): termlen should not be included in capa. * string.c (RESIZE_CAPA_TERM): When using RSTRING_EMBED_LEN_MAX, termlen should be counted with it because embedded strings are also processed by TERM_FILL. * string.c (rb_str_capacity, str_shared_replace, str_buf_cat): ditto. * string.c (rb_str_drop_bytes, rb_str_setbyte, str_byte_substr): ditto. * string.c (rb_str_subseq, str_substr): When RSTRING_EMBED_LEN_MAX is used, TERM_LEN(str) should be considered with it because embedded strings are also processed by TERM_FILL. Additional fix for [Bug #12536] [ruby-dev:49699]. Additional fix for [Bug #12536] [ruby-dev:49699]. * string.c (rb_usascii_str_new, rb_utf8_str_new): Specify termlen which is apparently 1 for the encodings. * string.c (str_new0_cstr): New static function to create a String object from a C string with specifying termlen. * string.c (rb_usascii_str_new_cstr, rb_utf8_str_new_cstr): Specify termlen by using new str_new0_cstr(). * string.c (str_new_static): Specify termlen from the given encoding when creating a new String object is needed. * string.c (rb_tainted_str_new_with_enc): New function to create a tainted String object with the given encoding. This means that the termlen is correctly specified. Curretly static function. The function name might be renamed to rb_tainted_enc_str_new or rb_enc_tainted_str_new. * string.c (rb_external_str_new_with_enc): Use encoding by using the above rb_tainted_str_new_with_enc(). * string.c (str_fill_term): When termlen increases, re-allocation of memory for termlen should always be needed. In this fix, if possible, decrease capa instead of realloc. [Bug #12536] [ruby-dev:49699] * string.c: Partially reverts r55547 and r55555. ChangeLog about the reverted changes are also deleted in this file. [Bug #12536] [ruby-dev:49699] [ruby-dev:49702] * string.c (rb_str_change_terminator_length): New function to change termlen and resize heap for the terminator. This is split from rb_str_fill_terminator (str_fill_term) because filling terminator and changing terminator length are different things. [Bug #12536] * internal.h: declaration for rb_str_change_terminator_length. * string.c (str_fill_term): Simplify only to zero-fill the terminator. For non-shared strings, it assumes that (capa + termlen) bytes of heap is allocated. This partially reverts r55557. * encoding.c (rb_enc_associate_index): rb_str_change_terminator_length is used, and it should be called whenever the termlen is changed. * string.c (str_capacity): New static function to return capacity of a string with the given termlen, because the termlen may sometimes be different from TERM_LEN(str) especially during changing termlen or filling terminator with specific termlen. * string.c (rb_str_capacity): Use str_capacity. * string.c (str_buf_cat): Fix capa size for embed string. Fix bug in r55547. [Bug #12536] * string.c: Specify termlen as far as possible. the termlen is correctly specified. Currently static function. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_3@55988 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog87
-rw-r--r--encoding.c4
-rw-r--r--internal.h1
-rw-r--r--string.c100
-rw-r--r--version.h6
5 files changed, 170 insertions, 28 deletions
diff --git a/ChangeLog b/ChangeLog
index c9113c3cbd..5620c4ba43 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,90 @@
+Tue Aug 23 03:14:22 2016 Naohisa Goto <ngotogenome@gmail.com>
+
+ * string.c (str_buf_cat): Fix capa size for embed string.
+ Fix bug in r55547. [Bug #12536]
+
+Tue Aug 23 03:14:22 2016 Naohisa Goto <ngotogenome@gmail.com>
+
+ * string.c (rb_str_change_terminator_length): New function to change
+ termlen and resize heap for the terminator. This is split from
+ rb_str_fill_terminator (str_fill_term) because filling terminator
+ and changing terminator length are different things. [Bug #12536]
+
+ * internal.h: declaration for rb_str_change_terminator_length.
+
+ * string.c (str_fill_term): Simplify only to zero-fill the terminator.
+ For non-shared strings, it assumes that (capa + termlen) bytes of
+ heap is allocated. This partially reverts r55557.
+
+ * encoding.c (rb_enc_associate_index): rb_str_change_terminator_length
+ is used, and it should be called whenever the termlen is changed.
+
+ * string.c (str_capacity): New static function to return capacity
+ of a string with the given termlen, because the termlen may
+ sometimes be different from TERM_LEN(str) especially during
+ changing termlen or filling terminator with specific termlen.
+
+ * string.c (rb_str_capacity): Use str_capacity.
+
+Tue Aug 23 03:14:22 2016 Naohisa Goto <ngotogenome@gmail.com>
+
+ * string.c: Partially reverts r55547 and r55555.
+ ChangeLog about the reverted changes are also deleted in this file.
+ [Bug #12536] [ruby-dev:49699] [ruby-dev:49702]
+
+Tue Aug 23 03:14:22 2016 Naohisa Goto <ngotogenome@gmail.com>
+
+ * string.c (str_fill_term): When termlen increases, re-allocation
+ of memory for termlen should always be needed.
+ In this fix, if possible, decrease capa instead of realloc.
+ [Bug #12536] [ruby-dev:49699]
+
+Tue Aug 23 03:14:22 2016 Naohisa Goto <ngotogenome@gmail.com>
+
+ * string.c: Specify termlen as far as possible.
+ Additional fix for [Bug #12536] [ruby-dev:49699].
+
+ * string.c (str_new_static): Specify termlen from the given encoding
+ when creating a new String object is needed.
+
+ * string.c (rb_tainted_str_new_with_enc): New function to create a
+ tainted String object with the given encoding. This means that
+ the termlen is correctly specified. Currently static function.
+ The function name might be renamed to rb_tainted_enc_str_new
+ or rb_enc_tainted_str_new.
+
+ * string.c (rb_external_str_new_with_enc): Use encoding by using the
+ above rb_tainted_str_new_with_enc().
+
+Tue Aug 23 03:14:22 2016 Naohisa Goto <ngotogenome@gmail.com>
+
+ * string.c (rb_str_subseq, str_substr): When RSTRING_EMBED_LEN_MAX
+ is used, TERM_LEN(str) should be considered with it because
+ embedded strings are also processed by TERM_FILL.
+ Additional fix for [Bug #12536] [ruby-dev:49699].
+
+Tue Aug 23 03:14:22 2016 Naohisa Goto <ngotogenome@gmail.com>
+
+ * string.c: Fix memory corruptions when using UTF-16/32 strings.
+ [Bug #12536] [ruby-dev:49699]
+
+ * string.c (rb_str_new_with_class): Use TERM_LEN of the "obj".
+
+ * string.c (rb_str_plus, rb_str_justify): Use str_new0 which is aware
+ of termlen.
+
+ * string.c (str_shared_replace): Copy +termlen bytes instead of +1.
+
+ * string.c (rb_str_times): termlen should not be included in capa.
+
+ * string.c (RESIZE_CAPA_TERM): When using RSTRING_EMBED_LEN_MAX,
+ termlen should be counted with it because embedded strings are
+ also processed by TERM_FILL.
+
+ * string.c (rb_str_capacity, str_shared_replace, str_buf_cat): ditto.
+
+ * string.c (rb_str_drop_bytes, rb_str_setbyte, str_byte_substr): ditto.
+
Thu Aug 18 23:43:33 2016 Eric Wong <e@80x24.org>
* ext/openssl/ossl_ssl.c (ossl_ssl_write_internal):
diff --git a/encoding.c b/encoding.c
index f5f7555eca..474dd26b8d 100644
--- a/encoding.c
+++ b/encoding.c
@@ -843,8 +843,8 @@ rb_enc_associate_index(VALUE obj, int idx)
}
termlen = rb_enc_mbminlen(enc);
oldtermlen = rb_enc_mbminlen(rb_enc_from_index(oldidx));
- if (oldtermlen < termlen && RB_TYPE_P(obj, T_STRING)) {
- rb_str_fill_terminator(obj, termlen);
+ if (oldtermlen != termlen && RB_TYPE_P(obj, T_STRING)) {
+ rb_str_change_terminator_length(obj, oldtermlen, termlen);
}
enc_set_index(obj, idx);
return obj;
diff --git a/internal.h b/internal.h
index ac8834dbce..bf238059b9 100644
--- a/internal.h
+++ b/internal.h
@@ -1124,6 +1124,7 @@ VALUE rb_id_quote_unprintable(ID);
#define QUOTE(str) rb_str_quote_unprintable(str)
#define QUOTE_ID(id) rb_id_quote_unprintable(id)
void rb_str_fill_terminator(VALUE str, const int termlen);
+void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen);
VALUE rb_str_locktmp_ensure(VALUE str, VALUE (*func)(VALUE), VALUE arg);
#ifdef RUBY_ENCODING_H
VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc);
diff --git a/string.c b/string.c
index c9fa4849e5..c399f3eafd 100644
--- a/string.c
+++ b/string.c
@@ -119,7 +119,7 @@ VALUE rb_cSymbol;
} while (0)
#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
if (STR_EMBED_P(str)) {\
- if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
+ if ((capacity) > (RSTRING_EMBED_LEN_MAX + 1 - (termlen))) {\
char *const tmp = ALLOC_N(char, (capacity)+termlen);\
const long tlen = RSTRING_LEN(str);\
memcpy(tmp, RSTRING_PTR(str), tlen);\
@@ -601,11 +601,11 @@ str_mod_check(VALUE s, const char *p, long len)
}
}
-size_t
-rb_str_capacity(VALUE str)
+static size_t
+str_capacity(VALUE str, const int termlen)
{
if (STR_EMBED_P(str)) {
- return RSTRING_EMBED_LEN_MAX;
+ return (RSTRING_EMBED_LEN_MAX + 1 - termlen);
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
return RSTRING(str)->as.heap.len;
@@ -615,6 +615,12 @@ rb_str_capacity(VALUE str)
}
}
+size_t
+rb_str_capacity(VALUE str)
+{
+ return str_capacity(str, TERM_LEN(str));
+}
+
static inline void
must_not_null(const char *ptr)
{
@@ -649,7 +655,7 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen)
RUBY_DTRACE_CREATE_HOOK(STRING, len);
str = str_alloc(klass);
- if (len > RSTRING_EMBED_LEN_MAX) {
+ if (len > (RSTRING_EMBED_LEN_MAX + 1 - termlen)) {
RSTRING(str)->as.heap.aux.capa = len;
RSTRING(str)->as.heap.ptr = ALLOC_N(char, len + termlen);
STR_SET_NOEMBED(str);
@@ -748,7 +754,8 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex)
}
if (!ptr) {
- str = str_new(klass, ptr, len);
+ rb_encoding *enc = rb_enc_get_from_index(encindex);
+ str = str_new0(klass, ptr, len, rb_enc_mbminlen(enc));
}
else {
RUBY_DTRACE_CREATE_HOOK(STRING, len);
@@ -796,6 +803,15 @@ rb_tainted_str_new(const char *ptr, long len)
return str;
}
+static VALUE
+rb_tainted_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
+{
+ VALUE str = rb_enc_str_new(ptr, len, enc);
+
+ OBJ_TAINT(str);
+ return str;
+}
+
VALUE
rb_tainted_str_new_cstr(const char *ptr)
{
@@ -928,7 +944,7 @@ rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
{
VALUE str;
- str = rb_tainted_str_new(ptr, len);
+ str = rb_tainted_str_new_with_enc(ptr, len, eenc);
return rb_external_str_with_enc(str, eenc);
}
@@ -1114,7 +1130,7 @@ str_new_frozen(VALUE klass, VALUE orig)
VALUE
rb_str_new_with_class(VALUE obj, const char *ptr, long len)
{
- return str_new(rb_obj_class(obj), ptr, len);
+ return str_new0(rb_obj_class(obj), ptr, len, TERM_LEN(obj));
}
static VALUE
@@ -1206,16 +1222,18 @@ str_shared_replace(VALUE str, VALUE str2)
{
rb_encoding *enc;
int cr;
+ int termlen;
ASSUME(str2 != str);
enc = STR_ENC_GET(str2);
cr = ENC_CODERANGE(str2);
str_discard(str);
OBJ_INFECT(str, str2);
+ termlen = rb_enc_mbminlen(enc);
- if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) {
+ if (RSTRING_LEN(str2) <= (RSTRING_EMBED_LEN_MAX + 1 - termlen)) {
STR_SET_EMBED(str);
- memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
+ memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+termlen);
STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
rb_enc_associate(str, enc);
ENC_CODERANGE_SET(str, cr);
@@ -1629,16 +1647,18 @@ rb_str_plus(VALUE str1, VALUE str2)
rb_encoding *enc;
char *ptr1, *ptr2, *ptr3;
long len1, len2;
+ int termlen;
StringValue(str2);
enc = rb_enc_check_str(str1, str2);
RSTRING_GETMEM(str1, ptr1, len1);
RSTRING_GETMEM(str2, ptr2, len2);
- str3 = rb_str_new(0, len1+len2);
+ termlen = rb_enc_mbminlen(enc);
+ str3 = str_new0(rb_cString, 0, len1+len2, termlen);
ptr3 = RSTRING_PTR(str3);
memcpy(ptr3, ptr1, len1);
memcpy(ptr3+len1, ptr2, len2);
- TERM_FILL(&ptr3[len1+len2], rb_enc_mbminlen(enc));
+ TERM_FILL(&ptr3[len1+len2], termlen);
FL_SET_RAW(str3, OBJ_TAINTED_RAW(str1) | OBJ_TAINTED_RAW(str2));
ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc),
@@ -1686,7 +1706,7 @@ rb_str_times(VALUE str, VALUE times)
len *= RSTRING_LEN(str);
termlen = TERM_LEN(str);
- str2 = rb_str_new_with_class(str, 0, (len + termlen - 1));
+ str2 = str_new0(rb_obj_class(str), 0, len, termlen);
ptr2 = RSTRING_PTR(str2);
if (len) {
n = RSTRING_LEN(str);
@@ -1906,9 +1926,13 @@ str_null_char(const char *s, long len, const int minlen, rb_encoding *enc)
static char *
str_fill_term(VALUE str, char *s, long len, int termlen)
{
- long capa = rb_str_capacity(str) + 1;
+ long capa = str_capacity(str, termlen);
+
+ /* This function assumes that (capa + termlen) bytes of memory
+ * is allocated, like many other functions in this file.
+ */
- if (capa < len + termlen) {
+ if (capa < len) {
rb_check_lockedtmp(str);
str_make_independent_expand(str, len, 0L, termlen);
}
@@ -1922,6 +1946,34 @@ str_fill_term(VALUE str, char *s, long len, int termlen)
return s;
}
+void
+rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen)
+{
+ long capa = str_capacity(str, oldtermlen);
+ long len = RSTRING_LEN(str);
+
+ if (capa < len + termlen - oldtermlen) {
+ rb_check_lockedtmp(str);
+ str_make_independent_expand(str, len, 0L, termlen);
+ }
+ else if (str_dependent_p(str)) {
+ if (termlen > oldtermlen)
+ str_make_independent_expand(str, len, 0L, termlen);
+ }
+ else {
+ if (!STR_EMBED_P(str)) {
+ /* modify capa instead of realloc */
+ assert(!FL_TEST((str), STR_SHARED));
+ RSTRING(str)->as.heap.aux.capa = capa - (termlen - oldtermlen);
+ }
+ if (termlen > oldtermlen) {
+ TERM_FILL(RSTRING_PTR(str) + len, termlen);
+ }
+ }
+
+ return;
+}
+
char *
rb_string_value_cstr(volatile VALUE *ptr)
{
@@ -2119,7 +2171,7 @@ rb_str_subseq(VALUE str, long beg, long len)
{
VALUE str2;
- if (RSTRING_EMBED_LEN_MAX < len && SHARABLE_SUBSTRING_P(beg, len, RSTRING_LEN(str))) {
+ if ((RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str)) < len && SHARABLE_SUBSTRING_P(beg, len, RSTRING_LEN(str))) {
long olen;
str2 = rb_str_new_shared(rb_str_new_frozen(str));
RSTRING(str2)->as.heap.ptr += beg;
@@ -2229,7 +2281,7 @@ rb_str_substr(VALUE str, long beg, long len)
char *p = rb_str_subpos(str, beg, &len);
if (!p) return Qnil;
- if (len > RSTRING_EMBED_LEN_MAX && SHARABLE_SUBSTRING_P(p, len, RSTRING_END(str))) {
+ if (len > (RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str)) && SHARABLE_SUBSTRING_P(p, len, RSTRING_END(str))) {
long ofs = p - RSTRING_PTR(str);
str2 = rb_str_new_frozen(str);
str2 = str_new_shared(rb_obj_class(str2), str2);
@@ -2407,7 +2459,7 @@ str_buf_cat(VALUE str, const char *ptr, long len)
rb_str_modify(str);
if (len == 0) return 0;
if (STR_EMBED_P(str)) {
- capa = RSTRING_EMBED_LEN_MAX;
+ capa = RSTRING_EMBED_LEN_MAX + 1 - termlen;
sptr = RSTRING(str)->as.ary;
olen = RSTRING_EMBED_LEN(str);
}
@@ -4029,7 +4081,7 @@ rb_str_drop_bytes(VALUE str, long len)
str_modifiable(str);
if (len > olen) len = olen;
nlen = olen - len;
- if (nlen <= RSTRING_EMBED_LEN_MAX) {
+ if (nlen <= (RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str))) {
char *oldptr = ptr;
int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
STR_SET_EMBED(str);
@@ -4880,7 +4932,7 @@ rb_str_setbyte(VALUE str, VALUE index, VALUE value)
enc = STR_ENC_GET(str);
head = RSTRING_PTR(str);
ptr = &head[pos];
- if (len > RSTRING_EMBED_LEN_MAX) {
+ if (len > (RSTRING_EMBED_LEN_MAX + 1 - rb_enc_mbminlen(enc))) {
cr = ENC_CODERANGE(str);
switch (cr) {
case ENC_CODERANGE_7BIT:
@@ -4933,7 +4985,7 @@ str_byte_substr(VALUE str, long beg, long len)
else
p = s + beg;
- if (len > RSTRING_EMBED_LEN_MAX && SHARABLE_SUBSTRING_P(beg, len, n)) {
+ if (len > (RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str)) && SHARABLE_SUBSTRING_P(beg, len, n)) {
str2 = rb_str_new_frozen(str);
str2 = str_new_shared(rb_obj_class(str2), str2);
RSTRING(str2)->as.heap.ptr += beg;
@@ -8220,9 +8272,11 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
VALUE pad;
int singlebyte = 1, cr;
+ int termlen;
rb_scan_args(argc, argv, "11", &w, &pad);
enc = STR_ENC_GET(str);
+ termlen = rb_enc_mbminlen(enc);
width = NUM2LONG(w);
if (argc == 2) {
StringValue(pad);
@@ -8252,7 +8306,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
rb_raise(rb_eArgError, "argument too big");
}
len += size;
- res = rb_str_new_with_class(str, 0, len);
+ res = str_new0(rb_obj_class(str), 0, len, termlen);
p = RSTRING_PTR(res);
if (flen <= 1) {
memset(p, *f, llen);
@@ -8286,7 +8340,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
p += rlen2;
}
}
- TERM_FILL(p, rb_enc_mbminlen(enc));
+ TERM_FILL(p, termlen);
STR_SET_LEN(res, p-RSTRING_PTR(res));
OBJ_INFECT_RAW(res, str);
if (!NIL_P(pad)) OBJ_INFECT_RAW(res, pad);
diff --git a/version.h b/version.h
index 6bc65438a8..6941dd3adc 100644
--- a/version.h
+++ b/version.h
@@ -1,10 +1,10 @@
#define RUBY_VERSION "2.3.2"
-#define RUBY_RELEASE_DATE "2016-08-21"
-#define RUBY_PATCHLEVEL 174
+#define RUBY_RELEASE_DATE "2016-08-23"
+#define RUBY_PATCHLEVEL 175
#define RUBY_RELEASE_YEAR 2016
#define RUBY_RELEASE_MONTH 8
-#define RUBY_RELEASE_DAY 21
+#define RUBY_RELEASE_DAY 23
#include "ruby/version.h"