diff options
author | usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-03-25 08:13:18 +0000 |
---|---|---|
committer | usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-03-25 08:13:18 +0000 |
commit | 2703953ca6763176932dee45674f59f76db22262 (patch) | |
tree | 018bc3e1bfdb4dd95d5dcdecca7c4bfe0c0a14ba /string.c | |
parent | 4ef48a781ad8519f8dfe4871336d25a84e62bcd2 (diff) |
merge revision(s) 49096,51353,53168,53169: [Backport #11834]
test_m17n.rb: split test_scrub
* test/ruby/test_m17n.rb (TestM17N#test_scrub): split into some
tests.
* include/ruby/ruby.h: add raw FL macros, which assume always the
argument object is not a special constant.
* internal.h (STR_EMBED_P, STR_SHARED_P): valid only for T_STRING.
* string.c: deal with taint flags directly across String instances.
* transcode.c (rb_econv_substr_append, econv_primitive_convert):
the result should be infected by the original string.
* string.c (rb_str_scrub): the result should be infected by the
original string.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_1@54260 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 59 |
1 files changed, 30 insertions, 29 deletions
@@ -1018,7 +1018,7 @@ rb_obj_as_string(VALUE obj) str = rb_funcall(obj, id_to_s, 0); if (!RB_TYPE_P(str, T_STRING)) return rb_any_to_s(obj); - if (OBJ_TAINTED(obj)) OBJ_TAINT(str); + OBJ_INFECT(str, obj); return str; } @@ -1362,8 +1362,7 @@ rb_str_plus(VALUE str1, VALUE str2) RSTRING_PTR(str2), RSTRING_LEN(str2)); RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0'; - if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2)) - OBJ_TAINT(str3); + FL_SET_RAW(str3, OBJ_TAINTED_RAW(str1) | OBJ_TAINTED_RAW(str2)); ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc), ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2))); return str3; @@ -3992,7 +3991,7 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str) if (NIL_P(hash)) { StringValue(repl); } - if (OBJ_TAINTED(repl)) tainted = 1; + tainted = OBJ_TAINTED_RAW(repl); } pat = get_pat(argv[0], 1); @@ -4037,7 +4036,7 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str) } rb_str_modify(str); rb_enc_associate(str, enc); - if (OBJ_TAINTED(repl)) tainted = 1; + tainted |= OBJ_TAINTED_RAW(repl); if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) { int cr2 = ENC_CODERANGE(repl); if (cr2 == ENC_CODERANGE_BROKEN || @@ -4061,7 +4060,7 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str) STR_SET_LEN(str, len); RSTRING_PTR(str)[len] = '\0'; ENC_CODERANGE_SET(str, cr); - if (tainted) OBJ_TAINT(str); + FL_SET_RAW(str, tainted); return str; } @@ -4143,7 +4142,7 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang) if (NIL_P(hash)) { StringValue(repl); } - if (OBJ_TAINTED(repl)) tainted = 1; + tainted = OBJ_TAINTED_RAW(repl); break; default: rb_check_arity(argc, 1, 2); @@ -4190,7 +4189,7 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang) val = rb_reg_regsub(repl, str, regs, pat); } - if (OBJ_TAINTED(val)) tainted = 1; + tainted |= OBJ_TAINTED_RAW(val); len = beg0 - offset; /* copy pre-match substr */ if (len) { @@ -4224,11 +4223,11 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang) } else { RBASIC_SET_CLASS(dest, rb_obj_class(str)); - OBJ_INFECT(dest, str); + tainted |= OBJ_TAINTED_RAW(str); str = dest; } - if (tainted) OBJ_TAINT(str); + FL_SET_RAW(str, tainted); return str; } @@ -4457,7 +4456,7 @@ str_byte_substr(VALUE str, long beg, long len) } } - OBJ_INFECT(str2, str); + OBJ_INFECT_RAW(str2, str); return str2; } @@ -4582,7 +4581,7 @@ rb_str_reverse(VALUE str) } } STR_SET_LEN(rev, RSTRING_LEN(str)); - OBJ_INFECT(rev, str); + OBJ_INFECT_RAW(rev, str); str_enc_copy(rev, str); ENC_CODERANGE_SET(rev, cr); @@ -4877,7 +4876,7 @@ rb_str_inspect(VALUE str) if (p > prev) str_buf_cat(result, prev, p - prev); str_buf_cat2(result, "\""); - OBJ_INFECT(result, str); + OBJ_INFECT_RAW(result, str); return result; } @@ -5017,7 +5016,7 @@ rb_str_dump(VALUE str) snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name); enc = rb_ascii8bit_encoding(); } - OBJ_INFECT(result, str); + OBJ_INFECT_RAW(result, str); /* result from dump is ASCII */ rb_enc_associate(result, enc); ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT); @@ -7433,8 +7432,7 @@ rb_str_crypt(VALUE str, VALUE salt) rb_sys_fail("crypt"); } result = rb_str_new2(res); - OBJ_INFECT(result, str); - OBJ_INFECT(result, salt); + FL_SET_RAW(result, OBJ_TAINTED_RAW(str) | OBJ_TAINTED_RAW(salt)); return result; } @@ -7636,8 +7634,8 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag) } *p = '\0'; STR_SET_LEN(res, p-RSTRING_PTR(res)); - OBJ_INFECT(res, str); - if (!NIL_P(pad)) OBJ_INFECT(res, pad); + OBJ_INFECT_RAW(res, str); + if (!NIL_P(pad)) OBJ_INFECT_RAW(res, pad); rb_enc_associate(res, enc); if (argc == 2) cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad)); @@ -7905,7 +7903,7 @@ rb_str_b(VALUE str) { VALUE str2 = str_alloc(rb_cString); str_replace_shared_without_enc(str2, str); - OBJ_INFECT(str2, str); + OBJ_INFECT_RAW(str2, str); ENC_CODERANGE_CLEAR(str2); return str2; } @@ -8035,6 +8033,10 @@ rb_str_scrub(VALUE str, VALUE repl) int cr = ENC_CODERANGE(str); rb_encoding *enc; int encidx; + VALUE buf = Qnil; + const char *rep; + long replen; + int tainted = 0; if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) return Qnil; @@ -8042,6 +8044,7 @@ rb_str_scrub(VALUE str, VALUE repl) enc = STR_ENC_GET(str); if (!NIL_P(repl)) { repl = str_compat_and_valid(repl, enc); + tainted = OBJ_TAINTED_RAW(repl); } if (rb_enc_dummy_p(enc)) { @@ -8058,10 +8061,7 @@ rb_str_scrub(VALUE str, VALUE repl) const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; - const char *rep; - long replen; int rep7bit_p; - VALUE buf = Qnil; if (rb_block_given_p()) { rep = NULL; replen = 0; @@ -8127,6 +8127,7 @@ rb_str_scrub(VALUE str, VALUE repl) else { repl = rb_yield(rb_enc_str_new(p, clen, enc)); repl = str_compat_and_valid(repl, enc); + tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; @@ -8161,22 +8162,18 @@ rb_str_scrub(VALUE str, VALUE repl) else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); + tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; } } - ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr); - return buf; } else { /* ASCII incompatible */ const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; - VALUE buf = Qnil; - const char *rep; - long replen; long mbminlen = rb_enc_mbminlen(enc); if (!NIL_P(repl)) { rep = RSTRING_PTR(repl); @@ -8231,6 +8228,7 @@ rb_str_scrub(VALUE str, VALUE repl) else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); + tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } p += clen; @@ -8257,12 +8255,15 @@ rb_str_scrub(VALUE str, VALUE repl) else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); + tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } } - ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID); - return buf; + cr = ENC_CODERANGE_VALID; } + FL_SET_RAW(buf, tainted|OBJ_TAINTED_RAW(str)); + ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr); + return buf; } /* |