summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog19
-rw-r--r--include/ruby/ruby.h18
-rw-r--r--internal.h4
-rw-r--r--string.c60
-rw-r--r--test/ruby/test_econv.rb1
-rw-r--r--test/ruby/test_m17n.rb34
-rw-r--r--transcode.c5
-rw-r--r--version.h2
8 files changed, 101 insertions, 42 deletions
diff --git a/ChangeLog b/ChangeLog
index 7f80445288..2220e96e33 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+Mon Feb 15 02:05:13 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * string.c (rb_str_scrub): the result should be infected by the
+ original string.
+
+Mon Feb 15 02:05:13 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * transcode.c (rb_econv_substr_append, econv_primitive_convert):
+ the result should be infected by the original string.
+
+Mon Feb 15 02:05:13 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * include/ruby/ruby.h: add raw FL macros, which assume always the
+ argument object is not a special constant.
+
+ * internal.h (STR_EMBED_P, STR_SHARED_P): valid only for T_STRING.
+
+ * string.c: deal with taint flags directly across String instances.
+
Mon Feb 15 01:20:08 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
* parse.y (regexp): set_yylval_num sets u1, should use nd_tag
diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h
index cd03125990..f178299323 100644
--- a/include/ruby/ruby.h
+++ b/include/ruby/ruby.h
@@ -1129,20 +1129,28 @@ struct RStruct {
#define FL_ABLE(x) (!SPECIAL_CONST_P(x) && BUILTIN_TYPE(x) != T_NODE)
#define FL_TEST_RAW(x,f) (RBASIC(x)->flags&(f))
#define FL_TEST(x,f) (FL_ABLE(x)?FL_TEST_RAW((x),(f)):0)
+#define FL_ANY_RAW(x,f) FL_TEST_RAW((x),(f))
#define FL_ANY(x,f) FL_TEST((x),(f))
+#define FL_ALL_RAW(x,f) (FL_TEST_RAW((x),(f)) == (f))
#define FL_ALL(x,f) (FL_TEST((x),(f)) == (f))
-#define FL_SET(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags |= (f);} while (0)
-#define FL_UNSET(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags &= ~(f);} while (0)
-#define FL_REVERSE(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags ^= (f);} while (0)
+#define FL_SET_RAW(x,f) (RBASIC(x)->flags |= (f))
+#define FL_SET(x,f) (FL_ABLE(x) ? FL_SET_RAW(x, f) : 0)
+#define FL_UNSET_RAW(x,f) (RBASIC(x)->flags &= ~(f))
+#define FL_UNSET(x,f) (FL_ABLE(x) ? FL_UNSET_RAW(x, f) : 0)
+#define FL_REVERSE_RAW(x,f) (RBASIC(x)->flags ^= (f))
+#define FL_REVERSE(x,f) (FL_ABLE(x) ? FL_REVERSE_RAW(x, f) : 0)
#define OBJ_TAINTABLE(x) (FL_ABLE(x) && BUILTIN_TYPE(x) != T_BIGNUM && BUILTIN_TYPE(x) != T_FLOAT)
+#define OBJ_TAINTED_RAW(x) FL_TEST_RAW(x, FL_TAINT)
#define OBJ_TAINTED(x) (!!FL_TEST((x), FL_TAINT))
-#define OBJ_TAINT(x) (OBJ_TAINTABLE(x) ? (RBASIC(x)->flags |= FL_TAINT) : 0)
+#define OBJ_TAINT_RAW(x) FL_SET_RAW(x, FL_TAINT)
+#define OBJ_TAINT(x) (OBJ_TAINTABLE(x) ? OBJ_TAINT_RAW(x) : 0)
#define OBJ_UNTRUSTED(x) OBJ_TAINTED(x)
#define OBJ_UNTRUST(x) OBJ_TAINT(x)
+#define OBJ_INFECT_RAW(x,s) FL_SET_RAW(x, OBJ_TAINTED_RAW(s))
#define OBJ_INFECT(x,s) ( \
(OBJ_TAINTABLE(x) && FL_ABLE(s)) ? \
- RBASIC(x)->flags |= RBASIC(s)->flags & FL_TAINT : 0)
+ OBJ_INFECT_RAW(x, s) : 0)
#define OBJ_FROZEN(x) (FL_ABLE(x) ? !!(RBASIC(x)->flags&FL_FREEZE) : 1)
#define OBJ_FREEZE_RAW(x) (RBASIC(x)->flags |= FL_FREEZE)
diff --git a/internal.h b/internal.h
index 64b9843c92..d512514b3a 100644
--- a/internal.h
+++ b/internal.h
@@ -993,8 +993,8 @@ VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc);
#endif
#define STR_NOEMBED FL_USER1
#define STR_SHARED FL_USER2 /* = ELTS_SHARED */
-#define STR_EMBED_P(str) (!FL_TEST((str), STR_NOEMBED))
-#define STR_SHARED_P(s) FL_ALL((s), STR_NOEMBED|ELTS_SHARED)
+#define STR_EMBED_P(str) (!FL_TEST_RAW((str), STR_NOEMBED))
+#define STR_SHARED_P(s) FL_ALL_RAW((s), STR_NOEMBED|ELTS_SHARED)
#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
#define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN)
size_t rb_str_memsize(VALUE);
diff --git a/string.c b/string.c
index 53fc9471b0..efae3d607f 100644
--- a/string.c
+++ b/string.c
@@ -1130,7 +1130,7 @@ rb_obj_as_string(VALUE obj)
str = rb_funcall(obj, id_to_s, 0);
if (!RB_TYPE_P(str, T_STRING))
return rb_any_to_s(obj);
- if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
+ OBJ_INFECT(str, obj);
return str;
}
@@ -1476,8 +1476,7 @@ rb_str_plus(VALUE str1, VALUE str2)
memcpy(ptr3+len1, ptr2, len2);
TERM_FILL(&ptr3[len1+len2], rb_enc_mbminlen(enc));
- if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
- OBJ_TAINT(str3);
+ FL_SET_RAW(str3, OBJ_TAINTED_RAW(str1) | OBJ_TAINTED_RAW(str2));
ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc),
ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2)));
RB_GC_GUARD(str1);
@@ -4115,7 +4114,7 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
if (NIL_P(hash)) {
StringValue(repl);
}
- if (OBJ_TAINTED(repl)) tainted = 1;
+ tainted = OBJ_TAINTED_RAW(repl);
}
pat = get_pat_quoted(argv[0], 1);
@@ -4175,7 +4174,7 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
}
rb_str_modify(str);
rb_enc_associate(str, enc);
- if (OBJ_TAINTED(repl)) tainted = 1;
+ tainted |= OBJ_TAINTED_RAW(repl);
if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
int cr2 = ENC_CODERANGE(repl);
if (cr2 == ENC_CODERANGE_BROKEN ||
@@ -4199,7 +4198,7 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
ENC_CODERANGE_SET(str, cr);
- if (tainted) OBJ_TAINT(str);
+ FL_SET_RAW(str, tainted);
return str;
}
@@ -4285,7 +4284,7 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang)
else {
mode = MAP;
}
- if (OBJ_TAINTED(repl)) tainted = 1;
+ tainted = OBJ_TAINTED_RAW(repl);
break;
default:
rb_check_arity(argc, 1, 2);
@@ -4348,8 +4347,7 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang)
val = repl;
}
-
- if (OBJ_TAINTED(val)) tainted = 1;
+ tainted |= OBJ_TAINTED_RAW(val);
len = beg0 - offset; /* copy pre-match substr */
if (len) {
@@ -4383,11 +4381,11 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang)
}
else {
RBASIC_SET_CLASS(dest, rb_obj_class(str));
- OBJ_INFECT(dest, str);
+ tainted |= OBJ_TAINTED_RAW(str);
str = dest;
}
- if (tainted) OBJ_TAINT(str);
+ FL_SET_RAW(str, tainted);
return str;
}
@@ -4616,7 +4614,7 @@ str_byte_substr(VALUE str, long beg, long len)
}
}
- OBJ_INFECT(str2, str);
+ OBJ_INFECT_RAW(str2, str);
return str2;
}
@@ -4741,7 +4739,7 @@ rb_str_reverse(VALUE str)
}
}
STR_SET_LEN(rev, RSTRING_LEN(str));
- OBJ_INFECT(rev, str);
+ OBJ_INFECT_RAW(rev, str);
str_enc_copy(rev, str);
ENC_CODERANGE_SET(rev, cr);
@@ -5036,7 +5034,7 @@ rb_str_inspect(VALUE str)
if (p > prev) str_buf_cat(result, prev, p - prev);
str_buf_cat2(result, "\"");
- OBJ_INFECT(result, str);
+ OBJ_INFECT_RAW(result, str);
return result;
}
@@ -5176,7 +5174,7 @@ rb_str_dump(VALUE str)
snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name);
enc = rb_ascii8bit_encoding();
}
- OBJ_INFECT(result, str);
+ OBJ_INFECT_RAW(result, str);
/* result from dump is ASCII */
rb_enc_associate(result, enc);
ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
@@ -7687,8 +7685,7 @@ rb_str_crypt(VALUE str, VALUE salt)
rb_sys_fail("crypt");
}
result = rb_str_new_cstr(res);
- OBJ_INFECT(result, str);
- OBJ_INFECT(result, salt);
+ FL_SET_RAW(result, OBJ_TAINTED_RAW(str) | OBJ_TAINTED_RAW(salt));
return result;
}
@@ -7861,8 +7858,8 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
}
TERM_FILL(p, rb_enc_mbminlen(enc));
STR_SET_LEN(res, p-RSTRING_PTR(res));
- OBJ_INFECT(res, str);
- if (!NIL_P(pad)) OBJ_INFECT(res, pad);
+ OBJ_INFECT_RAW(res, str);
+ if (!NIL_P(pad)) OBJ_INFECT_RAW(res, pad);
rb_enc_associate(res, enc);
if (argc == 2)
cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad));
@@ -8120,7 +8117,7 @@ rb_str_b(VALUE str)
{
VALUE str2 = str_alloc(rb_cString);
str_replace_shared_without_enc(str2, str);
- OBJ_INFECT(str2, str);
+ OBJ_INFECT_RAW(str2, str);
ENC_CODERANGE_CLEAR(str2);
return str2;
}
@@ -8250,6 +8247,10 @@ rb_str_scrub(VALUE str, VALUE repl)
int cr = ENC_CODERANGE(str);
rb_encoding *enc;
int encidx;
+ VALUE buf = Qnil;
+ const char *rep;
+ long replen;
+ int tainted = 0;
if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID)
return Qnil;
@@ -8257,6 +8258,7 @@ rb_str_scrub(VALUE str, VALUE repl)
enc = STR_ENC_GET(str);
if (!NIL_P(repl)) {
repl = str_compat_and_valid(repl, enc);
+ tainted = OBJ_TAINTED_RAW(repl);
}
if (rb_enc_dummy_p(enc)) {
@@ -8273,10 +8275,7 @@ rb_str_scrub(VALUE str, VALUE repl)
const char *p = RSTRING_PTR(str);
const char *e = RSTRING_END(str);
const char *p1 = p;
- const char *rep;
- long replen;
int rep7bit_p;
- VALUE buf = Qnil;
if (rb_block_given_p()) {
rep = NULL;
replen = 0;
@@ -8342,6 +8341,7 @@ rb_str_scrub(VALUE str, VALUE repl)
else {
repl = rb_yield(rb_enc_str_new(p, clen, enc));
repl = str_compat_and_valid(repl, enc);
+ tainted |= OBJ_TAINTED_RAW(repl);
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
cr = ENC_CODERANGE_VALID;
@@ -8376,22 +8376,18 @@ rb_str_scrub(VALUE str, VALUE repl)
else {
repl = rb_yield(rb_enc_str_new(p, e-p, enc));
repl = str_compat_and_valid(repl, enc);
+ tainted |= OBJ_TAINTED_RAW(repl);
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
cr = ENC_CODERANGE_VALID;
}
}
- ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr);
- return buf;
}
else {
/* ASCII incompatible */
const char *p = RSTRING_PTR(str);
const char *e = RSTRING_END(str);
const char *p1 = p;
- VALUE buf = Qnil;
- const char *rep;
- long replen;
long mbminlen = rb_enc_mbminlen(enc);
if (!NIL_P(repl)) {
rep = RSTRING_PTR(repl);
@@ -8446,6 +8442,7 @@ rb_str_scrub(VALUE str, VALUE repl)
else {
repl = rb_yield(rb_enc_str_new(p, e-p, enc));
repl = str_compat_and_valid(repl, enc);
+ tainted |= OBJ_TAINTED_RAW(repl);
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
}
p += clen;
@@ -8472,12 +8469,15 @@ rb_str_scrub(VALUE str, VALUE repl)
else {
repl = rb_yield(rb_enc_str_new(p, e-p, enc));
repl = str_compat_and_valid(repl, enc);
+ tainted |= OBJ_TAINTED_RAW(repl);
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
}
}
- ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID);
- return buf;
+ cr = ENC_CODERANGE_VALID;
}
+ FL_SET_RAW(buf, tainted|OBJ_TAINTED_RAW(str));
+ ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr);
+ return buf;
}
/*
diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb
index f423ebfc9e..4c8f4baeeb 100644
--- a/test/ruby/test_econv.rb
+++ b/test/ruby/test_econv.rb
@@ -683,6 +683,7 @@ class TestEncodingConverter < Test::Unit::TestCase
ec = Encoding::Converter.new("utf-8", "euc-jp")
assert_raise(Encoding::InvalidByteSequenceError) { ec.convert("a\x80") }
assert_raise(Encoding::UndefinedConversionError) { ec.convert("\ufffd") }
+ assert_predicate(ec.convert("abc".taint), :tainted?)
ret = ec.primitive_convert(nil, "", nil, nil)
assert_equal(:finished, ret)
assert_raise(ArgumentError) { ec.convert("a") }
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index a028e91222..f5c82dcde9 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -1529,20 +1529,32 @@ class TestM17N < Test::Unit::TestCase
assert_equal(a("\xE3\x81\x82"), s.b)
assert_equal(Encoding::ASCII_8BIT, s.b.encoding)
s.taint
- assert_equal(true, s.b.tainted?)
+ assert_predicate(s.b, :tainted?)
s = "abc".b
- assert_equal(true, s.b.ascii_only?)
+ assert_predicate(s.b, :ascii_only?)
end
- def test_scrub
+ def test_scrub_valid_string
+ str = "foo"
+ assert_equal(str, str.scrub)
+ assert_not_same(str, str.scrub)
+ assert_predicate(str.dup.taint.scrub, :tainted?)
str = "\u3042\u3044"
+ assert_equal(str, str.scrub)
assert_not_same(str, str.scrub)
+ assert_predicate(str.dup.taint.scrub, :tainted?)
str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
+ assert_equal(str, str.scrub)
assert_not_same(str, str.scrub)
assert_nothing_raised(ArgumentError) {str.scrub(nil)}
+ assert_predicate(str.dup.taint.scrub, :tainted?)
+ end
+ def test_scrub_replace_default
assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub)
assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub)
+ assert_predicate(u("\x80\x80\x80").taint.scrub, :tainted?)
+ assert_predicate(u("\xF4\x80\x80A").taint.scrub, :tainted?)
# examples in Unicode 6.1.0 D93b
assert_equal("\x41\uFFFD\uFFFD\x41\uFFFD\x41",
@@ -1553,14 +1565,28 @@ class TestM17N < Test::Unit::TestCase
u("\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64").scrub)
assert_equal("abcdefghijklmnopqrstuvwxyz\u0061\uFFFD\uFFFD\uFFFD\u0062\uFFFD\u0063\uFFFD\uFFFD\u0064",
u("abcdefghijklmnopqrstuvwxyz\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64").scrub)
+ end
+ def test_scrub_replace_argument
+ assert_equal("foo", u("foo").scrub("\u3013"))
+ assert_predicate(u("foo").taint.scrub("\u3013"), :tainted?)
+ assert_not_predicate(u("foo").scrub("\u3013".taint), :tainted?)
+ assert_equal("\u3042\u3044", u("\xE3\x81\x82\xE3\x81\x84").scrub("\u3013"))
+ assert_predicate(u("\xE3\x81\x82\xE3\x81\x84").taint.scrub("\u3013"), :tainted?)
+ assert_not_predicate(u("\xE3\x81\x82\xE3\x81\x84").scrub("\u3013".taint), :tainted?)
assert_equal("\u3042\u3013", u("\xE3\x81\x82\xE3\x81").scrub("\u3013"))
+ assert_predicate(u("\xE3\x81\x82\xE3\x81").taint.scrub("\u3013"), :tainted?)
+ assert_predicate(u("\xE3\x81\x82\xE3\x81").scrub("\u3013".taint), :tainted?)
assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub(e("\xA4\xA2")) }
assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub(1) }
assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub(u("\x81")) }
assert_equal(e("\xA4\xA2\xA2\xAE"), e("\xA4\xA2\xA4").scrub(e("\xA2\xAE")))
+ end
+ def test_scrub_replace_block
assert_equal("\u3042<e381>", u("\xE3\x81\x82\xE3\x81").scrub{|x|'<'+x.unpack('H*')[0]+'>'})
+ assert_predicate(u("\xE3\x81\x82\xE3\x81").taint.scrub{|x|'<'+x.unpack('H*')[0]+'>'}, :tainted?)
+ assert_predicate(u("\xE3\x81\x82\xE3\x81").scrub{|x|('<'+x.unpack('H*')[0]+'>').taint}, :tainted?)
assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub{e("\xA4\xA2")} }
assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub{1} }
assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub{u("\x81")} }
@@ -1568,7 +1594,9 @@ class TestM17N < Test::Unit::TestCase
assert_equal(u("\x81"), u("a\x81").scrub {|c| break c})
assert_raise(ArgumentError) {u("a\x81").scrub {|c| c}}
+ end
+ def test_scrub_widechar
assert_equal("\uFFFD\u3042".encode("UTF-16BE"),
"\xD8\x00\x30\x42".force_encoding(Encoding::UTF_16BE).
scrub)
diff --git a/transcode.c b/transcode.c
index d5add7b6e7..cba357b884 100644
--- a/transcode.c
+++ b/transcode.c
@@ -1854,6 +1854,7 @@ rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst,
src = rb_str_new_frozen(src);
dst = rb_econv_append(ec, RSTRING_PTR(src) + off, len, dst, flags);
RB_GC_GUARD(src);
+ OBJ_INFECT_RAW(dst, src);
return dst;
}
@@ -3768,8 +3769,10 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
res = rb_econv_convert(ec, &ip, is, &op, os, flags);
rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
- if (!NIL_P(input))
+ if (!NIL_P(input)) {
+ OBJ_INFECT_RAW(output, input);
rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
+ }
if (NIL_P(output_bytesize_v) && res == econv_destination_buffer_full) {
if (LONG_MAX / 2 < output_bytesize)
diff --git a/version.h b/version.h
index 25b35add5c..9f79785e8a 100644
--- a/version.h
+++ b/version.h
@@ -1,6 +1,6 @@
#define RUBY_VERSION "2.2.5"
#define RUBY_RELEASE_DATE "2016-02-15"
-#define RUBY_PATCHLEVEL 238
+#define RUBY_PATCHLEVEL 239
#define RUBY_RELEASE_YEAR 2016
#define RUBY_RELEASE_MONTH 2