diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2013-04-19 17:50:38 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2013-04-19 17:50:38 +0000 |
commit | 394d5dfa9ba625c99a1e6a411f81b628bfebd60a (patch) | |
tree | 22a788ad3df769fa9c3b4f5f81efde5b1cad87a4 /transcode.c | |
parent | 57ffc79c4561b9249ef6b81101f1887f16f29e19 (diff) |
* string.c (str_scrub): add ruby method String#scrub which verify and
fix invalid byte sequence.
* string.c (str_compat_and_valid): check given string is compatible
and valid with given encoding.
* transcode.c (str_transcode0): If invalid: :replace is specified for
String#encode, replace invalid byte sequence even if the destination
encoding equals to the source encoding.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'transcode.c')
-rw-r--r-- | transcode.c | 17 |
1 files changed, 13 insertions, 4 deletions
diff --git a/transcode.c b/transcode.c index de12c04f0f..b03241fcfc 100644 --- a/transcode.c +++ b/transcode.c @@ -2652,6 +2652,8 @@ str_transcode_enc_args(VALUE str, volatile VALUE *arg1, volatile VALUE *arg2, return dencidx; } +VALUE rb_str_scrub(int argc, VALUE *argv, VALUE str); + static int str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts) { @@ -2686,6 +2688,17 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts) ECONV_XML_ATTR_CONTENT_DECORATOR| ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) { if (senc && senc == denc) { + if (ecflags & ECONV_INVALID_MASK) { + if (!NIL_P(ecopts)) { + VALUE rep = rb_hash_aref(ecopts, sym_replace); + dest = rb_str_scrub(1, &rep, str); + } + else { + dest = rb_str_scrub(0, NULL, str); + } + *self = dest; + return dencidx; + } return NIL_P(arg2) ? -1 : dencidx; } if (senc && denc && rb_enc_asciicompat(senc) && rb_enc_asciicompat(denc)) { @@ -2815,10 +2828,6 @@ static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx); * in the source encoding. The last form by default does not raise * exceptions but uses replacement strings. * - * Please note that conversion from an encoding +enc+ to the - * same encoding +enc+ is a no-op, i.e. the receiver is returned without - * any changes, and no exceptions are raised, even if there are invalid bytes. - * * The +options+ Hash gives details for conversion and can have the following * keys: * |