summaryrefslogtreecommitdiff
path: root/transcode.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-04-19 17:50:38 (GMT)
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-04-19 17:50:38 (GMT)
commit394d5dfa9ba625c99a1e6a411f81b628bfebd60a (patch)
tree22a788ad3df769fa9c3b4f5f81efde5b1cad87a4 /transcode.c
parent57ffc79c4561b9249ef6b81101f1887f16f29e19 (diff)
* string.c (str_scrub): add ruby method String#scrub which verify and
fix invalid byte sequence. * string.c (str_compat_and_valid): check given string is compatible and valid with given encoding. * transcode.c (str_transcode0): If invalid: :replace is specified for String#encode, replace invalid byte sequence even if the destination encoding equals to the source encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'transcode.c')
-rw-r--r--transcode.c17
1 files changed, 13 insertions, 4 deletions
diff --git a/transcode.c b/transcode.c
index de12c04..b03241f 100644
--- a/transcode.c
+++ b/transcode.c
@@ -2652,6 +2652,8 @@ str_transcode_enc_args(VALUE str, volatile VALUE *arg1, volatile VALUE *arg2,
return dencidx;
}
+VALUE rb_str_scrub(int argc, VALUE *argv, VALUE str);
+
static int
str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
{
@@ -2686,6 +2688,17 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
ECONV_XML_ATTR_CONTENT_DECORATOR|
ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) {
if (senc && senc == denc) {
+ if (ecflags & ECONV_INVALID_MASK) {
+ if (!NIL_P(ecopts)) {
+ VALUE rep = rb_hash_aref(ecopts, sym_replace);
+ dest = rb_str_scrub(1, &rep, str);
+ }
+ else {
+ dest = rb_str_scrub(0, NULL, str);
+ }
+ *self = dest;
+ return dencidx;
+ }
return NIL_P(arg2) ? -1 : dencidx;
}
if (senc && denc && rb_enc_asciicompat(senc) && rb_enc_asciicompat(denc)) {
@@ -2815,10 +2828,6 @@ static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx);
* in the source encoding. The last form by default does not raise
* exceptions but uses replacement strings.
*
- * Please note that conversion from an encoding +enc+ to the
- * same encoding +enc+ is a no-op, i.e. the receiver is returned without
- * any changes, and no exceptions are raised, even if there are invalid bytes.
- *
* The +options+ Hash gives details for conversion and can have the following
* keys:
*