summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-05-18 11:16:56 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-05-18 11:16:56 +0000
commit05c0a22854312d528965ef5866cf5d840b42bdad (patch)
tree78b8218aab42e94c9f516c92df605e4bac8b9935
parentbfec5ad41f808b37d5bc248fdcbc595c367be9e6 (diff)
* string.c (str_scrub_bang): add String#scrub!. [Feature #8414]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40810 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog4
-rw-r--r--NEWS2
-rw-r--r--string.c23
-rw-r--r--test/ruby/test_m17n.rb17
4 files changed, 45 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index c8e188c675..329a694e88 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Sat May 18 20:15:28 2013 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * string.c (str_scrub_bang): add String#scrub!. [Feature #8414]
+
Sat May 18 16:59:52 2013 Tanaka Akira <akr@fsij.org>
* ext/socket/mkconstants.rb (INTEGER2NUM): Renamed from INTEGER2VALUE.
diff --git a/NEWS b/NEWS
index 436973b8e2..ce69d328fb 100644
--- a/NEWS
+++ b/NEWS
@@ -32,7 +32,7 @@ with all sufficient information, see the ChangeLog file.
* String
* New methods:
- * added String#scrub to verify and fix invalid byte sequence.
+ * String#scrub and String#scrub! verify and fix invalid byte sequence.
* extended methods:
* If invalid: :replace is specified for String#encode, replace
invalid byte sequence even if the destination encoding equals to
diff --git a/string.c b/string.c
index 59ed1b2dc4..b8f1ab4f9d 100644
--- a/string.c
+++ b/string.c
@@ -8014,6 +8014,28 @@ rb_str_scrub(int argc, VALUE *argv, VALUE str)
}
}
+/*
+ * call-seq:
+ * str.scrub! -> str
+ * str.scrub!(repl) -> str
+ * str.scrub!{|bytes|} -> str
+ *
+ * If the string is invalid byte sequence then replace invalid bytes with given replacement
+ * character, else returns self.
+ * If block is given, replace invalid bytes with returned value of the block.
+ *
+ * "abc\u3042\x81".scrub! #=> "abc\u3042\uFFFD"
+ * "abc\u3042\x81".scrub!("*") #=> "abc\u3042*"
+ * "abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
+ */
+VALUE
+rb_str_scrub_bang(int argc, VALUE *argv, VALUE str)
+{
+ VALUE new = rb_str_scrub(argc, argv, str);
+ rb_str_replace(str, new);
+ return str;
+}
+
/**********************************************************************
* Document-class: Symbol
*
@@ -8500,6 +8522,7 @@ Init_String(void)
rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
rb_define_method(rb_cString, "scrub", rb_str_scrub, -1);
+ rb_define_method(rb_cString, "scrub!", rb_str_scrub_bang, -1);
rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index 9d92fbc6a9..98f79b3f76 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -1491,6 +1491,11 @@ class TestM17N < Test::Unit::TestCase
end
def test_scrub
+ str = "\u3042\u3044"
+ assert_not_same(str, str.scrub)
+ str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
+ assert_not_same(str, str.scrub)
+
assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub)
assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub)
@@ -1529,4 +1534,16 @@ class TestM17N < Test::Unit::TestCase
"\xff".force_encoding(Encoding::UTF_32LE).
scrub)
end
+
+ def test_scrub_bang
+ str = "\u3042\u3044"
+ assert_same(str, str.scrub!)
+ str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
+ assert_same(str, str.scrub!)
+
+ str = u("\x80\x80\x80")
+ str.scrub!
+ assert_same(str, str.scrub!)
+ assert_equal("\uFFFD\uFFFD\uFFFD", str)
+ end
end