From 05c0a22854312d528965ef5866cf5d840b42bdad Mon Sep 17 00:00:00 2001 From: naruse Date: Sat, 18 May 2013 11:16:56 +0000 Subject: * string.c (str_scrub_bang): add String#scrub!. [Feature #8414] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40810 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 4 ++++ NEWS | 2 +- string.c | 23 +++++++++++++++++++++++ test/ruby/test_m17n.rb | 17 +++++++++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c8e188c675..329a694e88 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Sat May 18 20:15:28 2013 NARUSE, Yui + + * string.c (str_scrub_bang): add String#scrub!. [Feature #8414] + Sat May 18 16:59:52 2013 Tanaka Akira * ext/socket/mkconstants.rb (INTEGER2NUM): Renamed from INTEGER2VALUE. diff --git a/NEWS b/NEWS index 436973b8e2..ce69d328fb 100644 --- a/NEWS +++ b/NEWS @@ -32,7 +32,7 @@ with all sufficient information, see the ChangeLog file. * String * New methods: - * added String#scrub to verify and fix invalid byte sequence. + * String#scrub and String#scrub! verify and fix invalid byte sequence. * extended methods: * If invalid: :replace is specified for String#encode, replace invalid byte sequence even if the destination encoding equals to diff --git a/string.c b/string.c index 59ed1b2dc4..b8f1ab4f9d 100644 --- a/string.c +++ b/string.c @@ -8014,6 +8014,28 @@ rb_str_scrub(int argc, VALUE *argv, VALUE str) } } +/* + * call-seq: + * str.scrub! -> str + * str.scrub!(repl) -> str + * str.scrub!{|bytes|} -> str + * + * If the string is invalid byte sequence then replace invalid bytes with given replacement + * character, else returns self. + * If block is given, replace invalid bytes with returned value of the block. + * + * "abc\u3042\x81".scrub! #=> "abc\u3042\uFFFD" + * "abc\u3042\x81".scrub!("*") #=> "abc\u3042*" + * "abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042" + */ +VALUE +rb_str_scrub_bang(int argc, VALUE *argv, VALUE str) +{ + VALUE new = rb_str_scrub(argc, argv, str); + rb_str_replace(str, new); + return str; +} + /********************************************************************** * Document-class: Symbol * @@ -8500,6 +8522,7 @@ Init_String(void) rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2); rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1); rb_define_method(rb_cString, "scrub", rb_str_scrub, -1); + rb_define_method(rb_cString, "scrub!", rb_str_scrub_bang, -1); rb_define_method(rb_cString, "to_i", rb_str_to_i, -1); rb_define_method(rb_cString, "to_f", rb_str_to_f, 0); diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 9d92fbc6a9..98f79b3f76 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -1491,6 +1491,11 @@ class TestM17N < Test::Unit::TestCase end def test_scrub + str = "\u3042\u3044" + assert_not_same(str, str.scrub) + str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding + assert_not_same(str, str.scrub) + assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub) assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub) @@ -1529,4 +1534,16 @@ class TestM17N < Test::Unit::TestCase "\xff".force_encoding(Encoding::UTF_32LE). scrub) end + + def test_scrub_bang + str = "\u3042\u3044" + assert_same(str, str.scrub!) + str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding + assert_same(str, str.scrub!) + + str = u("\x80\x80\x80") + str.scrub! + assert_same(str, str.scrub!) + assert_equal("\uFFFD\uFFFD\uFFFD", str) + end end -- cgit v1.2.3