summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog17
-rw-r--r--test/ruby/test_transcode.rb11
-rw-r--r--transcode.c8
3 files changed, 22 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index 178aa41b04..cf4e4ec34f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Thu Jul 31 19:54:57 2008 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * transcode.c (get_replacement_character): use U+FFFD as replacement
+ character when convert to Unicode.
+
+ * test/ruby/test_transcode.rb (test_unicode_public_review_issue_121):
+ rename from test_public_review_issue_121.
+
+ * test/ruby/test_transcode.rb (test_unicode_public_review_issue_121):
+ enable option2.
+
Thu Jul 31 17:00:10 2008 NARUSE, Yui <naruse@ruby-lang.org>
* transcode.c (get_replacement_character): fix: invalid byte sequence
@@ -11,9 +22,9 @@ Thu Jul 31 16:37:03 2008 NARUSE, Yui <naruse@ruby-lang.org>
Thu Jul 31 15:11:11 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
* test/ruby/test_transcode.rb: added test_shift_jis
- (contributed by Yoshihiro Kambayashi) and
- test_public_review_issue_121
- (see http://www.unicode.org/review/pr-121.html)
+ (contributed by Yoshihiro Kambayashi) and
+ test_public_review_issue_121
+ (see http://www.unicode.org/review/pr-121.html)
Thu Jul 31 13:18:30 2008 Yusuke Endoh <mame@tsg.ne.jp>
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index 9ec2b4367a..6aa599b1b9 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -312,16 +312,13 @@ class TestTranscode < Test::Unit::TestCase
# check_both_ways("\u9299", "\x1b$(Dd!\x1b(B", "iso-2022-jp-1") # JIS X 0212 区68 点01 銙
end
- def test_public_review_issue_121 # see http://www.unicode.org/review/pr-121.html
+ def test_unicode_public_review_issue_121 # see http://www.unicode.org/review/pr-121.html
# assert_equal("\x00\x61\x00?\x00\x62".force_encoding('UTF-16BE'),
# "\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16BE', 'UTF-8', invalid: :replace)) # option 1
- assert_equal("\x00\x61\x00?\x00?\x00?\x00\x62".force_encoding('UTF-16BE'),
+ assert_equal("\x00\x61\xFF\xFD\xFF\xFD\xFF\xFD\x00\x62".force_encoding('UTF-16BE'),
"\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16BE', 'UTF-8', invalid: :replace)) # option 2
- # The next test doesn't work because of a bug in the implementation
- # but we currently don't plan to fix that bug because we'll rewrite
- # this stuff a bit anyway.
- # assert_equal("\x61\x00?\x00?\x00?\x00\x62\x00".force_encoding('UTF-16LE'),
- # "\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16LE', 'UTF-8', invalid: :replace)) # option 2
+ assert_equal("\x61\x00\xFD\xFF\xFD\xFF\xFD\xFF\x62\x00".force_encoding('UTF-16LE'),
+ "\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16LE', 'UTF-8', invalid: :replace)) # option 2
# assert_equal("\x00\x61\x00?\x00?\x00?\x00?\x00?\x00?\x00\x62".force_encoding('UTF-16BE'),
# "\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16BE', 'UTF-8', invalid: :replace)) # option 3
end
diff --git a/transcode.c b/transcode.c
index 4d9f00f8c2..55f4b2faf3 100644
--- a/transcode.c
+++ b/transcode.c
@@ -137,16 +137,16 @@ get_replacement_character(rb_encoding *enc)
return "?";
}
else if (utf16be_encoding == enc) {
- return "\x00?";
+ return "\xFF\xFD";
}
else if (utf16le_encoding == enc) {
- return "?\x00";
+ return "\xFD\xFF";
}
else if (utf32be_encoding == enc) {
- return "\x00\x00\x00?";
+ return "\x00\x00\xFF\xFD";
}
else if (utf32le_encoding == enc) {
- return "?\x00\x00\x00";
+ return "\xFD\xFF\x00\x00";
}
else {
return "?";