summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog4
-rw-r--r--enc/trans/utf_16_32.trans30
-rw-r--r--test/ruby/test_transcode.rb6
3 files changed, 35 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index 2f778111b3..c0b870e469 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Wed Nov 24 05:40:33 2010 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * enc/trans/utf_16_32.trans: add a convert from UTF-8 to UTF-16.
+
Wed Nov 24 03:21:35 2010 NARUSE, Yui <naruse@ruby-lang.org>
* enc/trans/utf_16_32.trans: raise error on unpaired upper
diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans
index 2d7005993e..01caffe02c 100644
--- a/enc/trans/utf_16_32.trans
+++ b/enc/trans/utf_16_32.trans
@@ -289,7 +289,7 @@ fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
}
break;
case BE:
- if (s[0] < 0xD8 && 0xDF < s[0]) {
+ if (s[0] < 0xD8 || 0xDF < s[0]) {
return (VALUE)FUNso;
}
else if (s[0] <= 0xDB) {
@@ -297,7 +297,7 @@ fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
}
break;
case LE:
- if (s[1] < 0xD8 && 0xDF < s[1]) {
+ if (s[1] < 0xD8 || 0xDF < s[1]) {
return (VALUE)FUNso;
}
else if (s[1] <= 0xDB) {
@@ -321,6 +321,19 @@ fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char
return 0;
}
+static ssize_t
+fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
+{
+ unsigned char *sp = statep;
+ if (*sp == 0) {
+ *o++ = 0xFE;
+ *o++ = 0xFF;
+ *sp = 1;
+ return 2 + fun_so_to_utf_16be(statep, s, l, o, osize);
+ }
+ return fun_so_to_utf_16be(statep, s, l, o, osize);
+}
+
static const rb_transcoder
rb_from_UTF_16BE = {
"UTF-16BE", "UTF-8", from_UTF_16BE,
@@ -429,6 +442,18 @@ rb_from_UTF_16 = {
NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16
};
+static const rb_transcoder
+rb_to_UTF_16 = {
+ "UTF-8", "UTF-16", from_UTF_8,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 4, /* max_input */
+ 4, /* max_output */
+ asciicompat_encoder, /* asciicompat_type */
+ 1, state_init, NULL, /* state_size, state_init, state_fini */
+ NULL, NULL, NULL, fun_so_to_utf_16
+};
+
void
Init_utf_16_32(void)
{
@@ -441,4 +466,5 @@ Init_utf_16_32(void)
rb_register_transcoder(&rb_from_UTF_32LE);
rb_register_transcoder(&rb_to_UTF_32LE);
rb_register_transcoder(&rb_from_UTF_16);
+ rb_register_transcoder(&rb_to_UTF_16);
}
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index bf4ca2bf6d..cca7aabac9 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -1022,9 +1022,9 @@ class TestTranscode < Test::Unit::TestCase
def test_utf_16_bom
expected = "\u{3042}\u{3044}\u{20bb7}"
assert_equal(expected, %w/fffe4230443042d8b7df/.pack("H*").encode("UTF-8","UTF-16"))
- assert_equal(expected, %w/feff30423044d842dfb7/.pack("H*").encode("UTF-8","UTF-16"))
- assert_raise(Encoding::InvalidByteSequenceError){%w/feffdfb7/.pack("H*").encode("UTF-8","UTF-16"))
- assert_raise(Encoding::InvalidByteSequenceError){%w/fffeb7df/.pack("H*").encode("UTF-8","UTF-16"))
+ check_both_ways(expected, %w/feff30423044d842dfb7/.pack("H*"), "UTF-16")
+ assert_raise(Encoding::InvalidByteSequenceError){%w/feffdfb7/.pack("H*").encode("UTF-8","UTF-16")}
+ assert_raise(Encoding::InvalidByteSequenceError){%w/fffeb7df/.pack("H*").encode("UTF-8","UTF-16")}
end
def check_utf_32_both_ways(utf8, raw)