summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog12
-rw-r--r--enc/trans/single_byte.trans11
-rw-r--r--enc/trans/windows-1252-tbl.rb125
-rw-r--r--test/ruby/test_transcode.rb27
-rw-r--r--tool/transcode-tblgen.rb1
5 files changed, 176 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 5a5a6231bc..6d02b07cd2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+Tue Oct 14 13:30:30 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
+
+ * enc/trans/single_byte.trans: added windows-1252
+
+ * enc/trans/windows-1252-tbl.rb: new file
+ (contributed by Yoshihiro Kambayashi)
+
+ * tool/transcode-tblgen.rb: listed windows-1252 as '1byte'
+
+ * test/ruby/test_transcode.rb: added test_windows_1252
+ (contributed by Yoshihiro Kambayashi)
+
Tue Oct 14 12:22:32 2008 Kazuhiro NISHIYAMA <zn@mbf.nifty.com>
* test/ruby/test_variable.rb (TestVariable#test_variable): add
diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans
index 57254bf3d5..c33a850ad8 100644
--- a/enc/trans/single_byte.trans
+++ b/enc/trans/single_byte.trans
@@ -19,6 +19,7 @@
require 'iso-8859-13-tbl'
require 'iso-8859-14-tbl'
require 'iso-8859-15-tbl'
+ require 'windows-1252-tbl'
transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map
transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map
@@ -35,6 +36,15 @@
code
end
+ def transcode_tblgen_windows(name, tbl_to_ucs)
+ name_ident = name.tr('-','_')
+ code = ''
+ code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs])
+ code << "\n"
+ code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }])
+ code
+ end
+
transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL)
@@ -49,6 +59,7 @@
transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL)
+ transcode_tblgen_windows("WINDOWS-1252", WINDOWS_1252_TO_UCS_TBL)
%>
<%= transcode_generated_code %>
diff --git a/enc/trans/windows-1252-tbl.rb b/enc/trans/windows-1252-tbl.rb
new file mode 100644
index 0000000000..cefc72dff2
--- /dev/null
+++ b/enc/trans/windows-1252-tbl.rb
@@ -0,0 +1,125 @@
+WINDOWS_1252_TO_UCS_TBL = [
+ ["A0",0xA0],
+ ["A1",0xA1],
+ ["A2",0xA2],
+ ["A3",0xA3],
+ ["A4",0xA4],
+ ["A5",0xA5],
+ ["A6",0xA6],
+ ["A7",0xA7],
+ ["A8",0xA8],
+ ["A9",0xA9],
+ ["AA",0xAA],
+ ["AB",0xAB],
+ ["AC",0xAC],
+ ["AD",0xAD],
+ ["AE",0xAE],
+ ["AF",0xAF],
+ ["B0",0xB0],
+ ["B1",0xB1],
+ ["B2",0xB2],
+ ["B3",0xB3],
+ ["B4",0xB4],
+ ["B5",0xB5],
+ ["B6",0xB6],
+ ["B7",0xB7],
+ ["B8",0xB8],
+ ["B9",0xB9],
+ ["BA",0xBA],
+ ["BB",0xBB],
+ ["BC",0xBC],
+ ["BD",0xBD],
+ ["BE",0xBE],
+ ["BF",0xBF],
+ ["C0",0xC0],
+ ["C1",0xC1],
+ ["C2",0xC2],
+ ["C3",0xC3],
+ ["C4",0xC4],
+ ["C5",0xC5],
+ ["C6",0xC6],
+ ["C7",0xC7],
+ ["C8",0xC8],
+ ["C9",0xC9],
+ ["CA",0xCA],
+ ["CB",0xCB],
+ ["CC",0xCC],
+ ["CD",0xCD],
+ ["CE",0xCE],
+ ["CF",0xCF],
+ ["D0",0xD0],
+ ["D1",0xD1],
+ ["D2",0xD2],
+ ["D3",0xD3],
+ ["D4",0xD4],
+ ["D5",0xD5],
+ ["D6",0xD6],
+ ["D7",0xD7],
+ ["D8",0xD8],
+ ["D9",0xD9],
+ ["DA",0xDA],
+ ["DB",0xDB],
+ ["DC",0xDC],
+ ["DD",0xDD],
+ ["DE",0xDE],
+ ["DF",0xDF],
+ ["E0",0xE0],
+ ["E1",0xE1],
+ ["E2",0xE2],
+ ["E3",0xE3],
+ ["E4",0xE4],
+ ["E5",0xE5],
+ ["E6",0xE6],
+ ["E7",0xE7],
+ ["E8",0xE8],
+ ["E9",0xE9],
+ ["EA",0xEA],
+ ["EB",0xEB],
+ ["EC",0xEC],
+ ["ED",0xED],
+ ["EE",0xEE],
+ ["EF",0xEF],
+ ["F0",0xF0],
+ ["F1",0xF1],
+ ["F2",0xF2],
+ ["F3",0xF3],
+ ["F4",0xF4],
+ ["F5",0xF5],
+ ["F6",0xF6],
+ ["F7",0xF7],
+ ["F8",0xF8],
+ ["F9",0xF9],
+ ["FA",0xFA],
+ ["FB",0xFB],
+ ["FC",0xFC],
+ ["FD",0xFD],
+ ["FE",0xFE],
+ ["FF",0xFF],
+ ["8C",0x152],
+ ["9C",0x153],
+ ["8A",0x160],
+ ["9A",0x161],
+ ["9F",0x178],
+ ["8E",0x17D],
+ ["9E",0x17E],
+ ["83",0x192],
+ ["88",0x2C6],
+ ["98",0x2DC],
+ ["96",0x2013],
+ ["97",0x2014],
+ ["91",0x2018],
+ ["92",0x2019],
+ ["82",0x201A],
+ ["93",0x201C],
+ ["94",0x201D],
+ ["84",0x201E],
+ ["86",0x2020],
+ ["87",0x2021],
+ ["95",0x2022],
+ ["85",0x2026],
+ ["89",0x2030],
+ ["8B",0x2039],
+ ["9B",0x203A],
+ ["80",0x20AC],
+ ["99",0x2122],
+] \ No newline at end of file
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index 2838f3a14e..3ee1ff380b 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -133,6 +133,33 @@ class TestTranscode < Test::Unit::TestCase
end
end
+ def test_windows_1252
+ check_both_ways("\u20AC", "\x80", 'windows-1252') # €
+ assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1252') }
+ check_both_ways("\u201A", "\x82", 'windows-1252') # ‚
+ check_both_ways("\u0152", "\x8C", 'windows-1252') # >Œ
+ assert_raise(Encoding::UndefinedConversionError) { "\x8D".encode("utf-8", 'windows-1252') }
+ check_both_ways("\u017D", "\x8E", 'windows-1252') # Ž
+ assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1252') }
+ assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1252') }
+ check_both_ways("\u2018", "\x91", 'windows-1252') #‘
+ check_both_ways("\u0153", "\x9C", 'windows-1252') # œ
+ assert_raise(Encoding::UndefinedConversionError) { "\x9D".encode("utf-8", 'windows-1252') }
+ check_both_ways("\u017E", "\x9E", 'windows-1252') # ž
+ check_both_ways("\u00A0", "\xA0", 'windows-1252') # non-breaking space
+ check_both_ways("\u00AF", "\xAF", 'windows-1252') # ¯
+ check_both_ways("\u00B0", "\xB0", 'windows-1252') # °
+ check_both_ways("\u00BF", "\xBF", 'windows-1252') # ¿
+ check_both_ways("\u00C0", "\xC0", 'windows-1252') # À
+ check_both_ways("\u00CF", "\xCF", 'windows-1252') # Ï
+ check_both_ways("\u00D0", "\xD0", 'windows-1252') # Ð
+ check_both_ways("\u00DF", "\xDF", 'windows-1252') # ß
+ check_both_ways("\u00E0", "\xE0", 'windows-1252') # à
+ check_both_ways("\u00EF", "\xEF", 'windows-1252') # ï
+ check_both_ways("\u00F0", "\xF0", 'windows-1252') # ð
+ check_both_ways("\u00FF", "\xFF", 'windows-1252') # ÿ
+ end
+
def check_utf_16_both_ways(utf8, raw)
copy = raw.dup
0.step(copy.length-1, 2) { |i| copy[i+1], copy[i] = copy[i], copy[i+1] }
diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb
index 50b490810f..b785e07399 100644
--- a/tool/transcode-tblgen.rb
+++ b/tool/transcode-tblgen.rb
@@ -742,6 +742,7 @@ ValidEncoding = {
'ISO-8859-13' => '1byte',
'ISO-8859-14' => '1byte',
'ISO-8859-15' => '1byte',
+ 'WINDOWS-1252' => '1byte',
'Windows-31J' => 'Shift_JIS',
'eucJP-ms' => 'EUC-JP'
}.each {|k, v|