From 83304b75c17b5126c8e8448f2cc08350146157d0 Mon Sep 17 00:00:00 2001 From: duerst Date: Mon, 14 Dec 2015 13:11:31 +0000 Subject: * enc/ebcdic.h: new dummy encoding EBCDIC-US * enc/trans/ebcdic.trans: transcodings between EBCDIC-US and iso-8859-1 [with code from Andrea Ribuoli] * test/ruby/test_transcode.rb: tests for above * tool/transcode_tablegen.rb: additional argument for method transcode_tblgen git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53112 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 9 ++ enc/ebcdic.h | 10 ++ enc/trans/ebcdic.trans | 278 ++++++++++++++++++++++++++++++++++++++++++++ test/ruby/test_transcode.rb | 7 ++ tool/transcode-tblgen.rb | 5 +- 5 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 enc/ebcdic.h create mode 100644 enc/trans/ebcdic.trans diff --git a/ChangeLog b/ChangeLog index eac47333ce..f6ec8fb9e7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Mon Dec 14 22:11:11 2015 Martin Duerst + + * enc/ebcdic.h: new dummy encoding EBCDIC-US + * enc/trans/ebcdic.trans: transcodings between EBCDIC-US + and iso-8859-1 [with code from Andrea Ribuoli] + * test/ruby/test_transcode.rb: tests for above + * tool/transcode_tablegen.rb: additional argument for + method transcode_tblgen + Mon Dec 14 17:04:14 2015 SHIBATA Hiroshi * ext/socket/lib/socket.rb: use safe navigation operator. diff --git a/enc/ebcdic.h b/enc/ebcdic.h new file mode 100644 index 0000000000..ca984315ee --- /dev/null +++ b/enc/ebcdic.h @@ -0,0 +1,10 @@ +#include "regenc.h" +/* dummy for unsupported, non-ascii-based encoding */ +ENC_DUMMY("EBCDIC-US"); + +/* we start with just defining a single EBCDIC encoding, + * hopefully the most widely used one. + * + * See http://www.iana.org/assignments/character-sets/character-sets.xhtml + * http://tools.ietf.org/html/rfc1345 + */ diff --git a/enc/trans/ebcdic.trans b/enc/trans/ebcdic.trans new file mode 100644 index 0000000000..e4756e3e47 --- /dev/null +++ b/enc/trans/ebcdic.trans @@ -0,0 +1,278 @@ +#include "transcode_data.h" + +<% + us_ebcdic_map = [ + ["00", "00"], + ["01", "01"], + ["02", "02"], + ["03", "03"], + ["37", "04"], + ["2D", "05"], + ["2E", "06"], + ["2F", "07"], + ["16", "08"], + ["05", "09"], + ["25", "0A"], + ["0B", "0B"], + ["0C", "0C"], + ["0D", "0D"], + ["0E", "0E"], + ["0F", "0F"], + ["10", "10"], + ["11", "11"], + ["12", "12"], + ["13", "13"], + ["3C", "14"], + ["3D", "15"], + ["32", "16"], + ["26", "17"], + ["18", "18"], + ["19", "19"], + ["3F", "1A"], + ["27", "1B"], + ["1C", "1C"], + ["1D", "1D"], + ["1E", "1E"], + ["1F", "1F"], + ["40", "20"], + ["5A", "21"], + ["7F", "22"], + ["7B", "23"], + ["5B", "24"], + ["6C", "25"], + ["50", "26"], + ["7D", "27"], + ["4D", "28"], + ["5D", "29"], + ["5C", "2A"], + ["4E", "2B"], + ["6B", "2C"], + ["60", "2D"], + ["4B", "2E"], + ["61", "2F"], + ["F0", "30"], + ["F1", "31"], + ["F2", "32"], + ["F3", "33"], + ["F4", "34"], + ["F5", "35"], + ["F6", "36"], + ["F7", "37"], + ["F8", "38"], + ["F9", "39"], + ["7A", "3A"], + ["5E", "3B"], + ["4C", "3C"], + ["7E", "3D"], + ["6E", "3E"], + ["6F", "3F"], + ["7C", "40"], + ["C1", "41"], + ["C2", "42"], + ["C3", "43"], + ["C4", "44"], + ["C5", "45"], + ["C6", "46"], + ["C7", "47"], + ["C8", "48"], + ["C9", "49"], + ["D1", "4A"], + ["D2", "4B"], + ["D3", "4C"], + ["D4", "4D"], + ["D5", "4E"], + ["D6", "4F"], + ["D7", "50"], + ["D8", "51"], + ["D9", "52"], + ["E2", "53"], + ["E3", "54"], + ["E4", "55"], + ["E5", "56"], + ["E6", "57"], + ["E7", "58"], + ["E8", "59"], + ["E9", "5A"], + ["BA", "5B"], + ["E0", "5C"], + ["BB", "5D"], + ["B0", "5E"], + ["6D", "5F"], + ["79", "60"], + ["81", "61"], + ["82", "62"], + ["83", "63"], + ["84", "64"], + ["85", "65"], + ["86", "66"], + ["87", "67"], + ["88", "68"], + ["89", "69"], + ["91", "6A"], + ["92", "6B"], + ["93", "6C"], + ["94", "6D"], + ["95", "6E"], + ["96", "6F"], + ["97", "70"], + ["98", "71"], + ["99", "72"], + ["A2", "73"], + ["A3", "74"], + ["A4", "75"], + ["A5", "76"], + ["A6", "77"], + ["A7", "78"], + ["A8", "79"], + ["A9", "7A"], + ["C0", "7B"], + ["4F", "7C"], + ["D0", "7D"], + ["A1", "7E"], + ["07", "7F"], + ["20", "80"], + ["21", "81"], + ["22", "82"], + ["23", "83"], + ["24", "84"], + ["15", "85"], + ["06", "86"], + ["17", "87"], + ["28", "88"], + ["29", "89"], + ["2A", "8A"], + ["2B", "8B"], + ["2C", "8C"], + ["09", "8D"], + ["0A", "8E"], + ["1B", "8F"], + ["30", "90"], + ["31", "91"], + ["1A", "92"], + ["33", "93"], + ["34", "94"], + ["35", "95"], + ["36", "96"], + ["08", "97"], + ["38", "98"], + ["39", "99"], + ["3A", "9A"], + ["3B", "9B"], + ["04", "9C"], + ["14", "9D"], + ["3E", "9E"], + ["FF", "9F"], + ["41", "A0"], + ["AA", "A1"], + ["4A", "A2"], + ["B1", "A3"], + ["9F", "A4"], + ["B2", "A5"], + ["6A", "A6"], + ["B5", "A7"], + ["BD", "A8"], + ["B4", "A9"], + ["9A", "AA"], + ["8A", "AB"], + ["5F", "AC"], + ["CA", "AD"], + ["AF", "AE"], + ["BC", "AF"], + ["90", "B0"], + ["8F", "B1"], + ["EA", "B2"], + ["FA", "B3"], + ["BE", "B4"], + ["A0", "B5"], + ["B6", "B6"], + ["B3", "B7"], + ["9D", "B8"], + ["DA", "B9"], + ["9B", "BA"], + ["8B", "BB"], + ["B7", "BC"], + ["B8", "BD"], + ["B9", "BE"], + ["AB", "BF"], + ["64", "C0"], + ["65", "C1"], + ["62", "C2"], + ["66", "C3"], + ["63", "C4"], + ["67", "C5"], + ["9E", "C6"], + ["68", "C7"], + ["74", "C8"], + ["71", "C9"], + ["72", "CA"], + ["73", "CB"], + ["78", "CC"], + ["75", "CD"], + ["76", "CE"], + ["77", "CF"], + ["AC", "D0"], + ["69", "D1"], + ["ED", "D2"], + ["EE", "D3"], + ["EB", "D4"], + ["EF", "D5"], + ["EC", "D6"], + ["BF", "D7"], + ["80", "D8"], + ["FD", "D9"], + ["FE", "DA"], + ["FB", "DB"], + ["FC", "DC"], + ["AD", "DD"], + ["AE", "DE"], + ["59", "DF"], + ["44", "E0"], + ["45", "E1"], + ["42", "E2"], + ["46", "E3"], + ["43", "E4"], + ["47", "E5"], + ["9C", "E6"], + ["48", "E7"], + ["54", "E8"], + ["51", "E9"], + ["52", "EA"], + ["53", "EB"], + ["58", "EC"], + ["55", "ED"], + ["56", "EE"], + ["57", "EF"], + ["8C", "F0"], + ["49", "F1"], + ["CD", "F2"], + ["CE", "F3"], + ["CB", "F4"], + ["CF", "F5"], + ["CC", "F6"], + ["E1", "F7"], + ["70", "F8"], + ["DD", "F9"], + ["DE", "FA"], + ["DB", "FB"], + ["DC", "FC"], + ["8D", "FD"], + ["8E", "FE"], + ["DF", "FF"] +] + +def to_nomap (map) + map.collect do |from, to| + from == to ? [from, :nomap] : [from, to] + end +end + +transcode_tblgen "EBCDIC-US", "ISO-8859-1", to_nomap(us_ebcdic_map), '{00-ff}', 'asciicompat_decoder' +transcode_tblgen "ISO-8859-1", "EBCDIC-US", to_nomap(us_ebcdic_map.map {|a,b| [b,a] }), '{00-ff}', 'asciicompat_encoder' +%> + +<%= transcode_generated_code %> + +TRANS_INIT(ebcdic) +{ +<%= transcode_register_code %> +} diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 4bade11a51..5162c7d5d9 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -2018,6 +2018,13 @@ class TestTranscode < Test::Unit::TestCase def test_Big5_UAO check_both_ways("\u4e17", "\x81\x40", 'Big5-UAO') # 丗 end + + def test_EBCDIC + check_both_ways("abcdeABCDE", "\x81\x82\x83\x84\x85\xC1\xC2\xC3\xC4\xC5", 'EBCDIC-US') + check_both_ways("aijrszAIJRSZ09", "\x81\x89\x91\x99\xA2\xA9\xC1\xC9\xD1\xD9\xE2\xE9\xF0\xF9", 'EBCDIC-US') + check_both_ways("Matz", "\xD4\x81\xA3\xA9", 'EBCDIC-US') # Dürst + check_both_ways("D\u00FCrst", "\xC4\xDC\x99\xA2\xA3", 'EBCDIC-US') # Dürst + end def test_nothing_changed a = "James".force_encoding("US-ASCII") diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb index c87363876a..17c5ebf3d4 100644 --- a/tool/transcode-tblgen.rb +++ b/tool/transcode-tblgen.rb @@ -842,7 +842,8 @@ def transcode_tbl_only(from, to, map, valid_encoding=UnspecifiedValidEncoding) return map, tree_name, real_tree_name, max_input end -def transcode_tblgen(from, to, map, valid_encoding=UnspecifiedValidEncoding) +def transcode_tblgen(from, to, map, valid_encoding=UnspecifiedValidEncoding, + ascii_compatibility='asciicompat_converter') map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map, valid_encoding) transcoder_name = "rb_#{tree_name}" TRANSCODERS << transcoder_name @@ -856,7 +857,7 @@ static const rb_transcoder #{input_unit_length}, /* input_unit_length */ #{max_input}, /* max_input */ #{max_output}, /* max_output */ - asciicompat_converter, /* asciicompat_type */ + #{ascii_compatibility}, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL, NULL, NULL, NULL -- cgit v1.2.3