summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2015-12-14 13:11:31 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2015-12-14 13:11:31 +0000
commit83304b75c17b5126c8e8448f2cc08350146157d0 (patch)
treeecfc6fb7163b13c09a14af1c0b462eae79f125c9
parent059c9c1cf371e049c7481c78b76e9620da52757f (diff)
* enc/ebcdic.h: new dummy encoding EBCDIC-US
* enc/trans/ebcdic.trans: transcodings between EBCDIC-US and iso-8859-1 [with code from Andrea Ribuoli] * test/ruby/test_transcode.rb: tests for above * tool/transcode_tablegen.rb: additional argument for method transcode_tblgen git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53112 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog9
-rw-r--r--enc/ebcdic.h10
-rw-r--r--enc/trans/ebcdic.trans278
-rw-r--r--test/ruby/test_transcode.rb7
-rw-r--r--tool/transcode-tblgen.rb5
5 files changed, 307 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index eac4733..f6ec8fb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+Mon Dec 14 22:11:11 2015 Martin Duerst <duerst@it.aoyama.ac.jp>
+
+ * enc/ebcdic.h: new dummy encoding EBCDIC-US
+ * enc/trans/ebcdic.trans: transcodings between EBCDIC-US
+ and iso-8859-1 [with code from Andrea Ribuoli]
+ * test/ruby/test_transcode.rb: tests for above
+ * tool/transcode_tablegen.rb: additional argument for
+ method transcode_tblgen
+
Mon Dec 14 17:04:14 2015 SHIBATA Hiroshi <hsbt@ruby-lang.org>
* ext/socket/lib/socket.rb: use safe navigation operator.
diff --git a/enc/ebcdic.h b/enc/ebcdic.h
new file mode 100644
index 0000000..ca98431
--- /dev/null
+++ b/enc/ebcdic.h
@@ -0,0 +1,10 @@
+#include "regenc.h"
+/* dummy for unsupported, non-ascii-based encoding */
+ENC_DUMMY("EBCDIC-US");
+
+/* we start with just defining a single EBCDIC encoding,
+ * hopefully the most widely used one.
+ *
+ * See http://www.iana.org/assignments/character-sets/character-sets.xhtml
+ * http://tools.ietf.org/html/rfc1345
+ */
diff --git a/enc/trans/ebcdic.trans b/enc/trans/ebcdic.trans
new file mode 100644
index 0000000..e4756e3
--- /dev/null
+++ b/enc/trans/ebcdic.trans
@@ -0,0 +1,278 @@
+#include "transcode_data.h"
+
+<%
+ us_ebcdic_map = [
+ ["00", "00"],
+ ["01", "01"],
+ ["02", "02"],
+ ["03", "03"],
+ ["37", "04"],
+ ["2D", "05"],
+ ["2E", "06"],
+ ["2F", "07"],
+ ["16", "08"],
+ ["05", "09"],
+ ["25", "0A"],
+ ["0B", "0B"],
+ ["0C", "0C"],
+ ["0D", "0D"],
+ ["0E", "0E"],
+ ["0F", "0F"],
+ ["10", "10"],
+ ["11", "11"],
+ ["12", "12"],
+ ["13", "13"],
+ ["3C", "14"],
+ ["3D", "15"],
+ ["32", "16"],
+ ["26", "17"],
+ ["18", "18"],
+ ["19", "19"],
+ ["3F", "1A"],
+ ["27", "1B"],
+ ["1C", "1C"],
+ ["1D", "1D"],
+ ["1E", "1E"],
+ ["1F", "1F"],
+ ["40", "20"],
+ ["5A", "21"],
+ ["7F", "22"],
+ ["7B", "23"],
+ ["5B", "24"],
+ ["6C", "25"],
+ ["50", "26"],
+ ["7D", "27"],
+ ["4D", "28"],
+ ["5D", "29"],
+ ["5C", "2A"],
+ ["4E", "2B"],
+ ["6B", "2C"],
+ ["60", "2D"],
+ ["4B", "2E"],
+ ["61", "2F"],
+ ["F0", "30"],
+ ["F1", "31"],
+ ["F2", "32"],
+ ["F3", "33"],
+ ["F4", "34"],
+ ["F5", "35"],
+ ["F6", "36"],
+ ["F7", "37"],
+ ["F8", "38"],
+ ["F9", "39"],
+ ["7A", "3A"],
+ ["5E", "3B"],
+ ["4C", "3C"],
+ ["7E", "3D"],
+ ["6E", "3E"],
+ ["6F", "3F"],
+ ["7C", "40"],
+ ["C1", "41"],
+ ["C2", "42"],
+ ["C3", "43"],
+ ["C4", "44"],
+ ["C5", "45"],
+ ["C6", "46"],
+ ["C7", "47"],
+ ["C8", "48"],
+ ["C9", "49"],
+ ["D1", "4A"],
+ ["D2", "4B"],
+ ["D3", "4C"],
+ ["D4", "4D"],
+ ["D5", "4E"],
+ ["D6", "4F"],
+ ["D7", "50"],
+ ["D8", "51"],
+ ["D9", "52"],
+ ["E2", "53"],
+ ["E3", "54"],
+ ["E4", "55"],
+ ["E5", "56"],
+ ["E6", "57"],
+ ["E7", "58"],
+ ["E8", "59"],
+ ["E9", "5A"],
+ ["BA", "5B"],
+ ["E0", "5C"],
+ ["BB", "5D"],
+ ["B0", "5E"],
+ ["6D", "5F"],
+ ["79", "60"],
+ ["81", "61"],
+ ["82", "62"],
+ ["83", "63"],
+ ["84", "64"],
+ ["85", "65"],
+ ["86", "66"],
+ ["87", "67"],
+ ["88", "68"],
+ ["89", "69"],
+ ["91", "6A"],
+ ["92", "6B"],
+ ["93", "6C"],
+ ["94", "6D"],
+ ["95", "6E"],
+ ["96", "6F"],
+ ["97", "70"],
+ ["98", "71"],
+ ["99", "72"],
+ ["A2", "73"],
+ ["A3", "74"],
+ ["A4", "75"],
+ ["A5", "76"],
+ ["A6", "77"],
+ ["A7", "78"],
+ ["A8", "79"],
+ ["A9", "7A"],
+ ["C0", "7B"],
+ ["4F", "7C"],
+ ["D0", "7D"],
+ ["A1", "7E"],
+ ["07", "7F"],
+ ["20", "80"],
+ ["21", "81"],
+ ["22", "82"],
+ ["23", "83"],
+ ["24", "84"],
+ ["15", "85"],
+ ["06", "86"],
+ ["17", "87"],
+ ["28", "88"],
+ ["29", "89"],
+ ["2A", "8A"],
+ ["2B", "8B"],
+ ["2C", "8C"],
+ ["09", "8D"],
+ ["0A", "8E"],
+ ["1B", "8F"],
+ ["30", "90"],
+ ["31", "91"],
+ ["1A", "92"],
+ ["33", "93"],
+ ["34", "94"],
+ ["35", "95"],
+ ["36", "96"],
+ ["08", "97"],
+ ["38", "98"],
+ ["39", "99"],
+ ["3A", "9A"],
+ ["3B", "9B"],
+ ["04", "9C"],
+ ["14", "9D"],
+ ["3E", "9E"],
+ ["FF", "9F"],
+ ["41", "A0"],
+ ["AA", "A1"],
+ ["4A", "A2"],
+ ["B1", "A3"],
+ ["9F", "A4"],
+ ["B2", "A5"],
+ ["6A", "A6"],
+ ["B5", "A7"],
+ ["BD", "A8"],
+ ["B4", "A9"],
+ ["9A", "AA"],
+ ["8A", "AB"],
+ ["5F", "AC"],
+ ["CA", "AD"],
+ ["AF", "AE"],
+ ["BC", "AF"],
+ ["90", "B0"],
+ ["8F", "B1"],
+ ["EA", "B2"],
+ ["FA", "B3"],
+ ["BE", "B4"],
+ ["A0", "B5"],
+ ["B6", "B6"],
+ ["B3", "B7"],
+ ["9D", "B8"],
+ ["DA", "B9"],
+ ["9B", "BA"],
+ ["8B", "BB"],
+ ["B7", "BC"],
+ ["B8", "BD"],
+ ["B9", "BE"],
+ ["AB", "BF"],
+ ["64", "C0"],
+ ["65", "C1"],
+ ["62", "C2"],
+ ["66", "C3"],
+ ["63", "C4"],
+ ["67", "C5"],
+ ["9E", "C6"],
+ ["68", "C7"],
+ ["74", "C8"],
+ ["71", "C9"],
+ ["72", "CA"],
+ ["73", "CB"],
+ ["78", "CC"],
+ ["75", "CD"],
+ ["76", "CE"],
+ ["77", "CF"],
+ ["AC", "D0"],
+ ["69", "D1"],
+ ["ED", "D2"],
+ ["EE", "D3"],
+ ["EB", "D4"],
+ ["EF", "D5"],
+ ["EC", "D6"],
+ ["BF", "D7"],
+ ["80", "D8"],
+ ["FD", "D9"],
+ ["FE", "DA"],
+ ["FB", "DB"],
+ ["FC", "DC"],
+ ["AD", "DD"],
+ ["AE", "DE"],
+ ["59", "DF"],
+ ["44", "E0"],
+ ["45", "E1"],
+ ["42", "E2"],
+ ["46", "E3"],
+ ["43", "E4"],
+ ["47", "E5"],
+ ["9C", "E6"],
+ ["48", "E7"],
+ ["54", "E8"],
+ ["51", "E9"],
+ ["52", "EA"],
+ ["53", "EB"],
+ ["58", "EC"],
+ ["55", "ED"],
+ ["56", "EE"],
+ ["57", "EF"],
+ ["8C", "F0"],
+ ["49", "F1"],
+ ["CD", "F2"],
+ ["CE", "F3"],
+ ["CB", "F4"],
+ ["CF", "F5"],
+ ["CC", "F6"],
+ ["E1", "F7"],
+ ["70", "F8"],
+ ["DD", "F9"],
+ ["DE", "FA"],
+ ["DB", "FB"],
+ ["DC", "FC"],
+ ["8D", "FD"],
+ ["8E", "FE"],
+ ["DF", "FF"]
+]
+
+def to_nomap (map)
+ map.collect do |from, to|
+ from == to ? [from, :nomap] : [from, to]
+ end
+end
+
+transcode_tblgen "EBCDIC-US", "ISO-8859-1", to_nomap(us_ebcdic_map), '{00-ff}', 'asciicompat_decoder'
+transcode_tblgen "ISO-8859-1", "EBCDIC-US", to_nomap(us_ebcdic_map.map {|a,b| [b,a] }), '{00-ff}', 'asciicompat_encoder'
+%>
+
+<%= transcode_generated_code %>
+
+TRANS_INIT(ebcdic)
+{
+<%= transcode_register_code %>
+}
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index 4bade11..5162c7d 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -2018,6 +2018,13 @@ class TestTranscode < Test::Unit::TestCase
def test_Big5_UAO
check_both_ways("\u4e17", "\x81\x40", 'Big5-UAO') # 丗
end
+
+ def test_EBCDIC
+ check_both_ways("abcdeABCDE", "\x81\x82\x83\x84\x85\xC1\xC2\xC3\xC4\xC5", 'EBCDIC-US')
+ check_both_ways("aijrszAIJRSZ09", "\x81\x89\x91\x99\xA2\xA9\xC1\xC9\xD1\xD9\xE2\xE9\xF0\xF9", 'EBCDIC-US')
+ check_both_ways("Matz", "\xD4\x81\xA3\xA9", 'EBCDIC-US') # Dürst
+ check_both_ways("D\u00FCrst", "\xC4\xDC\x99\xA2\xA3", 'EBCDIC-US') # Dürst
+ end
def test_nothing_changed
a = "James".force_encoding("US-ASCII")
diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb
index c873638..17c5ebf 100644
--- a/tool/transcode-tblgen.rb
+++ b/tool/transcode-tblgen.rb
@@ -842,7 +842,8 @@ def transcode_tbl_only(from, to, map, valid_encoding=UnspecifiedValidEncoding)
return map, tree_name, real_tree_name, max_input
end
-def transcode_tblgen(from, to, map, valid_encoding=UnspecifiedValidEncoding)
+def transcode_tblgen(from, to, map, valid_encoding=UnspecifiedValidEncoding,
+ ascii_compatibility='asciicompat_converter')
map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map, valid_encoding)
transcoder_name = "rb_#{tree_name}"
TRANSCODERS << transcoder_name
@@ -856,7 +857,7 @@ static const rb_transcoder
#{input_unit_length}, /* input_unit_length */
#{max_input}, /* max_input */
#{max_output}, /* max_output */
- asciicompat_converter, /* asciicompat_type */
+ #{ascii_compatibility}, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, NULL,
NULL, NULL, NULL