summaryrefslogtreecommitdiff
path: root/enc/trans
diff options
context:
space:
mode:
Diffstat (limited to 'enc/trans')
-rw-r--r--enc/trans/JIS/JISX0212%UCS.src2
-rw-r--r--enc/trans/JIS/UCS%JISX0212.src2
-rw-r--r--enc/trans/big5-uao-tbl.rb2
-rw-r--r--enc/trans/cesu_8.trans85
-rw-r--r--enc/trans/cp850-tbl.rb2
-rw-r--r--enc/trans/cp852-tbl.rb2
-rw-r--r--enc/trans/cp855-tbl.rb2
-rw-r--r--enc/trans/escape.trans3
-rw-r--r--enc/trans/gbk-tbl.rb2
-rw-r--r--enc/trans/ibm437-tbl.rb2
-rw-r--r--enc/trans/ibm720-tbl.rb122
-rw-r--r--enc/trans/ibm775-tbl.rb2
-rw-r--r--enc/trans/ibm852-tbl.rb2
-rw-r--r--enc/trans/ibm855-tbl.rb2
-rw-r--r--enc/trans/ibm857-tbl.rb2
-rw-r--r--enc/trans/ibm860-tbl.rb2
-rw-r--r--enc/trans/ibm861-tbl.rb2
-rw-r--r--enc/trans/ibm862-tbl.rb2
-rw-r--r--enc/trans/ibm863-tbl.rb2
-rw-r--r--enc/trans/ibm864-tbl.rb126
-rw-r--r--enc/trans/ibm865-tbl.rb2
-rw-r--r--enc/trans/ibm866-tbl.rb2
-rw-r--r--enc/trans/ibm869-tbl.rb2
-rw-r--r--enc/trans/iso2022.trans148
-rw-r--r--enc/trans/koi8-r-tbl.rb2
-rw-r--r--enc/trans/koi8-u-tbl.rb2
-rw-r--r--enc/trans/maccroatian-tbl.rb2
-rw-r--r--enc/trans/maccyrillic-tbl.rb2
-rw-r--r--enc/trans/macgreek-tbl.rb2
-rw-r--r--enc/trans/maciceland-tbl.rb2
-rw-r--r--enc/trans/macroman-tbl.rb2
-rw-r--r--enc/trans/macromania-tbl.rb2
-rw-r--r--enc/trans/macturkish-tbl.rb2
-rw-r--r--enc/trans/macukraine-tbl.rb2
-rw-r--r--enc/trans/newline.trans30
-rw-r--r--enc/trans/single_byte.trans4
-rw-r--r--enc/trans/transdb.c2
-rw-r--r--enc/trans/windows-1250-tbl.rb2
-rw-r--r--enc/trans/windows-1251-tbl.rb2
-rw-r--r--enc/trans/windows-1252-tbl.rb2
-rw-r--r--enc/trans/windows-1253-tbl.rb2
-rw-r--r--enc/trans/windows-1254-tbl.rb2
-rw-r--r--enc/trans/windows-1256-tbl.rb2
-rw-r--r--enc/trans/windows-1257-tbl.rb2
-rw-r--r--enc/trans/windows-874-tbl.rb2
45 files changed, 475 insertions, 119 deletions
diff --git a/enc/trans/JIS/JISX0212%UCS.src b/enc/trans/JIS/JISX0212%UCS.src
index aa51257b99..0e1ab4c9b9 100644
--- a/enc/trans/JIS/JISX0212%UCS.src
+++ b/enc/trans/JIS/JISX0212%UCS.src
@@ -67,7 +67,7 @@ BEGIN_MAP
#
# However, JIS X 0212 maintains the distinction between
# the lowercase forms of these two elements at 0x2942 and 0x2943.
-# Given the structre of these JIS encodings, it is clear that
+# Given the structure of these JIS encodings, it is clear that
# 0x2922 and 0x2942 are intended to be a capital/small pair.
# Consequently, in the Unicode mapping, 0x2922 is treated as
# LATIN CAPITAL LETTER D WITH STROKE.
diff --git a/enc/trans/JIS/UCS%JISX0212.src b/enc/trans/JIS/UCS%JISX0212.src
index 65383a1c9f..c7711c8ac0 100644
--- a/enc/trans/JIS/UCS%JISX0212.src
+++ b/enc/trans/JIS/UCS%JISX0212.src
@@ -67,7 +67,7 @@ BEGIN_MAP
#
# However, JIS X 0212 maintains the distinction between
# the lowercase forms of these two elements at 0x2942 and 0x2943.
-# Given the structre of these JIS encodings, it is clear that
+# Given the structure of these JIS encodings, it is clear that
# 0x2922 and 0x2942 are intended to be a capital/small pair.
# Consequently, in the Unicode mapping, 0x2922 is treated as
# LATIN CAPITAL LETTER D WITH STROKE.
diff --git a/enc/trans/big5-uao-tbl.rb b/enc/trans/big5-uao-tbl.rb
index 295fbfdda5..a6f37cc7bd 100644
--- a/enc/trans/big5-uao-tbl.rb
+++ b/enc/trans/big5-uao-tbl.rb
@@ -19781,4 +19781,4 @@ BIG5_UAO_TO_UCS_TBL = [
["FEFC",0x8262],
["FEFD",0x826A],
["FEFE",0x8288],
-] \ No newline at end of file
+]
diff --git a/enc/trans/cesu_8.trans b/enc/trans/cesu_8.trans
new file mode 100644
index 0000000000..4e17b1ddbb
--- /dev/null
+++ b/enc/trans/cesu_8.trans
@@ -0,0 +1,85 @@
+#include "transcode_data.h"
+
+<%
+ map = {}
+ map["{00-7f}"] = :nomap
+ map["{c2-df}{80-bf}"] = :nomap
+ map["e0{a0-bf}{80-bf}"] = :nomap
+ map["{e1-ec}{80-bf}{80-bf}"] = :nomap
+ map["ed{80-9f}{80-bf}"] = :nomap
+ map["{ee-ef}{80-bf}{80-bf}"] = :nomap
+ map["ed{a0-af}{80-bf}ed{b0-bf}{80-bf}"] = :func_so # surrogate pairs
+ transcode_generate_node(ActionMap.parse(map), "from_CESU_8")
+
+ map = {}
+ map["{00-7f}"] = :nomap
+ map["{c2-df}{80-bf}"] = :nomap
+ map["e0{a0-bf}{80-bf}"] = :nomap
+ map["{e1-ec}{80-bf}{80-bf}"] = :nomap
+ map["ed{80-9f}{80-bf}"] = :nomap
+ map["{ee-ef}{80-bf}{80-bf}"] = :nomap
+ map["f0{90-bf}{80-bf}{80-bf}"] = :func_so # planes 1-3
+ map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so # planes 4-15
+ map["f4{80-8f}{80-bf}{80-bf}"] = :func_so # plane 16
+ transcode_generate_node(ActionMap.parse(map), "to_CESU_8")
+%>
+
+<%= transcode_generated_code %>
+
+static ssize_t
+fun_so_from_cesu_8(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
+{
+ unsigned int scalar = ( ((s[1]&0x0F)<<16) | ((s[2]&0x3F)<<10)
+ | ((s[4]&0x0F)<< 6) | (s[5]&0x3F)
+ ) + 0x10000;
+ o[0] = 0xF0 | (scalar>>18);
+ o[1] = 0x80 | ((scalar>>12)&0x3F);
+ o[2] = 0x80 | ((scalar>> 6)&0x3F);
+ o[3] = 0x80 | ( scalar &0x3F);
+ return 4;
+}
+
+static ssize_t
+fun_so_to_cesu_8(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
+{
+ unsigned int scalar = ((s[0]&0x07)<<18) | ((s[1]&0x3F)<<12)
+ | ((s[2]&0x3F)<< 6) | (s[3]&0x3F);
+ scalar -= 0x10000;
+ o[0] = 0xED;
+ o[1] = 0xA0 | (scalar>>16);
+ o[2] = 0x80 | ((scalar>>10)&0x3F);
+ o[3] = 0xED;
+ o[4] = 0xB0 | ((scalar>> 6)&0x0F);
+ o[5] = 0x80 | (scalar &0x3F);
+ return 6;
+}
+
+static const rb_transcoder
+rb_from_CESU_8 = {
+ "CESU-8", "UTF-8", from_CESU_8,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 6, /* max_input */
+ 4, /* max_output */
+ asciicompat_decoder, /* asciicompat_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
+ NULL, NULL, NULL, fun_so_from_cesu_8
+};
+
+static const rb_transcoder
+rb_to_CESU_8 = {
+ "UTF-8", "CESU-8", to_CESU_8,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 4, /* max_input */
+ 6, /* max_output */
+ asciicompat_encoder, /* asciicompat_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
+ NULL, NULL, NULL, fun_so_to_cesu_8
+};
+
+TRANS_INIT(cesu_8)
+{
+ rb_register_transcoder(&rb_from_CESU_8);
+ rb_register_transcoder(&rb_to_CESU_8);
+}
diff --git a/enc/trans/cp850-tbl.rb b/enc/trans/cp850-tbl.rb
index 615d3b2599..e0d120c803 100644
--- a/enc/trans/cp850-tbl.rb
+++ b/enc/trans/cp850-tbl.rb
@@ -127,4 +127,4 @@ CP850_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/cp852-tbl.rb b/enc/trans/cp852-tbl.rb
index 6763bfa6e9..ad32cdc59d 100644
--- a/enc/trans/cp852-tbl.rb
+++ b/enc/trans/cp852-tbl.rb
@@ -127,4 +127,4 @@ CP852_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/cp855-tbl.rb b/enc/trans/cp855-tbl.rb
index 72e548b9cb..a2ca9daf97 100644
--- a/enc/trans/cp855-tbl.rb
+++ b/enc/trans/cp855-tbl.rb
@@ -127,4 +127,4 @@ CP855_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/escape.trans b/enc/trans/escape.trans
index c76ffa0e06..36d9dd0f13 100644
--- a/enc/trans/escape.trans
+++ b/enc/trans/escape.trans
@@ -18,9 +18,10 @@
], nil)
transcode_tblgen("", "xml_attr_content_escape", [
- ["{00-21,23-25,27-3B,3D,3F-FF}", :nomap],
+ ["{00-21,23-25,28-3B,3D,3F-FF}", :nomap],
["22", hexstr("&quot;")],
["26", hexstr("&amp;")],
+ ["27", hexstr("&apos;")],
["3C", hexstr("&lt;")],
["3E", hexstr("&gt;")]
], nil)
diff --git a/enc/trans/gbk-tbl.rb b/enc/trans/gbk-tbl.rb
index 26f5078c45..40929f992e 100644
--- a/enc/trans/gbk-tbl.rb
+++ b/enc/trans/gbk-tbl.rb
@@ -21791,4 +21791,4 @@ GBK_TO_UCS_TBL= [
["A3FE",0xFFE3],
["A957",0xFFE4],
["A3A4",0xFFE5],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm437-tbl.rb b/enc/trans/ibm437-tbl.rb
index 5ae64d621e..6a823c293d 100644
--- a/enc/trans/ibm437-tbl.rb
+++ b/enc/trans/ibm437-tbl.rb
@@ -127,4 +127,4 @@ IBM437_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm720-tbl.rb b/enc/trans/ibm720-tbl.rb
new file mode 100644
index 0000000000..558684d649
--- /dev/null
+++ b/enc/trans/ibm720-tbl.rb
@@ -0,0 +1,122 @@
+IBM720_TO_UCS_TBL = [
+ ["FF",0xA0],
+ ["9C",0xA3],
+ ["94",0xA4],
+ ["AE",0xAB],
+ ["F8",0xB0],
+ ["FD",0xB2],
+ ["E6",0xB5],
+ ["FA",0xB7],
+ ["AF",0xBB],
+ ["85",0xE0],
+ ["83",0xE2],
+ ["87",0xE7],
+ ["8A",0xE8],
+ ["82",0xE9],
+ ["88",0xEA],
+ ["89",0xEB],
+ ["8C",0xEE],
+ ["8B",0xEF],
+ ["93",0xF4],
+ ["97",0xF9],
+ ["96",0xFB],
+ ["98",0x621],
+ ["99",0x622],
+ ["9A",0x623],
+ ["9B",0x624],
+ ["9D",0x625],
+ ["9E",0x626],
+ ["9F",0x627],
+ ["A0",0x628],
+ ["A1",0x629],
+ ["A2",0x62A],
+ ["A3",0x62B],
+ ["A4",0x62C],
+ ["A5",0x62D],
+ ["A6",0x62E],
+ ["A7",0x62F],
+ ["A8",0x630],
+ ["A9",0x631],
+ ["AA",0x632],
+ ["AB",0x633],
+ ["AC",0x634],
+ ["AD",0x635],
+ ["E0",0x636],
+ ["E1",0x637],
+ ["E2",0x638],
+ ["E3",0x639],
+ ["E4",0x63A],
+ ["95",0x640],
+ ["E5",0x641],
+ ["E7",0x642],
+ ["E8",0x643],
+ ["E9",0x644],
+ ["EA",0x645],
+ ["EB",0x646],
+ ["EC",0x647],
+ ["ED",0x648],
+ ["EE",0x649],
+ ["EF",0x64A],
+ ["F1",0x64B],
+ ["F2",0x64C],
+ ["F3",0x64D],
+ ["F4",0x64E],
+ ["F5",0x64F],
+ ["F6",0x650],
+ ["91",0x651],
+ ["92",0x652],
+ ["FC",0x207F],
+ ["F9",0x2219],
+ ["FB",0x221A],
+ ["F7",0x2248],
+ ["F0",0x2261],
+ ["C4",0x2500],
+ ["B3",0x2502],
+ ["DA",0x250C],
+ ["BF",0x2510],
+ ["C0",0x2514],
+ ["D9",0x2518],
+ ["C3",0x251C],
+ ["B4",0x2524],
+ ["C2",0x252C],
+ ["C1",0x2534],
+ ["C5",0x253C],
+ ["CD",0x2550],
+ ["BA",0x2551],
+ ["D5",0x2552],
+ ["D6",0x2553],
+ ["C9",0x2554],
+ ["B8",0x2555],
+ ["B7",0x2556],
+ ["BB",0x2557],
+ ["D4",0x2558],
+ ["D3",0x2559],
+ ["C8",0x255A],
+ ["BE",0x255B],
+ ["BD",0x255C],
+ ["BC",0x255D],
+ ["C6",0x255E],
+ ["C7",0x255F],
+ ["CC",0x2560],
+ ["B5",0x2561],
+ ["B6",0x2562],
+ ["B9",0x2563],
+ ["D1",0x2564],
+ ["D2",0x2565],
+ ["CB",0x2566],
+ ["CF",0x2567],
+ ["D0",0x2568],
+ ["CA",0x2569],
+ ["D8",0x256A],
+ ["D7",0x256B],
+ ["CE",0x256C],
+ ["DF",0x2580],
+ ["DC",0x2584],
+ ["DB",0x2588],
+ ["DD",0x258C],
+ ["DE",0x2590],
+ ["B0",0x2591],
+ ["B1",0x2592],
+ ["B2",0x2593],
+ ["FE",0x25A0],
+]
diff --git a/enc/trans/ibm775-tbl.rb b/enc/trans/ibm775-tbl.rb
index f55679f409..2b79780813 100644
--- a/enc/trans/ibm775-tbl.rb
+++ b/enc/trans/ibm775-tbl.rb
@@ -127,4 +127,4 @@ IBM775_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm852-tbl.rb b/enc/trans/ibm852-tbl.rb
index 6cec51cf80..3e70daef2e 100644
--- a/enc/trans/ibm852-tbl.rb
+++ b/enc/trans/ibm852-tbl.rb
@@ -127,4 +127,4 @@ IBM852_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm855-tbl.rb b/enc/trans/ibm855-tbl.rb
index 7e0cc5014f..b4c0244728 100644
--- a/enc/trans/ibm855-tbl.rb
+++ b/enc/trans/ibm855-tbl.rb
@@ -127,4 +127,4 @@ IBM855_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm857-tbl.rb b/enc/trans/ibm857-tbl.rb
index 5b20d389d3..c1c76545ed 100644
--- a/enc/trans/ibm857-tbl.rb
+++ b/enc/trans/ibm857-tbl.rb
@@ -124,4 +124,4 @@ IBM857_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm860-tbl.rb b/enc/trans/ibm860-tbl.rb
index ae218a129f..77734cd194 100644
--- a/enc/trans/ibm860-tbl.rb
+++ b/enc/trans/ibm860-tbl.rb
@@ -127,4 +127,4 @@ IBM860_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm861-tbl.rb b/enc/trans/ibm861-tbl.rb
index c24042a76c..69e0a45019 100644
--- a/enc/trans/ibm861-tbl.rb
+++ b/enc/trans/ibm861-tbl.rb
@@ -127,4 +127,4 @@ IBM861_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm862-tbl.rb b/enc/trans/ibm862-tbl.rb
index 31d6fb0243..f564051fd6 100644
--- a/enc/trans/ibm862-tbl.rb
+++ b/enc/trans/ibm862-tbl.rb
@@ -127,4 +127,4 @@ IBM862_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm863-tbl.rb b/enc/trans/ibm863-tbl.rb
index db110cf38c..af1eb97566 100644
--- a/enc/trans/ibm863-tbl.rb
+++ b/enc/trans/ibm863-tbl.rb
@@ -127,4 +127,4 @@ IBM863_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm864-tbl.rb b/enc/trans/ibm864-tbl.rb
new file mode 100644
index 0000000000..13f8a27f1d
--- /dev/null
+++ b/enc/trans/ibm864-tbl.rb
@@ -0,0 +1,126 @@
+IBM864_TO_UCS_TBL = [
+ ["80",0x00B0],
+ ["81",0x00B7],
+ ["82",0x2219],
+ ["83",0x221A],
+ ["84",0x2592],
+ ["85",0x2500],
+ ["86",0x2502],
+ ["87",0x253C],
+ ["88",0x2524],
+ ["89",0x252C],
+ ["8A",0x251C],
+ ["8B",0x2534],
+ ["8C",0x2510],
+ ["8D",0x250C],
+ ["8E",0x2514],
+ ["8F",0x2518],
+ ["90",0x03B2],
+ ["91",0x221E],
+ ["92",0x03C6],
+ ["93",0x00B1],
+ ["94",0x00BD],
+ ["95",0x00BC],
+ ["96",0x2248],
+ ["97",0x00AB],
+ ["98",0x00BB],
+ ["99",0xFEF7],
+ ["9A",0xFEF8],
+ ["9D",0xFEFB],
+ ["9E",0xFEFC],
+ ["9F",0xFE73],
+ ["A0",0x00A0],
+ ["A1",0x00AD],
+ ["A2",0xFE82],
+ ["A3",0x00A3],
+ ["A4",0x00A4],
+ ["A5",0xFE84],
+ ["A7",0x20AC], # Euro sign from CCSID 864
+ ["A8",0xFE8E],
+ ["A9",0xFE8F],
+ ["AA",0xFE95],
+ ["AB",0xFE99],
+ ["AC",0x060C],
+ ["AD",0xFE9D],
+ ["AE",0xFEA1],
+ ["AF",0xFEA5],
+ ["B0",0x0660],
+ ["B1",0x0661],
+ ["B2",0x0662],
+ ["B3",0x0663],
+ ["B4",0x0664],
+ ["B5",0x0665],
+ ["B6",0x0666],
+ ["B7",0x0667],
+ ["B8",0x0668],
+ ["B9",0x0669],
+ ["BA",0xFED1],
+ ["BB",0x061B],
+ ["BC",0xFEB1],
+ ["BD",0xFEB5],
+ ["BE",0xFEB9],
+ ["BF",0x061F],
+ ["C0",0x00A2],
+ ["C1",0xFE80],
+ ["C2",0xFE81],
+ ["C3",0xFE83],
+ ["C4",0xFE85],
+ ["C5",0xFECA],
+ ["C6",0xFE8B],
+ ["C7",0xFE8D],
+ ["C8",0xFE91],
+ ["C9",0xFE93],
+ ["CA",0xFE97],
+ ["CB",0xFE9B],
+ ["CC",0xFE9F],
+ ["CD",0xFEA3],
+ ["CE",0xFEA7],
+ ["CF",0xFEA9],
+ ["D0",0xFEAB],
+ ["D1",0xFEAD],
+ ["D2",0xFEAF],
+ ["D3",0xFEB3],
+ ["D4",0xFEB7],
+ ["D5",0xFEBB],
+ ["D6",0xFEBF],
+ ["D7",0xFEC1],
+ ["D8",0xFEC5],
+ ["D9",0xFECB],
+ ["DA",0xFECF],
+ ["DB",0x00A6],
+ ["DC",0x00AC],
+ ["DD",0x00F7],
+ ["DE",0x00D7],
+ ["DF",0xFEC9],
+ ["E0",0x0640],
+ ["E1",0xFED3],
+ ["E2",0xFED7],
+ ["E3",0xFEDB],
+ ["E4",0xFEDF],
+ ["E5",0xFEE3],
+ ["E6",0xFEE7],
+ ["E7",0xFEEB],
+ ["E8",0xFEED],
+ ["E9",0xFEEF],
+ ["EA",0xFEF3],
+ ["EB",0xFEBD],
+ ["EC",0xFECC],
+ ["ED",0xFECE],
+ ["EE",0xFECD],
+ ["EF",0xFEE1],
+ ["F0",0xFE7D],
+ ["F1",0x0651],
+ ["F2",0xFEE5],
+ ["F3",0xFEE9],
+ ["F4",0xFEEC],
+ ["F5",0xFEF0],
+ ["F6",0xFEF2],
+ ["F7",0xFED0],
+ ["F8",0xFED5],
+ ["F9",0xFEF5],
+ ["FA",0xFEF6],
+ ["FB",0xFEDD],
+ ["FC",0xFED9],
+ ["FD",0xFEF1],
+ ["FE",0x25A0]
+]
diff --git a/enc/trans/ibm865-tbl.rb b/enc/trans/ibm865-tbl.rb
index 22e322fb31..4747509d66 100644
--- a/enc/trans/ibm865-tbl.rb
+++ b/enc/trans/ibm865-tbl.rb
@@ -127,4 +127,4 @@ IBM865_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm866-tbl.rb b/enc/trans/ibm866-tbl.rb
index 95b9ee7534..ed4b0d683e 100644
--- a/enc/trans/ibm866-tbl.rb
+++ b/enc/trans/ibm866-tbl.rb
@@ -127,4 +127,4 @@ IBM866_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/ibm869-tbl.rb b/enc/trans/ibm869-tbl.rb
index 437e41ad98..bee85b84ea 100644
--- a/enc/trans/ibm869-tbl.rb
+++ b/enc/trans/ibm869-tbl.rb
@@ -118,4 +118,4 @@ IBM869_TO_UCS_TBL = [
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans
index a441f1596d..bc42bbc19c 100644
--- a/enc/trans/iso2022.trans
+++ b/enc/trans/iso2022.trans
@@ -1,4 +1,5 @@
#include "transcode_data.h"
+#include "ruby/internal/attr/nonstring.h"
<%
map = {
@@ -79,6 +80,34 @@ iso2022jp_init(void *statep)
return 0;
}
+static unsigned char *
+iso2022jp_put_state(unsigned char *sp, unsigned char *o, int oldstate, int newstate)
+{
+ if (oldstate != newstate) {
+ *o++ = 0x1b;
+ switch (newstate) {
+ case G0_ASCII:
+ *o++ = '(';
+ *o++ = 'B';
+ break;
+ case G0_JISX0201_KATAKANA:
+ *o++ = '(';
+ *o++ = 'I';
+ break;
+ case G0_JISX0208_1978:
+ *o++ = '$';
+ *o++ = '@';
+ break;
+ default:
+ *o++ = '$';
+ *o++ = 'B';
+ break;
+ }
+ *sp = newstate;
+ }
+ return o;
+}
+
static VALUE
fun_si_iso2022jp_decoder(void *statep, const unsigned char *s, size_t l)
{
@@ -154,24 +183,7 @@ fun_so_iso2022jp_encoder(void *statep, const unsigned char *s, size_t l, unsigne
else
newstate = G0_JISX0208_1983;
- if (*sp != newstate) {
- if (newstate == G0_ASCII) {
- *o++ = 0x1b;
- *o++ = '(';
- *o++ = 'B';
- }
- else if (newstate == G0_JISX0208_1978) {
- *o++ = 0x1b;
- *o++ = '$';
- *o++ = '@';
- }
- else {
- *o++ = 0x1b;
- *o++ = '$';
- *o++ = 'B';
- }
- *sp = newstate;
- }
+ o = iso2022jp_put_state(sp, o, *sp, newstate);
if (l == 1) {
*o++ = s[0] & 0x7f;
@@ -202,10 +214,7 @@ finish_iso2022jp_encoder(void *statep, unsigned char *o, size_t osize)
if (*sp == G0_ASCII)
return 0;
- *o++ = 0x1b;
- *o++ = '(';
- *o++ = 'B';
- *sp = G0_ASCII;
+ o = iso2022jp_put_state(sp, o, *sp, G0_ASCII);
return o - output0;
}
@@ -399,24 +408,7 @@ fun_so_cp5022x_encoder(void *statep, const unsigned char *s, size_t l,
else
newstate = G0_JISX0208_1983;
- if (*sp != newstate) {
- if (newstate == G0_ASCII) {
- *o++ = 0x1b;
- *o++ = '(';
- *o++ = 'B';
- }
- else if (newstate == G0_JISX0201_KATAKANA) {
- *o++ = 0x1b;
- *o++ = '(';
- *o++ = 'I';
- }
- else {
- *o++ = 0x1b;
- *o++ = '$';
- *o++ = 'B';
- }
- *sp = newstate;
- }
+ o = iso2022jp_put_state(sp, o, sp[0], newstate);
if (l == 1) {
*o++ = s[0] & 0x7f;
@@ -443,15 +435,26 @@ rb_cp50221_encoder = {
iso2022jp_encoder_reset_sequence_size, finish_iso2022jp_encoder
};
-static const char *tbl0208 =
- "\x21\x23\x21\x56\x21\x57\x21\x22\x21\x26\x25\x72\x25\x21\x25\x23" \
- "\x25\x25\x25\x27\x25\x29\x25\x63\x25\x65\x25\x67\x25\x43\x21\x3C" \
- "\x25\x22\x25\x24\x25\x26\x25\x28\x25\x2A\x25\x2B\x25\x2D\x25\x2F" \
- "\x25\x31\x25\x33\x25\x35\x25\x37\x25\x39\x25\x3B\x25\x3D\x25\x3F" \
- "\x25\x41\x25\x44\x25\x46\x25\x48\x25\x4A\x25\x4B\x25\x4C\x25\x4D" \
- "\x25\x4E\x25\x4F\x25\x52\x25\x55\x25\x58\x25\x5B\x25\x5E\x25\x5F" \
- "\x25\x60\x25\x61\x25\x62\x25\x64\x25\x66\x25\x68\x25\x69\x25\x6A" \
- "\x25\x6B\x25\x6C\x25\x6D\x25\x6F\x25\x73\x21\x2B\x21\x2C";
+/* JIS0201 to JIS0208 conversion table */
+enum {tbl0208_num = 0xDF - 0xA1 + 1};
+RBIMPL_ATTR_NONSTRING_ARRAY() static const char tbl0208[tbl0208_num][2] = {
+ "\x21\x23", "\x21\x56", "\x21\x57", "\x21\x22",
+ "\x21\x26", "\x25\x72", "\x25\x21", "\x25\x23",
+ "\x25\x25", "\x25\x27", "\x25\x29", "\x25\x63",
+ "\x25\x65", "\x25\x67", "\x25\x43", "\x21\x3C",
+ "\x25\x22", "\x25\x24", "\x25\x26", "\x25\x28",
+ "\x25\x2A", "\x25\x2B", "\x25\x2D", "\x25\x2F",
+ "\x25\x31", "\x25\x33", "\x25\x35", "\x25\x37",
+ "\x25\x39", "\x25\x3B", "\x25\x3D", "\x25\x3F",
+ "\x25\x41", "\x25\x44", "\x25\x46", "\x25\x48",
+ "\x25\x4A", "\x25\x4B", "\x25\x4C", "\x25\x4D",
+ "\x25\x4E", "\x25\x4F", "\x25\x52", "\x25\x55",
+ "\x25\x58", "\x25\x5B", "\x25\x5E", "\x25\x5F",
+ "\x25\x60", "\x25\x61", "\x25\x62", "\x25\x64",
+ "\x25\x66", "\x25\x68", "\x25\x69", "\x25\x6A",
+ "\x25\x6B", "\x25\x6C", "\x25\x6D", "\x25\x6F",
+ "\x25\x73", "\x21\x2B", "\x21\x2C"
+};
static ssize_t
fun_so_cp50220_encoder(void *statep, const unsigned char *s, size_t l,
@@ -460,22 +463,21 @@ fun_so_cp50220_encoder(void *statep, const unsigned char *s, size_t l,
unsigned char *output0 = o;
unsigned char *sp = statep;
- if (sp[0] == G0_JISX0201_KATAKANA) {
+ if (sp[0] == G0_JISX0201_KATAKANA && sp[2]) {
int c = sp[2] & 0x7F;
- const char *p = tbl0208 + (c - 0x21) * 2;
- if (sp[1] != G0_JISX0208_1983) {
- *o++ = 0x1b;
- *o++ = '$';
- *o++ = 'B';
- }
+ const char *p = tbl0208[c - 0x21];
+ sp[2] = 0;
+ o = iso2022jp_put_state(sp, o, sp[1], G0_JISX0208_1983);
sp[0] = G0_JISX0208_1983;
*o++ = *p++;
if (l == 2 && s[0] == 0x8E) {
if (s[1] == 0xDE) {
+ /* VOICED SOUND MARK */
*o++ = *p + 1;
return o - output0;
}
else if (s[1] == 0xDF && (0x4A <= c && c <= 0x4E)) {
+ /* SEMI-VOICED SOUND MARK */
*o++ = *p + 2;
return o - output0;
}
@@ -484,21 +486,25 @@ fun_so_cp50220_encoder(void *statep, const unsigned char *s, size_t l,
}
if (l == 2 && s[0] == 0x8E) {
- const char *p = tbl0208 + (s[1] - 0xA1) * 2;
if ((0xA1 <= s[1] && s[1] <= 0xB5) ||
(0xC5 <= s[1] && s[1] <= 0xC9) ||
(0xCF <= s[1] && s[1] <= 0xDF)) {
- if (*sp != G0_JISX0208_1983) {
- *o++ = 0x1b;
- *o++ = '$';
- *o++ = 'B';
- *sp = G0_JISX0208_1983;
- }
+ /* May not be followed by a sound mark */
+ const char *p = tbl0208[s[1] - 0xA1];
+ o = iso2022jp_put_state(sp, o, *sp, G0_JISX0208_1983);
*o++ = *p++;
*o++ = *p;
return o - output0;
}
+ if (s[1] > 0xDF) { /* undef */
+ o = iso2022jp_put_state(sp, o, *sp, G0_JISX0201_KATAKANA);
+ *o++ = s[1] & 0x7F;
+ sp[2] = 0;
+ return o - output0;
+ }
+
+ /* Katakana that may be followed by a sound mark */
sp[2] = s[1];
sp[1] = sp[0];
sp[0] = G0_JISX0201_KATAKANA;
@@ -518,23 +524,16 @@ finish_cp50220_encoder(void *statep, unsigned char *o, size_t osize)
if (*sp == G0_ASCII)
return 0;
- if (sp[0] == G0_JISX0201_KATAKANA) {
+ if (sp[0] == G0_JISX0201_KATAKANA && sp[2]) {
int c = sp[2] & 0x7F;
- const char *p = tbl0208 + (c - 0x21) * 2;
- if (sp[1] != G0_JISX0208_1983) {
- *o++ = 0x1b;
- *o++ = '$';
- *o++ = 'B';
- }
+ const char *p = tbl0208[c - 0x21];
+ o = iso2022jp_put_state(sp, o, sp[1], G0_JISX0208_1983);
sp[0] = G0_JISX0208_1983;
*o++ = *p++;
*o++ = *p;
}
- *o++ = 0x1b;
- *o++ = '(';
- *o++ = 'B';
- *sp = G0_ASCII;
+ o = iso2022jp_put_state(sp, o, sp[0], G0_ASCII);
return o - output0;
}
@@ -564,4 +563,3 @@ TRANS_INIT(iso2022)
rb_register_transcoder(&rb_cp50220_encoder);
rb_register_transcoder(&rb_cp50221_encoder);
}
-
diff --git a/enc/trans/koi8-r-tbl.rb b/enc/trans/koi8-r-tbl.rb
index a1f55ff2e3..4cfe523334 100644
--- a/enc/trans/koi8-r-tbl.rb
+++ b/enc/trans/koi8-r-tbl.rb
@@ -127,4 +127,4 @@ KOI8_R_TO_UCS_TBL = [
["91",0x2592],
["92",0x2593],
["94",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/koi8-u-tbl.rb b/enc/trans/koi8-u-tbl.rb
index e87aa1aa3f..225931ba5e 100644
--- a/enc/trans/koi8-u-tbl.rb
+++ b/enc/trans/koi8-u-tbl.rb
@@ -127,4 +127,4 @@ KOI8_U_TO_UCS_TBL = [
["91",0x2592],
["92",0x2593],
["94",0x25A0],
-] \ No newline at end of file
+]
diff --git a/enc/trans/maccroatian-tbl.rb b/enc/trans/maccroatian-tbl.rb
index 359878ec1f..e78f2f373f 100644
--- a/enc/trans/maccroatian-tbl.rb
+++ b/enc/trans/maccroatian-tbl.rb
@@ -126,4 +126,4 @@ MACCROATIAN_TO_UCS_TBL = [
["B2",0x2264],
["B3",0x2265],
["D7",0x25CA],
-] \ No newline at end of file
+]
diff --git a/enc/trans/maccyrillic-tbl.rb b/enc/trans/maccyrillic-tbl.rb
index 378aa8c3bc..2d5af7b466 100644
--- a/enc/trans/maccyrillic-tbl.rb
+++ b/enc/trans/maccyrillic-tbl.rb
@@ -127,4 +127,4 @@ MACCYRILLIC_TO_UCS_TBL = [
["AD",0x2260],
["B2",0x2264],
["B3",0x2265],
-] \ No newline at end of file
+]
diff --git a/enc/trans/macgreek-tbl.rb b/enc/trans/macgreek-tbl.rb
index 7f75fde6d2..645aefe5ff 100644
--- a/enc/trans/macgreek-tbl.rb
+++ b/enc/trans/macgreek-tbl.rb
@@ -126,4 +126,4 @@ MACGREEK_TO_UCS_TBL = [
["AD",0x2260],
["B2",0x2264],
["B3",0x2265],
-] \ No newline at end of file
+]
diff --git a/enc/trans/maciceland-tbl.rb b/enc/trans/maciceland-tbl.rb
index 818d992274..ee9b5000d9 100644
--- a/enc/trans/maciceland-tbl.rb
+++ b/enc/trans/maciceland-tbl.rb
@@ -126,4 +126,4 @@ MACICELAND_TO_UCS_TBL = [
["B2",0x2264],
["B3",0x2265],
["D7",0x25CA],
-] \ No newline at end of file
+]
diff --git a/enc/trans/macroman-tbl.rb b/enc/trans/macroman-tbl.rb
index 8f74eea27f..9a8172554a 100644
--- a/enc/trans/macroman-tbl.rb
+++ b/enc/trans/macroman-tbl.rb
@@ -126,4 +126,4 @@ MACROMAN_TO_UCS_TBL = [
["D7",0x25CA],
["DE",0xFB01],
["DF",0xFB02],
-] \ No newline at end of file
+]
diff --git a/enc/trans/macromania-tbl.rb b/enc/trans/macromania-tbl.rb
index ff95c5e957..29a7942d9b 100644
--- a/enc/trans/macromania-tbl.rb
+++ b/enc/trans/macromania-tbl.rb
@@ -126,4 +126,4 @@ MACROMANIA_TO_UCS_TBL = [
["B2",0x2264],
["B3",0x2265],
["D7",0x25CA],
-] \ No newline at end of file
+]
diff --git a/enc/trans/macturkish-tbl.rb b/enc/trans/macturkish-tbl.rb
index 2358672ed6..883f693e23 100644
--- a/enc/trans/macturkish-tbl.rb
+++ b/enc/trans/macturkish-tbl.rb
@@ -125,4 +125,4 @@ MACTURKISH_TO_UCS_TBL = [
["B2",0x2264],
["B3",0x2265],
["D7",0x25CA],
-] \ No newline at end of file
+]
diff --git a/enc/trans/macukraine-tbl.rb b/enc/trans/macukraine-tbl.rb
index 6941af654d..09acf7c45a 100644
--- a/enc/trans/macukraine-tbl.rb
+++ b/enc/trans/macukraine-tbl.rb
@@ -127,4 +127,4 @@ MACUKRAINE_TO_UCS_TBL = [
["AD",0x2260],
["B2",0x2264],
["B3",0x2265],
-] \ No newline at end of file
+]
diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans
index a200ec00a7..95e082f5bd 100644
--- a/enc/trans/newline.trans
+++ b/enc/trans/newline.trans
@@ -17,10 +17,16 @@
map_cr["0a"] = "0d"
transcode_generate_node(ActionMap.parse(map_cr), "cr_newline")
+
+ map_normalize = {}
+ map_normalize["{00-ff}"] = :func_so
+
+ transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline")
%>
<%= transcode_generated_code %>
+#define lf_newline universal_newline
#define STATE (sp[0])
#define NORMAL 0
#define JUST_AFTER_CR 1
@@ -98,7 +104,7 @@ rb_universal_newline = {
2, /* max_output */
asciicompat_converter, /* asciicompat_type */
2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
- NULL, NULL, NULL, fun_so_universal_newline,
+ 0, 0, 0, fun_so_universal_newline,
universal_newline_finish
};
@@ -110,8 +116,8 @@ rb_crlf_newline = {
1, /* max_input */
2, /* max_output */
asciicompat_converter, /* asciicompat_type */
- 0, NULL, NULL, /* state_size, state_init, state_fini */
- NULL, NULL, NULL, NULL
+ 0, 0, 0, /* state_size, state_init, state_fini */
+ 0, 0, 0, 0
};
static const rb_transcoder
@@ -122,8 +128,21 @@ rb_cr_newline = {
1, /* max_input */
1, /* max_output */
asciicompat_converter, /* asciicompat_type */
- 0, NULL, NULL, /* state_size, state_init, state_fini */
- NULL, NULL, NULL, NULL
+ 0, 0, 0, /* state_size, state_init, state_fini */
+ 0, 0, 0, 0
+};
+
+static const rb_transcoder
+rb_lf_newline = {
+ "", "lf_newline", lf_newline,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 1, /* max_input */
+ 2, /* max_output */
+ asciicompat_converter, /* asciicompat_type */
+ 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
+ 0, 0, 0, fun_so_universal_newline,
+ universal_newline_finish
};
void
@@ -132,4 +151,5 @@ Init_newline(void)
rb_register_transcoder(&rb_universal_newline);
rb_register_transcoder(&rb_crlf_newline);
rb_register_transcoder(&rb_cr_newline);
+ rb_register_transcoder(&rb_lf_newline);
}
diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans
index cf521bed38..c326cbebea 100644
--- a/enc/trans/single_byte.trans
+++ b/enc/trans/single_byte.trans
@@ -51,8 +51,9 @@
transcode_tblgen_singlebyte "WINDOWS-1256"
transcode_tblgen_singlebyte "WINDOWS-1257"
transcode_tblgen_singlebyte "IBM437"
- transcode_tblgen_singlebyte "IBM775"
+ transcode_tblgen_singlebyte "IBM720"
transcode_tblgen_singlebyte "IBM737"
+ transcode_tblgen_singlebyte "IBM775"
transcode_tblgen_singlebyte "IBM852"
transcode_tblgen_singlebyte "IBM855"
transcode_tblgen_singlebyte "IBM857"
@@ -60,6 +61,7 @@
transcode_tblgen_singlebyte "IBM861"
transcode_tblgen_singlebyte "IBM862"
transcode_tblgen_singlebyte "IBM863"
+ transcode_tblgen_singlebyte "IBM864"
transcode_tblgen_singlebyte "IBM865"
transcode_tblgen_singlebyte "IBM866"
transcode_tblgen_singlebyte "IBM869"
diff --git a/enc/trans/transdb.c b/enc/trans/transdb.c
index d6ac41e967..7caf8d845d 100644
--- a/enc/trans/transdb.c
+++ b/enc/trans/transdb.c
@@ -9,6 +9,8 @@
**********************************************************************/
+#include "ruby.h"
+
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
void
diff --git a/enc/trans/windows-1250-tbl.rb b/enc/trans/windows-1250-tbl.rb
index 52063e17b1..9cdb432a03 100644
--- a/enc/trans/windows-1250-tbl.rb
+++ b/enc/trans/windows-1250-tbl.rb
@@ -122,4 +122,4 @@ WINDOWS_1250_TO_UCS_TBL = [
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
-] \ No newline at end of file
+]
diff --git a/enc/trans/windows-1251-tbl.rb b/enc/trans/windows-1251-tbl.rb
index 870c718b72..3c6c4ca0bb 100644
--- a/enc/trans/windows-1251-tbl.rb
+++ b/enc/trans/windows-1251-tbl.rb
@@ -126,4 +126,4 @@ WINDOWS_1251_TO_UCS_TBL = [
["88",0x20AC],
["B9",0x2116],
["99",0x2122],
-] \ No newline at end of file
+]
diff --git a/enc/trans/windows-1252-tbl.rb b/enc/trans/windows-1252-tbl.rb
index cefc72dff2..86a7be41e7 100644
--- a/enc/trans/windows-1252-tbl.rb
+++ b/enc/trans/windows-1252-tbl.rb
@@ -122,4 +122,4 @@ WINDOWS_1252_TO_UCS_TBL = [
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
-] \ No newline at end of file
+]
diff --git a/enc/trans/windows-1253-tbl.rb b/enc/trans/windows-1253-tbl.rb
index 132edb60ba..b9d47be2e0 100644
--- a/enc/trans/windows-1253-tbl.rb
+++ b/enc/trans/windows-1253-tbl.rb
@@ -110,4 +110,4 @@ WINDOWS_1253_TO_UCS_TBL = [
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
-] \ No newline at end of file
+]
diff --git a/enc/trans/windows-1254-tbl.rb b/enc/trans/windows-1254-tbl.rb
index 81a747afaa..84063abf05 100644
--- a/enc/trans/windows-1254-tbl.rb
+++ b/enc/trans/windows-1254-tbl.rb
@@ -120,4 +120,4 @@ WINDOWS_1254_TO_UCS_TBL = [
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
-] \ No newline at end of file
+]
diff --git a/enc/trans/windows-1256-tbl.rb b/enc/trans/windows-1256-tbl.rb
index 25c5874fb0..0b76c824d1 100644
--- a/enc/trans/windows-1256-tbl.rb
+++ b/enc/trans/windows-1256-tbl.rb
@@ -127,4 +127,4 @@ WINDOWS_1256_TO_UCS_TBL = [
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
-] \ No newline at end of file
+]
diff --git a/enc/trans/windows-1257-tbl.rb b/enc/trans/windows-1257-tbl.rb
index 9e89b2b0b5..7f15cbbd50 100644
--- a/enc/trans/windows-1257-tbl.rb
+++ b/enc/trans/windows-1257-tbl.rb
@@ -115,4 +115,4 @@ WINDOWS_1257_TO_UCS_TBL = [
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
-] \ No newline at end of file
+]
diff --git a/enc/trans/windows-874-tbl.rb b/enc/trans/windows-874-tbl.rb
index 0552df3d28..a569765bf0 100644
--- a/enc/trans/windows-874-tbl.rb
+++ b/enc/trans/windows-874-tbl.rb
@@ -96,4 +96,4 @@ WINDOWS_874_TO_UCS_TBL = [
["95",0x2022],
["85",0x2026],
["80",0x20AC],
-] \ No newline at end of file
+]