diff options
Diffstat (limited to 'enc/trans')
45 files changed, 475 insertions, 119 deletions
diff --git a/enc/trans/JIS/JISX0212%UCS.src b/enc/trans/JIS/JISX0212%UCS.src index aa51257b99..0e1ab4c9b9 100644 --- a/enc/trans/JIS/JISX0212%UCS.src +++ b/enc/trans/JIS/JISX0212%UCS.src @@ -67,7 +67,7 @@ BEGIN_MAP # # However, JIS X 0212 maintains the distinction between # the lowercase forms of these two elements at 0x2942 and 0x2943. -# Given the structre of these JIS encodings, it is clear that +# Given the structure of these JIS encodings, it is clear that # 0x2922 and 0x2942 are intended to be a capital/small pair. # Consequently, in the Unicode mapping, 0x2922 is treated as # LATIN CAPITAL LETTER D WITH STROKE. diff --git a/enc/trans/JIS/UCS%JISX0212.src b/enc/trans/JIS/UCS%JISX0212.src index 65383a1c9f..c7711c8ac0 100644 --- a/enc/trans/JIS/UCS%JISX0212.src +++ b/enc/trans/JIS/UCS%JISX0212.src @@ -67,7 +67,7 @@ BEGIN_MAP # # However, JIS X 0212 maintains the distinction between # the lowercase forms of these two elements at 0x2942 and 0x2943. -# Given the structre of these JIS encodings, it is clear that +# Given the structure of these JIS encodings, it is clear that # 0x2922 and 0x2942 are intended to be a capital/small pair. # Consequently, in the Unicode mapping, 0x2922 is treated as # LATIN CAPITAL LETTER D WITH STROKE. diff --git a/enc/trans/big5-uao-tbl.rb b/enc/trans/big5-uao-tbl.rb index 295fbfdda5..a6f37cc7bd 100644 --- a/enc/trans/big5-uao-tbl.rb +++ b/enc/trans/big5-uao-tbl.rb @@ -19781,4 +19781,4 @@ BIG5_UAO_TO_UCS_TBL = [ ["FEFC",0x8262], ["FEFD",0x826A], ["FEFE",0x8288], -]
\ No newline at end of file +] diff --git a/enc/trans/cesu_8.trans b/enc/trans/cesu_8.trans new file mode 100644 index 0000000000..4e17b1ddbb --- /dev/null +++ b/enc/trans/cesu_8.trans @@ -0,0 +1,85 @@ +#include "transcode_data.h" + +<% + map = {} + map["{00-7f}"] = :nomap + map["{c2-df}{80-bf}"] = :nomap + map["e0{a0-bf}{80-bf}"] = :nomap + map["{e1-ec}{80-bf}{80-bf}"] = :nomap + map["ed{80-9f}{80-bf}"] = :nomap + map["{ee-ef}{80-bf}{80-bf}"] = :nomap + map["ed{a0-af}{80-bf}ed{b0-bf}{80-bf}"] = :func_so # surrogate pairs + transcode_generate_node(ActionMap.parse(map), "from_CESU_8") + + map = {} + map["{00-7f}"] = :nomap + map["{c2-df}{80-bf}"] = :nomap + map["e0{a0-bf}{80-bf}"] = :nomap + map["{e1-ec}{80-bf}{80-bf}"] = :nomap + map["ed{80-9f}{80-bf}"] = :nomap + map["{ee-ef}{80-bf}{80-bf}"] = :nomap + map["f0{90-bf}{80-bf}{80-bf}"] = :func_so # planes 1-3 + map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so # planes 4-15 + map["f4{80-8f}{80-bf}{80-bf}"] = :func_so # plane 16 + transcode_generate_node(ActionMap.parse(map), "to_CESU_8") +%> + +<%= transcode_generated_code %> + +static ssize_t +fun_so_from_cesu_8(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + unsigned int scalar = ( ((s[1]&0x0F)<<16) | ((s[2]&0x3F)<<10) + | ((s[4]&0x0F)<< 6) | (s[5]&0x3F) + ) + 0x10000; + o[0] = 0xF0 | (scalar>>18); + o[1] = 0x80 | ((scalar>>12)&0x3F); + o[2] = 0x80 | ((scalar>> 6)&0x3F); + o[3] = 0x80 | ( scalar &0x3F); + return 4; +} + +static ssize_t +fun_so_to_cesu_8(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + unsigned int scalar = ((s[0]&0x07)<<18) | ((s[1]&0x3F)<<12) + | ((s[2]&0x3F)<< 6) | (s[3]&0x3F); + scalar -= 0x10000; + o[0] = 0xED; + o[1] = 0xA0 | (scalar>>16); + o[2] = 0x80 | ((scalar>>10)&0x3F); + o[3] = 0xED; + o[4] = 0xB0 | ((scalar>> 6)&0x0F); + o[5] = 0x80 | (scalar &0x3F); + return 6; +} + +static const rb_transcoder +rb_from_CESU_8 = { + "CESU-8", "UTF-8", from_CESU_8, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 6, /* max_input */ + 4, /* max_output */ + asciicompat_decoder, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_from_cesu_8 +}; + +static const rb_transcoder +rb_to_CESU_8 = { + "UTF-8", "CESU-8", to_CESU_8, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 4, /* max_input */ + 6, /* max_output */ + asciicompat_encoder, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_to_cesu_8 +}; + +TRANS_INIT(cesu_8) +{ + rb_register_transcoder(&rb_from_CESU_8); + rb_register_transcoder(&rb_to_CESU_8); +} diff --git a/enc/trans/cp850-tbl.rb b/enc/trans/cp850-tbl.rb index 615d3b2599..e0d120c803 100644 --- a/enc/trans/cp850-tbl.rb +++ b/enc/trans/cp850-tbl.rb @@ -127,4 +127,4 @@ CP850_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/cp852-tbl.rb b/enc/trans/cp852-tbl.rb index 6763bfa6e9..ad32cdc59d 100644 --- a/enc/trans/cp852-tbl.rb +++ b/enc/trans/cp852-tbl.rb @@ -127,4 +127,4 @@ CP852_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/cp855-tbl.rb b/enc/trans/cp855-tbl.rb index 72e548b9cb..a2ca9daf97 100644 --- a/enc/trans/cp855-tbl.rb +++ b/enc/trans/cp855-tbl.rb @@ -127,4 +127,4 @@ CP855_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/escape.trans b/enc/trans/escape.trans index c76ffa0e06..36d9dd0f13 100644 --- a/enc/trans/escape.trans +++ b/enc/trans/escape.trans @@ -18,9 +18,10 @@ ], nil) transcode_tblgen("", "xml_attr_content_escape", [ - ["{00-21,23-25,27-3B,3D,3F-FF}", :nomap], + ["{00-21,23-25,28-3B,3D,3F-FF}", :nomap], ["22", hexstr(""")], ["26", hexstr("&")], + ["27", hexstr("'")], ["3C", hexstr("<")], ["3E", hexstr(">")] ], nil) diff --git a/enc/trans/gbk-tbl.rb b/enc/trans/gbk-tbl.rb index 26f5078c45..40929f992e 100644 --- a/enc/trans/gbk-tbl.rb +++ b/enc/trans/gbk-tbl.rb @@ -21791,4 +21791,4 @@ GBK_TO_UCS_TBL= [ ["A3FE",0xFFE3], ["A957",0xFFE4], ["A3A4",0xFFE5], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm437-tbl.rb b/enc/trans/ibm437-tbl.rb index 5ae64d621e..6a823c293d 100644 --- a/enc/trans/ibm437-tbl.rb +++ b/enc/trans/ibm437-tbl.rb @@ -127,4 +127,4 @@ IBM437_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm720-tbl.rb b/enc/trans/ibm720-tbl.rb new file mode 100644 index 0000000000..558684d649 --- /dev/null +++ b/enc/trans/ibm720-tbl.rb @@ -0,0 +1,122 @@ +IBM720_TO_UCS_TBL = [ + ["FF",0xA0], + ["9C",0xA3], + ["94",0xA4], + ["AE",0xAB], + ["F8",0xB0], + ["FD",0xB2], + ["E6",0xB5], + ["FA",0xB7], + ["AF",0xBB], + ["85",0xE0], + ["83",0xE2], + ["87",0xE7], + ["8A",0xE8], + ["82",0xE9], + ["88",0xEA], + ["89",0xEB], + ["8C",0xEE], + ["8B",0xEF], + ["93",0xF4], + ["97",0xF9], + ["96",0xFB], + ["98",0x621], + ["99",0x622], + ["9A",0x623], + ["9B",0x624], + ["9D",0x625], + ["9E",0x626], + ["9F",0x627], + ["A0",0x628], + ["A1",0x629], + ["A2",0x62A], + ["A3",0x62B], + ["A4",0x62C], + ["A5",0x62D], + ["A6",0x62E], + ["A7",0x62F], + ["A8",0x630], + ["A9",0x631], + ["AA",0x632], + ["AB",0x633], + ["AC",0x634], + ["AD",0x635], + ["E0",0x636], + ["E1",0x637], + ["E2",0x638], + ["E3",0x639], + ["E4",0x63A], + ["95",0x640], + ["E5",0x641], + ["E7",0x642], + ["E8",0x643], + ["E9",0x644], + ["EA",0x645], + ["EB",0x646], + ["EC",0x647], + ["ED",0x648], + ["EE",0x649], + ["EF",0x64A], + ["F1",0x64B], + ["F2",0x64C], + ["F3",0x64D], + ["F4",0x64E], + ["F5",0x64F], + ["F6",0x650], + ["91",0x651], + ["92",0x652], + ["FC",0x207F], + ["F9",0x2219], + ["FB",0x221A], + ["F7",0x2248], + ["F0",0x2261], + ["C4",0x2500], + ["B3",0x2502], + ["DA",0x250C], + ["BF",0x2510], + ["C0",0x2514], + ["D9",0x2518], + ["C3",0x251C], + ["B4",0x2524], + ["C2",0x252C], + ["C1",0x2534], + ["C5",0x253C], + ["CD",0x2550], + ["BA",0x2551], + ["D5",0x2552], + ["D6",0x2553], + ["C9",0x2554], + ["B8",0x2555], + ["B7",0x2556], + ["BB",0x2557], + ["D4",0x2558], + ["D3",0x2559], + ["C8",0x255A], + ["BE",0x255B], + ["BD",0x255C], + ["BC",0x255D], + ["C6",0x255E], + ["C7",0x255F], + ["CC",0x2560], + ["B5",0x2561], + ["B6",0x2562], + ["B9",0x2563], + ["D1",0x2564], + ["D2",0x2565], + ["CB",0x2566], + ["CF",0x2567], + ["D0",0x2568], + ["CA",0x2569], + ["D8",0x256A], + ["D7",0x256B], + ["CE",0x256C], + ["DF",0x2580], + ["DC",0x2584], + ["DB",0x2588], + ["DD",0x258C], + ["DE",0x2590], + ["B0",0x2591], + ["B1",0x2592], + ["B2",0x2593], + ["FE",0x25A0], +] diff --git a/enc/trans/ibm775-tbl.rb b/enc/trans/ibm775-tbl.rb index f55679f409..2b79780813 100644 --- a/enc/trans/ibm775-tbl.rb +++ b/enc/trans/ibm775-tbl.rb @@ -127,4 +127,4 @@ IBM775_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm852-tbl.rb b/enc/trans/ibm852-tbl.rb index 6cec51cf80..3e70daef2e 100644 --- a/enc/trans/ibm852-tbl.rb +++ b/enc/trans/ibm852-tbl.rb @@ -127,4 +127,4 @@ IBM852_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm855-tbl.rb b/enc/trans/ibm855-tbl.rb index 7e0cc5014f..b4c0244728 100644 --- a/enc/trans/ibm855-tbl.rb +++ b/enc/trans/ibm855-tbl.rb @@ -127,4 +127,4 @@ IBM855_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm857-tbl.rb b/enc/trans/ibm857-tbl.rb index 5b20d389d3..c1c76545ed 100644 --- a/enc/trans/ibm857-tbl.rb +++ b/enc/trans/ibm857-tbl.rb @@ -124,4 +124,4 @@ IBM857_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm860-tbl.rb b/enc/trans/ibm860-tbl.rb index ae218a129f..77734cd194 100644 --- a/enc/trans/ibm860-tbl.rb +++ b/enc/trans/ibm860-tbl.rb @@ -127,4 +127,4 @@ IBM860_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm861-tbl.rb b/enc/trans/ibm861-tbl.rb index c24042a76c..69e0a45019 100644 --- a/enc/trans/ibm861-tbl.rb +++ b/enc/trans/ibm861-tbl.rb @@ -127,4 +127,4 @@ IBM861_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm862-tbl.rb b/enc/trans/ibm862-tbl.rb index 31d6fb0243..f564051fd6 100644 --- a/enc/trans/ibm862-tbl.rb +++ b/enc/trans/ibm862-tbl.rb @@ -127,4 +127,4 @@ IBM862_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm863-tbl.rb b/enc/trans/ibm863-tbl.rb index db110cf38c..af1eb97566 100644 --- a/enc/trans/ibm863-tbl.rb +++ b/enc/trans/ibm863-tbl.rb @@ -127,4 +127,4 @@ IBM863_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm864-tbl.rb b/enc/trans/ibm864-tbl.rb new file mode 100644 index 0000000000..13f8a27f1d --- /dev/null +++ b/enc/trans/ibm864-tbl.rb @@ -0,0 +1,126 @@ +IBM864_TO_UCS_TBL = [ + ["80",0x00B0], + ["81",0x00B7], + ["82",0x2219], + ["83",0x221A], + ["84",0x2592], + ["85",0x2500], + ["86",0x2502], + ["87",0x253C], + ["88",0x2524], + ["89",0x252C], + ["8A",0x251C], + ["8B",0x2534], + ["8C",0x2510], + ["8D",0x250C], + ["8E",0x2514], + ["8F",0x2518], + ["90",0x03B2], + ["91",0x221E], + ["92",0x03C6], + ["93",0x00B1], + ["94",0x00BD], + ["95",0x00BC], + ["96",0x2248], + ["97",0x00AB], + ["98",0x00BB], + ["99",0xFEF7], + ["9A",0xFEF8], + ["9D",0xFEFB], + ["9E",0xFEFC], + ["9F",0xFE73], + ["A0",0x00A0], + ["A1",0x00AD], + ["A2",0xFE82], + ["A3",0x00A3], + ["A4",0x00A4], + ["A5",0xFE84], + ["A7",0x20AC], # Euro sign from CCSID 864 + ["A8",0xFE8E], + ["A9",0xFE8F], + ["AA",0xFE95], + ["AB",0xFE99], + ["AC",0x060C], + ["AD",0xFE9D], + ["AE",0xFEA1], + ["AF",0xFEA5], + ["B0",0x0660], + ["B1",0x0661], + ["B2",0x0662], + ["B3",0x0663], + ["B4",0x0664], + ["B5",0x0665], + ["B6",0x0666], + ["B7",0x0667], + ["B8",0x0668], + ["B9",0x0669], + ["BA",0xFED1], + ["BB",0x061B], + ["BC",0xFEB1], + ["BD",0xFEB5], + ["BE",0xFEB9], + ["BF",0x061F], + ["C0",0x00A2], + ["C1",0xFE80], + ["C2",0xFE81], + ["C3",0xFE83], + ["C4",0xFE85], + ["C5",0xFECA], + ["C6",0xFE8B], + ["C7",0xFE8D], + ["C8",0xFE91], + ["C9",0xFE93], + ["CA",0xFE97], + ["CB",0xFE9B], + ["CC",0xFE9F], + ["CD",0xFEA3], + ["CE",0xFEA7], + ["CF",0xFEA9], + ["D0",0xFEAB], + ["D1",0xFEAD], + ["D2",0xFEAF], + ["D3",0xFEB3], + ["D4",0xFEB7], + ["D5",0xFEBB], + ["D6",0xFEBF], + ["D7",0xFEC1], + ["D8",0xFEC5], + ["D9",0xFECB], + ["DA",0xFECF], + ["DB",0x00A6], + ["DC",0x00AC], + ["DD",0x00F7], + ["DE",0x00D7], + ["DF",0xFEC9], + ["E0",0x0640], + ["E1",0xFED3], + ["E2",0xFED7], + ["E3",0xFEDB], + ["E4",0xFEDF], + ["E5",0xFEE3], + ["E6",0xFEE7], + ["E7",0xFEEB], + ["E8",0xFEED], + ["E9",0xFEEF], + ["EA",0xFEF3], + ["EB",0xFEBD], + ["EC",0xFECC], + ["ED",0xFECE], + ["EE",0xFECD], + ["EF",0xFEE1], + ["F0",0xFE7D], + ["F1",0x0651], + ["F2",0xFEE5], + ["F3",0xFEE9], + ["F4",0xFEEC], + ["F5",0xFEF0], + ["F6",0xFEF2], + ["F7",0xFED0], + ["F8",0xFED5], + ["F9",0xFEF5], + ["FA",0xFEF6], + ["FB",0xFEDD], + ["FC",0xFED9], + ["FD",0xFEF1], + ["FE",0x25A0] +] diff --git a/enc/trans/ibm865-tbl.rb b/enc/trans/ibm865-tbl.rb index 22e322fb31..4747509d66 100644 --- a/enc/trans/ibm865-tbl.rb +++ b/enc/trans/ibm865-tbl.rb @@ -127,4 +127,4 @@ IBM865_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm866-tbl.rb b/enc/trans/ibm866-tbl.rb index 95b9ee7534..ed4b0d683e 100644 --- a/enc/trans/ibm866-tbl.rb +++ b/enc/trans/ibm866-tbl.rb @@ -127,4 +127,4 @@ IBM866_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/ibm869-tbl.rb b/enc/trans/ibm869-tbl.rb index 437e41ad98..bee85b84ea 100644 --- a/enc/trans/ibm869-tbl.rb +++ b/enc/trans/ibm869-tbl.rb @@ -118,4 +118,4 @@ IBM869_TO_UCS_TBL = [ ["B1",0x2592], ["B2",0x2593], ["FE",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans index a441f1596d..bc42bbc19c 100644 --- a/enc/trans/iso2022.trans +++ b/enc/trans/iso2022.trans @@ -1,4 +1,5 @@ #include "transcode_data.h" +#include "ruby/internal/attr/nonstring.h" <% map = { @@ -79,6 +80,34 @@ iso2022jp_init(void *statep) return 0; } +static unsigned char * +iso2022jp_put_state(unsigned char *sp, unsigned char *o, int oldstate, int newstate) +{ + if (oldstate != newstate) { + *o++ = 0x1b; + switch (newstate) { + case G0_ASCII: + *o++ = '('; + *o++ = 'B'; + break; + case G0_JISX0201_KATAKANA: + *o++ = '('; + *o++ = 'I'; + break; + case G0_JISX0208_1978: + *o++ = '$'; + *o++ = '@'; + break; + default: + *o++ = '$'; + *o++ = 'B'; + break; + } + *sp = newstate; + } + return o; +} + static VALUE fun_si_iso2022jp_decoder(void *statep, const unsigned char *s, size_t l) { @@ -154,24 +183,7 @@ fun_so_iso2022jp_encoder(void *statep, const unsigned char *s, size_t l, unsigne else newstate = G0_JISX0208_1983; - if (*sp != newstate) { - if (newstate == G0_ASCII) { - *o++ = 0x1b; - *o++ = '('; - *o++ = 'B'; - } - else if (newstate == G0_JISX0208_1978) { - *o++ = 0x1b; - *o++ = '$'; - *o++ = '@'; - } - else { - *o++ = 0x1b; - *o++ = '$'; - *o++ = 'B'; - } - *sp = newstate; - } + o = iso2022jp_put_state(sp, o, *sp, newstate); if (l == 1) { *o++ = s[0] & 0x7f; @@ -202,10 +214,7 @@ finish_iso2022jp_encoder(void *statep, unsigned char *o, size_t osize) if (*sp == G0_ASCII) return 0; - *o++ = 0x1b; - *o++ = '('; - *o++ = 'B'; - *sp = G0_ASCII; + o = iso2022jp_put_state(sp, o, *sp, G0_ASCII); return o - output0; } @@ -399,24 +408,7 @@ fun_so_cp5022x_encoder(void *statep, const unsigned char *s, size_t l, else newstate = G0_JISX0208_1983; - if (*sp != newstate) { - if (newstate == G0_ASCII) { - *o++ = 0x1b; - *o++ = '('; - *o++ = 'B'; - } - else if (newstate == G0_JISX0201_KATAKANA) { - *o++ = 0x1b; - *o++ = '('; - *o++ = 'I'; - } - else { - *o++ = 0x1b; - *o++ = '$'; - *o++ = 'B'; - } - *sp = newstate; - } + o = iso2022jp_put_state(sp, o, sp[0], newstate); if (l == 1) { *o++ = s[0] & 0x7f; @@ -443,15 +435,26 @@ rb_cp50221_encoder = { iso2022jp_encoder_reset_sequence_size, finish_iso2022jp_encoder }; -static const char *tbl0208 = - "\x21\x23\x21\x56\x21\x57\x21\x22\x21\x26\x25\x72\x25\x21\x25\x23" \ - "\x25\x25\x25\x27\x25\x29\x25\x63\x25\x65\x25\x67\x25\x43\x21\x3C" \ - "\x25\x22\x25\x24\x25\x26\x25\x28\x25\x2A\x25\x2B\x25\x2D\x25\x2F" \ - "\x25\x31\x25\x33\x25\x35\x25\x37\x25\x39\x25\x3B\x25\x3D\x25\x3F" \ - "\x25\x41\x25\x44\x25\x46\x25\x48\x25\x4A\x25\x4B\x25\x4C\x25\x4D" \ - "\x25\x4E\x25\x4F\x25\x52\x25\x55\x25\x58\x25\x5B\x25\x5E\x25\x5F" \ - "\x25\x60\x25\x61\x25\x62\x25\x64\x25\x66\x25\x68\x25\x69\x25\x6A" \ - "\x25\x6B\x25\x6C\x25\x6D\x25\x6F\x25\x73\x21\x2B\x21\x2C"; +/* JIS0201 to JIS0208 conversion table */ +enum {tbl0208_num = 0xDF - 0xA1 + 1}; +RBIMPL_ATTR_NONSTRING_ARRAY() static const char tbl0208[tbl0208_num][2] = { + "\x21\x23", "\x21\x56", "\x21\x57", "\x21\x22", + "\x21\x26", "\x25\x72", "\x25\x21", "\x25\x23", + "\x25\x25", "\x25\x27", "\x25\x29", "\x25\x63", + "\x25\x65", "\x25\x67", "\x25\x43", "\x21\x3C", + "\x25\x22", "\x25\x24", "\x25\x26", "\x25\x28", + "\x25\x2A", "\x25\x2B", "\x25\x2D", "\x25\x2F", + "\x25\x31", "\x25\x33", "\x25\x35", "\x25\x37", + "\x25\x39", "\x25\x3B", "\x25\x3D", "\x25\x3F", + "\x25\x41", "\x25\x44", "\x25\x46", "\x25\x48", + "\x25\x4A", "\x25\x4B", "\x25\x4C", "\x25\x4D", + "\x25\x4E", "\x25\x4F", "\x25\x52", "\x25\x55", + "\x25\x58", "\x25\x5B", "\x25\x5E", "\x25\x5F", + "\x25\x60", "\x25\x61", "\x25\x62", "\x25\x64", + "\x25\x66", "\x25\x68", "\x25\x69", "\x25\x6A", + "\x25\x6B", "\x25\x6C", "\x25\x6D", "\x25\x6F", + "\x25\x73", "\x21\x2B", "\x21\x2C" +}; static ssize_t fun_so_cp50220_encoder(void *statep, const unsigned char *s, size_t l, @@ -460,22 +463,21 @@ fun_so_cp50220_encoder(void *statep, const unsigned char *s, size_t l, unsigned char *output0 = o; unsigned char *sp = statep; - if (sp[0] == G0_JISX0201_KATAKANA) { + if (sp[0] == G0_JISX0201_KATAKANA && sp[2]) { int c = sp[2] & 0x7F; - const char *p = tbl0208 + (c - 0x21) * 2; - if (sp[1] != G0_JISX0208_1983) { - *o++ = 0x1b; - *o++ = '$'; - *o++ = 'B'; - } + const char *p = tbl0208[c - 0x21]; + sp[2] = 0; + o = iso2022jp_put_state(sp, o, sp[1], G0_JISX0208_1983); sp[0] = G0_JISX0208_1983; *o++ = *p++; if (l == 2 && s[0] == 0x8E) { if (s[1] == 0xDE) { + /* VOICED SOUND MARK */ *o++ = *p + 1; return o - output0; } else if (s[1] == 0xDF && (0x4A <= c && c <= 0x4E)) { + /* SEMI-VOICED SOUND MARK */ *o++ = *p + 2; return o - output0; } @@ -484,21 +486,25 @@ fun_so_cp50220_encoder(void *statep, const unsigned char *s, size_t l, } if (l == 2 && s[0] == 0x8E) { - const char *p = tbl0208 + (s[1] - 0xA1) * 2; if ((0xA1 <= s[1] && s[1] <= 0xB5) || (0xC5 <= s[1] && s[1] <= 0xC9) || (0xCF <= s[1] && s[1] <= 0xDF)) { - if (*sp != G0_JISX0208_1983) { - *o++ = 0x1b; - *o++ = '$'; - *o++ = 'B'; - *sp = G0_JISX0208_1983; - } + /* May not be followed by a sound mark */ + const char *p = tbl0208[s[1] - 0xA1]; + o = iso2022jp_put_state(sp, o, *sp, G0_JISX0208_1983); *o++ = *p++; *o++ = *p; return o - output0; } + if (s[1] > 0xDF) { /* undef */ + o = iso2022jp_put_state(sp, o, *sp, G0_JISX0201_KATAKANA); + *o++ = s[1] & 0x7F; + sp[2] = 0; + return o - output0; + } + + /* Katakana that may be followed by a sound mark */ sp[2] = s[1]; sp[1] = sp[0]; sp[0] = G0_JISX0201_KATAKANA; @@ -518,23 +524,16 @@ finish_cp50220_encoder(void *statep, unsigned char *o, size_t osize) if (*sp == G0_ASCII) return 0; - if (sp[0] == G0_JISX0201_KATAKANA) { + if (sp[0] == G0_JISX0201_KATAKANA && sp[2]) { int c = sp[2] & 0x7F; - const char *p = tbl0208 + (c - 0x21) * 2; - if (sp[1] != G0_JISX0208_1983) { - *o++ = 0x1b; - *o++ = '$'; - *o++ = 'B'; - } + const char *p = tbl0208[c - 0x21]; + o = iso2022jp_put_state(sp, o, sp[1], G0_JISX0208_1983); sp[0] = G0_JISX0208_1983; *o++ = *p++; *o++ = *p; } - *o++ = 0x1b; - *o++ = '('; - *o++ = 'B'; - *sp = G0_ASCII; + o = iso2022jp_put_state(sp, o, sp[0], G0_ASCII); return o - output0; } @@ -564,4 +563,3 @@ TRANS_INIT(iso2022) rb_register_transcoder(&rb_cp50220_encoder); rb_register_transcoder(&rb_cp50221_encoder); } - diff --git a/enc/trans/koi8-r-tbl.rb b/enc/trans/koi8-r-tbl.rb index a1f55ff2e3..4cfe523334 100644 --- a/enc/trans/koi8-r-tbl.rb +++ b/enc/trans/koi8-r-tbl.rb @@ -127,4 +127,4 @@ KOI8_R_TO_UCS_TBL = [ ["91",0x2592], ["92",0x2593], ["94",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/koi8-u-tbl.rb b/enc/trans/koi8-u-tbl.rb index e87aa1aa3f..225931ba5e 100644 --- a/enc/trans/koi8-u-tbl.rb +++ b/enc/trans/koi8-u-tbl.rb @@ -127,4 +127,4 @@ KOI8_U_TO_UCS_TBL = [ ["91",0x2592], ["92",0x2593], ["94",0x25A0], -]
\ No newline at end of file +] diff --git a/enc/trans/maccroatian-tbl.rb b/enc/trans/maccroatian-tbl.rb index 359878ec1f..e78f2f373f 100644 --- a/enc/trans/maccroatian-tbl.rb +++ b/enc/trans/maccroatian-tbl.rb @@ -126,4 +126,4 @@ MACCROATIAN_TO_UCS_TBL = [ ["B2",0x2264], ["B3",0x2265], ["D7",0x25CA], -]
\ No newline at end of file +] diff --git a/enc/trans/maccyrillic-tbl.rb b/enc/trans/maccyrillic-tbl.rb index 378aa8c3bc..2d5af7b466 100644 --- a/enc/trans/maccyrillic-tbl.rb +++ b/enc/trans/maccyrillic-tbl.rb @@ -127,4 +127,4 @@ MACCYRILLIC_TO_UCS_TBL = [ ["AD",0x2260], ["B2",0x2264], ["B3",0x2265], -]
\ No newline at end of file +] diff --git a/enc/trans/macgreek-tbl.rb b/enc/trans/macgreek-tbl.rb index 7f75fde6d2..645aefe5ff 100644 --- a/enc/trans/macgreek-tbl.rb +++ b/enc/trans/macgreek-tbl.rb @@ -126,4 +126,4 @@ MACGREEK_TO_UCS_TBL = [ ["AD",0x2260], ["B2",0x2264], ["B3",0x2265], -]
\ No newline at end of file +] diff --git a/enc/trans/maciceland-tbl.rb b/enc/trans/maciceland-tbl.rb index 818d992274..ee9b5000d9 100644 --- a/enc/trans/maciceland-tbl.rb +++ b/enc/trans/maciceland-tbl.rb @@ -126,4 +126,4 @@ MACICELAND_TO_UCS_TBL = [ ["B2",0x2264], ["B3",0x2265], ["D7",0x25CA], -]
\ No newline at end of file +] diff --git a/enc/trans/macroman-tbl.rb b/enc/trans/macroman-tbl.rb index 8f74eea27f..9a8172554a 100644 --- a/enc/trans/macroman-tbl.rb +++ b/enc/trans/macroman-tbl.rb @@ -126,4 +126,4 @@ MACROMAN_TO_UCS_TBL = [ ["D7",0x25CA], ["DE",0xFB01], ["DF",0xFB02], -]
\ No newline at end of file +] diff --git a/enc/trans/macromania-tbl.rb b/enc/trans/macromania-tbl.rb index ff95c5e957..29a7942d9b 100644 --- a/enc/trans/macromania-tbl.rb +++ b/enc/trans/macromania-tbl.rb @@ -126,4 +126,4 @@ MACROMANIA_TO_UCS_TBL = [ ["B2",0x2264], ["B3",0x2265], ["D7",0x25CA], -]
\ No newline at end of file +] diff --git a/enc/trans/macturkish-tbl.rb b/enc/trans/macturkish-tbl.rb index 2358672ed6..883f693e23 100644 --- a/enc/trans/macturkish-tbl.rb +++ b/enc/trans/macturkish-tbl.rb @@ -125,4 +125,4 @@ MACTURKISH_TO_UCS_TBL = [ ["B2",0x2264], ["B3",0x2265], ["D7",0x25CA], -]
\ No newline at end of file +] diff --git a/enc/trans/macukraine-tbl.rb b/enc/trans/macukraine-tbl.rb index 6941af654d..09acf7c45a 100644 --- a/enc/trans/macukraine-tbl.rb +++ b/enc/trans/macukraine-tbl.rb @@ -127,4 +127,4 @@ MACUKRAINE_TO_UCS_TBL = [ ["AD",0x2260], ["B2",0x2264], ["B3",0x2265], -]
\ No newline at end of file +] diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans index a200ec00a7..95e082f5bd 100644 --- a/enc/trans/newline.trans +++ b/enc/trans/newline.trans @@ -17,10 +17,16 @@ map_cr["0a"] = "0d" transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") + + map_normalize = {} + map_normalize["{00-ff}"] = :func_so + + transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline") %> <%= transcode_generated_code %> +#define lf_newline universal_newline #define STATE (sp[0]) #define NORMAL 0 #define JUST_AFTER_CR 1 @@ -98,7 +104,7 @@ rb_universal_newline = { 2, /* max_output */ asciicompat_converter, /* asciicompat_type */ 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ - NULL, NULL, NULL, fun_so_universal_newline, + 0, 0, 0, fun_so_universal_newline, universal_newline_finish }; @@ -110,8 +116,8 @@ rb_crlf_newline = { 1, /* max_input */ 2, /* max_output */ asciicompat_converter, /* asciicompat_type */ - 0, NULL, NULL, /* state_size, state_init, state_fini */ - NULL, NULL, NULL, NULL + 0, 0, 0, /* state_size, state_init, state_fini */ + 0, 0, 0, 0 }; static const rb_transcoder @@ -122,8 +128,21 @@ rb_cr_newline = { 1, /* max_input */ 1, /* max_output */ asciicompat_converter, /* asciicompat_type */ - 0, NULL, NULL, /* state_size, state_init, state_fini */ - NULL, NULL, NULL, NULL + 0, 0, 0, /* state_size, state_init, state_fini */ + 0, 0, 0, 0 +}; + +static const rb_transcoder +rb_lf_newline = { + "", "lf_newline", lf_newline, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 2, /* max_output */ + asciicompat_converter, /* asciicompat_type */ + 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ + 0, 0, 0, fun_so_universal_newline, + universal_newline_finish }; void @@ -132,4 +151,5 @@ Init_newline(void) rb_register_transcoder(&rb_universal_newline); rb_register_transcoder(&rb_crlf_newline); rb_register_transcoder(&rb_cr_newline); + rb_register_transcoder(&rb_lf_newline); } diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans index cf521bed38..c326cbebea 100644 --- a/enc/trans/single_byte.trans +++ b/enc/trans/single_byte.trans @@ -51,8 +51,9 @@ transcode_tblgen_singlebyte "WINDOWS-1256" transcode_tblgen_singlebyte "WINDOWS-1257" transcode_tblgen_singlebyte "IBM437" - transcode_tblgen_singlebyte "IBM775" + transcode_tblgen_singlebyte "IBM720" transcode_tblgen_singlebyte "IBM737" + transcode_tblgen_singlebyte "IBM775" transcode_tblgen_singlebyte "IBM852" transcode_tblgen_singlebyte "IBM855" transcode_tblgen_singlebyte "IBM857" @@ -60,6 +61,7 @@ transcode_tblgen_singlebyte "IBM861" transcode_tblgen_singlebyte "IBM862" transcode_tblgen_singlebyte "IBM863" + transcode_tblgen_singlebyte "IBM864" transcode_tblgen_singlebyte "IBM865" transcode_tblgen_singlebyte "IBM866" transcode_tblgen_singlebyte "IBM869" diff --git a/enc/trans/transdb.c b/enc/trans/transdb.c index d6ac41e967..7caf8d845d 100644 --- a/enc/trans/transdb.c +++ b/enc/trans/transdb.c @@ -9,6 +9,8 @@ **********************************************************************/ +#include "ruby.h" + void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib); void diff --git a/enc/trans/windows-1250-tbl.rb b/enc/trans/windows-1250-tbl.rb index 52063e17b1..9cdb432a03 100644 --- a/enc/trans/windows-1250-tbl.rb +++ b/enc/trans/windows-1250-tbl.rb @@ -122,4 +122,4 @@ WINDOWS_1250_TO_UCS_TBL = [ ["9B",0x203A], ["80",0x20AC], ["99",0x2122], -]
\ No newline at end of file +] diff --git a/enc/trans/windows-1251-tbl.rb b/enc/trans/windows-1251-tbl.rb index 870c718b72..3c6c4ca0bb 100644 --- a/enc/trans/windows-1251-tbl.rb +++ b/enc/trans/windows-1251-tbl.rb @@ -126,4 +126,4 @@ WINDOWS_1251_TO_UCS_TBL = [ ["88",0x20AC], ["B9",0x2116], ["99",0x2122], -]
\ No newline at end of file +] diff --git a/enc/trans/windows-1252-tbl.rb b/enc/trans/windows-1252-tbl.rb index cefc72dff2..86a7be41e7 100644 --- a/enc/trans/windows-1252-tbl.rb +++ b/enc/trans/windows-1252-tbl.rb @@ -122,4 +122,4 @@ WINDOWS_1252_TO_UCS_TBL = [ ["9B",0x203A], ["80",0x20AC], ["99",0x2122], -]
\ No newline at end of file +] diff --git a/enc/trans/windows-1253-tbl.rb b/enc/trans/windows-1253-tbl.rb index 132edb60ba..b9d47be2e0 100644 --- a/enc/trans/windows-1253-tbl.rb +++ b/enc/trans/windows-1253-tbl.rb @@ -110,4 +110,4 @@ WINDOWS_1253_TO_UCS_TBL = [ ["9B",0x203A], ["80",0x20AC], ["99",0x2122], -]
\ No newline at end of file +] diff --git a/enc/trans/windows-1254-tbl.rb b/enc/trans/windows-1254-tbl.rb index 81a747afaa..84063abf05 100644 --- a/enc/trans/windows-1254-tbl.rb +++ b/enc/trans/windows-1254-tbl.rb @@ -120,4 +120,4 @@ WINDOWS_1254_TO_UCS_TBL = [ ["9B",0x203A], ["80",0x20AC], ["99",0x2122], -]
\ No newline at end of file +] diff --git a/enc/trans/windows-1256-tbl.rb b/enc/trans/windows-1256-tbl.rb index 25c5874fb0..0b76c824d1 100644 --- a/enc/trans/windows-1256-tbl.rb +++ b/enc/trans/windows-1256-tbl.rb @@ -127,4 +127,4 @@ WINDOWS_1256_TO_UCS_TBL = [ ["9B",0x203A], ["80",0x20AC], ["99",0x2122], -]
\ No newline at end of file +] diff --git a/enc/trans/windows-1257-tbl.rb b/enc/trans/windows-1257-tbl.rb index 9e89b2b0b5..7f15cbbd50 100644 --- a/enc/trans/windows-1257-tbl.rb +++ b/enc/trans/windows-1257-tbl.rb @@ -115,4 +115,4 @@ WINDOWS_1257_TO_UCS_TBL = [ ["9B",0x203A], ["80",0x20AC], ["99",0x2122], -]
\ No newline at end of file +] diff --git a/enc/trans/windows-874-tbl.rb b/enc/trans/windows-874-tbl.rb index 0552df3d28..a569765bf0 100644 --- a/enc/trans/windows-874-tbl.rb +++ b/enc/trans/windows-874-tbl.rb @@ -96,4 +96,4 @@ WINDOWS_874_TO_UCS_TBL = [ ["95",0x2022], ["85",0x2026], ["80",0x20AC], -]
\ No newline at end of file +] |
