From af1c4167287b9353fec766f932fe4afe97116ad4 Mon Sep 17 00:00:00 2001 From: naruse Date: Mon, 4 Jun 2007 12:31:26 +0000 Subject: * lib/json.rb, lib/json, ext/json, test/json: import JSON library. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12428 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/nkf/nkf-utf8/nkf.c | 198 +++++++++++++++++++++++------------- ext/nkf/nkf-utf8/utf8tbl.c | 247 +++++++++++++++++++++++++++++++++++++++++++++ ext/nkf/nkf-utf8/utf8tbl.h | 3 + 3 files changed, 378 insertions(+), 70 deletions(-) (limited to 'ext/nkf/nkf-utf8') diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c index 3cd1b160da..30bb6c47b8 100644 --- a/ext/nkf/nkf-utf8/nkf.c +++ b/ext/nkf/nkf-utf8/nkf.c @@ -41,7 +41,7 @@ ***********************************************************************/ /* $Id$ */ #define NKF_VERSION "2.0.8" -#define NKF_RELEASE_DATE "2007-01-28" +#define NKF_RELEASE_DATE "2007-05-28" #include "config.h" #include "utf8tbl.h" @@ -351,10 +351,12 @@ static nkf_char e_iconv(nkf_char c2,nkf_char c1,nkf_char c0); * 0: Shift_JIS, eucJP-ascii * 1: eucJP-ms * 2: CP932, CP51932 + * 3: CP10001 */ -#define UCS_MAP_ASCII 0 -#define UCS_MAP_MS 1 -#define UCS_MAP_CP932 2 +#define UCS_MAP_ASCII 0 +#define UCS_MAP_MS 1 +#define UCS_MAP_CP932 2 +#define UCS_MAP_CP10001 3 static int ms_ucs_map_f = UCS_MAP_ASCII; #endif #ifdef UTF8_INPUT_ENABLE @@ -1232,6 +1234,14 @@ void options(unsigned char *cp) #endif #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; +#endif + }else if(strcmp(codeset, "CP10001") == 0){ + input_f = SJIS_INPUT; +#ifdef SHIFTJIS_CP932 + cp51932_f = TRUE; +#endif +#ifdef UTF8_OUTPUT_ENABLE + ms_ucs_map_f = UCS_MAP_CP10001; #endif }else if(strcmp(codeset, "EUCJP") == 0 || strcmp(codeset, "EUC-JP") == 0){ @@ -1370,6 +1380,11 @@ void options(unsigned char *cp) output_conv = s_oconv; #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; +#endif + }else if(strcmp(codeset, "CP10001") == 0){ + output_conv = s_oconv; +#ifdef UTF8_OUTPUT_ENABLE + ms_ucs_map_f = UCS_MAP_CP10001; #endif }else if(strcmp(codeset, "EUCJP") == 0 || strcmp(codeset, "EUC-JP") == 0){ @@ -2676,6 +2691,12 @@ nkf_char kanji_convert(FILE *f) } else { /* bogus code, skip SSO and one byte */ NEXT; } + } else if (ms_ucs_map_f == UCS_MAP_CP10001 && + (c1 == 0xFD || c1 == 0xFE)) { + /* CP10001 */ + c2 = X0201; + c1 &= 0x7f; + SEND; } else { /* already established */ c2 = c1; @@ -2885,35 +2906,41 @@ nkf_char kanji_convert(FILE *f) (*oconv)(0, ESC); SEND; } - } else if ((c1 == NL || c1 == CR) && broken_f&4) { - input_mode = ASCII; set_iconv(FALSE, 0); - SEND; - } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) { - if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) { - i_ungetc(SPACE,f); - continue; - } else { - i_ungetc(c1,f); - } - c1 = NL; - SEND; - } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) { - if ((c1=(*i_getc)(f))!=EOF) { - if (c1==SPACE) { - i_ungetc(SPACE,f); - continue; - } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) { - i_ungetc(SPACE,f); - continue; - } else { - i_ungetc(c1,f); + } else if (c1 == NL || c1 == CR) { + if (broken_f&4) { + input_mode = ASCII; set_iconv(FALSE, 0); + SEND; + } else if (mime_decode_f && !mime_decode_mode){ + if (c1 == NL) { + if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) { + i_ungetc(SPACE,f); + continue; + } else { + i_ungetc(c1,f); + } + c1 = NL; + SEND; + } else { /* if (c1 == CR)*/ + if ((c1=(*i_getc)(f))!=EOF) { + if (c1==SPACE) { + i_ungetc(SPACE,f); + continue; + } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) { + i_ungetc(SPACE,f); + continue; + } else { + i_ungetc(c1,f); + } + i_ungetc(NL,f); + } else { + i_ungetc(c1,f); + } + c1 = CR; + SEND; } - i_ungetc(NL,f); - } else { - i_ungetc(c1,f); } - c1 = CR; - SEND; + if (crmode_f == CR && c1 == NL) crmode_f = CRLF; + else crmode_f = c1; } else if (c1 == DEL && input_mode == X0208 ) { /* CP5022x */ c2 = c1; @@ -3125,9 +3152,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} }; #ifdef SHIFTJIS_CP932 if (!cp932inv_f && is_ibmext_in_sjis(c2)){ -#if 0 - extern const unsigned short shiftjis_cp932[3][189]; -#endif val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40]; if (val){ c2 = val >> 8; @@ -3136,9 +3160,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) } if (cp932inv_f && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){ -#if 0 - extern const unsigned short cp932inv[2][189]; -#endif nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40]; if (c){ c2 = c >> 8; @@ -3148,9 +3169,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) #endif /* SHIFTJIS_CP932 */ #ifdef X0212_ENABLE if (!x0213_f && is_ibmext_in_sjis(c2)){ -#if 0 - extern const unsigned short shiftjis_x0212[3][189]; -#endif val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40]; if (val){ if (val > 0x7FFF){ @@ -3481,14 +3499,6 @@ nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0) nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1) { -#if 0 - extern const unsigned short *const utf8_to_euc_2bytes[]; - extern const unsigned short *const utf8_to_euc_2bytes_ms[]; - extern const unsigned short *const utf8_to_euc_2bytes_932[]; - extern const unsigned short *const *const utf8_to_euc_3bytes[]; - extern const unsigned short *const *const utf8_to_euc_3bytes_ms[]; - extern const unsigned short *const *const utf8_to_euc_3bytes_932[]; -#endif const unsigned short *const *pp; const unsigned short *const *const *ppp; static const int no_best_fit_chars_table_C2[] = @@ -3538,11 +3548,27 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char * } }else if(ms_ucs_map_f == UCS_MAP_MS){ if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1; + }else if(ms_ucs_map_f == UCS_MAP_CP10001){ + switch(c2){ + case 0xC2: + switch(c1){ + case 0xA2: + case 0xA3: + case 0xA5: + case 0xA6: + case 0xAC: + case 0xAF: + case 0xB8: + return 1; + } + break; + } } } pp = ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 : ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms : + ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac : utf8_to_euc_2bytes; ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1); }else if(c0 < 0xF0){ @@ -3565,6 +3591,19 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char * if(c1 == 0x80 || c0 == 0x9C) return 1; break; } + }else if(ms_ucs_map_f == UCS_MAP_CP10001){ + switch(c2){ + case 0xE3: + switch(c1){ + case 0x82: + if(c0 == 0x94) return 1; + break; + case 0x83: + if(c0 == 0xBB) return 1; + break; + } + break; + } }else{ switch(c2){ case 0xE2: @@ -3596,8 +3635,10 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char * ppp = ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 : ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms : + ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac : utf8_to_euc_3bytes; ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1); +// fprintf(stderr, "wret: %X %X %X -> %X %X\n",c2,c1,c0,*p2,*p1,ret); }else return -1; #ifdef SHIFTJIS_CP932 if (!ret && !cp932inv_f && is_eucg3(*p2)) { @@ -3739,15 +3780,17 @@ void encode_fallback_subchar(nkf_char c) #ifdef UTF8_OUTPUT_ENABLE nkf_char e2w_conv(nkf_char c2, nkf_char c1) { -#if 0 - extern const unsigned short euc_to_utf8_1byte[]; - extern const unsigned short *const euc_to_utf8_2bytes[]; - extern const unsigned short *const euc_to_utf8_2bytes_ms[]; - extern const unsigned short *const x0212_to_utf8_2bytes[]; -#endif const unsigned short *p; if (c2 == X0201) { + if (ms_ucs_map_f == UCS_MAP_CP10001) { + switch (c1) { + case 0x20: + return 0xA0; + case 0x7D: + return 0xA9; + } + } p = euc_to_utf8_1byte; #ifdef X0212_ENABLE } else if (is_eucg3(c2)){ @@ -3764,7 +3807,10 @@ nkf_char e2w_conv(nkf_char c2, nkf_char c1) c2 &= 0x7f; c2 = (c2&0x7f) - 0x21; if (0<=c2 && c2> 8; @@ -4539,6 +4579,10 @@ void z_conv(nkf_char c2, nkf_char c1) /* if (c2) c1 &= 0x7f; assertion */ + if (c2 == X0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) { + (*o_zconv)(c2,c1); + return; + } if (x0201_f && z_prev2==X0201) { /* X0201 */ if (c1==(0xde&0x7f)) { /* 濁点 */ z_prev2=0; @@ -4942,15 +4986,20 @@ void set_input_codename(char *codename) void print_guessed_code(char *filename) { char *codename = "BINARY"; + char *str_crmode = NULL; if (!is_inputcode_mixed) { if (strcmp(input_codename, "") == 0) { codename = "ASCII"; } else { codename = input_codename; } + if (crmode_f == CR) str_crmode = "CR"; + else if (crmode_f == NL) str_crmode = "LF"; + else if (crmode_f == CRLF) str_crmode = "CRLF"; } if (filename != NULL) printf("%s:", filename); - printf("%s\n", codename); + if (str_crmode != NULL) printf("%s (%s)\n", codename, str_crmode); + else printf("%s\n", codename); } #endif /*WIN32DLL*/ @@ -5068,9 +5117,6 @@ nkf_char nfc_getc(FILE *f) int i=0, j, k=1, lower, upper; nkf_char buf[9]; const nkf_nfchar *array; -#if 0 - extern const struct normalization_pair normalization_table[]; -#endif buf[i] = (*g)(f); while (k > 0 && ((buf[i] & 0xc0) != 0x80)){ @@ -5437,7 +5483,7 @@ void open_mime(nkf_char mode) int i; int j; p = mime_pattern[0]; - for(i=0;mime_encode[i];i++) { + for(i=0;mime_pattern[i];i++) { if (mode == mime_encode[i]) { p = mime_pattern[i]; break; @@ -5643,10 +5689,21 @@ void mime_putc(nkf_char c) if (mimeout_mode=='Q') { if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) { - if (c <= SPACE) { + if (c == CR || c == NL) { + close_mime(); + (*o_mputc)(c); + base64_count = 0; + return; + } else if (c <= SPACE) { close_mime(); - (*o_mputc)(SPACE); - base64_count++; + if (base64_count > 70) { + (*o_mputc)(NL); + base64_count = 0; + } + if (!nkf_isblank(c)) { + (*o_mputc)(SPACE); + base64_count++; + } } (*o_mputc)(c); base64_count++; @@ -5678,7 +5735,8 @@ void mime_putc(nkf_char c) mimeout_buf_count = 1; }else{ if (base64_count > 1 - && base64_count + mimeout_buf_count > 76){ + && base64_count + mimeout_buf_count > 76 + && mimeout_buf[0] != CR && mimeout_buf[0] != NL){ (*o_mputc)(NL); base64_count = 0; if (!nkf_isspace(mimeout_buf[0])){ diff --git a/ext/nkf/nkf-utf8/utf8tbl.c b/ext/nkf/nkf-utf8/utf8tbl.c index e43ad553d6..fb6c3b7362 100644 --- a/ext/nkf/nkf-utf8/utf8tbl.c +++ b/ext/nkf/nkf-utf8/utf8tbl.c @@ -201,6 +201,20 @@ const unsigned short euc_to_utf8_AC[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short euc_to_utf8_AC_mac[] = { + 0x2664, 0x2667, 0x2661, 0x2662, 0x2660, 0x2663, 0x2665, + 0x2666, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0x3020, 0x260E, 0x3004, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x261E, 0x261C, 0x261D, 0x261F, 0x21C6, 0x21C4, 0x21C5, + 0, 0x21E8, 0x21E6, 0x21E7, 0x21E9, 0x2192, 0x2190, 0x2191, + 0x2193, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short euc_to_utf8_AD[] = { 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, @@ -215,6 +229,20 @@ const unsigned short euc_to_utf8_AD[] = { 0x2252, 0x2261, 0x222B, 0x222E, 0x2211, 0x221A, 0x22A5, 0x2220, 0x221F, 0x22BF, 0x2235, 0x2229, 0x222A, 0, 0x3299, }; +const unsigned short euc_to_utf8_AD_mac[] = { + 0x65E5, 0x6708, 0x706B, 0x6C34, 0x6728, 0x91D1, 0x571F, + 0x796D, 0x795D, 0x81EA, 0x81F3, 0x3239, 0x547C, 0x3231, 0x8CC7, + 0x540D, 0x3232, 0x5B66, 0x8CA1, 0x793E, 0x7279, 0x76E3, 0x4F01, + 0x5354, 0x52B4, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0, + 0x3349, 0x3314, 0x3322, 0x334D, 0x3318, 0x3327, 0x3303, 0x3336, + 0x3351, 0x3357, 0x330D, 0x3326, 0x3323, 0x332B, 0x334A, 0x333B, + 0x339C, 0x339D, 0x339E, 0x338E, 0x338F, 0x33C4, 0x33A1, 0, + 0, 0, 0, 0, 0, 0, 0, 0x337B, + 0x301D, 0x301F, 0x2116, 0x33CD, 0x2121, 0x32A4, 0x32A5, 0x32A6, + 0x32A7, 0x32A8, 0x3231, 0x3232, 0x3239, 0x337E, 0x337D, 0x337C, + 0x2252, 0x5927, 0x5C0F, 0x32A4, 0x32A5, 0x32A6, 0x32A7, 0x32A8, + 0x533B, 0x8CA1, 0x512A, 0x52B4, 0x5370, 0x63A7, 0x79D8, +}; const unsigned short euc_to_utf8_AE[] = { 0x3349, 0x3322, 0x334D, 0x3314, 0x3316, 0x3305, 0x3333, 0x334E, 0x3303, 0x3336, 0x3318, 0x3315, 0x3327, 0x3351, 0x334A, @@ -2346,6 +2374,33 @@ const unsigned short *const euc_to_utf8_2bytes_ms[] = { 0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB, euc_to_utf8_FC_ms, 0, 0, }; +/* CP10001 */ +const unsigned short *const euc_to_utf8_2bytes_mac[] = { + euc_to_utf8_A1_ms, euc_to_utf8_A2_ms, euc_to_utf8_A3, + euc_to_utf8_A4, euc_to_utf8_A5, euc_to_utf8_A6, euc_to_utf8_A7, + euc_to_utf8_A8, euc_to_utf8_A9, euc_to_utf8_AA, euc_to_utf8_AB, + euc_to_utf8_AC_mac, euc_to_utf8_AD_mac, euc_to_utf8_AE, euc_to_utf8_AF, + euc_to_utf8_B0, euc_to_utf8_B1, euc_to_utf8_B2, euc_to_utf8_B3, + euc_to_utf8_B4, euc_to_utf8_B5, euc_to_utf8_B6, euc_to_utf8_B7, + euc_to_utf8_B8, euc_to_utf8_B9, euc_to_utf8_BA, euc_to_utf8_BB, + euc_to_utf8_BC, euc_to_utf8_BD, euc_to_utf8_BE, euc_to_utf8_BF, + euc_to_utf8_C0, euc_to_utf8_C1, euc_to_utf8_C2, euc_to_utf8_C3, + euc_to_utf8_C4, euc_to_utf8_C5, euc_to_utf8_C6, euc_to_utf8_C7, + euc_to_utf8_C8, euc_to_utf8_C9, euc_to_utf8_CA, euc_to_utf8_CB, + euc_to_utf8_CC, euc_to_utf8_CD, euc_to_utf8_CE, euc_to_utf8_CF, + euc_to_utf8_D0, euc_to_utf8_D1, euc_to_utf8_D2, euc_to_utf8_D3, + euc_to_utf8_D4, euc_to_utf8_D5, euc_to_utf8_D6, euc_to_utf8_D7, + euc_to_utf8_D8, euc_to_utf8_D9, euc_to_utf8_DA, euc_to_utf8_DB, + euc_to_utf8_DC, euc_to_utf8_DD, euc_to_utf8_DE, euc_to_utf8_DF, + euc_to_utf8_E0, euc_to_utf8_E1, euc_to_utf8_E2, euc_to_utf8_E3, + euc_to_utf8_E4, euc_to_utf8_E5, euc_to_utf8_E6, euc_to_utf8_E7, + euc_to_utf8_E8, euc_to_utf8_E9, euc_to_utf8_EA, euc_to_utf8_EB, + euc_to_utf8_EC, euc_to_utf8_ED, euc_to_utf8_EE, euc_to_utf8_EF, + euc_to_utf8_F0, euc_to_utf8_F1, euc_to_utf8_F2, euc_to_utf8_F3, + euc_to_utf8_F4, euc_to_utf8_F5, 0, 0, + 0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB, + euc_to_utf8_FC_ms, 0, 0, +}; #ifdef X0212_ENABLE const unsigned short *const x0212_to_utf8_2bytes[] = { @@ -2397,6 +2452,16 @@ const unsigned short utf8_to_euc_C2_ms[] = { 0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0, 0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244, }; +const unsigned short utf8_to_euc_C2_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0x0220, 0xA242, 0x2171, 0x2172, 0xA270, 0x5C, 0xA243, 0x2178, + 0x212F, 0x027D, 0xA26C, 0, 0x224C, 0, 0xA26E, 0xA234, + 0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0, + 0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244, +}; const unsigned short utf8_to_euc_C2_932[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2547,6 +2612,16 @@ const unsigned short utf8_to_euc_E284[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E284_mac[] = { + 0, 0, 0, 0x216E, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0x2B7B, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x2B7D, 0x027E, 0, 0, 0, 0, 0, + 0, 0, 0, 0x2272, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E285[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2557,6 +2632,16 @@ const unsigned short utf8_to_euc_E285[] = { 0xF373, 0xF374, 0xF375, 0xF376, 0xF377, 0xF378, 0xF379, 0xF37A, 0xF37B, 0xF37C, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E285_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0x2A21, 0x2A22, 0x2A23, 0x2A24, 0x2A25, 0x2A26, 0x2A27, 0x2A28, + 0x2A29, 0x2A2A, 0, 0, 0, 0, 0, 0, + 0x2A35, 0x2A36, 0x2A37, 0x2A38, 0x2A39, 0x2A3A, 0x2A3B, 0x2A3C, + 0x2A3D, 0x2A3E, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E286[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2597,6 +2682,16 @@ const unsigned short utf8_to_euc_E288_932[] = { 0, 0, 0, 0, 0x2168, 0x2268, 0, 0, 0, 0, 0, 0, 0, 0x2266, 0, 0, }; +const unsigned short utf8_to_euc_E288_mac[] = { + 0x224F, 0, 0x225F, 0x2250, 0, 0, 0, 0x2260, + 0x223A, 0, 0, 0x223B, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x2265, 0, 0, 0x2267, 0x2167, 0x2F22, + 0x225C, 0, 0, 0, 0, 0x2142, 0, 0x224A, + 0x224B, 0x2241, 0x2240, 0x2269, 0x226A, 0, 0x2F21, 0, + 0, 0, 0, 0, 0x2168, 0x2268, 0, 0, + 0, 0, 0, 0, 0, 0x2266, 0, 0, +}; const unsigned short utf8_to_euc_E289[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2617,6 +2712,16 @@ const unsigned short utf8_to_euc_E28A[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D79, }; +const unsigned short utf8_to_euc_E28A_mac[] = { + 0, 0, 0x223E, 0x223F, 0, 0, 0x223C, 0x223D, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0x225D, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0x2F23, +}; const unsigned short utf8_to_euc_E28C[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2637,6 +2742,16 @@ const unsigned short utf8_to_euc_E291[] = { 0x2D31, 0x2D32, 0x2D33, 0x2D34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E291_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0x2921, 0x2922, 0x2923, 0x2924, 0x2925, 0x2926, 0x2927, 0x2928, + 0x2929, 0x292A, 0x292B, 0x292C, 0x292D, 0x292E, 0x292F, 0x2930, + 0x2931, 0x2932, 0x2933, 0x2934, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E294[] = { 0x2821, 0x282C, 0x2822, 0x282D, 0, 0, 0, 0, 0, 0, 0, 0, 0x2823, 0, 0, 0x282E, @@ -2767,6 +2882,16 @@ const unsigned short utf8_to_euc_E388[] = { 0, 0x2D6A, 0x2D6B, 0, 0, 0, 0, 0, 0, 0x2D6C, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E388_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x2D2E, 0x2D31, 0, 0, 0, 0, 0, + 0, 0x2D2C, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E38A[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2777,6 +2902,16 @@ const unsigned short utf8_to_euc_E38A[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E38A_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0x2D73, 0x2D74, 0x2D75, 0x2D76, + 0x2D77, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E38C[] = { 0, 0, 0, 0x2D46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D4A, 0, 0, @@ -2787,6 +2922,16 @@ const unsigned short utf8_to_euc_E38C[] = { 0, 0, 0, 0, 0, 0, 0x2D47, 0, 0, 0, 0, 0x2D4F, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E38C_mac[] = { + 0, 0, 0, 0x2E29, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0x2E32, 0, 0, + 0, 0, 0, 0, 0x2E24, 0, 0, 0, + 0x2E2B, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x2E22, 0x2E34, 0, 0, 0x2E35, 0x2E2D, + 0, 0, 0, 0x2E37, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0x2E2A, 0, + 0, 0, 0, 0x2E36, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E38D[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D40, 0x2D4E, 0, 0, 0x2D43, 0, 0, @@ -2797,6 +2942,16 @@ const unsigned short utf8_to_euc_E38D[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D5F, 0x2D6F, 0x2D6E, 0x2D6D, 0, }; +const unsigned short utf8_to_euc_E38D_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x2E21, 0x2E2F, 0, 0, 0x2E23, 0, 0, + 0, 0x2E2E, 0, 0, 0, 0, 0, 0x2E31, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0x2E6A, 0x2E69, 0x2E68, 0x2E67, 0, +}; const unsigned short utf8_to_euc_E38E[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D53, 0x2D54, @@ -2807,6 +2962,16 @@ const unsigned short utf8_to_euc_E38E[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E38E_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0x2B2B, 0x2B2D, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0x2B21, 0x2B23, 0x2B29, 0, + 0, 0x2B27, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E38F[] = { 0, 0, 0, 0, 0x2D55, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D63, 0, 0, @@ -2817,6 +2982,16 @@ const unsigned short utf8_to_euc_E38F[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E38F_mac[] = { + 0, 0, 0, 0, 0x2B2E, 0, 0, 0, + 0, 0, 0, 0, 0, 0x2B7C, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E4B8[] = { 0x306C, 0x437A, 0xB021, 0x3C37, 0xB022, 0xB023, 0, 0x4B7C, 0x3E66, 0x3B30, 0x3E65, 0x323C, 0xB024, 0x4954, 0x4D3F, 0, @@ -6171,6 +6346,24 @@ const unsigned short *const utf8_to_euc_E2_932[] = { 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short *const utf8_to_euc_E2_mac[] = { + utf8_to_euc_E280_932, 0, 0, 0, + utf8_to_euc_E284_mac, utf8_to_euc_E285_mac, utf8_to_euc_E286, utf8_to_euc_E287, + utf8_to_euc_E288_mac, utf8_to_euc_E289, utf8_to_euc_E28A_mac, 0, + utf8_to_euc_E28C, 0, 0, 0, + 0, utf8_to_euc_E291_mac, 0, 0, + utf8_to_euc_E294, utf8_to_euc_E295, utf8_to_euc_E296, utf8_to_euc_E297, + utf8_to_euc_E298, utf8_to_euc_E299, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, +}; const unsigned short *const utf8_to_euc_E3[] = { utf8_to_euc_E380, utf8_to_euc_E381, utf8_to_euc_E382, utf8_to_euc_E383, 0, 0, 0, 0, @@ -6207,6 +6400,24 @@ const unsigned short *const utf8_to_euc_E3_932[] = { 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short *const utf8_to_euc_E3_mac[] = { + utf8_to_euc_E380_932, utf8_to_euc_E381, utf8_to_euc_E382_932, utf8_to_euc_E383, + 0, 0, 0, 0, + utf8_to_euc_E388_mac, 0, utf8_to_euc_E38A_mac, 0, + utf8_to_euc_E38C_mac, utf8_to_euc_E38D_mac, utf8_to_euc_E38E_mac, utf8_to_euc_E38F_mac, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, +}; const unsigned short *const utf8_to_euc_E4[] = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -6441,6 +6652,36 @@ const unsigned short *const utf8_to_euc_2bytes_932[] = { 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short *const utf8_to_euc_2bytes_mac[] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, utf8_to_euc_C2_mac, utf8_to_euc_C3, + utf8_to_euc_C4, utf8_to_euc_C5, 0, utf8_to_euc_C7, + 0, 0, 0, utf8_to_euc_CB, + 0, 0, utf8_to_euc_CE, utf8_to_euc_CF, + utf8_to_euc_D0, utf8_to_euc_D1, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, +}; const unsigned short *const *const utf8_to_euc_3bytes[] = { 0, 0, utf8_to_euc_E2, utf8_to_euc_E3, utf8_to_euc_E4, utf8_to_euc_E5, utf8_to_euc_E6, utf8_to_euc_E7, @@ -6459,6 +6700,12 @@ const unsigned short *const *const utf8_to_euc_3bytes_932[] = { utf8_to_euc_E8, utf8_to_euc_E9, 0, 0, 0, 0, 0, utf8_to_euc_EF_ms, }; +const unsigned short *const *const utf8_to_euc_3bytes_mac[] = { + 0, 0, utf8_to_euc_E2_mac, utf8_to_euc_E3_mac, + utf8_to_euc_E4, utf8_to_euc_E5, utf8_to_euc_E6, utf8_to_euc_E7, + utf8_to_euc_E8, utf8_to_euc_E9, 0, 0, + 0, 0, 0, utf8_to_euc_EF_ms, +}; #ifdef UNICODE_NORMALIZATION diff --git a/ext/nkf/nkf-utf8/utf8tbl.h b/ext/nkf/nkf-utf8/utf8tbl.h index 1f40f0b363..29413d4fac 100644 --- a/ext/nkf/nkf-utf8/utf8tbl.h +++ b/ext/nkf/nkf-utf8/utf8tbl.h @@ -5,6 +5,7 @@ extern const unsigned short euc_to_utf8_1byte[]; extern const unsigned short *const euc_to_utf8_2bytes[]; extern const unsigned short *const euc_to_utf8_2bytes_ms[]; +extern const unsigned short *const euc_to_utf8_2bytes_mac[]; extern const unsigned short *const x0212_to_utf8_2bytes[]; #endif /* UTF8_OUTPUT_ENABLE */ @@ -12,9 +13,11 @@ extern const unsigned short *const x0212_to_utf8_2bytes[]; extern const unsigned short *const utf8_to_euc_2bytes[]; extern const unsigned short *const utf8_to_euc_2bytes_ms[]; extern const unsigned short *const utf8_to_euc_2bytes_932[]; +extern const unsigned short *const utf8_to_euc_2bytes_mac[]; extern const unsigned short *const *const utf8_to_euc_3bytes[]; extern const unsigned short *const *const utf8_to_euc_3bytes_ms[]; extern const unsigned short *const *const utf8_to_euc_3bytes_932[]; +extern const unsigned short *const *const utf8_to_euc_3bytes_mac[]; #endif /* UTF8_INPUT_ENABLE */ #ifdef UNICODE_NORMALIZATION -- cgit v1.2.3