summaryrefslogtreecommitdiff
path: root/ext/nkf
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-06-04 12:31:26 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-06-04 12:31:26 +0000
commitaf1c4167287b9353fec766f932fe4afe97116ad4 (patch)
treefe952114fed9f49b11fa58533478ef130eddeba7 /ext/nkf
parent6b3ef2249cc6d03b9e153a1bbfb24eb12d4032a5 (diff)
* lib/json.rb, lib/json, ext/json, test/json:
import JSON library. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12428 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/nkf')
-rw-r--r--ext/nkf/nkf-utf8/nkf.c198
-rw-r--r--ext/nkf/nkf-utf8/utf8tbl.c247
-rw-r--r--ext/nkf/nkf-utf8/utf8tbl.h3
3 files changed, 378 insertions, 70 deletions
diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c
index 3cd1b160da..30bb6c47b8 100644
--- a/ext/nkf/nkf-utf8/nkf.c
+++ b/ext/nkf/nkf-utf8/nkf.c
@@ -41,7 +41,7 @@
***********************************************************************/
/* $Id$ */
#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2007-01-28"
+#define NKF_RELEASE_DATE "2007-05-28"
#include "config.h"
#include "utf8tbl.h"
@@ -351,10 +351,12 @@ static nkf_char e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
* 0: Shift_JIS, eucJP-ascii
* 1: eucJP-ms
* 2: CP932, CP51932
+ * 3: CP10001
*/
-#define UCS_MAP_ASCII 0
-#define UCS_MAP_MS 1
-#define UCS_MAP_CP932 2
+#define UCS_MAP_ASCII 0
+#define UCS_MAP_MS 1
+#define UCS_MAP_CP932 2
+#define UCS_MAP_CP10001 3
static int ms_ucs_map_f = UCS_MAP_ASCII;
#endif
#ifdef UTF8_INPUT_ENABLE
@@ -1233,6 +1235,14 @@ void options(unsigned char *cp)
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
#endif
+ }else if(strcmp(codeset, "CP10001") == 0){
+ input_f = SJIS_INPUT;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP10001;
+#endif
}else if(strcmp(codeset, "EUCJP") == 0 ||
strcmp(codeset, "EUC-JP") == 0){
input_f = EUC_INPUT;
@@ -1371,6 +1381,11 @@ void options(unsigned char *cp)
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
#endif
+ }else if(strcmp(codeset, "CP10001") == 0){
+ output_conv = s_oconv;
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP10001;
+#endif
}else if(strcmp(codeset, "EUCJP") == 0 ||
strcmp(codeset, "EUC-JP") == 0){
output_conv = e_oconv;
@@ -2676,6 +2691,12 @@ nkf_char kanji_convert(FILE *f)
} else { /* bogus code, skip SSO and one byte */
NEXT;
}
+ } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
+ (c1 == 0xFD || c1 == 0xFE)) {
+ /* CP10001 */
+ c2 = X0201;
+ c1 &= 0x7f;
+ SEND;
} else {
/* already established */
c2 = c1;
@@ -2885,35 +2906,41 @@ nkf_char kanji_convert(FILE *f)
(*oconv)(0, ESC);
SEND;
}
- } else if ((c1 == NL || c1 == CR) && broken_f&4) {
- input_mode = ASCII; set_iconv(FALSE, 0);
- SEND;
- } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
- if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
- i_ungetc(SPACE,f);
- continue;
- } else {
- i_ungetc(c1,f);
- }
- c1 = NL;
- SEND;
- } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
- if ((c1=(*i_getc)(f))!=EOF) {
- if (c1==SPACE) {
- i_ungetc(SPACE,f);
- continue;
- } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
- i_ungetc(SPACE,f);
- continue;
- } else {
- i_ungetc(c1,f);
+ } else if (c1 == NL || c1 == CR) {
+ if (broken_f&4) {
+ input_mode = ASCII; set_iconv(FALSE, 0);
+ SEND;
+ } else if (mime_decode_f && !mime_decode_mode){
+ if (c1 == NL) {
+ if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
+ i_ungetc(SPACE,f);
+ continue;
+ } else {
+ i_ungetc(c1,f);
+ }
+ c1 = NL;
+ SEND;
+ } else { /* if (c1 == CR)*/
+ if ((c1=(*i_getc)(f))!=EOF) {
+ if (c1==SPACE) {
+ i_ungetc(SPACE,f);
+ continue;
+ } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
+ i_ungetc(SPACE,f);
+ continue;
+ } else {
+ i_ungetc(c1,f);
+ }
+ i_ungetc(NL,f);
+ } else {
+ i_ungetc(c1,f);
+ }
+ c1 = CR;
+ SEND;
}
- i_ungetc(NL,f);
- } else {
- i_ungetc(c1,f);
}
- c1 = CR;
- SEND;
+ if (crmode_f == CR && c1 == NL) crmode_f = CRLF;
+ else crmode_f = c1;
} else if (c1 == DEL && input_mode == X0208 ) {
/* CP5022x */
c2 = c1;
@@ -3125,9 +3152,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
#ifdef SHIFTJIS_CP932
if (!cp932inv_f && is_ibmext_in_sjis(c2)){
-#if 0
- extern const unsigned short shiftjis_cp932[3][189];
-#endif
val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
if (val){
c2 = val >> 8;
@@ -3136,9 +3160,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
}
if (cp932inv_f
&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
-#if 0
- extern const unsigned short cp932inv[2][189];
-#endif
nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
if (c){
c2 = c >> 8;
@@ -3148,9 +3169,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
#endif /* SHIFTJIS_CP932 */
#ifdef X0212_ENABLE
if (!x0213_f && is_ibmext_in_sjis(c2)){
-#if 0
- extern const unsigned short shiftjis_x0212[3][189];
-#endif
val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
if (val){
if (val > 0x7FFF){
@@ -3481,14 +3499,6 @@ nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
{
-#if 0
- extern const unsigned short *const utf8_to_euc_2bytes[];
- extern const unsigned short *const utf8_to_euc_2bytes_ms[];
- extern const unsigned short *const utf8_to_euc_2bytes_932[];
- extern const unsigned short *const *const utf8_to_euc_3bytes[];
- extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
- extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
-#endif
const unsigned short *const *pp;
const unsigned short *const *const *ppp;
static const int no_best_fit_chars_table_C2[] =
@@ -3538,11 +3548,27 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *
}
}else if(ms_ucs_map_f == UCS_MAP_MS){
if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
+ }else if(ms_ucs_map_f == UCS_MAP_CP10001){
+ switch(c2){
+ case 0xC2:
+ switch(c1){
+ case 0xA2:
+ case 0xA3:
+ case 0xA5:
+ case 0xA6:
+ case 0xAC:
+ case 0xAF:
+ case 0xB8:
+ return 1;
+ }
+ break;
+ }
}
}
pp =
ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
utf8_to_euc_2bytes;
ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
}else if(c0 < 0xF0){
@@ -3565,6 +3591,19 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *
if(c1 == 0x80 || c0 == 0x9C) return 1;
break;
}
+ }else if(ms_ucs_map_f == UCS_MAP_CP10001){
+ switch(c2){
+ case 0xE3:
+ switch(c1){
+ case 0x82:
+ if(c0 == 0x94) return 1;
+ break;
+ case 0x83:
+ if(c0 == 0xBB) return 1;
+ break;
+ }
+ break;
+ }
}else{
switch(c2){
case 0xE2:
@@ -3596,8 +3635,10 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *
ppp =
ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
utf8_to_euc_3bytes;
ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
+// fprintf(stderr, "wret: %X %X %X -> %X %X\n",c2,c1,c0,*p2,*p1,ret);
}else return -1;
#ifdef SHIFTJIS_CP932
if (!ret && !cp932inv_f && is_eucg3(*p2)) {
@@ -3739,15 +3780,17 @@ void encode_fallback_subchar(nkf_char c)
#ifdef UTF8_OUTPUT_ENABLE
nkf_char e2w_conv(nkf_char c2, nkf_char c1)
{
-#if 0
- extern const unsigned short euc_to_utf8_1byte[];
- extern const unsigned short *const euc_to_utf8_2bytes[];
- extern const unsigned short *const euc_to_utf8_2bytes_ms[];
- extern const unsigned short *const x0212_to_utf8_2bytes[];
-#endif
const unsigned short *p;
if (c2 == X0201) {
+ if (ms_ucs_map_f == UCS_MAP_CP10001) {
+ switch (c1) {
+ case 0x20:
+ return 0xA0;
+ case 0x7D:
+ return 0xA9;
+ }
+ }
p = euc_to_utf8_1byte;
#ifdef X0212_ENABLE
} else if (is_eucg3(c2)){
@@ -3764,7 +3807,10 @@ nkf_char e2w_conv(nkf_char c2, nkf_char c1)
c2 &= 0x7f;
c2 = (c2&0x7f) - 0x21;
if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
- p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
+ p =
+ ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
+ euc_to_utf8_2bytes_ms[c2];
else
return 0;
}
@@ -4069,9 +4115,6 @@ nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
else if(nkf_isgraph(ndx)){
nkf_char val = 0;
const unsigned short *ptr;
-#if 0
- extern const unsigned short *const x0212_shiftjis[];
-#endif
ptr = x0212_shiftjis[ndx - 0x21];
if (ptr){
val = ptr[(c1 & 0x7f) - 0x21];
@@ -4147,9 +4190,6 @@ void s_oconv(nkf_char c2, nkf_char c1)
#ifdef SHIFTJIS_CP932
if (cp932inv_f
&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
-#if 0
- extern const unsigned short cp932inv[2][189];
-#endif
nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
if (c){
c2 = c >> 8;
@@ -4539,6 +4579,10 @@ void z_conv(nkf_char c2, nkf_char c1)
/* if (c2) c1 &= 0x7f; assertion */
+ if (c2 == X0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
+ (*o_zconv)(c2,c1);
+ return;
+ }
if (x0201_f && z_prev2==X0201) { /* X0201 */
if (c1==(0xde&0x7f)) { /* 濁点 */
z_prev2=0;
@@ -4942,15 +4986,20 @@ void set_input_codename(char *codename)
void print_guessed_code(char *filename)
{
char *codename = "BINARY";
+ char *str_crmode = NULL;
if (!is_inputcode_mixed) {
if (strcmp(input_codename, "") == 0) {
codename = "ASCII";
} else {
codename = input_codename;
}
+ if (crmode_f == CR) str_crmode = "CR";
+ else if (crmode_f == NL) str_crmode = "LF";
+ else if (crmode_f == CRLF) str_crmode = "CRLF";
}
if (filename != NULL) printf("%s:", filename);
- printf("%s\n", codename);
+ if (str_crmode != NULL) printf("%s (%s)\n", codename, str_crmode);
+ else printf("%s\n", codename);
}
#endif /*WIN32DLL*/
@@ -5068,9 +5117,6 @@ nkf_char nfc_getc(FILE *f)
int i=0, j, k=1, lower, upper;
nkf_char buf[9];
const nkf_nfchar *array;
-#if 0
- extern const struct normalization_pair normalization_table[];
-#endif
buf[i] = (*g)(f);
while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
@@ -5437,7 +5483,7 @@ void open_mime(nkf_char mode)
int i;
int j;
p = mime_pattern[0];
- for(i=0;mime_encode[i];i++) {
+ for(i=0;mime_pattern[i];i++) {
if (mode == mime_encode[i]) {
p = mime_pattern[i];
break;
@@ -5643,10 +5689,21 @@ void mime_putc(nkf_char c)
if (mimeout_mode=='Q') {
if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
- if (c <= SPACE) {
+ if (c == CR || c == NL) {
+ close_mime();
+ (*o_mputc)(c);
+ base64_count = 0;
+ return;
+ } else if (c <= SPACE) {
close_mime();
- (*o_mputc)(SPACE);
- base64_count++;
+ if (base64_count > 70) {
+ (*o_mputc)(NL);
+ base64_count = 0;
+ }
+ if (!nkf_isblank(c)) {
+ (*o_mputc)(SPACE);
+ base64_count++;
+ }
}
(*o_mputc)(c);
base64_count++;
@@ -5678,7 +5735,8 @@ void mime_putc(nkf_char c)
mimeout_buf_count = 1;
}else{
if (base64_count > 1
- && base64_count + mimeout_buf_count > 76){
+ && base64_count + mimeout_buf_count > 76
+ && mimeout_buf[0] != CR && mimeout_buf[0] != NL){
(*o_mputc)(NL);
base64_count = 0;
if (!nkf_isspace(mimeout_buf[0])){
diff --git a/ext/nkf/nkf-utf8/utf8tbl.c b/ext/nkf/nkf-utf8/utf8tbl.c
index e43ad553d6..fb6c3b7362 100644
--- a/ext/nkf/nkf-utf8/utf8tbl.c
+++ b/ext/nkf/nkf-utf8/utf8tbl.c
@@ -201,6 +201,20 @@ const unsigned short euc_to_utf8_AC[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short euc_to_utf8_AC_mac[] = {
+ 0x2664, 0x2667, 0x2661, 0x2662, 0x2660, 0x2663, 0x2665,
+ 0x2666, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0x3020, 0x260E, 0x3004,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x261E, 0x261C, 0x261D, 0x261F, 0x21C6, 0x21C4, 0x21C5,
+ 0, 0x21E8, 0x21E6, 0x21E7, 0x21E9, 0x2192, 0x2190, 0x2191,
+ 0x2193, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short euc_to_utf8_AD[] = {
0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466,
0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E,
@@ -215,6 +229,20 @@ const unsigned short euc_to_utf8_AD[] = {
0x2252, 0x2261, 0x222B, 0x222E, 0x2211, 0x221A, 0x22A5, 0x2220,
0x221F, 0x22BF, 0x2235, 0x2229, 0x222A, 0, 0x3299,
};
+const unsigned short euc_to_utf8_AD_mac[] = {
+ 0x65E5, 0x6708, 0x706B, 0x6C34, 0x6728, 0x91D1, 0x571F,
+ 0x796D, 0x795D, 0x81EA, 0x81F3, 0x3239, 0x547C, 0x3231, 0x8CC7,
+ 0x540D, 0x3232, 0x5B66, 0x8CA1, 0x793E, 0x7279, 0x76E3, 0x4F01,
+ 0x5354, 0x52B4, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0,
+ 0x3349, 0x3314, 0x3322, 0x334D, 0x3318, 0x3327, 0x3303, 0x3336,
+ 0x3351, 0x3357, 0x330D, 0x3326, 0x3323, 0x332B, 0x334A, 0x333B,
+ 0x339C, 0x339D, 0x339E, 0x338E, 0x338F, 0x33C4, 0x33A1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0x337B,
+ 0x301D, 0x301F, 0x2116, 0x33CD, 0x2121, 0x32A4, 0x32A5, 0x32A6,
+ 0x32A7, 0x32A8, 0x3231, 0x3232, 0x3239, 0x337E, 0x337D, 0x337C,
+ 0x2252, 0x5927, 0x5C0F, 0x32A4, 0x32A5, 0x32A6, 0x32A7, 0x32A8,
+ 0x533B, 0x8CA1, 0x512A, 0x52B4, 0x5370, 0x63A7, 0x79D8,
+};
const unsigned short euc_to_utf8_AE[] = {
0x3349, 0x3322, 0x334D, 0x3314, 0x3316, 0x3305, 0x3333,
0x334E, 0x3303, 0x3336, 0x3318, 0x3315, 0x3327, 0x3351, 0x334A,
@@ -2346,6 +2374,33 @@ const unsigned short *const euc_to_utf8_2bytes_ms[] = {
0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB,
euc_to_utf8_FC_ms, 0, 0,
};
+/* CP10001 */
+const unsigned short *const euc_to_utf8_2bytes_mac[] = {
+ euc_to_utf8_A1_ms, euc_to_utf8_A2_ms, euc_to_utf8_A3,
+ euc_to_utf8_A4, euc_to_utf8_A5, euc_to_utf8_A6, euc_to_utf8_A7,
+ euc_to_utf8_A8, euc_to_utf8_A9, euc_to_utf8_AA, euc_to_utf8_AB,
+ euc_to_utf8_AC_mac, euc_to_utf8_AD_mac, euc_to_utf8_AE, euc_to_utf8_AF,
+ euc_to_utf8_B0, euc_to_utf8_B1, euc_to_utf8_B2, euc_to_utf8_B3,
+ euc_to_utf8_B4, euc_to_utf8_B5, euc_to_utf8_B6, euc_to_utf8_B7,
+ euc_to_utf8_B8, euc_to_utf8_B9, euc_to_utf8_BA, euc_to_utf8_BB,
+ euc_to_utf8_BC, euc_to_utf8_BD, euc_to_utf8_BE, euc_to_utf8_BF,
+ euc_to_utf8_C0, euc_to_utf8_C1, euc_to_utf8_C2, euc_to_utf8_C3,
+ euc_to_utf8_C4, euc_to_utf8_C5, euc_to_utf8_C6, euc_to_utf8_C7,
+ euc_to_utf8_C8, euc_to_utf8_C9, euc_to_utf8_CA, euc_to_utf8_CB,
+ euc_to_utf8_CC, euc_to_utf8_CD, euc_to_utf8_CE, euc_to_utf8_CF,
+ euc_to_utf8_D0, euc_to_utf8_D1, euc_to_utf8_D2, euc_to_utf8_D3,
+ euc_to_utf8_D4, euc_to_utf8_D5, euc_to_utf8_D6, euc_to_utf8_D7,
+ euc_to_utf8_D8, euc_to_utf8_D9, euc_to_utf8_DA, euc_to_utf8_DB,
+ euc_to_utf8_DC, euc_to_utf8_DD, euc_to_utf8_DE, euc_to_utf8_DF,
+ euc_to_utf8_E0, euc_to_utf8_E1, euc_to_utf8_E2, euc_to_utf8_E3,
+ euc_to_utf8_E4, euc_to_utf8_E5, euc_to_utf8_E6, euc_to_utf8_E7,
+ euc_to_utf8_E8, euc_to_utf8_E9, euc_to_utf8_EA, euc_to_utf8_EB,
+ euc_to_utf8_EC, euc_to_utf8_ED, euc_to_utf8_EE, euc_to_utf8_EF,
+ euc_to_utf8_F0, euc_to_utf8_F1, euc_to_utf8_F2, euc_to_utf8_F3,
+ euc_to_utf8_F4, euc_to_utf8_F5, 0, 0,
+ 0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB,
+ euc_to_utf8_FC_ms, 0, 0,
+};
#ifdef X0212_ENABLE
const unsigned short *const x0212_to_utf8_2bytes[] = {
@@ -2397,6 +2452,16 @@ const unsigned short utf8_to_euc_C2_ms[] = {
0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0,
0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244,
};
+const unsigned short utf8_to_euc_C2_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x0220, 0xA242, 0x2171, 0x2172, 0xA270, 0x5C, 0xA243, 0x2178,
+ 0x212F, 0x027D, 0xA26C, 0, 0x224C, 0, 0xA26E, 0xA234,
+ 0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0,
+ 0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244,
+};
const unsigned short utf8_to_euc_C2_932[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2547,6 +2612,16 @@ const unsigned short utf8_to_euc_E284[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E284_mac[] = {
+ 0, 0, 0, 0x216E, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0x2B7B, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x2B7D, 0x027E, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0x2272, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E285[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2557,6 +2632,16 @@ const unsigned short utf8_to_euc_E285[] = {
0xF373, 0xF374, 0xF375, 0xF376, 0xF377, 0xF378, 0xF379, 0xF37A,
0xF37B, 0xF37C, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E285_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x2A21, 0x2A22, 0x2A23, 0x2A24, 0x2A25, 0x2A26, 0x2A27, 0x2A28,
+ 0x2A29, 0x2A2A, 0, 0, 0, 0, 0, 0,
+ 0x2A35, 0x2A36, 0x2A37, 0x2A38, 0x2A39, 0x2A3A, 0x2A3B, 0x2A3C,
+ 0x2A3D, 0x2A3E, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E286[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2597,6 +2682,16 @@ const unsigned short utf8_to_euc_E288_932[] = {
0, 0, 0, 0, 0x2168, 0x2268, 0, 0,
0, 0, 0, 0, 0, 0x2266, 0, 0,
};
+const unsigned short utf8_to_euc_E288_mac[] = {
+ 0x224F, 0, 0x225F, 0x2250, 0, 0, 0, 0x2260,
+ 0x223A, 0, 0, 0x223B, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0x2265, 0, 0, 0x2267, 0x2167, 0x2F22,
+ 0x225C, 0, 0, 0, 0, 0x2142, 0, 0x224A,
+ 0x224B, 0x2241, 0x2240, 0x2269, 0x226A, 0, 0x2F21, 0,
+ 0, 0, 0, 0, 0x2168, 0x2268, 0, 0,
+ 0, 0, 0, 0, 0, 0x2266, 0, 0,
+};
const unsigned short utf8_to_euc_E289[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2617,6 +2712,16 @@ const unsigned short utf8_to_euc_E28A[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0x2D79,
};
+const unsigned short utf8_to_euc_E28A_mac[] = {
+ 0, 0, 0x223E, 0x223F, 0, 0, 0x223C, 0x223D,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0x225D, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0x2F23,
+};
const unsigned short utf8_to_euc_E28C[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2637,6 +2742,16 @@ const unsigned short utf8_to_euc_E291[] = {
0x2D31, 0x2D32, 0x2D33, 0x2D34, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E291_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x2921, 0x2922, 0x2923, 0x2924, 0x2925, 0x2926, 0x2927, 0x2928,
+ 0x2929, 0x292A, 0x292B, 0x292C, 0x292D, 0x292E, 0x292F, 0x2930,
+ 0x2931, 0x2932, 0x2933, 0x2934, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E294[] = {
0x2821, 0x282C, 0x2822, 0x282D, 0, 0, 0, 0,
0, 0, 0, 0, 0x2823, 0, 0, 0x282E,
@@ -2767,6 +2882,16 @@ const unsigned short utf8_to_euc_E388[] = {
0, 0x2D6A, 0x2D6B, 0, 0, 0, 0, 0,
0, 0x2D6C, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E388_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x2D2E, 0x2D31, 0, 0, 0, 0, 0,
+ 0, 0x2D2C, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E38A[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2777,6 +2902,16 @@ const unsigned short utf8_to_euc_E38A[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E38A_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0x2D73, 0x2D74, 0x2D75, 0x2D76,
+ 0x2D77, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E38C[] = {
0, 0, 0, 0x2D46, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0x2D4A, 0, 0,
@@ -2787,6 +2922,16 @@ const unsigned short utf8_to_euc_E38C[] = {
0, 0, 0, 0, 0, 0, 0x2D47, 0,
0, 0, 0, 0x2D4F, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E38C_mac[] = {
+ 0, 0, 0, 0x2E29, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0x2E32, 0, 0,
+ 0, 0, 0, 0, 0x2E24, 0, 0, 0,
+ 0x2E2B, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0x2E22, 0x2E34, 0, 0, 0x2E35, 0x2E2D,
+ 0, 0, 0, 0x2E37, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0x2E2A, 0,
+ 0, 0, 0, 0x2E36, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E38D[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0x2D40, 0x2D4E, 0, 0, 0x2D43, 0, 0,
@@ -2797,6 +2942,16 @@ const unsigned short utf8_to_euc_E38D[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0x2D5F, 0x2D6F, 0x2D6E, 0x2D6D, 0,
};
+const unsigned short utf8_to_euc_E38D_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x2E21, 0x2E2F, 0, 0, 0x2E23, 0, 0,
+ 0, 0x2E2E, 0, 0, 0, 0, 0, 0x2E31,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0x2E6A, 0x2E69, 0x2E68, 0x2E67, 0,
+};
const unsigned short utf8_to_euc_E38E[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0x2D53, 0x2D54,
@@ -2807,6 +2962,16 @@ const unsigned short utf8_to_euc_E38E[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E38E_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0x2B2B, 0x2B2D,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0x2B21, 0x2B23, 0x2B29, 0,
+ 0, 0x2B27, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E38F[] = {
0, 0, 0, 0, 0x2D55, 0, 0, 0,
0, 0, 0, 0, 0, 0x2D63, 0, 0,
@@ -2817,6 +2982,16 @@ const unsigned short utf8_to_euc_E38F[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E38F_mac[] = {
+ 0, 0, 0, 0, 0x2B2E, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0x2B7C, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E4B8[] = {
0x306C, 0x437A, 0xB021, 0x3C37, 0xB022, 0xB023, 0, 0x4B7C,
0x3E66, 0x3B30, 0x3E65, 0x323C, 0xB024, 0x4954, 0x4D3F, 0,
@@ -6171,6 +6346,24 @@ const unsigned short *const utf8_to_euc_E2_932[] = {
0, 0, 0, 0,
0, 0, 0, 0,
};
+const unsigned short *const utf8_to_euc_E2_mac[] = {
+ utf8_to_euc_E280_932, 0, 0, 0,
+ utf8_to_euc_E284_mac, utf8_to_euc_E285_mac, utf8_to_euc_E286, utf8_to_euc_E287,
+ utf8_to_euc_E288_mac, utf8_to_euc_E289, utf8_to_euc_E28A_mac, 0,
+ utf8_to_euc_E28C, 0, 0, 0,
+ 0, utf8_to_euc_E291_mac, 0, 0,
+ utf8_to_euc_E294, utf8_to_euc_E295, utf8_to_euc_E296, utf8_to_euc_E297,
+ utf8_to_euc_E298, utf8_to_euc_E299, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+};
const unsigned short *const utf8_to_euc_E3[] = {
utf8_to_euc_E380, utf8_to_euc_E381, utf8_to_euc_E382, utf8_to_euc_E383,
0, 0, 0, 0,
@@ -6207,6 +6400,24 @@ const unsigned short *const utf8_to_euc_E3_932[] = {
0, 0, 0, 0,
0, 0, 0, 0,
};
+const unsigned short *const utf8_to_euc_E3_mac[] = {
+ utf8_to_euc_E380_932, utf8_to_euc_E381, utf8_to_euc_E382_932, utf8_to_euc_E383,
+ 0, 0, 0, 0,
+ utf8_to_euc_E388_mac, 0, utf8_to_euc_E38A_mac, 0,
+ utf8_to_euc_E38C_mac, utf8_to_euc_E38D_mac, utf8_to_euc_E38E_mac, utf8_to_euc_E38F_mac,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+};
const unsigned short *const utf8_to_euc_E4[] = {
0, 0, 0, 0,
0, 0, 0, 0,
@@ -6441,6 +6652,36 @@ const unsigned short *const utf8_to_euc_2bytes_932[] = {
0, 0, 0, 0,
0, 0, 0, 0,
};
+const unsigned short *const utf8_to_euc_2bytes_mac[] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, utf8_to_euc_C2_mac, utf8_to_euc_C3,
+ utf8_to_euc_C4, utf8_to_euc_C5, 0, utf8_to_euc_C7,
+ 0, 0, 0, utf8_to_euc_CB,
+ 0, 0, utf8_to_euc_CE, utf8_to_euc_CF,
+ utf8_to_euc_D0, utf8_to_euc_D1, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+};
const unsigned short *const *const utf8_to_euc_3bytes[] = {
0, 0, utf8_to_euc_E2, utf8_to_euc_E3,
utf8_to_euc_E4, utf8_to_euc_E5, utf8_to_euc_E6, utf8_to_euc_E7,
@@ -6459,6 +6700,12 @@ const unsigned short *const *const utf8_to_euc_3bytes_932[] = {
utf8_to_euc_E8, utf8_to_euc_E9, 0, 0,
0, 0, 0, utf8_to_euc_EF_ms,
};
+const unsigned short *const *const utf8_to_euc_3bytes_mac[] = {
+ 0, 0, utf8_to_euc_E2_mac, utf8_to_euc_E3_mac,
+ utf8_to_euc_E4, utf8_to_euc_E5, utf8_to_euc_E6, utf8_to_euc_E7,
+ utf8_to_euc_E8, utf8_to_euc_E9, 0, 0,
+ 0, 0, 0, utf8_to_euc_EF_ms,
+};
#ifdef UNICODE_NORMALIZATION
diff --git a/ext/nkf/nkf-utf8/utf8tbl.h b/ext/nkf/nkf-utf8/utf8tbl.h
index 1f40f0b363..29413d4fac 100644
--- a/ext/nkf/nkf-utf8/utf8tbl.h
+++ b/ext/nkf/nkf-utf8/utf8tbl.h
@@ -5,6 +5,7 @@
extern const unsigned short euc_to_utf8_1byte[];
extern const unsigned short *const euc_to_utf8_2bytes[];
extern const unsigned short *const euc_to_utf8_2bytes_ms[];
+extern const unsigned short *const euc_to_utf8_2bytes_mac[];
extern const unsigned short *const x0212_to_utf8_2bytes[];
#endif /* UTF8_OUTPUT_ENABLE */
@@ -12,9 +13,11 @@ extern const unsigned short *const x0212_to_utf8_2bytes[];
extern const unsigned short *const utf8_to_euc_2bytes[];
extern const unsigned short *const utf8_to_euc_2bytes_ms[];
extern const unsigned short *const utf8_to_euc_2bytes_932[];
+extern const unsigned short *const utf8_to_euc_2bytes_mac[];
extern const unsigned short *const *const utf8_to_euc_3bytes[];
extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
+extern const unsigned short *const *const utf8_to_euc_3bytes_mac[];
#endif /* UTF8_INPUT_ENABLE */
#ifdef UNICODE_NORMALIZATION