diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2005-03-03 14:50:01 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2005-03-03 14:50:01 +0000 |
commit | be8c5a34b5e47c4c894a1bf3be40db8312dd65a1 (patch) | |
tree | 65c52b6ce2aea88d9a1ce0a37343a8985d9e2b2b /ext/nkf | |
parent | b4b620bcfa39834949b876bef0cf176831d7a779 (diff) |
* ext/nkf/nkf-utf8/nkf.c: follow nkf 1.62
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@8062 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/nkf')
-rw-r--r-- | ext/nkf/nkf-utf8/nkf.c | 658 |
1 files changed, 511 insertions, 147 deletions
diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c index 2db2500454..714a1fc44a 100644 --- a/ext/nkf/nkf-utf8/nkf.c +++ b/ext/nkf/nkf-utf8/nkf.c @@ -41,7 +41,7 @@ ***********************************************************************/ /* $Id$ */ #define NKF_VERSION "2.0.4" -#define NKF_RELEASE_DATE "2004-12-01" +#define NKF_RELEASE_DATE "2005-02-20" #include "config.h" static char *CopyRight = @@ -149,12 +149,20 @@ static char *CopyRight = #ifndef MSDOS /* UNIX, OS/2 */ #include <unistd.h> #include <utime.h> -#else +#else /* defined(MSDOS) */ +#ifdef __WIN32__ +#ifdef __BORLANDC__ /* BCC32 */ +#include <utime.h> +#else /* !defined(__BORLANDC__) */ +#include <sys/utime.h> +#endif /* (__BORLANDC__) */ +#else /* !defined(__WIN32__) */ #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */ #include <sys/utime.h> #elif defined(__TURBOC__) /* BCC */ #include <utime.h> #elif defined(LSI_C) /* LSI C */ +#endif /* (__WIN32__) */ #endif #endif #endif @@ -182,6 +190,7 @@ static char *CopyRight = #define X0201 2 #define ISO8859_1 8 #define NO_X0201 3 +#define X0212 16 /* Input Assumption */ @@ -281,7 +290,7 @@ STATIC int noconvert PROTO((FILE *f)); STATIC int kanji_convert PROTO((FILE *f)); STATIC int h_conv PROTO((FILE *f,int c2,int c1)); STATIC int push_hold_buf PROTO((int c2)); -STATIC void set_iconv PROTO((int f, int (*iconv_func)())); +STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0))); STATIC int s_iconv PROTO((int c2,int c1,int c0)); STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1)); STATIC int e_iconv PROTO((int c2,int c1,int c0)); @@ -298,7 +307,7 @@ STATIC void w_oconv PROTO((int c2,int c1)); STATIC void w_oconv16 PROTO((int c2,int c1)); #endif STATIC void e_oconv PROTO((int c2,int c1)); -STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1)); +STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1)); STATIC void s_oconv PROTO((int c2,int c1)); STATIC void j_oconv PROTO((int c2,int c1)); STATIC void fold_conv PROTO((int c2,int c1)); @@ -331,13 +340,14 @@ STATIC int mime_ungetc_buf PROTO((int c,FILE *f)); STATIC int mime_integrity PROTO((FILE *f,unsigned char *p)); STATIC int base64decode PROTO((int c)); +STATIC void mime_prechar PROTO((int c2, int c1)); STATIC void mime_putc PROTO((int c)); STATIC void open_mime PROTO((int c)); STATIC void close_mime PROTO(()); STATIC void usage PROTO(()); STATIC void version PROTO(()); STATIC void options PROTO((unsigned char *c)); -#ifdef PERL_XS +#if defined(PERL_XS) || defined(WIN32DLL) STATIC void reinit PROTO(()); #endif @@ -368,6 +378,7 @@ static int hira_f = FALSE; /* hira/kata henkan */ static int input_f = FALSE; /* non fixed input code */ static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */ static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */ +static int mime_decode_f = FALSE; /* mime decode is explicitly on */ static int mimebuf_f = FALSE; /* MIME buffered input */ static int broken_f = FALSE; /* convert ESC-less broken JIS */ static int iso8859_f = FALSE; /* ISO8859 through */ @@ -437,8 +448,15 @@ STATIC int cp932inv_f = TRUE; #define CP932INV_TABLE_BEGIN (0xed) #define CP932INV_TABLE_END (0xee) +/* STATIC int cp932_conv PROTO((int c2, int c1)); */ #endif /* SHIFTJIS_CP932 */ +#ifdef X0212_ENABLE +STATIC int x0212_f = FALSE; +static int x0212_shift PROTO((int c)); +static int x0212_unshift PROTO((int c)); +#endif + STATIC unsigned char prefix_table[256]; STATIC void e_status PROTO((struct input_code *, int)); @@ -638,7 +656,14 @@ static int crmode_f = 0; /* CR, NL, CRLF */ static int end_check; #endif /*Easy Win */ -#ifndef PERL_XS +#define STD_GC_BUFSIZE (256) +int std_gc_buf[STD_GC_BUFSIZE]; +int std_gc_ndx; + +#ifdef WIN32DLL +#include "nkf32dll.c" +#elif defined(PERL_XS) +#else /* WIN32DLL */ int main(argc, argv) int argc; @@ -807,7 +832,7 @@ main(argc, argv) #ifdef OVERWRITE if (overwrite) { struct stat sb; -#if defined(MSDOS) && !defined(__MINGW32__) +#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) time_t tb[2]; #else struct utimbuf tb; @@ -827,7 +852,7 @@ main(argc, argv) } /* $B%?%$%`%9%?%s%W$rI|85(B */ -#if defined(MSDOS) && !defined(__MINGW32__) +#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) tb[0] = tb[1] = sb.st_mtime; if (utime(outfname, tb)) { fprintf(stderr, "Can't set timestamp %s\n", outfname); @@ -863,10 +888,10 @@ main(argc, argv) #else /* for Other OS */ if (file_out == TRUE) fclose(stdout); -#endif +#endif /*Easy Win */ return (0); } -#endif +#endif /* WIN32DLL */ static struct { @@ -895,6 +920,9 @@ struct { {"guess", "g"}, {"cp932", ""}, {"no-cp932", ""}, +#ifdef X0212_ENABLE + {"x0212", ""}, +#endif #ifdef UTF8_OUTPUT_ENABLE {"utf8", "w"}, {"utf16", "w16"}, @@ -956,7 +984,7 @@ options(cp) int j; p = (unsigned char *)long_option[i].name; for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++); - if (!*p || *p == cp[j]){ + if (*p == cp[j]){ p = &cp[j]; break; } @@ -1024,6 +1052,14 @@ options(cp) continue; } #endif + +#ifdef X0212_ENABLE + if (strcmp(long_option[i].name, "x0212") == 0){ + x0212_f = TRUE; + continue; + } +#endif + #ifdef EXEC_IO if (strcmp(long_option[i].name, "exec-in") == 0){ exec_f = 1; @@ -1210,6 +1246,7 @@ options(cp) } continue; case 'm': /* MIME support */ + mime_decode_f = TRUE; if (*cp=='B'||*cp=='Q') { mime_decode_mode = *cp++; mimebuf_f = FIXED_MIME; @@ -1484,6 +1521,11 @@ void s_status(ptr, c) ptr->stat = 2; status_push_ch(ptr, c); #endif /* SHIFTJIS_CP932 */ +#ifdef X0212_ENABLE + }else if (x0212_f && 0xf0 <= c && c <= 0xfc){ + ptr->stat = 1; + status_push_ch(ptr, c); +#endif /* X0212_ENABLE */ }else{ status_disable(ptr); } @@ -1498,8 +1540,8 @@ void s_status(ptr, c) status_disable(ptr); } break; -#ifdef SHIFTJIS_CP932 case 2: +#ifdef SHIFTJIS_CP932 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){ status_push_ch(ptr, c); if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){ @@ -1508,9 +1550,11 @@ void s_status(ptr, c) break; } } +#endif /* SHIFTJIS_CP932 */ +#ifndef X0212_ENABLE status_disable(ptr); break; -#endif /* SHIFTJIS_CP932 */ +#endif } } @@ -1532,6 +1576,11 @@ void e_status(ptr, c) }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){ ptr->stat = 1; status_push_ch(ptr, c); +#ifdef X0212_ENABLE + }else if (0x8f == c){ + ptr->stat = 2; + status_push_ch(ptr, c); +#endif /* X0212_ENABLE */ }else{ status_disable(ptr); } @@ -1545,6 +1594,15 @@ void e_status(ptr, c) status_disable(ptr); } break; +#ifdef X0212_ENABLE + case 2: + if (0xa1 <= c && c <= 0xfe){ + ptr->stat = 1; + status_push_ch(ptr, c); + }else{ + status_disable(ptr); + } +#endif /* X0212_ENABLE */ } } @@ -1678,10 +1736,7 @@ code_status(c) } } -#define STD_GC_BUFSIZE (256) -int std_gc_buf[STD_GC_BUFSIZE]; -int std_gc_ndx; - +#ifndef WIN32DLL int std_getc(f) FILE *f; @@ -1691,6 +1746,7 @@ FILE *f; } return getc(f); } +#endif /*WIN32DLL*/ int std_ungetc(c,f) @@ -1704,6 +1760,7 @@ FILE *f; return c; } +#ifndef WIN32DLL void std_putc(c) int c; @@ -1711,6 +1768,7 @@ int c; if(c!=EOF) putchar(c); } +#endif /*WIN32DLL*/ int noconvert(f) @@ -1845,7 +1903,7 @@ kanji_convert(f) /* second byte */ if (c2 > DEL) { /* in case of 8th bit is on */ - if (!estab_f) { + if (!estab_f&&!mime_decode_mode) { /* in case of not established yet */ /* It is still ambiguious */ if (h_conv(f, c2, c1)==EOF) @@ -2015,6 +2073,12 @@ kanji_convert(f) input_mode = X0208; shift_mode = FALSE; NEXT; +#ifdef X0212_ENABLE + } else if (c1 == 'D'){ + input_mode = X0212; + shift_mode = FALSE; + NEXT; +#endif /* X0212_ENABLE */ } else { /* could be some special code */ (*oconv)(0, ESC); @@ -2080,8 +2144,7 @@ kanji_convert(f) } else if ((c1 == NL || c1 == CR) && broken_f&4) { input_mode = ASCII; set_iconv(FALSE, 0); SEND; - /* - } else if (c1 == NL && mime_f && !mime_decode_mode ) { + } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) { if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) { i_ungetc(SPACE,f); continue; @@ -2090,7 +2153,7 @@ kanji_convert(f) } c1 = NL; SEND; - } else if (c1 == CR && mime_f && !mime_decode_mode ) { + } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) { if ((c1=(*i_getc)(f))!=EOF) { if (c1==SPACE) { i_ungetc(SPACE,f); @@ -2107,13 +2170,16 @@ kanji_convert(f) } c1 = CR; SEND; - */ } else SEND; } /* send: */ if (input_mode == X0208) (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */ +#ifdef X0212_ENABLE + else if (input_mode == X0212) + (*oconv)((0x8f << 8) | c2, c1); +#endif /* X0212_ENABLE */ else if (input_mode) (*oconv)(input_mode, c1); /* other special case */ else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */ @@ -2248,15 +2314,35 @@ int s2e_conv(c2, c1, p2, p1) int c2, c1; int *p2, *p1; { + int val; #ifdef SHIFTJIS_CP932 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){ extern unsigned short shiftjis_cp932[3][189]; - c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40]; - if (c1 == 0) return 1; - c2 = c1 >> 8; - c1 &= 0xff; + val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40]; + if (val){ + c2 = val >> 8; + c1 = val & 0xff; + } } #endif /* SHIFTJIS_CP932 */ +#ifdef X0212_ENABLE + if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){ + extern unsigned short shiftjis_x0212[3][189]; + val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40]; + if (val){ + if (val & 0x8000){ + c2 = (0x8f << 8) | (val >> 8); + c1 = val & 0xff; + }else{ + c2 = val >> 8; + c1 = val & 0xff; + } + if (p2) *p2 = c2; + if (p1) *p1 = c1; + return 0; + } + } +#endif c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394); if (c1 < 0x9f) c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f); @@ -2264,6 +2350,12 @@ int s2e_conv(c2, c1, p2, p1) c1 = c1 - 0x7e; c2++; } + +#ifdef X0212_ENABLE + if (x0212_f){ + c2 = x0212_unshift(c2); + } +#endif if (p2) *p2 = c2; if (p1) *p1 = c1; return 0; @@ -2293,6 +2385,26 @@ e_iconv(c2, c1, c0) { if (c2 == X0201) { c1 &= 0x7f; +#ifdef X0212_ENABLE + }else if (c2 == 0x8f){ + if (c0 == 0){ + return -1; + } + c2 = (c2 << 8) | (c1 & 0x7f); + c1 = c0 & 0x7f; +#ifdef SHIFTJIS_CP932 + if (cp932_f){ + int s2, s1; + if (e2s_conv(c2, c1, &s2, &s1) == 0){ + s2e_conv(s2, s1, &c2, &c1); + if ((c2 & 0xff00) == 0){ + c1 &= 0x7f; + c2 &= 0x7f; + } + } + } +#endif /* SHIFTJIS_CP932 */ +#endif /* X0212_ENABLE */ } else if (c2 == SSO){ c2 = X0201; c1 &= 0x7f; @@ -2385,7 +2497,7 @@ ww16_conv(c2, c1, c0) val |= (c0 & 0x3f); }else if (c2 >= 0xc0){ val = (c2 & 0x1f) << 6; - val |= (c1 & 0x3f) << 6; + val |= (c1 & 0x3f); }else{ val = c2; } @@ -2422,6 +2534,9 @@ w16e_conv(val, p2, p1) ret = 0; } #endif + }else{ + *p2 = 0; + *p1 = c2; } return ret; } @@ -2477,6 +2592,10 @@ w_iconv_common(c1, c0, pp, psize, p2, p1) if (val == 0) return 1; c2 = val >> 8; + if (val & 0x8000){ + c2 &= 0x7f; + c2 |= 0x8f00; + } if (c2 == SO) c2 = X0201; c1 = val & 0x7f; if (p2) *p2 = c2; @@ -2498,6 +2617,15 @@ e2w_conv(c2, c1) if (c2 == X0201) { p = euc_to_utf8_1byte; +#ifdef X0212_ENABLE + } else if (c2 >> 8 == 0x8f){ + extern unsigned short * x0212_to_utf8_2bytes[]; + c2 = (c2&0x7f) - 0x21; + if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes) + p = x0212_to_utf8_2bytes[c2]; + else + return 0; +#endif } else { c2 &= 0x7f; c2 = (c2&0x7f) - 0x21; @@ -2519,16 +2647,6 @@ w_oconv(c2, c1) c1; { int c0; -#ifdef NUMCHAR_OPTION - if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){ - w16w_conv(c1, &c2, &c1, &c0); - (*o_putc)(c2); - if (c1){ - (*o_putc)(c1); - if (c0) (*o_putc)(c0); - } - } -#endif if (c2 == EOF) { (*o_putc)(EOF); return; @@ -2541,6 +2659,18 @@ w_oconv(c2, c1) unicode_bom_f=1; } +#ifdef NUMCHAR_OPTION + if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){ + w16w_conv(c1, &c2, &c1, &c0); + (*o_putc)(c2); + if (c1){ + (*o_putc)(c1); + if (c0) (*o_putc)(c0); + } + return; + } +#endif + if (c2 == 0) { output_mode = ASCII; (*o_putc)(c1); @@ -2548,12 +2678,16 @@ w_oconv(c2, c1) output_mode = ISO8859_1; (*o_putc)(c1 | 0x080); } else { + unsigned short val; output_mode = UTF8; - w16w_conv((unsigned short)e2w_conv(c2, c1), &c2, &c1, &c0); - (*o_putc)(c2); - if (c1){ - (*o_putc)(c1); - if (c0) (*o_putc)(c0); + val = e2w_conv(c2, c1); + if (val){ + w16w_conv(val, &c2, &c1, &c0); + (*o_putc)(c2); + if (c1){ + (*o_putc)(c1); + if (c0) (*o_putc)(c0); + } } } } @@ -2588,7 +2722,7 @@ w_oconv16(c2, c1) c1 &= 0xff; #endif } else if (c2) { - unsigned short val = (unsigned short)e2w_conv(c2, c1); + unsigned short val = e2w_conv(c2, c1); c2 = (val >> 8) & 0xff; c1 = val & 0xff; } @@ -2611,6 +2745,9 @@ e_oconv(c2, c1) #ifdef NUMCHAR_OPTION if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){ w16e_conv(c1, &c2, &c1); + if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){ + return; + } } #endif if (c2 == EOF) { @@ -2625,6 +2762,28 @@ e_oconv(c2, c1) } else if (c2 == ISO8859_1) { output_mode = ISO8859_1; (*o_putc)(c1 | 0x080); +#ifdef X0212_ENABLE + } else if ((c2 & 0xff00) >> 8 == 0x8f){ + output_mode = JAPANESE_EUC; +#ifdef SHIFTJIS_CP932 + if (cp932_f){ + int s2, s1; + if (e2s_conv(c2, c1, &s2, &s1) == 0){ + s2e_conv(s2, s1, &c2, &c1); + } + } +#endif + if ((c2 & 0xff00) >> 8 == 0x8f){ + if (x0212_f){ + (*o_putc)(0x8f); + (*o_putc)((c2 & 0x7f) | 0x080); + (*o_putc)(c1 | 0x080); + } + }else{ + (*o_putc)((c2 & 0x7f) | 0x080); + (*o_putc)(c1 | 0x080); + } +#endif } else { if ((c1<0x21 || 0x7e<c1) || (c2<0x21 || 0x7e<c2)) { @@ -2637,12 +2796,71 @@ e_oconv(c2, c1) } } -void +#ifdef X0212_ENABLE +int x0212_shift(c) + int c; +{ + int ret = c; + c &= 0x7f; + if ((ret & 0xff00) == 0x8f00){ + if (0x75 <= c && c <= 0x7f){ + ret = c + (0x109 - 0x75); + } + }else{ + if (0x75 <= c && c <= 0x7f){ + ret = c + (0x113 - 0x75); + } + } + return ret; +} + + +int x0212_unshift(c) + int c; +{ + int ret = c; + if (0x7f <= c && c <= 0x88){ + ret = c + (0x75 - 0x7f); + }else if (0x89 <= c && c <= 0x92){ + ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89)); + } + return ret; +} +#endif /* X0212_ENABLE */ + +int e2s_conv(c2, c1, p2, p1) int c2, c1, *p2, *p1; { + int val = 0; + unsigned short *ptr; + int ndx; + extern unsigned short *x0212_shiftjis[]; +#ifdef X0212_ENABLE + if ((c2 & 0xff00) == 0x8f00){ + ndx = c2 & 0x7f; + if (0x21 <= ndx && ndx <= 0x7e){ + ptr = x0212_shiftjis[ndx - 0x21]; + if (ptr){ + val = ptr[(c1 & 0x7f) - 0x21]; + } + if (val){ + c2 = val >> 8; + c1 = val & 0xff; + if (p2) *p2 = c2; + if (p1) *p1 = c1; + return 0; + } + } + c2 = x0212_shift(c2); + } +#endif /* X0212_ENABLE */ + if ((c2 & 0xff00) == 0x8f00){ + return 1; + } if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1); if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e); + return 0; } void @@ -2667,6 +2885,14 @@ s_oconv(c2, c1) } else if (c2 == ISO8859_1) { output_mode = ISO8859_1; (*o_putc)(c1 | 0x080); +#ifdef X0212_ENABLE + } else if ((c2 & 0xff00) >> 8 == 0x8f){ + output_mode = SHIFT_JIS; + if (e2s_conv(c2, c1, &c2, &c1) == 0){ + (*o_putc)(c2); + (*o_putc)(c1); + } +#endif } else { if ((c1<0x20 || 0x7e<c1) || (c2<0x20 || 0x7e<c2)) { @@ -2714,6 +2940,18 @@ j_oconv(c2, c1) output_mode = ASCII; } (*o_putc)(EOF); +#ifdef X0212_ENABLE + } else if ((c2 & 0xff00) >> 8 == 0x8f){ + if (output_mode!=X0212) { + output_mode = X0212; + (*o_putc)(ESC); + (*o_putc)('$'); + (*o_putc)('('); + (*o_putc)('D'); + } + (*o_putc)(c2 & 0x7f); + (*o_putc)(c1); +#endif } else if (c2==X0201) { if (output_mode!=X0201) { output_mode = X0201; @@ -2757,14 +2995,7 @@ base64_conv(c2, c1) int c2, c1; { - if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) { - (*o_putc)(EOF); - (*o_putc)(NL); - } else if (base64_count>66 && mimeout_mode) { - (*o_base64conv)(EOF,0); - (*o_base64conv)(NL,0); - (*o_base64conv)(SPACE,0); - } + mime_prechar(c2, c1); (*o_base64conv)(c2,c1); } @@ -3265,6 +3496,10 @@ int mime_encode_method[] = { #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c) #define nkf_isdigit(c) ('0'<=c && c<='9') #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F')) +#define nkf_isblank(c) (c == SPACE || c == TAB) +#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL) +#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) +#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c)) void switch_mime_getc() @@ -3325,6 +3560,7 @@ FILE *f; } mime_decode_mode = p[i-2]; + set_iconv(FALSE, mime_priority_func[j]); clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME); if (mime_decode_mode=='B') { @@ -3451,6 +3687,7 @@ set_input_codename (codename) is_inputcode_set = TRUE; } +#ifndef WIN32DLL void print_guessed_code (filename) char *filename; @@ -3466,6 +3703,7 @@ print_guessed_code (filename) if (filename != NULL) printf("%s:", filename); printf("%s\n", codename); } +#endif /*WIN32DLL*/ int hex2bin(x) @@ -3965,6 +4203,10 @@ int mode; i = 0; if (base64_count>45) { + if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){ + (*o_mputc)(mimeout_buf[i]); + i++; + } (*o_mputc)(NL); (*o_mputc)(SPACE); base64_count = 1; @@ -4040,7 +4282,13 @@ mimeout_addchar(c) { switch(mimeout_mode) { case 'Q': - if(c>=DEL) { + if(c==SPACE){ + (*o_mputc)('_'); + base64_count++; + } else if (c==CR||c==NL) { + (*o_mputc)(c); + base64_count = 0; + } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) { (*o_mputc)('='); (*o_mputc)(itoh4(((c>>4)&0xf))); (*o_mputc)(itoh4((c&0xf))); @@ -4068,109 +4316,84 @@ mimeout_addchar(c) mimeout_mode='B'; base64_count += 2; break; + default: + (*o_mputc)(c); + base64_count++; + break; } } +int mime_lastchar2, mime_lastchar1; + +void mime_prechar(c2, c1) + int c2, c1; +{ + if (mimeout_mode){ + if (c2){ + if (base64_count + mimeout_buf_count/3*4> 66){ + (*o_base64conv)(EOF,0); + (*o_base64conv)(0,NL); + (*o_base64conv)(0,SPACE); + } + }/*else if (mime_lastchar2){ + if (c1 <=DEL && !nkf_isspace(c1)){ + (*o_base64conv)(0,SPACE); + } + }*/ + }/*else{ + if (c2 && mime_lastchar2 == 0 + && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){ + (*o_base64conv)(0,SPACE); + } + }*/ + mime_lastchar2 = c2; + mime_lastchar1 = c1; +} + void mime_putc(c) int c; { int i = 0; int j = 0; - - if (mimeout_f==FIXED_MIME && base64_count>50) { - eof_mime(); - (*o_mputc)(NL); - base64_count=0; - } else if (c==CR||c==NL) { - base64_count=0; - } - if (c!=EOF && mimeout_f!=FIXED_MIME) { - if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 ) ) { - if (mimeout_mode=='Q') { - if (c<=SPACE) { - close_mime(); - (*o_mputc)(SPACE); - base64_count++; - } - (*o_mputc)(c); - base64_count++; - return; - } else if (mimeout_mode) { - if (base64_count>63) { - eof_mime(); - (*o_mputc)(NL); - (*o_mputc)(SPACE); - base64_count=1; - mimeout_preserve_space = TRUE; - } - if (c==SPACE || c==TAB || c==CR || c==NL) { - for (i=0;i<mimeout_buf_count;i++) { - if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) { - eof_mime(); - for (i=0;i<mimeout_buf_count;i++) { - (*o_mputc)(mimeout_buf[i]); - base64_count++; - } - mimeout_buf_count = 0; - } - } - mimeout_buf[mimeout_buf_count++] = c; - if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) { - eof_mime(); - base64_count = 0; - for (i=0;i<mimeout_buf_count;i++) { - (*o_mputc)(mimeout_buf[i]); - base64_count++; - } - } - return; - } - if (mimeout_buf_count>0 && SPACE<c) { - mimeout_buf[mimeout_buf_count++] = c; - if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) { - } else { - return; - } - } - } else if (!mimeout_mode) { - if (c==SPACE || c==TAB || c==CR || c==NL) { - if ((c==CR || c==NL) - &&(mimeout_buf[mimeout_buf_count-1]==SPACE - || mimeout_buf[mimeout_buf_count-1]==TAB)) { - mimeout_buf_count--; - } - for (i=0;i<mimeout_buf_count;i++) { - (*o_mputc)(mimeout_buf[i]); - base64_count++; - } - mimeout_buf_count = 0; - } - mimeout_buf[mimeout_buf_count++] = c; - if (mimeout_buf_count>75) { - open_mime(output_mode); - } - return; - } - } else if (!mimeout_mode) { - if (mimeout_buf_count>0 && mimeout_buf[mimeout_buf_count-1]==SPACE) { - for (i=0;i<mimeout_buf_count-1;i++) { - (*o_mputc)(mimeout_buf[i]); - base64_count++; - } - mimeout_buf[0] = SPACE; - mimeout_buf_count = 1; - } - open_mime(output_mode); + int lastchar; + + if (mimeout_f == FIXED_MIME){ + if (mimeout_mode == 'Q'){ + if (base64_count > 71){ + if (c!=CR && c!=NL) { + (*o_mputc)('='); + (*o_mputc)(NL); + } + base64_count = 0; + } + }else{ + if (base64_count > 71){ + eof_mime(); + (*o_mputc)(NL); + base64_count = 0; + } + if (c == EOF) { /* c==EOF */ + eof_mime(); + } } - } else { /* c==EOF */ + if (c != EOF) { /* c==EOF */ + mimeout_addchar(c); + } + return; + } + + /* mimeout_f != FIXED_MIME */ + + if (c == EOF) { /* c==EOF */ j = mimeout_buf_count; + mimeout_buf_count = 0; i = 0; for (;i<j;i++) { - if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB - || mimeout_buf[i]==CR || mimeout_buf[i]==NL) + /*if (nkf_isspace(mimeout_buf[i])){ break; - (*mime_putc)(mimeout_buf[i]); + }*/ + mimeout_addchar(mimeout_buf[i]); } eof_mime(); for (;i<j;i++) { @@ -4179,19 +4402,151 @@ mime_putc(c) } return; } - + + if (mimeout_mode=='Q') { + if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) { + if (c <= SPACE) { + close_mime(); + (*o_mputc)(SPACE); + base64_count++; + } + (*o_mputc)(c); + base64_count++; + } + return; + } + + if (mimeout_buf_count > 0){ + lastchar = mimeout_buf[mimeout_buf_count - 1]; + }else{ + lastchar = -1; + } + + if (!mimeout_mode) { + if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) { + if (nkf_isspace(c)) { + if (c==CR || c==NL) { + base64_count=0; + } + for (i=0;i<mimeout_buf_count;i++) { + (*o_mputc)(mimeout_buf[i]); + if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){ + base64_count = 0; + }else{ + base64_count++; + } + } + mimeout_buf[0] = c; + mimeout_buf_count = 1; + }else{ + if (base64_count > 1 + && base64_count + mimeout_buf_count > 76){ + (*o_mputc)(NL); + base64_count = 0; + if (!nkf_isspace(mimeout_buf[0])){ + (*o_mputc)(SPACE); + base64_count++; + } + } + mimeout_buf[mimeout_buf_count++] = c; + if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) { + open_mime(output_mode); + } + } + return; + }else{ + if (lastchar==CR || lastchar == NL){ + for (i=0;i<mimeout_buf_count;i++) { + (*o_mputc)(mimeout_buf[i]); + } + base64_count = 0; + mimeout_buf_count = 0; + } + if (lastchar==SPACE) { + for (i=0;i<mimeout_buf_count-1;i++) { + (*o_mputc)(mimeout_buf[i]); + base64_count++; + } + mimeout_buf[0] = SPACE; + mimeout_buf_count = 1; + } + open_mime(output_mode); + } + }else{ + /* mimeout_mode == 'B', 1, 2 */ + if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) { + if (lastchar == CR || lastchar == NL){ + if (nkf_isblank(c)) { + for (i=0;i<mimeout_buf_count;i++) { + mimeout_addchar(mimeout_buf[i]); + } + mimeout_buf_count = 0; + } else if (SPACE<c && c<DEL) { + eof_mime(); + for (i=0;i<mimeout_buf_count;i++) { + (*o_mputc)(mimeout_buf[i]); + } + base64_count = 0; + mimeout_buf_count = 0; + } + } + if (c==SPACE || c==TAB || c==CR || c==NL) { + for (i=0;i<mimeout_buf_count;i++) { + if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) { + eof_mime(); + for (i=0;i<mimeout_buf_count;i++) { + (*o_mputc)(mimeout_buf[i]); + base64_count++; + } + mimeout_buf_count = 0; + } + } + mimeout_buf[mimeout_buf_count++] = c; + if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) { + eof_mime(); + for (i=0;i<mimeout_buf_count;i++) { + (*o_mputc)(mimeout_buf[i]); + base64_count++; + } + mimeout_buf_count = 0; + } + return; + } + if (mimeout_buf_count>0 && SPACE<c && c!='=') { + mimeout_buf[mimeout_buf_count++] = c; + if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) { + j = mimeout_buf_count; + mimeout_buf_count = 0; + for (i=0;i<j;i++) { + mimeout_addchar(mimeout_buf[i]); + } + } + return; + } + } + } if (mimeout_buf_count>0) { j = mimeout_buf_count; mimeout_buf_count = 0; for (i=0;i<j;i++) { + if (mimeout_buf[i]==CR || mimeout_buf[i]==NL) + break; mimeout_addchar(mimeout_buf[i]); } + if (i<j) { + eof_mime(); + base64_count=0; + for (;i<j;i++) { + (*o_mputc)(mimeout_buf[i]); + } + open_mime(output_mode); + } } mimeout_addchar(c); } -#ifdef PERL_XS +#if defined(PERL_XS) || defined(WIN32DLL) void reinit() { @@ -4210,6 +4565,7 @@ reinit() input_f = FALSE; alpha_f = FALSE; mime_f = STRICT_MIME; + mime_decode_f = FALSE; mimebuf_f = FALSE; broken_f = FALSE; iso8859_f = FALSE; @@ -4296,7 +4652,10 @@ reinit() #ifdef CHECK_OPTION iconv_for_check = 0; #endif - + input_codename = ""; +#ifdef WIN32DLL + reinitdll(); +#endif /*WIN32DLL*/ } #endif @@ -4313,9 +4672,13 @@ int c2,c1,c0; { fprintf(stderr,"nkf internal module connection failure.\n"); exit(1); + return 0; /* LINT */ } #ifndef PERL_XS +#ifdef WIN32DLL +#define fprintf dllprintf +#endif void usage() { @@ -4365,7 +4728,8 @@ usage() fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n"); fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n"); fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n"); - fprintf(stderr," --cp932, --no-cp932 CP932 compatible\n"); + fprintf(stderr," --x0212 Convert JISX0212\n"); + fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n"); #ifdef INPUT_OPTION fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n"); #endif @@ -4402,7 +4766,7 @@ version() ,NKF_VERSION,NKF_RELEASE_DATE); fprintf(stderr,"\n%s\n",CopyRight); } -#endif +#endif /*PERL_XS*/ /** ** $B%Q%C%A@):n<T(B |