From c065b233c6020e9a63e5f941f8254d7f8d2d0c8a Mon Sep 17 00:00:00 2001 From: naruse Date: Sun, 17 Jul 2005 20:03:35 +0000 Subject: * ext/nkf/nkf-utf8/nkf.c: import nkf.c 1.73 fix: TestKconv 1F git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8786 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/nkf/nkf-utf8/nkf.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'ext') diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c index 9b6fffadc4..bf71bfcd08 100644 --- a/ext/nkf/nkf-utf8/nkf.c +++ b/ext/nkf/nkf-utf8/nkf.c @@ -41,7 +41,7 @@ ***********************************************************************/ /* $Id$ */ #define NKF_VERSION "2.0.5" -#define NKF_RELEASE_DATE "2005-07-05" +#define NKF_RELEASE_DATE "2005-07-18" #include "config.h" static char *CopyRight = @@ -372,6 +372,7 @@ static unsigned char mime_buf[MIME_BUF_SIZE]; static unsigned int mime_top = 0; static unsigned int mime_last = 0; /* decoded */ static unsigned int mime_input = 0; /* undecoded */ +static int (*mime_iconv_back)PROTO((int c2,int c1,int c0)) = NULL; /* flags */ static int unbuf_f = FALSE; @@ -440,6 +441,7 @@ static int noout_f = FALSE; STATIC void no_putc PROTO((int c)); static int debug_f = FALSE; STATIC void debug PROTO((char *str)); +static int (*iconv_for_check)() = 0; #endif static int guess_f = FALSE; @@ -759,6 +761,9 @@ main(argc, argv) is_inputcode_mixed = FALSE; is_inputcode_set = FALSE; input_codename = ""; +#ifdef CHECK_OPTION + iconv_for_check = 0; +#endif if ((fin = fopen((origfname = *argv++), "r")) == NULL) { perror(*--argv); return(-1); @@ -1377,10 +1382,6 @@ struct input_code * find_inputcode_byfunc(iconv_func) return 0; } -#ifdef CHECK_OPTION -static int (*iconv_for_check)() = 0; -#endif - #ifdef ANSI_C_PROTOTYPE void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0)) #else @@ -2526,16 +2527,22 @@ w_iconv(c2, c1, c0) ; /* 1 or 2ytes */ else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */ return -1; /* 3bytes */ - /*else if (0xf0 <= c2) +#ifdef __COMMENT__ + else if (0xf0 <= c2) return 0; /* 4,5,6bytes */ else if ((c2 & 0xc0) == 0x80) /* 0x80-0xbf */ return 0; /* trail byte */ +#endif else return 0; } if (c2 == EOF); else if (c2 == 0xef && c1 == 0xbb && c0 == 0xbf) return 0; /* throw BOM */ else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){ + if(c2 == 0){ + c2 = c1; + c1 = 0; + } val = ww16_conv(c2, c1, c0); c2 = (val >> 8) & 0xff; c1 = val & 0xff; @@ -3609,6 +3616,8 @@ unswitch_mime_getc() } i_getc = i_mgetc; i_ungetc = i_mungetc; + if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back); + mime_iconv_back = NULL; } int @@ -3646,6 +3655,7 @@ FILE *f; } mime_decode_mode = p[i-2]; + mime_iconv_back = iconv; set_iconv(FALSE, mime_priority_func[j]); clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME); @@ -4007,12 +4017,16 @@ FILE *f; if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF); restart_mime_q: if (c1=='_') return ' '; + if (c1<=' ' || DEL<=c1) { + mime_decode_mode = FALSE; /* quit */ + unswitch_mime_getc(); + return c1; + } if (c1!='=' && c1!='?') { return c1; } mime_decode_mode = exit_mode; /* prepare for quit */ - if (c1<=' ') return c1; if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF); if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) { /* end Q encoding */ @@ -4714,7 +4728,7 @@ reinit() #endif iso2022jp_f = FALSE; #ifdef UNICODE_ENABLE - internal_unicode_f = TRUE; + internal_unicode_f = FALSE; #endif #ifdef UTF8_OUTPUT_ENABLE unicode_bom_f = 0; @@ -4829,16 +4843,16 @@ usage() fprintf(stderr,"Flags:\n"); fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n"); #ifdef DEFAULT_CODE_SJIS - fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n"); + fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8N\n"); #endif #ifdef DEFAULT_CODE_JIS - fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n"); + fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8N\n"); #endif #ifdef DEFAULT_CODE_EUC - fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n"); + fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8N\n"); #endif #ifdef DEFAULT_CODE_UTF8 - fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n"); + fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8N (DEFAULT)\n"); #endif #ifdef UTF8_OUTPUT_ENABLE fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n"); @@ -4880,6 +4894,9 @@ usage() #ifdef NUMCHAR_OPTION fprintf(stderr," --numchar-input Convert Unicode Character Reference\n"); #endif +#ifdef UNICODE_NORMALIZATION + fprintf(stderr," --utf8mac-input UTF-8-MAC input\n"); +#endif #ifdef UTF8_OUTPUT_ENABLE fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n"); #endif -- cgit v1.2.3