diff options
Diffstat (limited to 'ext/nkf')
| -rw-r--r-- | ext/nkf/.cvsignore | 2 | ||||
| -rw-r--r-- | ext/nkf/MANIFEST | 7 | ||||
| -rw-r--r-- | ext/nkf/depend | 1 | ||||
| -rw-r--r-- | ext/nkf/extconf.rb | 2 | ||||
| -rw-r--r-- | ext/nkf/lib/kconv.rb | 73 | ||||
| -rw-r--r-- | ext/nkf/nkf.c | 197 | ||||
| -rw-r--r-- | ext/nkf/nkf1.7/nkf.c | 1900 | ||||
| -rw-r--r-- | ext/nkf/test.rb | 318 |
8 files changed, 0 insertions, 2500 deletions
diff --git a/ext/nkf/.cvsignore b/ext/nkf/.cvsignore deleted file mode 100644 index fc802ff1c2..0000000000 --- a/ext/nkf/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -mkmf.log diff --git a/ext/nkf/MANIFEST b/ext/nkf/MANIFEST deleted file mode 100644 index 5114a3762a..0000000000 --- a/ext/nkf/MANIFEST +++ /dev/null @@ -1,7 +0,0 @@ -MANIFEST -depend -extconf.rb -lib/kconv.rb -nkf.c -nkf1.7/nkf.c -test.rb diff --git a/ext/nkf/depend b/ext/nkf/depend deleted file mode 100644 index 13e32e6074..0000000000 --- a/ext/nkf/depend +++ /dev/null @@ -1 +0,0 @@ -nkf.o : nkf.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h $(srcdir)/nkf1.7/nkf.c diff --git a/ext/nkf/extconf.rb b/ext/nkf/extconf.rb deleted file mode 100644 index 710662f19c..0000000000 --- a/ext/nkf/extconf.rb +++ /dev/null @@ -1,2 +0,0 @@ -require 'mkmf' -create_makefile('nkf') diff --git a/ext/nkf/lib/kconv.rb b/ext/nkf/lib/kconv.rb deleted file mode 100644 index af6d82275f..0000000000 --- a/ext/nkf/lib/kconv.rb +++ /dev/null @@ -1,73 +0,0 @@ -require 'nkf' - -module Kconv - AUTO = NKF::AUTO - JIS = NKF::JIS - EUC = NKF::EUC - SJIS = NKF::SJIS - BINARY = NKF::BINARY - NOCONV = NKF::NOCONV - UNKNOWN = NKF::UNKNOWN - def kconv(str, out_code, in_code = AUTO) - opt = '-' - case in_code - when NKF::JIS - opt << 'J' - when NKF::EUC - opt << 'E' - when NKF::SJIS - opt << 'S' - end - - case out_code - when NKF::JIS - opt << 'j' - when NKF::EUC - opt << 'e' - when NKF::SJIS - opt << 's' - when NKF::NOCONV - return str - end - - opt = '' if opt == '-' - - NKF::nkf(opt, str) - end - module_function :kconv - - def tojis(str) - NKF::nkf('-j', str) - end - module_function :tojis - - def toeuc(str) - NKF::nkf('-e', str) - end - module_function :toeuc - - def tosjis(str) - NKF::nkf('-s', str) - end - module_function :tosjis - - def guess(str) - NKF::guess(str) - end - module_function :guess -end - -class String - def kconv(out_code, in_code=Kconv::AUTO) - Kconv::kconv(self, out_code, in_code) - end - def tojis - NKF::nkf('-j', self) - end - def toeuc - NKF::nkf('-e', self) - end - def tosjis - NKF::nkf('-s', self) - end -end diff --git a/ext/nkf/nkf.c b/ext/nkf/nkf.c deleted file mode 100644 index 50723467cd..0000000000 --- a/ext/nkf/nkf.c +++ /dev/null @@ -1,197 +0,0 @@ -#include "ruby.h" - -#define _AUTO 0 -#define _JIS 1 -#define _EUC 2 -#define _SJIS 3 -#define _BINARY 4 -#define _NOCONV 4 -#define _UNKNOWN _AUTO - -#undef getc -#undef ungetc -#define getc(f) (input_ctr<i_len?input[input_ctr++]:-1) -#define ungetc(c,f) input_ctr-- - -#undef putchar -#define putchar(c) rb_nkf_putchar(c) - -#define INCSIZE 32 -static int incsize; - -static unsigned char *input, *output; -static int input_ctr, i_len; -static int output_ctr, o_len; - -static VALUE dst; - -static int -rb_nkf_putchar(c) - unsigned int c; -{ - if (output_ctr >= o_len) { - o_len += incsize; - rb_str_cat(dst, 0, incsize); - output = RSTRING(dst)->ptr; - incsize *= 2; - } - output[output_ctr++] = c; - - return c; -} - -#define PERL_XS 1 -#include "nkf1.7/nkf.c" - -static VALUE -rb_nkf_kconv(obj, opt, src) - VALUE obj, opt, src; -{ - char *opt_ptr, *opt_end; - volatile VALUE v; - - reinit(); - StringValue(opt); - opt_ptr = RSTRING(opt)->ptr; - opt_end = opt_ptr + RSTRING(opt)->len; - for (; opt_ptr < opt_end; opt_ptr++) { - if (*opt_ptr != '-') { - continue; - } - arguments(opt_ptr); - } - - incsize = INCSIZE; - - input_ctr = 0; - StringValue(src); - input = RSTRING(src)->ptr; - i_len = RSTRING(src)->len; - dst = rb_str_new(0, i_len*3 + 10); - v = dst; - - output_ctr = 0; - output = RSTRING(dst)->ptr; - o_len = RSTRING(dst)->len; - *output = '\0'; - - if(iso8859_f && (oconv != j_oconv || !x0201_f )) { - iso8859_f = FALSE; - } - - kanji_convert(NULL); - RSTRING(dst)->ptr[output_ctr] = '\0'; - RSTRING(dst)->len = output_ctr; - OBJ_INFECT(dst, src); - - return dst; -} - -/* - * Character code detection - Algorithm described in: - * Ken Lunde. `Understanding Japanese Information Processing' - * Sebastopol, CA: O'Reilly & Associates. - */ - -static VALUE -rb_nkf_guess(obj, src) - VALUE obj, src; -{ - unsigned char *p; - unsigned char *pend; - int sequence_counter = 0; - - StringValue(src); - p = RSTRING(src)->ptr; - pend = p + RSTRING(src)->len; - -#define INCR do {\ - p++;\ - if (p==pend) return INT2FIX(_UNKNOWN);\ - sequence_counter++;\ - if (sequence_counter % 2 == 1 && *p != 0xa4)\ - sequence_counter = 0;\ - if (6 <= sequence_counter) {\ - sequence_counter = 0;\ - return INT2FIX(_EUC);\ - }\ -} while (0) - - if (*p == 0xa4) - sequence_counter = 1; - - while (p<pend) { - if (*p == '\033') { - return INT2FIX(_JIS); - } - if (*p < '\006' || *p == 0x7f || *p == 0xff) { - return INT2FIX(_BINARY); - } - if (0x81 <= *p && *p <= 0x8d) { - return INT2FIX(_SJIS); - } - if (0x8f <= *p && *p <= 0x9f) { - return INT2FIX(_SJIS); - } - if (*p == 0x8e) { /* SS2 */ - INCR; - if ((0x40 <= *p && *p <= 0x7e) || - (0x80 <= *p && *p <= 0xa0) || - (0xe0 <= *p && *p <= 0xfc)) - return INT2FIX(_SJIS); - } - else if (0xa1 <= *p && *p <= 0xdf) { - INCR; - if (0xf0 <= *p && *p <= 0xfe) - return INT2FIX(_EUC); - if (0xe0 <= *p && *p <= 0xef) { - while (p < pend && *p >= 0x40) { - if (*p >= 0x81) { - if (*p <= 0x8d || (0x8f <= *p && *p <= 0x9f)) { - return INT2FIX(_SJIS); - } - else if (0xfd <= *p && *p <= 0xfe) { - return INT2FIX(_EUC); - } - } - INCR; - } - } - else if (*p <= 0x9f) { - return INT2FIX(_SJIS); - } - } - else if (0xf0 <= *p && *p <= 0xfe) { - return INT2FIX(_EUC); - } - else if (0xe0 <= *p && *p <= 0xef) { - INCR; - if ((0x40 <= *p && *p <= 0x7e) || - (0x80 <= *p && *p <= 0xa0)) { - return INT2FIX(_SJIS); - } - if (0xfd <= *p && *p <= 0xfe) { - return INT2FIX(_EUC); - } - } - INCR; - } - return INT2FIX(_UNKNOWN); -} - -void -Init_nkf() -{ - VALUE mKconv = rb_define_module("NKF"); - - rb_define_module_function(mKconv, "nkf", rb_nkf_kconv, 2); - rb_define_module_function(mKconv, "guess", rb_nkf_guess, 1); - - rb_define_const(mKconv, "AUTO", INT2FIX(_AUTO)); - rb_define_const(mKconv, "JIS", INT2FIX(_JIS)); - rb_define_const(mKconv, "EUC", INT2FIX(_EUC)); - rb_define_const(mKconv, "SJIS", INT2FIX(_SJIS)); - rb_define_const(mKconv, "BINARY", INT2FIX(_BINARY)); - rb_define_const(mKconv, "NOCONV", INT2FIX(_NOCONV)); - rb_define_const(mKconv, "UNKNOWN", INT2FIX(_UNKNOWN)); -} diff --git a/ext/nkf/nkf1.7/nkf.c b/ext/nkf/nkf1.7/nkf.c deleted file mode 100644 index 09419f40a7..0000000000 --- a/ext/nkf/nkf1.7/nkf.c +++ /dev/null @@ -1,1900 +0,0 @@ -/** Network Kanji Filter. (PDS Version) -************************************************************************ -** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA) -** $BO"Mm@h!'(B $B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j(B -** $B!J(BE-Mail Address: ichikawa@flab.fujitsu.co.jp$B!K(B -** Copyright (C) 1996,1998 -** $BO"Mm@h!'(B $BN05eBg3X>pJs9)3X2J(B $B2OLn(B $B??<#(B mine/X0208 support -** $B!J(BE-Mail Address: kono@ie.u-ryukyu.ac.jp$B!K(B -** $BO"Mm@h!'(B COW for DOS & Win16 & Win32 & OS/2 -** $B!J(BE-Mail Address: GHG00637@niftyserve.or.p$B!K(B -** $B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"(B -** $B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#(B -** $B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#(B -** $B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#(B -** Everyone is permitted to do anything on this program -** including copying, modifying, improving. -** as long as you don't try to pretend that you wrote it. -** i.e., the above copyright notice has to appear in all copies. -** You don't have to ask before copying or publishing. -** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. -***********************************************************************/ - -static char *CopyRight = - "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),1998 S. Kono, COW"; -static char *Version = - "1.7"; -static char *Patchlevel = - "0/9711/Shinji Kono"; - -/* -** -** -** -** USAGE: nkf [flags] [file] -** -** Flags: -** b Output is bufferred (DEFAULT) -** u Output is unbufferred -** -** t no operation -** -** j Outout code is JIS 7 bit (DEFAULT SELECT) -** s Output code is MS Kanji (DEFAULT SELECT) -** e Output code is AT&T JIS (DEFAULT SELECT) -** l Output code is JIS 7bit and ISO8859-1 Latin-1 -** -** m MIME conversion for ISO-2022-JP -** i_ Output sequence to designate JIS-kanji (DEFAULT_J) -** o_ Output sequence to designate single-byte roman characters (DEFAULT_R) -** -** r {de/en}crypt ROT13/47 -** -** v display Version -** -** T Text mode output (for MS-DOS) -** -** x Do not convert X0201 kana into X0208 -** Z Convert X0208 alphabet to ASCII -** -** f60 fold option -** -** m MIME decode -** B try to fix broken JIS, missing Escape -** B[1-9] broken level -** -** O Output to 'nkf.out' file -** d Delete \r in line feed -** c Add \r in line feed -**/ -/******************************/ -/* $B%G%U%)%k%H$N=PNO%3!<%IA*Br(B */ -/* Select DEFAULT_CODE */ -#define DEFAULT_CODE_JIS -/* #define DEFAULT_CODE_SJIS */ -/* #define DEFAULT_CODE_EUC */ -/******************************/ - -#if (defined(__TURBOC__) || defined(LSI_C)) && !defined(MSDOS) -#define MSDOS -#endif - -#ifndef PERL_XS -#include <stdio.h> -#endif - -#if defined(MSDOS) || defined(__OS2__) -#include <stdlib.h> -#include <fcntl.h> -#include <io.h> -#endif - -#ifdef MSDOS -#ifdef LSI_C -#define setbinmode(fp) fsetbin(fp) -#else /* Microsoft C, Turbo C */ -#define setbinmode(fp) setmode(fileno(fp), O_BINARY) -#endif -#else /* UNIX,OS/2 */ -#define setbinmode(fp) -#endif - -#ifdef _IOFBF /* SysV and MSDOS */ -#define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size) -#else /* BSD */ -#define setvbuffer(fp, buf, size) setbuffer(fp, buf, size) -#endif - -/*Borland C++ 4.5 EasyWin*/ -#if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */ -#define EASYWIN -#include <windows.h> -#endif - -#define FALSE 0 -#define TRUE 1 - -/* state of output_mode and input_mode */ - -#define ASCII 0 -#define X0208 1 -#define X0201 2 -#define NO_X0201 3 -#define JIS_INPUT 4 -#define SJIS_INPUT 5 -#define LATIN1_INPUT 6 -#define FIXED_MIME 7 -#define DOUBLE_SPACE -2 - -#define NL 0x0a -#define ESC 0x1b -#define SPACE 0x20 -#define AT 0x40 -#define SSP 0xa0 -#define DEL 0x7f -#define SI 0x0f -#define SO 0x0e -#define SSO 0x8e - -#define HOLD_SIZE 32 -#define IOBUF_SIZE 16384 - -#define DEFAULT_J 'B' -#define DEFAULT_R 'B' - -#define SJ0162 0x00e1 /* 01 - 62 ku offset */ -#define SJ6394 0x0161 /* 63 - 94 ku offset */ - - -/* MIME preprocessor */ - -#undef STRICT_MIME /* do stupid strict mime integrity check */ -#define GETC(p) ((!mime_mode)?getc(p):mime_getc(p)) -#define UNGETC(c,p) ((!mime_mode)?ungetc(c,p):mime_ungetc(c)) - - -#ifdef EASYWIN /*Easy Win */ -extern POINT _BufferSize; -#endif - -/* function prototype */ - -#ifndef _ -# ifdef __STDC__ -# define _(args) args -# else -# define _(args) () -# endif -#endif - -#ifndef PERL_XS -static void noconvert _((FILE *f)); -static int mime_integrity _((FILE *f,unsigned char *p)); -static int usage _((void)); -static char stdibuf[IOBUF_SIZE]; -static char stdobuf[IOBUF_SIZE]; -static unsigned int mime_input = 0; /* undecoded */ -static int end_check; -#endif - -static void kanji_convert _((FILE *f)); -static void h_conv _((FILE *f,int c2,int c1)); -static int push_hold_buf _((int c2,int c1)); -static void s_iconv _((int c2,int c1)); -static void e_oconv _((int c2,int c1)); -static void s_oconv _((int c2,int c1)); -static void j_oconv _((int c2,int c1)); -static int line_fold _((int c2,int c1)); -static int pre_convert _((int c1,int c2)); -static int mime_begin _((FILE *f)); -static int mime_getc _((FILE *f)); -static int mime_ungetc _((unsigned int c)); -static int base64decode _((int c)); -static void arguments _((char *c)); -static void reinit _((void)); - -/* buffers */ - -static unsigned char hold_buf[HOLD_SIZE*2]; -static int hold_count; - -/* MIME preprocessor fifo */ - -#define MIME_BUF_SIZE (1024) /* 2^n ring buffer */ -#define MIME_BUF_MASK (MIME_BUF_SIZE-1) -#define Fifo(n) mime_buf[(n)&MIME_BUF_MASK] -static unsigned char mime_buf[MIME_BUF_SIZE]; -static unsigned int mime_top = 0; -static unsigned int mime_last = 0; /* decoded */ - -/* flags */ -static int unbuf_f = FALSE; -static int estab_f = FALSE; -static int nop_f = FALSE; -static int binmode_f = TRUE; /* binary mode */ -static int rot_f = FALSE; /* rot14/43 mode */ -static int input_f = FALSE; /* non fixed input code */ -static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */ -static int mime_f = TRUE; /* convert MIME B base64 or Q */ -static int mimebuf_f = FALSE; /* MIME buffered input */ -static int broken_f = FALSE; /* convert ESC-less broken JIS */ -static int iso8859_f = FALSE; /* ISO8859 through */ -#if defined(MSDOS) || defined(__OS2__) -static int x0201_f = TRUE; /* Assume JISX0201 kana */ -#else -static int x0201_f = NO_X0201; /* Assume NO JISX0201 */ -#endif - -/* X0208 -> ASCII converter */ - -static int c1_return; - -/* fold parameter */ -static int line = 0; /* chars in line */ -static int prev = 0; -static int fold_f = FALSE; -static int fold_len = 0; - -/* options */ -static char kanji_intro = DEFAULT_J, - ascii_intro = DEFAULT_R; - -/* Folding */ - -int line_fold(); -#define FOLD_MARGIN 10 -#define DEFAULT_FOLD 60 - -/* converters */ - -#ifdef DEFAULT_CODE_JIS -# define DEFAULT_CONV j_oconv -#endif -#ifdef DEFAULT_CODE_SJIS -# define DEFAULT_CONV s_oconv -#endif -#ifdef DEFAULT_CODE_EUC -# define DEFAULT_CONV e_oconv -#endif - -static void (*iconv) _((int c2,int c1)); - /* s_iconv or oconv */ -static void (*oconv) _((int c2,int c1)) = DEFAULT_CONV; - /* [ejs]_oconv */ - -/* Global states */ -static int output_mode = ASCII, /* output kanji mode */ - input_mode = ASCII, /* input kanji mode */ - shift_mode = FALSE; /* TRUE shift out, or X0201 */ -static int mime_mode = FALSE; /* MIME mode B base64, Q hex */ - -/* X0201 / X0208 conversion tables */ - -/* X0201 kana conversion table */ -/* 90-9F A0-DF */ -unsigned char cv[]= { -0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57, -0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21, -0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29, -0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43, -0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26, -0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d, -0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35, -0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d, -0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46, -0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c, -0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52, -0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e, -0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62, -0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69, -0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d, -0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c, -0x00,0x00}; - - -/* X0201 kana conversion table for daguten */ -/* 90-9F A0-DF */ -unsigned char dv[]= { -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e, -0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36, -0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e, -0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47, -0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53, -0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00}; - -/* X0201 kana conversion table for han-daguten */ -/* 90-9F A0-DF */ -unsigned char ev[]= { -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54, -0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00}; - - -/* X0208 kigou conversion table */ -/* 0x8140 - 0x819e */ -unsigned char fv[] = { - -0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a, -0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00, -0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f, -0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27, -0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d, -0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00, -0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -} ; - - -static int file_out = FALSE; -static int add_cr = FALSE; -static int del_cr = FALSE; - -#ifndef PERL_XS -int -main(argc, argv) - int argc; - char **argv; -{ - FILE *fin; - char *cp; - -#ifdef EASYWIN /*Easy Win */ - _BufferSize.y = 400;/*Set Scroll Buffer Size*/ -#endif - - for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) { - cp = *argv; - arguments(cp); - } - - if(iso8859_f && (oconv != j_oconv || !x0201_f )) { - fprintf(stderr,"Mixed ISO8859/JISX0201/SJIS/EUC output is not allowed.\n"); - exit(1); - } - - if(binmode_f == TRUE) -#ifdef __OS2__ - if(freopen("","wb",stdout) == NULL) - return (-1); -#else - setbinmode(stdout); -#endif - - if(unbuf_f) - setbuf(stdout, (char *) NULL); - else - setvbuffer(stdout, stdobuf, IOBUF_SIZE); - - if(argc == 0) { - if(binmode_f == TRUE) -#ifdef __OS2__ - if(freopen("","rb",stdin) == NULL) return (-1); -#else - setbinmode(stdin); -#endif - setvbuffer(stdin, stdibuf, IOBUF_SIZE); - if(nop_f) - noconvert(stdin); - else - kanji_convert(stdin); - } else { - while (argc--) { - if((fin = fopen(*argv++, "r")) == NULL) { - perror(*--argv); - return(-1); - } else { -/* reopen file for stdout */ - if(file_out == TRUE){ - if(argc == 1 ) { - if(freopen(*argv++, "w", stdout) == NULL) { - perror(*--argv); - return (-1); - } - argc--; - } else { - if(freopen("nkf.out", "w", stdout) == NULL) { - perror(*--argv); - return (-1); - } - } - if(binmode_f == TRUE) { -#ifdef __OS2__ - if(freopen("","wb",stdout) == NULL) - return (-1); -#else - setbinmode(stdout); -#endif - } - } - if(binmode_f == TRUE) -#ifdef __OS2__ - if(freopen("","rb",fin) == NULL) - return (-1); -#else - setbinmode(fin); -#endif - setvbuffer(fin, stdibuf, IOBUF_SIZE); - if(nop_f) - noconvert(fin); - else - kanji_convert(fin); - fclose(fin); - } - } - } -#ifdef EASYWIN /*Easy Win */ - if(file_out == FALSE) - scanf("%d",&end_check); - else - fclose(stdout); -#else /* for Other OS */ - if(file_out == TRUE) - fclose(stdout); -#endif - return (0); -} -#endif - -static void -arguments(cp) - char *cp; -{ - while (*cp) { - switch (*cp++) { - case 'b': /* buffered mode */ - unbuf_f = FALSE; - continue; - case 'u': /* non bufferd mode */ - unbuf_f = TRUE; - continue; - case 't': /* transparent mode */ - nop_f = TRUE; - continue; - case 'j': /* JIS output */ - case 'n': - oconv = j_oconv; - continue; - case 'e': /* AT&T EUC output */ - oconv = e_oconv; - continue; - case 's': /* SJIS output */ - oconv = s_oconv; - continue; - case 'l': /* ISO8859 Latin-1 support, no conversion */ - iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */ - input_f = LATIN1_INPUT; - continue; - case 'i': /* Kanji IN ESC-$-@/B */ - if(*cp=='@'||*cp=='B') - kanji_intro = *cp++; - continue; - case 'o': /* ASCII IN ESC-(-J/B */ - if(*cp=='J'||*cp=='B'||*cp=='H') - ascii_intro = *cp++; - continue; - case 'r': - rot_f = TRUE; - continue; -#if defined(MSDOS) || defined(__OS2__) - case 'T': - binmode_f = FALSE; - continue; -#endif -#ifndef PERL_XS - case 'v': - usage(); - exit(1); - break; -#endif - /* Input code assumption */ - case 'J': /* JIS input */ - case 'E': /* AT&T EUC input */ - input_f = JIS_INPUT; - continue; - case 'S': /* MS Kanji input */ - input_f = SJIS_INPUT; - if(x0201_f==NO_X0201) x0201_f=TRUE; - continue; - case 'Z': /* Convert X0208 alphabet to asii */ - /* bit:0 Convert X0208 - bit:1 Convert Kankaku to one space - bit:2 Convert Kankaku to two spaces - */ - if('9'>= *cp && *cp>='0') - alpha_f |= 1<<(*cp++ -'0'); - else - alpha_f |= TRUE; - continue; - case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */ - x0201_f = FALSE; /* No X0201->X0208 conversion */ - /* accept X0201 - ESC-(-I in JIS, EUC, MS Kanji - SI/SO in JIS, EUC, MS Kanji - SSO in EUC, JIS, not in MS Kanji - MS Kanji (0xa0-0xdf) - output X0201 - ESC-(-I in JIS (0x20-0x5f) - SSO in EUC (0xa0-0xdf) - 0xa0-0xd in MS Kanji (0xa0-0xdf) - */ - continue; - case 'X': /* Assume X0201 kana */ - /* Default value is NO_X0201 for EUC/MS-Kanji mix */ - x0201_f = TRUE; - continue; - case 'f': /* folding -f60 or -f */ - fold_f = TRUE; - fold_len = atoi(cp); - if(!(0<fold_len && fold_len<BUFSIZ)) - fold_len = DEFAULT_FOLD; - while('0'<= *cp && *cp <='9') cp++; - continue; - case 'm': /* MIME support */ - mime_f = TRUE; - if(*cp=='B'||*cp=='Q') { - mime_mode = *cp++; - mimebuf_f = FIXED_MIME; - } else if (*cp=='0') { - mime_f = FALSE; - } - continue; - case 'M': /* MIME output */ - oconv = j_oconv; /* sorry... not yet done.. */ - continue; - case 'B': /* Broken JIS support */ - /* bit:0 no ESC JIS - bit:1 allow any x on ESC-(-x or ESC-$-x - bit:2 reset to ascii on NL - */ - if('9'>= *cp && *cp>='0') - broken_f |= 1<<(*cp++ -'0'); - else - broken_f |= TRUE; - continue; -#ifndef PERL_XS - case 'O':/* for Output file */ - file_out = TRUE; - continue; -#endif - case 'c':/* add cr code */ - add_cr = TRUE; - continue; - case 'd':/* delete cr code */ - del_cr = TRUE; - continue; - default: - /* bogus option but ignored */ - continue; - } - } -} - -#ifndef PERL_XS -static void -noconvert(f) - FILE *f; -{ - int c; - - while ((c = getc(f)) != EOF) - putchar(c); -} -#endif - - -static void -kanji_convert(f) - FILE *f; -{ - int c1, c2; - - c2 = 0; - - if(input_f == JIS_INPUT || input_f == LATIN1_INPUT) { - estab_f = TRUE; iconv = oconv; - } else if(input_f == SJIS_INPUT) { - estab_f = TRUE; iconv = s_iconv; - } else { - estab_f = FALSE; iconv = oconv; - } - input_mode = ASCII; - output_mode = ASCII; - shift_mode = FALSE; - -#define NEXT continue /* no output, get next */ -#define SEND ; /* output c1 and c2, get next */ -#define LAST break /* end of loop, go closing */ - - while ((c1 = GETC(f)) != EOF) { - if(c2) { - /* second byte */ - if(c2 > DEL) { - /* in case of 8th bit is on */ - if(!estab_f) { - /* in case of not established yet */ - if(c1 > SSP) { - /* It is still ambiguious */ - h_conv(f, c2, c1); - c2 = 0; - NEXT; - } else if(c1 < AT) { - /* ignore bogus code */ - c2 = 0; - NEXT; - } else { - /* established */ - /* it seems to be MS Kanji */ - estab_f = TRUE; - iconv = s_iconv; - SEND; - } - } else - /* in case of already established */ - if(c1 < AT) { - /* ignore bogus code */ - c2 = 0; - NEXT; - } else - SEND; - } else - /* 7 bit code */ - /* it might be kanji shitfted */ - if((c1 == DEL) || (c1 <= SPACE)) { - /* ignore bogus first code */ - c2 = 0; - NEXT; - } else - SEND; - } else { - /* first byte */ - if(c1 > DEL) { - /* 8 bit code */ - if(!estab_f && !iso8859_f) { - /* not established yet */ - if(c1 < SSP) { - /* it seems to be MS Kanji */ - estab_f = TRUE; - iconv = s_iconv; - } else if(c1 < 0xe0) { - /* it seems to be EUC */ - estab_f = TRUE; - iconv = oconv; - } else { - /* still ambiguious */ - } - c2 = c1; - NEXT; - } else { /* estab_f==TRUE */ - if(iso8859_f) { - SEND; - } else if(SSP<=c1 && c1<0xe0 && iconv == s_iconv) { - /* SJIS X0201 Case... */ - /* This is too arrogant, but ... */ - if(x0201_f==NO_X0201) { - iconv = oconv; - c2 = c1; - NEXT; - } else - if(x0201_f) { - if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) { - /* look ahead for X0201/X0208conversion */ - if((c2 = GETC(f)) == EOF) { - (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); - LAST; - } else if(c2==(0xde)) { /* $BByE@(B */ - (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]); - c2=0; - NEXT; - } else if(c2==(0xdf)&&ev[(c1-SSP)*2]) { - /* $BH>ByE@(B */ - (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]); - c2=0; - NEXT; - } - UNGETC(c2,f); c2 = 0; - } - (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); - NEXT; - } else - SEND; - } else if(c1==SSO && iconv != s_iconv) { - /* EUC X0201 Case */ - /* This is too arrogant - if(x0201_f == NO_X0201) { - estab_f = FALSE; - c2 = 0; - NEXT; - } */ - c1 = GETC(f); /* skip SSO */ - euc_1byte_check: - if(x0201_f && SSP<=c1 && c1<0xe0) { - if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) { - if((c2 = GETC(f)) == EOF) { - (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); - LAST; - } - /* forward lookup $BByE@(B/$BH>ByE@(B */ - if(c2 != SSO) { - UNGETC(c2,f); c2 = 0; - (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); - NEXT; - } else if((c2 = GETC(f)) == EOF) { - (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); - (*oconv)(0,SSO); - LAST; - } else if(c2==(0xde)) { /* $BByE@(B */ - (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]); - c2=0; - NEXT; - } else if(c2==(0xdf)&&ev[(c1-SSP)*2]) { - /* $BH>ByE@(B */ - (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]); - c2=0; - NEXT; - } else { - (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); - /* we have to check this c2 */ - /* and no way to push back SSO */ - c1 = c2; c2 = 0; - goto euc_1byte_check; - } - } - (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); - NEXT; - } else - SEND; - } else if(c1 < SSP && iconv != s_iconv) { - /* strange code in EUC */ - iconv = s_iconv; /* try SJIS */ - c2 = c1; - NEXT; - } else { - /* already established */ - c2 = c1; - NEXT; - } - } - } else if((c1 > SPACE) && (c1 != DEL)) { - /* in case of Roman characters */ - if(shift_mode) { - c1 |= 0x80; - /* output 1 shifted byte */ - if(x0201_f && (!iso8859_f||input_mode==X0201) && - SSP<=c1 && c1<0xe0 ) { - if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) { - if((c2 = GETC(f)) == EOF) { - (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); - LAST; - } else if(c2==(0xde&0x7f)) { /* $BByE@(B */ - (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]); - c2=0; - NEXT; - } else if(c2==(0xdf&0x7f)&&ev[(c1-SSP)*2]) { - /* $BH>ByE@(B */ - (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]); - c2=0; - NEXT; - } - UNGETC(c2,f); c2 = 0; - } - (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); - NEXT; - } else - SEND; - } else if(c1 == '(' && broken_f && input_mode == X0208 - && !mime_mode ) { - /* Try to recover missing escape */ - if((c1 = GETC(f)) == EOF) { - (*oconv)(0, '('); - LAST; - } else { - if(c1 == 'B' || c1 == 'J' || c1 == 'H') { - input_mode = ASCII; shift_mode = FALSE; - NEXT; - } else { - (*oconv)(0, '('); - /* do not modify various input_mode */ - /* It can be vt100 sequence */ - SEND; - } - } - } else if(input_mode == X0208) { - /* in case of Kanji shifted */ - c2 = c1; - NEXT; - /* goto next_byte */ - } else if(c1 == '=' && mime_f && !mime_mode ) { - if((c1 = getc(f)) == EOF) { - (*oconv)(0, '='); - LAST; - } else if(c1 == '?') { - /* =? is mime conversiooon start sequence */ - if(mime_begin(f) == EOF) /* check in detail */ - LAST; - else - NEXT; - } else { - (*oconv)(0, '='); - ungetc(c1,f); - NEXT; - } - } else if(c1 == '$' && broken_f && !mime_mode) { - /* try to recover missing escape */ - if((c1 = GETC(f)) == EOF) { - (*oconv)(0, '$'); - LAST; - } else if(c1 == '@'|| c1 == 'B') { - /* in case of Kanji in ESC sequence */ - input_mode = X0208; - shift_mode = FALSE; - NEXT; - } else { - /* sorry */ - (*oconv)(0, '$'); - (*oconv)(0, c1); - NEXT; - } - } else - SEND; - } else if(c1 == SI) { - shift_mode = FALSE; - NEXT; - } else if(c1 == SO) { - shift_mode = TRUE; - NEXT; - } else if(c1 == ESC) { - if((c1 = GETC(f)) == EOF) { - (*oconv)(0, ESC); - LAST; - } else if(c1 == '$') { - if((c1 = GETC(f)) == EOF) { - (*oconv)(0, ESC); - (*oconv)(0, '$'); - LAST; - } else if(c1 == '@'|| c1 == 'B') { - /* This is kanji introduction */ - input_mode = X0208; - shift_mode = FALSE; - NEXT; - } else if(c1 == '(') { - if((c1 = GETC(f)) == EOF) { - (*oconv)(0, ESC); - (*oconv)(0, '$'); - (*oconv)(0, '('); - LAST; - } else if(c1 == '@'|| c1 == 'B') { - /* This is kanji introduction */ - input_mode = X0208; - shift_mode = FALSE; - NEXT; - } else { - (*oconv)(0, ESC); - (*oconv)(0, '$'); - (*oconv)(0, '('); - (*oconv)(0, c1); - NEXT; - } - } else if(broken_f&0x2) { - input_mode = X0208; - shift_mode = FALSE; - NEXT; - } else { - (*oconv)(0, ESC); - (*oconv)(0, '$'); - (*oconv)(0, c1); - NEXT; - } - } else if(c1 == '(') { - if((c1 = GETC(f)) == EOF) { - (*oconv)(0, ESC); - (*oconv)(0, '('); - LAST; - } else { - if(c1 == 'I') { - /* This is X0201 kana introduction */ - input_mode = X0201; shift_mode = X0201; - NEXT; - } else if(c1 == 'B' || c1 == 'J' || c1 == 'H') { - /* This is X0208 kanji introduction */ - input_mode = ASCII; shift_mode = FALSE; - NEXT; - } else if(broken_f&0x2) { - input_mode = ASCII; shift_mode = FALSE; - NEXT; - } else { - (*oconv)(0, ESC); - (*oconv)(0, '('); - /* maintain various input_mode here */ - SEND; - } - } - } else { - /* lonely ESC */ - (*oconv)(0, ESC); - SEND; - } - } else if(c1 == NL && broken_f&4) { - input_mode = ASCII; - SEND; - } else - SEND; - } - /* send: */ - if(input_mode == X0208) - (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */ - else - (*iconv)(c2, c1); /* can be EUC/SJIS */ - c2 = 0; - continue; - /* goto next_word */ - } - - /* epilogue */ - (*iconv)(EOF, 0); -} - - - - -static void -h_conv(f, c2, c1) - FILE *f; - int c1, c2; -{ - int wc; - - - /** it must NOT be in the kanji shifte sequence */ - /** it must NOT be written in JIS7 */ - /** and it must be after 2 byte 8bit code */ - - hold_count = 0; - push_hold_buf(c2, c1); - c2 = 0; - - while ((c1 = GETC(f)) != EOF) { - if(c2) { - /* second byte */ - if(!estab_f) { - /* not established */ - if(c1 > SSP) { - /* it is still ambiguious yet */ - SEND; - } else if(c1 < AT) { - /* ignore bogus first byte */ - c2 = 0; - SEND; - } else { - /* now established */ - /* it seems to be MS Kanji */ - estab_f = TRUE; - iconv = s_iconv; - SEND; - } - } else - SEND; - } else { - /* First byte */ - if(c1 > DEL) { - /* 8th bit is on */ - if(c1 < SSP) { - /* it seems to be MS Kanji */ - estab_f = TRUE; - iconv = s_iconv; - } else if(c1 < 0xe0) { - /* it seems to be EUC */ - estab_f = TRUE; - iconv = oconv; - } else { - /* still ambiguious */ - } - c2 = c1; - NEXT; - } else - /* 7 bit code , then send without any process */ - SEND; - } - /* send: */ - if((push_hold_buf(c2, c1) == EOF) || estab_f) - break; - c2 = 0; - continue; - } - - /** now, - ** 1) EOF is detected, or - ** 2) Code is established, or - ** 3) Buffer is FULL (but last word is pushed) - ** - ** in 1) and 3) cases, we continue to use - ** Kanji codes by oconv and leave estab_f unchanged. - **/ - - for (wc = 0; wc < hold_count; wc += 2) { - c2 = hold_buf[wc]; - c1 = hold_buf[wc+1]; - (*iconv)(c2, c1); - } - return; -} - - - -static int -push_hold_buf(c2, c1) - int c2, c1; -{ - if(hold_count >= HOLD_SIZE*2) - return (EOF); - hold_buf[hold_count++] = c2; - hold_buf[hold_count++] = c1; - return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count); -} - - -static void -s_iconv(c2, c1) - int c2, - c1; -{ - if((c2 == EOF) || (c2 == 0)) { - /* NOP */ - } else { - c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394); - if(c1 < 0x9f) - c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f); - else { - c1 = c1 - 0x7e; - c2++; - } - } - (*oconv)(c2, c1); -} - - -static void -e_oconv(c2, c1) - int c2, c1; -{ - c2 = pre_convert(c1,c2); c1 = c1_return; - if(fold_f) { - switch(line_fold(c2,c1)) { - case '\n': - if(add_cr == TRUE) { - putchar('\r'); - c1 = '\n'; - } - putchar('\n'); - break; - case 0: return; - case '\r': - c1 = '\n'; c2 = 0; - break; - case '\t': - case ' ': - c1 = ' '; c2 = 0; - break; - } - } - if(c2==DOUBLE_SPACE) { - putchar(' '); putchar(' '); - return; - } - if(c2 == EOF) - return; - else if(c2 == 0 && (c1&0x80)) { - putchar(SSO); putchar(c1); - } else if(c2 == 0) { - if(c1 == '\n' && add_cr == TRUE) - putchar('\r'); - if(c1 != '\r') - putchar(c1); - else if(del_cr == FALSE) - putchar(c1); - } else { - if((c1<0x20 || 0x7e<c1) || - (c2<0x20 || 0x7e<c2)) { - estab_f = FALSE; - return; /* too late to rescue this char */ - } - putchar(c2 | 0x080); - putchar(c1 | 0x080); - } - return; -} - - -static void -s_oconv(c2, c1) - int c2, c1; -{ - c2 = pre_convert(c1,c2); c1 = c1_return; - if(fold_f) { - switch(line_fold(c2,c1)) { - case '\n': - if(add_cr == TRUE) { - putchar('\r'); - c1 = '\n'; - } - putchar('\n'); - break; - case '\r': - c1 = '\n'; c2 = 0; - break; - case 0: return; - case '\t': - case ' ': - c1 = ' '; c2 = 0; - break; - } - } - if(c2==DOUBLE_SPACE) { - putchar(' '); putchar(' '); - return; - } - if(c2 == EOF) - return; - else if(c2 == 0) { - if(c1 == '\n' && add_cr == TRUE) - putchar('\r'); - if(c1 != '\r') - putchar(c1); - else if(del_cr == FALSE) - putchar(c1); - } else { - if((c1<0x20 || 0x7e<c1) || - (c2<0x20 || 0x7e<c2)) { - estab_f = FALSE; - return; /* too late to rescue this char */ - } - putchar((((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1))); - putchar((c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e))); - } - return; -} - - -static void -j_oconv(c2, c1) - int c2, c1; -{ - c2 = pre_convert(c1,c2); c1 = c1_return; - if(fold_f) { - switch(line_fold(c2,c1)) { - case '\n': - if(output_mode) { - putchar(ESC); - putchar('('); - putchar(ascii_intro); - } - if(add_cr == TRUE) { - putchar('\r'); - c1 = '\n'; - } - putchar('\n'); - output_mode = ASCII; - break; - case '\r': - c1 = '\n'; c2 = 0; - break; - case '\t': - case ' ': - c1 = ' '; c2 = 0; - break; - case 0: return; - } - } - if(c2 == EOF) { - if(output_mode) { - putchar(ESC); - putchar('('); - putchar(ascii_intro); - } - } else if(c2 == 0 && (c1 & 0x80)) { - if(input_mode==X0201 || !iso8859_f) { - if(output_mode!=X0201) { - putchar(ESC); - putchar('('); - putchar('I'); - output_mode = X0201; - } - c1 &= 0x7f; - } else { - /* iso8859 introduction, or 8th bit on */ - /* Can we convert in 7bit form using ESC-'-'-A ? - Is this popular? */ - } - putchar(c1); - } else if(c2 == 0) { - if(output_mode) { - putchar(ESC); - putchar('('); - putchar(ascii_intro); - output_mode = ASCII; - } - if(c1 == '\n' && add_cr == TRUE) - putchar('\r'); - if(c1 != '\r') - putchar(c1); - else if(del_cr == FALSE) - putchar(c1); - } else if(c2 == DOUBLE_SPACE) { - if(output_mode) { - putchar(ESC); - putchar('('); - putchar(ascii_intro); - output_mode = ASCII; - } - putchar(' '); - if(c1 == '\n' && add_cr == TRUE) - putchar('\r'); - if(c1 != '\r') - putchar(c1); - else if(del_cr == FALSE) - putchar(c1); - } else { - if(output_mode != X0208) { - putchar(ESC); - putchar('$'); - putchar(kanji_intro); - output_mode = X0208; - } - if(c1<0x20 || 0x7e<c1) - return; - if(c2<0x20 || 0x7e<c2) - return; - putchar(c2); - if(c1 == '\n' && add_cr == TRUE) - putchar('\r'); - if(c1 != '\r') - putchar(c1); - else if(del_cr == FALSE) - putchar(c1); - } - return; -} - - - -#define rot13(c) ( \ - ( c < 'A' ) ? c: \ - (c <= 'M') ? (c + 13): \ - (c <= 'Z') ? (c - 13): \ - (c < 'a') ? (c): \ - (c <= 'm') ? (c + 13): \ - (c <= 'z') ? (c - 13): \ - (c) \ -) - -#define rot47(c) ( \ - ( c < '!' ) ? c: \ - ( c <= 'O' ) ? (c + 47) : \ - ( c <= '~' ) ? (c - 47) : \ - c \ -) - - -/* - Return value of line_fold() - - \n add newline and output char - \r add newline and output nothing - ' ' space - 0 skip - 1 (or else) normal output - - fold state in prev (previous character) - - >0x80 Japanese (X0208/X0201) - <0x80 ASCII - \n new line - ' ' space - - This fold algorthm does not preserve heading space in a line. - This is the main difference from fmt. -*/ - -static int -line_fold(c2,c1) -int c2,c1; -{ - int prev0; - if(c1=='\r') - return 0; /* ignore cr */ - if(c1== 8) { - if(line>0) line--; - return 1; - } - if(c2==EOF && line != 0) /* close open last line */ - return '\n'; - /* new line */ - if(c1=='\n') { - if(prev == c1) { /* duplicate newline */ - if(line) { - line = 0; - return '\n'; /* output two newline */ - } else { - line = 0; - return 1; - } - } else { - if(prev&0x80) { /* Japanese? */ - prev = c1; - return 0; /* ignore given single newline */ - } else if(prev==' ') { - return 0; - } else { - prev = c1; - if(++line<=fold_len) - return ' '; - else { - line = 0; - return '\r'; /* fold and output nothing */ - } - } - } - } - if(c1=='\f') { - prev = '\n'; - if(line==0) - return 1; - line = 0; - return '\n'; /* output newline and clear */ - } - /* X0208 kankaku or ascii space */ - if( (c2==0&&c1==' ')|| - (c2==0&&c1=='\t')|| - (c2==DOUBLE_SPACE)|| - (c2=='!'&& c1=='!')) { - if(prev == ' ') { - return 0; /* remove duplicate spaces */ - } - prev = ' '; - if(++line<=fold_len) - return ' '; /* output ASCII space only */ - else { - prev = ' '; line = 0; - return '\r'; /* fold and output nothing */ - } - } - prev0 = prev; /* we still need this one... , but almost done */ - prev = c1; - if(c2 || (SSP<=c1 && c1<=0xdf)) - prev |= 0x80; /* this is Japanese */ - line += (c2==0)?1:2; - if(line<=fold_len) { /* normal case */ - return 1; - } - if(line>=fold_len+FOLD_MARGIN) { /* too many kinsou suspension */ - line = (c2==0)?1:2; - return '\n'; /* We can't wait, do fold now */ - } - /* simple kinsoku rules return 1 means no folding */ - if(c2==0) { - if(c1==0xde) return 1; /* $B!+(B*/ - if(c1==0xdf) return 1; /* $B!,(B*/ - if(c1==0xa4) return 1; /* $B!#(B*/ - if(c1==0xa3) return 1; /* $B!$(B*/ - if(c1==0xa1) return 1; /* $B!W(B*/ - if(c1==0xb0) return 1; /* - */ - if(SSP<=c1 && c1<=0xdf) { /* X0201 */ - line = 1; - return '\n';/* add one new line before this character */ - } - /* fold point in ASCII { [ ( */ - if(( c1!=')'&& - c1!=']'&& - c1!='}'&& - c1!='.'&& - c1!=','&& - c1!='!'&& - c1!='?'&& - c1!='/'&& - c1!=':'&& - c1!=';')&& - ((prev0=='\n')|| (prev0==' ')|| /* ignored new line */ - (prev0&0x80)) /* X0208 - ASCII */ - ) { - line = 1; - return '\n';/* add one new line before this character */ - } - return 1; /* default no fold in ASCII */ - } else { - if(c2=='!') { - if(c1=='"') return 1; /* $B!"(B */ - if(c1=='#') return 1; /* $B!#(B */ - if(c1=='$') return 1; /* $B!$(B */ - if(c1=='%') return 1; /* $B!%(B */ - if(c1=='\'') return 1; /* $B!\(B */ - if(c1=='(') return 1; /* $B!((B */ - if(c1==')') return 1; /* $B!)(B */ - if(c1=='*') return 1; /* $B!*(B */ - if(c1=='+') return 1; /* $B!+(B */ - if(c1==',') return 1; /* $B!,(B */ - } - line = 2; - return '\n'; /* add one new line before this character */ - } -} - -static int -pre_convert(c1,c2) - int c1,c2; -{ - if(c2) c1 &= 0x7f; - c1_return = c1; - if(c2==EOF) return c2; - c2 &= 0x7f; - if(rot_f) { - if(c2) { - c1 = rot47(c1); - c2 = rot47(c2); - } else { - if(!(c1 & 0x80)) - c1 = rot13(c1); - } - c1_return = c1; - } - /* JISX0208 Alphabet */ - if(alpha_f && c2 == 0x23 ) return 0; - /* JISX0208 Kigou */ - if(alpha_f && c2 == 0x21 ) { - if(0x21==c1) { - if(alpha_f&0x2) { - c1_return = ' '; - return 0; - } else if(alpha_f&0x4) { - c1_return = ' '; - return DOUBLE_SPACE; - } else { - return c2; - } - } else if(0x20<c1 && c1<0x7f && fv[c1-0x20]) { - c1_return = fv[c1-0x20]; - return 0; - } - } - return c2; -} - - -#ifdef STRICT_MIME -/* This converts */ - -unsigned char *mime_pattern[] = { - (unsigned char *)"\075?ISO-8859-1?Q?", - (unsigned char *)"\075?ISO-2022-JP?B?", - (unsigned char *)"\075?ISO-2022-JP?Q?", - (unsigned char *)"\075?JAPANESE_EUC?B?", - (unsigned char *)"\075?SHIFT_JIS?B?", - NULL -}; - -int mime_encode[] = { - 'Q', 'B', 'Q', - 0 -}; -#endif - -#define MAXRECOVER 20 -int iso8859_f_save; - -#ifdef STRICT_MIME - -#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c) -/* I don't trust portablity of toupper */ - -static int -mime_begin(f) - FILE *f; -{ - int c1; - int i,j,k; - unsigned char *p,*q; - int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */ - - mime_mode = FALSE; - /* =? has been checked */ - j = 0; - p = mime_pattern[j]; - r[0]='='; r[1]='?'; - - for(i=2;p[i]>' ';i++) { /* start at =? */ - if( ((((r[i] = c1 = getc(f))==EOF) || nkf_toupper(c1) != p[i] ) { - /* pattern fails, try next one */ - q = p; - while (p = mime_pattern[++j]) { - for(k=2;k<i;k++) /* assume length(p) > i */ - if(p[k]!=q[k]) break; - if(k==i && nkf_toupper(c1)==p[k]) break; - } - if(p) continue; /* found next one, continue */ - /* all fails, output from recovery buffer */ - ungetc(c1,f); - for(j=0;j<i;j++) { - (*oconv)(0,r[j]); - } - return c1; - } - } - mime_mode = mime_encode[j]; - iso8859_f_save = iso8859_f; - if(j==0) { - iso8859_f = TRUE; - } - if(mime_mode=='B') { - mimebuf_f = unbuf_f; - if(!unbuf_f) { - /* do MIME integrity check */ - return mime_integrity(f,mime_pattern[j]); - } - } - mimebuf_f = TRUE; - return c1; -} - -#define mime_getc0(f) (mimebuf_f?getc(f):Fifo(mime_input++)) -#define mime_ungetc0(c,f) (mimebuf_f?ungetc(c,f):mime_input--) - -#else -static int -mime_begin(f) -FILE *f; -{ - int c1; - int i,j; - int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */ - - mime_mode = FALSE; - /* =? has been checked */ - j = 0; - r[0]='='; r[1]='?'; - for(i=2;i<MAXRECOVER;i++) { /* start at =? */ - /* We accept any charcter type even if it is breaked by new lines */ - if( (r[i] = c1 = getc(f))==EOF) break; - if(c1=='=') break; - if(c1<' '&& c1!='\r' && c1!='\n') break; - if(c1=='?') { - i++; - if(!(i<MAXRECOVER) || (r[i] = c1 = getc(f))==EOF) break; - if(c1=='b'||c1=='B') { - mime_mode = 'B'; - } else if(c1=='q'||c1=='Q') { - mime_mode = 'Q'; - } else { - break; - } - i++; - if(!(i<MAXRECOVER) || (r[i] = c1 = getc(f))==EOF) break; - if(c1=='?') { - break; - } else { - mime_mode = FALSE; - } - break; - } - } - if(!mime_mode || c1==EOF || i==MAXRECOVER) { - ungetc(c1,f); - if (i == MAXRECOVER) - i--; - for(j=0;j<i;j++) { - (*oconv)(0,r[j]); - } - return c1; - } - iso8859_f_save = iso8859_f; - /* do no MIME integrity check */ - return c1; /* used only for checking EOF */ -} - -#define mime_getc0(f) getc(f) -#define mime_ungetc0(c,f) ungetc(c,f) - -#endif - -static int -mime_getc(f) - FILE *f; -{ - int c1, c2, c3, c4, cc; - int t1, t2, t3, t4, mode, exit_mode; - - if(mime_top != mime_last) { /* Something is in FIFO */ - return Fifo(mime_top++); - } - - if(mimebuf_f == FIXED_MIME) - exit_mode = mime_mode; - else - exit_mode = FALSE; - if(mime_mode == 'Q') { - if((c1 = mime_getc0(f)) == EOF) return (EOF); - if(c1=='_') return ' '; - if(c1!='=' && c1!='?') - return c1; - mime_mode = exit_mode; /* prepare for quit */ - if(c1<=' ') return c1; - if((c2 = mime_getc0(f)) == EOF) return (EOF); - if(c2<=' ') return c2; - if(c1=='?'&&c2=='=') { - /* end Q encoding */ - input_mode = exit_mode; - iso8859_f = iso8859_f_save; - return getc(f); - } - if(c1=='?') { - mime_mode = 'Q'; /* still in MIME */ - mime_ungetc0(c2,f); - return c1; - } - if((c3 = mime_getc0(f)) == EOF) return (EOF); - if(c2<=' ') return c2; - mime_mode = 'Q'; /* still in MIME */ -#define hex(c) (('0'<=c&&c<='9')?(c-'0'):\ - ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0) - return ((hex(c2)<<4) + hex(c3)); - } - - if(mime_mode != 'B') { - mime_mode = FALSE; - return getc(f); - } - - - /* Base64 encoding */ - /* - MIME allows line break in the middle of - Base64, but we are very pessimistic in decoding - in unbuf mode because MIME encoded code may broken by - less or editor's control sequence (such as ESC-[-K in unbuffered - mode. ignore incomplete MIME. - */ - mode = mime_mode; - mime_mode = exit_mode; /* prepare for quit */ - - while ((c1 = mime_getc0(f))<=' ') { - if(c1==EOF) - return (EOF); - } - if((c2 = mime_getc0(f))<=' ') { - if(c2==EOF) - return (EOF); - if(mimebuf_f!=FIXED_MIME) input_mode = ASCII; - return c2; - } - if((c1 == '?') && (c2 == '=')) { - input_mode = ASCII; - while((c1 = getc(f))==' ' /* || c1=='\n' || c1=='\r' */); - return c1; - } - if((c3 = mime_getc0(f))<=' ') { - if(c3==EOF) - return (EOF); - if(mimebuf_f!=FIXED_MIME) input_mode = ASCII; - return c3; - } - if((c4 = mime_getc0(f))<=' ') { - if(c4==EOF) - return (EOF); - if(mimebuf_f!=FIXED_MIME) input_mode = ASCII; - return c4; - } - - mime_mode = mode; /* still in MIME sigh... */ - - /* BASE 64 decoding */ - - t1 = 0x3f & base64decode(c1); - t2 = 0x3f & base64decode(c2); - t3 = 0x3f & base64decode(c3); - t4 = 0x3f & base64decode(c4); - cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03); - if(c2 != '=') { - Fifo(mime_last++) = cc; - cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f); - if(c3 != '=') { - Fifo(mime_last++) = cc; - cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f); - if(c4 != '=') - Fifo(mime_last++) = cc; - } - } else { - return c1; - } - return Fifo(mime_top++); -} - -static int -mime_ungetc(c) - unsigned int c; -{ - Fifo(mime_last++) = c; - return c; -} - -#ifdef STRICT_MIME -int -mime_integrity(f,p) - FILE *f; - unsigned char *p; -{ - int c,d; - unsigned int q; - /* In buffered mode, read until =? or NL or buffer fffull - */ - mime_input = mime_top; - mime_last = mime_top; - while(*p) Fifo(mime_input++) = *p++; - d = 0; - q = mime_input; - while((c=getc(f))!=EOF) { - if(((mime_input-mime_top)&MIME_BUF_MASK)==0) break; - if(c=='=' && d=='?') { - /* checked. skip header, start decode */ - Fifo(mime_input++) = c; - mime_input = q; - return 1; - } - if(!( (c=='+'||c=='/'|| c=='=' || c=='?' || - ('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9')))) - break; - /* Should we check length mod 4? */ - Fifo(mime_input++) = c; - d=c; - } - /* In case of Incomplete MIME, no MIME decode */ - Fifo(mime_input++) = c; - mime_last = mime_input; /* point undecoded buffer */ - mime_mode = 1; /* no decode on Fifo last in mime_getc */ - return 1; -} -#endif - -static int -base64decode(c) - int c; -{ - int i; - if(c > '@') - if(c < '[') - i = c - 'A'; /* A..Z 0-25 */ - else - i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */ - else if(c > '/') - i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */ - else if(c == '+') - i = '>' /* 62 */ ; /* + 62 */ - else - i = '?' /* 63 */ ; /* / 63 */ - return (i); -} - -static void -reinit() -{ - unbuf_f = FALSE; - estab_f = FALSE; - nop_f = FALSE; - binmode_f = TRUE; - rot_f = FALSE; - input_f = FALSE; - alpha_f = FALSE; - mime_f = TRUE; - mimebuf_f = FALSE; - broken_f = FALSE; - iso8859_f = FALSE; - x0201_f = TRUE; - x0201_f = NO_X0201; - fold_f = FALSE; - kanji_intro = DEFAULT_J; - ascii_intro = DEFAULT_R; - oconv = DEFAULT_CONV; - output_mode = ASCII; - input_mode = ASCII; - shift_mode = FALSE; - mime_mode = FALSE; - file_out = FALSE; - add_cr = FALSE; - del_cr = FALSE; - line = 0; -} - -#ifndef PERL_XS -int -usage() -{ - fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"); - fprintf(stderr,"Flags:\n"); - fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n"); -#ifdef DEFAULT_CODE_SJIS - fprintf(stderr,"j,s,e Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC)\n"); -#endif -#ifdef DEFAULT_CODE_JIS - fprintf(stderr,"j,s,e Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC)\n"); -#endif -#ifdef DEFAULT_CODE_EUC - fprintf(stderr,"j,s,e Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT)\n"); -#endif - fprintf(stderr,"J,S,E Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC)\n"); - fprintf(stderr,"t no conversion\n"); - fprintf(stderr,"i_ Output sequence to designate JIS-kanji (DEFAULT B)\n"); - fprintf(stderr,"o_ Output sequence to designate ASCII (DEFAULT B)\n"); - fprintf(stderr,"r {de/en}crypt ROT13/47\n"); - fprintf(stderr,"v Show this usage\n"); - fprintf(stderr,"m[BQ0] MIME decode [B:base64,Q:quoted,0:no decode]\n"); - fprintf(stderr,"l ISO8859-1 (Latin-1) support\n"); - fprintf(stderr,"f Folding: -f60 or -f\n"); - fprintf(stderr,"Z[0-2] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces\n"); - fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"); - fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"); -#ifdef MSDOS - fprintf(stderr,"T Text mode output\n"); -#endif - fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n"); - fprintf(stderr,"d,c Delete \\r in line feed, Add \\r in line feed\n"); - fprintf(stderr,"Network Kanji Filter Version %s (%s) " -#if defined(MSDOS) && !defined(_Windows) - "for DOS" -#endif -#if !defined(__WIN32__) && defined(_Windows) - "for Win16" -#endif -#if defined(__WIN32__) && defined(_Windows) - "for Win32" -#endif -#ifdef __OS2__ - "for OS/2" -#endif - ,Version,Patchlevel); - fprintf(stderr,"\n%s\n",CopyRight); - return 0; -} -#endif - -/** - ** $B%Q%C%A@):n<T(B - ** void@merope.pleiades.or.jp (Kusakabe Youichi) - ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp> - ** ohta@src.ricoh.co.jp (Junn Ohta) - ** inouet@strl.nhk.or.jp (Tomoyuki Inoue) - ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama) - ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp> - ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe) - ** kono@ie.u-ryukyu.ac.jp (Shinji Kono) - ** GHG00637@nifty-serve.or.jp (COW) - ** - ** $B:G=*99?7F|(B - ** 1998.11.7 - **/ - -/* end */ diff --git a/ext/nkf/test.rb b/ext/nkf/test.rb deleted file mode 100644 index 4519f8ba7e..0000000000 --- a/ext/nkf/test.rb +++ /dev/null @@ -1,318 +0,0 @@ -$counter = 0 -def result(result, message = nil) - $counter += 1 - printf("%s %d%s\n", - result ? 'ok' : 'no', - $counter, - message ? ' ... ' + message : '') -end - -begin - require 'nkf' - include NKF -rescue LoadError - result(false) -end -result(true) - -if nkf('-me', '1') - result(true); -else - result(false); -end - -output = nkf('-e', "\033\$@#1#3#2%B") -if output - # print output, "\n" - result(true, output) -else - result(false) -end - -output = nkf('-Zj', "\033\$@#1#3#2%B") -if output - # print output, "\n" - result(true, output) -else - result(false) -end - -output = "\244\306 " * 1024 -old = output.length -output = nkf("-j", output) -if output - # print output, "\n" - result(true, "#{old} #{output.length}") -else - result(false) -end - - -$detail = false -def test(opt, input, expect) - print "\nINPUT:\n", input if $detail - print "\nEXPECT:\n", expect if $detail - result = nkf(opt, input) - print "\nGOT:\n", result if $detail - - print result == expect ? "Ok\n" : "Fail\n" - return result -end - -# Basic Conversion -print "\nBasic Conversion test\n\n" - -example = {} -example['jis'] = <<'eofeof'.unpack('u')[0] -M1FER<W0@4W1A9V4@&R1"(3DQ(3%^2R%+?D]3&RA"(%-E8V]N9"!3=&%G92`; -M)$)0)TU:&RA"($AI<F%G86YA(!LD0B0B)"0D)B0H)"HD;R1R)',;*$(*2V%T -M86MA;F$@&R1")2(E)"4F)2@E*B5O)7(E<QLH0B!+:6=O=2`;)$(A)B%G(S`C -/029!)E@G(B=!*$`;*$(* -eofeof -#' - -example['sjis'] = <<'eofeof'.unpack('u')[0] -M1FER<W0@4W1A9V4@@5B)0(F>ED"6GIAR(%-E8V]N9"!3=&%G92"8I9=Y($AI -M<F%G86YA((*@@J*"I(*F@JB"[8+P@O$*2V%T86MA;F$@@T�X-%@T>#28./ ->@Y*#DR!+:6=O=2"!18&'@D^"8(._@]:$081@A+X* -eofeof -#' - -example['euc'] = <<'eofeof'.unpack('u')[0] -M1FER<W0@4W1A9V4@H;FQH;'^RZ'+_L_3(%-E8V]N9"!3=&%G92#0I\W:($AI -M<F%G86YA(*2BI*2DIJ2HI*JD[Z3RI/,*2V%T86MA;F$@I:*EI*6FI:BEJJ7O ->I?*E\R!+:6=O=2"AIJ'GH["CP:;!IMBGHJ?!J,`* -eofeof -#' - -example['amb'] = <<'eofeof'.unpack('u')[0] -MI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<*PL:7"L+&E -MPK"QI<*PL:7"L+&EPK"QI<(*I<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<*P -ML:7"L+&EPK"QI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<(*I<*PL:7"L+&E -MPK"QI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<*PL:7" -ML+&EPK"QI<(*I<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"Q -MI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<(*I<*PL:7"L+&EPK"QI<*PL:7" -ML+&EPK"QI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<*PL:7"L+&EPK"QI<(* -eofeof - -example['amb.euc'] = <<'eofeof'.unpack('u')[0] -M&R1")4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25" -M,#$E0C`Q)4(P,25",#$E0C`Q)4(;*$(*&R1")4(P,25",#$E0C`Q)4(P,25" -M,#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(; -M*$(*&R1")4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P -M,25",#$E0C`Q)4(P,25",#$E0C`Q)4(;*$(*&R1")4(P,25",#$E0C`Q)4(P -M,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q -M)4(;*$(*&R1")4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q ->)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(;*$(* -eofeof - -example['amb.sjis'] = <<'eofeof'.unpack('u')[0] -M&RA))4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25" -M,#$E0C`Q)4(P,25",#$E0C`Q)4(;*$(*&RA))4(P,25",#$E0C`Q)4(P,25" -M,#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(; -M*$(*&RA))4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P -M,25",#$E0C`Q)4(P,25",#$E0C`Q)4(;*$(*&RA))4(P,25",#$E0C`Q)4(P -M,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q -M)4(;*$(*&RA))4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q ->)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(;*$(* -eofeof - -example['x0201.sjis'] = <<'eofeof'.unpack('u')[0] -MD5.*<(-*@TR#3H-0@U*#2X--@T^#48-3"I%3B7""8()A@F*"8X)D@F6"9H*! -M@H*"@X*$@H6"AH*'"I%3BTR-AH%)@9>!E(&0@9.!3X&5@9:!:8%J@7R!>X&! -M@6V!;H%O@7"!CPJ4O(IPMK>X/;FZMMZWWKC>N=ZZWH+&"I2\BG#*W\O?S-_- -MW\[?M]^QW@K*W\O?S`IH86YK86MU(,K?R]_,I`K*W\O?S-VA"I2\BG""S(SC -!"@!" -eofeof -#' - -example['x0201.euc'] = <<'eofeof'.unpack('u')[0] -MP;2ST:6KI:VEKZ6QI;.EK*6NI;"ELJ6T"L&TL=&CP:/"H\.CQ*/%H\:CQZ/A -MH^*CXZ/DH^6CYJ/G"L&TM:VYYJ&JH?>A]*'PH?.AL*'UH?:ARJ'+H=VAW*'A -MH<ZASZ'0H=&A[PK(OK/1CK:.MXZX/8ZYCKJ.MH[>CK>.WHZXCMZ.N8[>CKJ. -MWJ3("LB^L]&.RH[?CLN.WX[,CM^.S8[?CLZ.WXZWCM^.L8[>"H[*CM^.RX[? -MCLP*:&%N:V%K=2".RH[?CLN.WX[,CJ0*CLJ.WX[+CM^.S([=CJ$*R+ZST:3. -#N.4* -eofeof -#' - -example['x0201.jis'] = <<'eofeof'.unpack('u')[0] -M&R1"030S424K)2TE+R4Q)3,E+"4N)3`E,B4T&RA""ALD0D$T,5$C02-"(T,C -M1"-%(T8C1R-A(V(C8R-D(V4C9B-G&RA""ALD0D$T-2TY9B$J(7<A="%P(7,A -M,"%U(78A2B%+(5TA7"%A(4XA3R%0(5$A;QLH0@H;)$)(/C-1&RA)-C<X&RA" -M/1LH23DZ-EXW7CA>.5XZ7ALD0B1(&RA""ALD0D@^,U$;*$E*7TM?3%]-7TY? -M-U\Q7ALH0@H;*$E*7TM?3!LH0@IH86YK86MU(!LH24I?2U],)!LH0@H;*$E* -97TM?3%TA&RA""ALD0D@^,U$D3CAE&RA""@`` -eofeof -#` - -example['x0201.sosi'] = <<'eofeof'.unpack('u')[0] -M&R1"030S424K)2TE+R4Q)3,E+"4N)3`E,B4T&RA*"ALD0D$T,5$C02-"(T,C -M1"-%(T8C1R-A(V(C8R-D(V4C9B-G&RA*"ALD0D$T-2TY9B$J(7<A="%P(7,A -M,"%U(78A2B%+(5TA7"%A(4XA3R%0(5$A;QLH2@H;)$)(/C-1&RA*#C8W.`\; -M*$H]#CDZ-EXW7CA>.5XZ7@\;)$(D2!LH2@H;)$)(/C-1&RA*#DI?2U],7TU? -M3E\W7S%>#PH.2E]+7TP/&RA*"FAA;FMA:W4@#DI?2U],)`\;*$H*#DI?2U], -672$/&RA*"ALD0D@^,U$D3CAE&RA""@`` -eofeof -#" - -example['x0201.x0208'] = <<'eofeof'.unpack('u')[0] -M&R1"030S424K)2TE+R4Q)3,E+"4N)3`E,B4T&RA""ALD0D$T,5$;*$)!0D-$ -M149'86)C9&5F9PH;)$)!-#4M.68;*$(A0",D)5XF*B@I+2L]6UU[?1LD0B%O -M&RA""ALD0D@^,U$E*R4M)2\;*$(]&R1")3$E,R4L)2XE,"4R)30D2!LH0@H; -M)$)(/C-1)5$E5"57)5HE724M(2PE(B$K&RA""ALD0B51)50E51LH0@IH86YK -M86MU(!LD0B51)50E52$B&RA""ALD0B51)50E525S(2,;*$(*&R1"2#XS421. -&.&4;*$(* -eofeof -#` - -example['mime.iso2022'] = <<'eofeof'.unpack('u')[0] -M/3])4T\M,C`R,BU*4#]"/T=Y4D%.144W96E23TI566Q/4U9)1WEH2S\]"CT_ -M:7-O+3(P,C(M2E`_0C]'>5)!3D5%-V5I4D]*55EL3U-624=Y:$L_/0H]/VES -M;RTR,#(R+4I0/U$_/3%")$(D1B11/3%"*$)?96YD/ST*&R1`)#TD)B0K)$H; -M*$H@/3])4T\M,C`R,BU*4#]"/T=Y4D%.144W96E23U!Y:S=D:'-O4V<]/3\] -M(&5N9"!O9B!L:6YE"CT_25-/+3(P,C(M2E`_0C]'>5)!3D5%-V5I4D]0>6LW -M9&AS;U-G/3T_/2`]/TE33RTR,#(R+4I0/T(_1WE204Y%13=E:5)/4'EK-V1H -M<V]39ST]/ST*0G)O:V5N(&-A<V4*/3])4T\M,C`R,BU*4#]"/T=Y4D%.144W -M96E23U!Y:S=D"FAS;U-G/3T_/2`]/TE33RTR,`HR,BU*4#]"/T=Y4D%.144W -M96E23U!Y:S=D:'-O4V<]/3\]"CT_25-/+3(P,C(M2E`_0C]'>5)!3D5%-V5I -44D]*55EL3QM;2U-624=Y:$L_/0H_ -eofeof -#' - -example['mime.ans.strict'] = <<'eofeof'.unpack('u')[0] -M&R1"-$$[>B1.)48E.25(&RA""ALD0C1!.WHD3B5&)3DE2!LH0@H;)$(D1B11 -M&RA"(&5N9`H;)$(D/20F)"LD2ALH0B`;)$(T03MZ)$X_*3MV&RA"96YD(&]F -M(&QI;F4*&R1"-$$[>B1./RD[=C1!.WHD3C\I.W8;*$(*0G)O:V5N(&-A<V4* -M/3])4T\M,C`R,BU*4#]"/T=Y4D%.144W96E23U!Y:S=D"FAS;U-G/3T_/2`] -M/TE33RTR,`HR,BU*4#]"/T=Y4D%.144W96E23U!Y:S=D:'-O4V<]/3\]"CT_ -L25-/+3(P,C(M2E`_0C]'>5)!3D5%-V5I4D]*55EL3QM;2U-624=Y:$L_/0H_ -eofeof -#' - -example['mime.unbuf.strict'] = <<'eofeof'.unpack('u')[0] -M&R1"-$$[>B1.)48E.25(&RA""ALD0C1!.WHD3B5&)3DE2!LH0@H;)$(D1B11 -M&RA"(&5N9`H;)$(D/20F)"LD2ALH0B`;)$(T03MZ)$X_*3MV&RA"96YD(&]F -M(&QI;F4*&R1"-$$[>B1./RD[=C1!.WHD3C\I.W8;*$(*0G)O:V5N(&-A<V4* -M&R1"-$$[>B1./RD;*$)H<V]39ST]/ST@/3])4T\M,C`*,C(M2E`_0C]'>5)! -M3D5%-V5I4D]0>6LW9&AS;U-G/3T_/0H;)$(T03MZ)$XE1ALH0EM+4U9)1WEH -$2S\]"F5I -eofeof - -example['mime.ans'] = <<'eofeof'.unpack('u')[0] -M&R1"-$$[>B1.)48E.25(&RA""ALD0C1!.WHD3B5&)3DE2!LH0@H;)$(D1B11 -M&RA"(&5N9`H;)$(D/20F)"LD2ALH0B`;)$(T03MZ)$X_*3MV&RA"96YD(&]F -M(&QI;F4*&R1"-$$[>B1./RD[=C1!.WHD3C\I.W8;*$(*0G)O:V5N(&-A<V4* -M&R1"-$$[>B1./RD;*$)H<V]39ST]/ST@&R1"-$$[>B1./RD[=ALH0@H;)$(T -603MZ)$XE1ALH0EM+4U9)1WEH2S\]"@`* -eofeof -#" - -example['mime.unbuf'] = <<'eofeof'.unpack('u')[0] -M&R1"-$$[>B1.)48E.25(&RA""ALD0C1!.WHD3B5&)3DE2!LH0@H;)$(D1B11 -M&RA"(&5N9`H;)$(D/20F)"LD2ALH0B`;)$(T03MZ)$X_*3MV&RA"96YD(&]F -M(&QI;F4*&R1"-$$[>B1./RD[=C1!.WHD3C\I.W8;*$(*0G)O:V5N(&-A<V4* -M&R1"-$$[>B1./RD;*$)H<V]39ST]/ST@&R1"-$$[>B1./RD[=ALH0@H;)$(T -603MZ)$XE1ALH0EM+4U9)1WEH2S\]"@`* -eofeof -#" - -example['mime.base64'] = <<'eofeof'.unpack('u')[0] -M9W-M5"])3&YG<FU#>$I+-&=Q=4,S24LS9W%Q0E%:3TUI-39,,S0Q-&=S5T)1 -M43!+9VUA1%9O3T@*9S)+1%1O3'=K8C)1;$E+;V=Q2T-X24MG9W5M0W%*3EEG -<<T=#>$E+9V=U;4,X64Q&9W)70S592VMG<6U""F=Q -eofeof -#" - -example['mime.base64.ans'] = <<'eofeof'.unpack('u')[0] -M&R1")$M&?B1I)#LD1D0Z)"TD7B0Y)"PA(D5L-7XV83E9)$<A(ALH0@T*&R1" -M(T<E-R5G)4,E+R1R0C\_="0J)"0D1B0B)&LD*D4Y)$,D1B0B)&LD<R1')#<D -(9R0F)"L;*$(E -eofeof -#' - -example['mime.is8859'] = <<'eofeof'.unpack('u')[0] -M/3])4T\M.#@U.2TQ/U$_*CU#-V%V83\_/2`*4&5E<B!4]G)N9W)E;@I,87-S -M92!(:6QL97+X92!0971E<G-E;B`@7"`B36EN(&MA97!H97-T(&AA<B!F86%E -M="!E="!F;V5L(2(*06%R:'5S(%5N:79E<G-I='DL($1%3DU!4DL@(%P@(DUI -<;B!KYG!H97-T(&AA<B!FY65T(&5T(&;X;"$B"@!K -eofeof - -example['mime.is8859.ans'] = <<'eofeof'.unpack('u')[0] -M*L=A=F$_(`I0965R(%3V<FYG<F5N"DQA<W-E($AI;&QE<OAE(%!E=&5R<V5N -M("!<(")-:6X@:V%E<&AE<W0@:&%R(&9A865T(&5T(&9O96PA(@I!87)H=7,@ -M56YI=F5R<VET>2P@1$5.34%22R`@7"`B36EN(&OF<&AE<W0@:&%R(&;E970@ -)970@9OAL(2(* -eofeof -#" - -print 'JIS to JIS ... '; test(' ', example['jis'], example['jis']) -print 'JIS to SJIS... '; test('-s', example['jis'], example['sjis']) -print 'JIS to EUC ... '; test('-e', example['jis'], example['euc']) - -print 'SJIS to JIS ... '; test('-j', example['sjis'], example['jis']) -print 'SJIS to SJIS... '; test('-s', example['sjis'], example['sjis']) -print 'SJIS to EUC ... '; test('-e', example['sjis'], example['euc']) - -print 'EUC to JIS ... '; test(' ', example['euc'], example['jis']) -print 'EUC to SJIS... '; test('-s', example['euc'], example['sjis']) -print 'EUC to EUC ... '; test('-e', example['euc'], example['euc']) - - -# Ambigous Case -print 'Ambiguous Case. '; test('' , example['amb'], example['amb.euc']) - -# Input assumption -print 'SJIS Input assumption ' -test('-Sx', example['amb'], example['amb.sjis']) - -# X0201 仮名 -# X0201->X0208 conversion -# X0208 aphabet -> ASCII -# X0201 相互変換 - -print "\nX0201 test\n\n" - -# -X is necessary to allow X0201 in SJIS -# -Z convert X0208 alphabet to ASCII -print 'X0201 conversion: SJIS ' -test('-XZ', example['x0201.sjis'], example['x0201.x0208']) -print 'X0201 conversion: JIS ' -test('-Z', example['x0201.jis'], example['x0201.x0208']) -print 'X0201 conversion:SI/SO ' -test('-Z', example['x0201.sosi'], example['x0201.x0208']) -print 'X0201 conversion: EUC ' -test('-Z', example['x0201.euc'], example['x0201.x0208']) -# -x means X0201 output -print 'X0201 output: SJIS ' -test('-xs', example['x0201.euc'], example['x0201.sjis']) -print 'X0201 output: JIS ' -test('-xj', example['x0201.sjis'], example['x0201.jis']) -print 'X0201 output: EUC ' -test('-xe', example['x0201.jis'], example['x0201.euc']) - -# MIME decode - -print "\nMIME test\n\n" - -# MIME ISO-2022-JP - -print "Next test is expeced to Fail.\n" - -print 'MIME decode (strict) ' -tmp = test('-m', example['mime.iso2022'], example['mime.ans.strict']) -print 'MIME decode (nonstrict)' -tmp = test('-m', example['mime.iso2022'], example['mime.ans']) -# open(OUT,'>tmp1');print OUT pack('u',$tmp);close(OUT); -# unbuf mode implies more pessimistic decode -print 'MIME decode (unbuf) ' -test('-mu', example['mime.iso2022'], example['mime.unbuf']) -print 'MIME decode (base64) ' -t = test('-mB', example['mime.base64'], example['mime.base64.ans']) - -# MIME ISO-8859-1 - -# Without -l, ISO-8859-1 was handled as X0201. - -print 'MIME ISO-8859-1 (Q) ' -test('-ml', example['mime.is8859'], example['mime.is8859.ans']) |
