/** Network Kanji Filter. (PDS Version)
** -*- coding: ISO-2022-JP -*-
************************************************************************
** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
** 連絡先: (株)富士通研究所 ソフト3研 市川 至
** (E-Mail Address: ichikawa@flab.fujitsu.co.jp)
** Copyright (C) 1996,1998
** Copyright (C) 2002
** 連絡先: 琉球大学情報工学科 河野 真治 mime/X0208 support
** (E-Mail Address: kono@ie.u-ryukyu.ac.jp)
** 連絡先: COW for DOS & Win16 & Win32 & OS/2
** (E-Mail Address: GHG00637@niftyserve.or.p)
**
** このソースのいかなる複写,改変,修正も許諾します。ただし、
** その際には、誰が貢献したを示すこの部分を残すこと。
** 再配布や雑誌の付録などの問い合わせも必要ありません。
** 営利利用も上記に反しない範囲で許可します。
** バイナリの配布の際にはversion messageを保存することを条件とします。
** このプログラムについては特に何の保証もしない、悪しからず。
**
** Everyone is permitted to do anything on this program
** including copying, modifying, improving,
** as long as you don't try to pretend that you wrote it.
** i.e., the above copyright notice has to appear in all copies.
** Binary distribution requires original version messages.
** You don't have to ask before copying, redistribution or publishing.
** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
***********************************************************************/
/***********************************************************************
* 現在、nkf は SorceForge にてメンテナンスが続けられています。
* http://sourceforge.jp/projects/nkf/
***********************************************************************/
#define NKF_IDENT "$Id$"
#define NKF_VERSION "2.0.8"
#define NKF_RELEASE_DATE "2008-02-08"
#define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
"Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
#include "config.h"
#include "nkf.h"
#include "utf8tbl.h"
/* state of output_mode and input_mode
c2 0 means ASCII
JIS_X_0201_1976_K
ISO_8859_1
JIS_X_0208
EOF all termination
c1 32bit data
*/
/* MIME ENCODE */
#define FIXED_MIME 7
#define STRICT_MIME 8
/* byte order */
enum byte_order {
ENDIAN_BIG = 1,
ENDIAN_LITTLE = 2,
ENDIAN_2143 = 3,
ENDIAN_3412 = 4
};
/* ASCII CODE */
#define BS 0x08
#define TAB 0x09
#define LF 0x0a
#define CR 0x0d
#define ESC 0x1b
#define SP 0x20
#define DEL 0x7f
#define SI 0x0f
#define SO 0x0e
#define SS2 0x8e
#define SS3 0x8f
#define CRLF 0x0D0A
/* encodings */
enum nkf_encodings {
ASCII,
ISO_8859_1,
ISO_2022_JP,
CP50220,
CP50221,
CP50222,
ISO_2022_JP_1,
ISO_2022_JP_3,
ISO_2022_JP_2004,
SHIFT_JIS,
WINDOWS_31J,
CP10001,
EUC_JP,
EUCJP_NKF,
CP51932,
EUCJP_MS,
EUCJP_ASCII,
SHIFT_JISX0213,
SHIFT_JIS_2004,
EUC_JISX0213,
EUC_JIS_2004,
UTF_8,
UTF_8N,
UTF_8_BOM,
UTF8_MAC,
UTF_16,
UTF_16BE,
UTF_16BE_BOM,
UTF_16LE,
UTF_16LE_BOM,
UTF_32,
UTF_32BE,
UTF_32BE_BOM,
UTF_32LE,
UTF_32LE_BOM,
BINARY,
NKF_ENCODING_TABLE_SIZE,
JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
/* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
/* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
/* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
JIS_X_0208 = 0x1168, /* @B */
JIS_X_0212 = 0x1159, /* D */
/* JIS_X_0213_2000_1 = 0x1228, */ /* O */
JIS_X_0213_2 = 0x1229, /* P */
JIS_X_0213_1 = 0x1233, /* Q */
};
static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
static void j_oconv(nkf_char c2, nkf_char c1);
static void s_oconv(nkf_char c2, nkf_char c1);
static void e_oconv(nkf_char c2, nkf_char c1);
static void w_oconv(nkf_char c2, nkf_char c1);
static void w_oconv16(nkf_char c2, nkf_char c1);
static void w_oconv32(nkf_char c2, nkf_char c1);
typedef struct {
const char *name;
nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
void (*oconv)(nkf_char c2, nkf_char c1);
} nkf_native_encoding;
nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
typedef struct {
const int id;
const char *name;
const nkf_native_encoding *base_encoding;
} nkf_encoding;
nkf_encoding nkf_encoding_table[] = {
{ASCII, "US-ASCII", &NkfEncodingASCII},
{ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
{ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
{CP50220, "CP50220", &NkfEncodingISO_2022_JP},
{CP50221, "CP50221", &NkfEncodingISO_2022_JP},
{CP50222, "CP50222", &NkfEncodingISO_2022_JP},
{ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
{ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
{ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
{SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
{WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
{CP10001, "CP10001", &NkfEncodingShift_JIS},
{EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
{EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
{CP51932, "CP51932", &NkfEncodingEUC_JP},
{EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
{EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
{SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
{SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
{EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
{EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
{UTF_8, "UTF-8", &NkfEncodingUTF_8},
{UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
{UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
{UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
{UTF_16, "UTF-16", &NkfEncodingUTF_16},
{UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
{UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
{UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
{UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
{UTF_32, "UTF-32", &NkfEncodingUTF_32},
{UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
{UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
{UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
{UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
{BINARY, "BINARY", &NkfEncodingASCII},
{-1, NULL, NULL}
};
struct {
const char *name;
const int id;
} encoding_name_to_id_table[] = {
{"US-ASCII", ASCII},
{"ASCII", ASCII},
{"ISO-2022-JP", ISO_2022_JP},
{"ISO2022JP-CP932", CP50220},
{"CP50220", CP50220},
{"CP50221", CP50221},
{"CSISO2022JP", CP50221},
{"CP50222", CP50222},
{"ISO-2022-JP-1", ISO_2022_JP_1},
{"ISO-2022-JP-3", ISO_2022_JP_3},
{"ISO-2022-JP-2004", ISO_2022_JP_2004},
{"SHIFT_JIS", SHIFT_JIS},
{"SJIS", SHIFT_JIS},
{"WINDOWS-31J", WINDOWS_31J},
{"CSWINDOWS31J", WINDOWS_31J},
{"CP932", WINDOWS_31J},
{"MS932", WINDOWS_31J},
{"CP10001", CP10001},
{"EUCJP", EUC_JP},
{"EUC-JP", EUC_JP},
{"EUCJP-NKF", EUCJP_NKF},
{"CP51932", CP51932},
{"EUC-JP-MS", EUCJP_MS},
{"EUCJP-MS", EUCJP_MS},
{"EUCJPMS", EUCJP_MS},
{"EUC-JP-ASCII", EUCJP_ASCII},
{"EUCJP-ASCII", EUCJP_ASCII},
{"SHIFT_JISX0213", SHIFT_JISX0213},
{"SHIFT_JIS-2004", SHIFT_JIS_2004},
{"EUC-JISX0213", EUC_JISX0213},
{"EUC-JIS-2004", EUC_JIS_2004},
{"UTF-8", UTF_8},
{"UTF-8N", UTF_8N},
{"UTF-8-BOM", UTF_8_BOM},
{"UTF8-MAC", UTF8_MAC},
{"UTF-8-MAC", UTF8_MAC},
{"UTF-16", UTF_16},
{"UTF-16BE", UTF_16BE},
{"UTF-16BE-BOM", UTF_16BE_BOM},
{"UTF-16LE", UTF_16LE},
{"UTF-16LE-BOM", UTF_16LE_BOM},
{"UTF-32", UTF_32},
{"UTF-32BE", UTF_32BE},
{"UTF-32BE-BOM", UTF_32BE_BOM},
{"UTF-32LE", UTF_32LE},
{"UTF-32LE-BOM", UTF_32LE_BOM},
{"BINARY", BINARY},
{NULL, -1}
};
#if defined(DEFAULT_CODE_JIS)
#define DEFAULT_ENCIDX ISO_2022_JP
#elif defined(DEFAULT_CODE_SJIS)
#define DEFAULT_ENCIDX SHIFT_JIS
#elif defined(DEFAULT_CODE_EUC)
#define DEFAULT_ENCIDX EUC_JP
#elif defined(DEFAULT_CODE_UTF8)
#define DEFAULT_ENCIDX UTF_8
#endif
#define is_alnum(c) \
(('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
/* I don't trust portablity of toupper */
#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
#define nkf_isoctal(c) ('0'<=c && c<='7')
#define nkf_isdigit(c) ('0'<=c && c<='9')
#define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
#define nkf_isblank(c) (c == SP || c == TAB)
#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
#define nkf_isprint(c) (SP<=c && c<='~')
#define nkf_isgraph(c) ('!'<=c && c<='~')
#define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
('A'<=c&&c<='F') ? (c-'A'+10) : \
('a'<=c&&c<='f') ? (c-'a'+10) : 0)
#define bin2hex(c) ("0123456789ABCDEF"[c&15])
#define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
#define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
&& (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
#define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
#define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c < (0xE0&0x7F))
#define HOLD_SIZE 1024
#if defined(INT_IS_SHORT)
#define IOBUF_SIZE 2048
#else
#define IOBUF_SIZE 16384
#endif
#define DEFAULT_J 'B'
#define DEFAULT_R 'B'
#define GETA1 0x22
#define GETA2 0x2e
/* MIME preprocessor */
#ifdef EASYWIN /*Easy Win */
extern POINT _BufferSize;
#endif
struct input_code{
char *name;
nkf_char stat;
nkf_char score;
nkf_char index;
nkf_char buf[3];
void (*status_func)(struct input_code *, nkf_char);
nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
int _file_stat;
};
static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
static nkf_encoding *input_encoding = NULL;
static nkf_encoding *output_encoding = NULL;
static int kanji_convert(FILE *f);
#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
/* UCS Mapping
* 0: Shift_JIS, eucJP-ascii
* 1: eucJP-ms
* 2: CP932, CP51932
* 3: CP10001
*/
#define UCS_MAP_ASCII 0
#define UCS_MAP_MS 1
#define UCS_MAP_CP932 2
#define UCS_MAP_CP10001 3
static int ms_ucs_map_f = UCS_MAP_ASCII;
#endif
#ifdef UTF8_INPUT_ENABLE
/* no NEC special, NEC-selected IBM extended and IBM extended characters */
static int no_cp932ext_f = FALSE;
/* ignore ZERO WIDTH NO-BREAK SPACE */
static int no_best_fit_chars_f = FALSE;
static int input_endian = ENDIAN_BIG;
static nkf_char unicode_subchar = '?'; /* the regular substitution character */
static void (*encode_fallback)(nkf_char c) = NULL;
static void w_status(struct input_code *, nkf_char);
#endif
#ifdef UTF8_OUTPUT_ENABLE
static int output_bom_f = FALSE;
static int output_endian = ENDIAN_BIG;
#endif
static void std_putc(nkf_char c);
static nkf_char std_getc(FILE *f);
static nkf_char std_ungetc(nkf_char c,FILE *f);
static nkf_char broken_getc(FILE *f);
static nkf_char broken_ungetc(nkf_char c,FILE *f);
static nkf_char mime_getc(FILE *f);
static void mime_putc(nkf_char c);
/* buffers */
#if !defined(PERL_XS) && !defined(WIN32DLL)
static unsigned char stdibuf[IOBUF_SIZE];
static unsigned char stdobuf[IOBUF_SIZE];
#endif
/* flags */
static int unbuf_f = FALSE;
static int estab_f = FALSE;
static int nop_f = FALSE;
static int binmode_f = TRUE; /* binary mode */
static int rot_f = FALSE; /* rot14/43 mode */
static int hira_f = FALSE; /* hira/kata henkan */
static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
static int mime_decode_f = FALSE; /* mime decode is explicitly on */
static int mimebuf_f = FALSE; /* MIME buffered input */
static int broken_f = FALSE; /* convert ESC-less broken JIS */
static int iso8859_f = FALSE; /* ISO8859 through */
static int mimeout_f = FALSE; /* base64 mode */
static int x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
#ifdef UNICODE_NORMALIZATION
static int nfc_f = FALSE;
static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
#endif
#ifdef INPUT_OPTION
static int cap_f = FALSE;
static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
static int url_f = FALSE;
static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
#endif
#define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
#define CLASS_MASK NKF_INT32_C(0xFF000000)
#define CLASS_UNICODE NKF_INT32_C(0x01000000)
#define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
#define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
#define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
#define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
#define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
#define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
#define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= NKF_INT32_C(UNICODE_BMP_MAX))
#define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= NKF_INT32_C(UNICODE_MAX))
#ifdef NUMCHAR_OPTION
static int numchar_f = FALSE;
static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
#endif
#ifdef CHECK_OPTION
static int noout_f = FALSE;
static void no_putc(nkf_char c);
static int debug_f = FALSE;
static void debug(const char *str);
static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
#endif
static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
static void set_input_codename(char *codename);
#ifdef EXEC_IO
static int exec_f = 0;
#endif
#ifdef SHIFTJIS_CP932
/* invert IBM extended characters to others */
static int cp51932_f = FALSE;
/* invert NEC-selected IBM extended characters to IBM extended characters */
static int cp932inv_f = TRUE;
/* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
#endif /* SHIFTJIS_CP932 */
static int x0212_f = FALSE;
static int x0213_f = FALSE;
static unsigned char prefix_table[256];
static void e_status(struct input_code *, nkf_char);
static void s_status(struct input_code *, nkf_char);
struct input_code input_code_list[] = {
{"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
{"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
#ifdef UTF8_INPUT_ENABLE
{"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
#endif
{0}
};
static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
static int base64_count = 0;
/* X0208 -> ASCII converter */
/* fold parameter */
static int f_line = 0; /* chars in line */
static int f_prev = 0;
static int fold_preserve_f = FALSE; /* preserve new lines */
static int fold_f = FALSE;
static int fold_len = 0;
/* options */
static unsigned char kanji_intro = DEFAULT_J;
static unsigned char ascii_intro = DEFAULT_R;
/* Folding */
#define FOLD_MARGIN 10
#define DEFAULT_FOLD 60
static int fold_margin = FOLD_MARGIN;
/* process default */
static nkf_char
no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
{
fprintf(stderr,"nkf internal module connection failure.\n");
exit(1);
return 0; /* LINT */
}
static void
no_connection(nkf_char c2, nkf_char c1)
{
no_connection2(c2,c1,0);
}
static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
/* static redirections */
static void (*o_putc)(nkf_char c) = std_putc;
static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
/* for strict mime */
static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
/* Global states */
static int output_mode = ASCII; /* output kanji mode */
static int input_mode = ASCII; /* input kanji mode */
static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
/* X0201 / X0208 conversion tables */
/* X0201 kana conversion table */
/* 90-9F A0-DF */
static const unsigned char cv[]= {
0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
0x00,0x00};
/* X0201 kana conversion table for daguten */
/* 90-9F A0-DF */
static const unsigned char dv[]= {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00};
/* X0201 kana conversion table for han-daguten */
/* 90-9F A0-DF */
static const unsigned char ev[]= {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00};
/* X0208 kigou conversion table */
/* 0x8140 - 0x819e */
static const unsigned char fv[] = {
0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
} ;
static int option_mode = 0;
static int file_out_f = FALSE;
#ifdef OVERWRITE
static int overwrite_f = FALSE;
static int preserve_time_f = FALSE;
static int backup_f = FALSE;
static char *backup_suffix = "";
#endif
static int eolmode_f = 0; /* CR, LF, CRLF */
static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
static nkf_char prev_cr = 0; /* CR or 0 */
#ifdef EASYWIN /*Easy Win */
static int end_check;
#endif /*Easy Win */
#define STD_GC_BUFSIZE (256)
nkf_char std_gc_buf[STD_GC_BUFSIZE];
nkf_char std_gc_ndx;
static int
nkf_str_caseeql(const char *src, const char *target)
{
int i;
for (i = 0; src[i] && target[i]; i++) {
if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
}
if (src[i] || target[i]) return FALSE;
else return TRUE;
}
static nkf_encoding*
nkf_enc_from_index(int idx)
{
if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
return 0;
}
return &nkf_encoding_table[idx];
}
static int
nkf_enc_find_index(const char *name)
{
int i;
if (name[0] == 'X' && *(name+1) == '-') name += 2;
for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
return encoding_name_to_id_table[i].id;
}
}
return -1;
}
static nkf_encoding*
nkf_enc_find(const char *name)
{
int idx = -1;
idx = nkf_enc_find_index(name);
if (idx < 0) return 0;
return nkf_enc_from_index(idx);
}
#define nkf_enc_name(enc) (enc)->name
#define nkf_enc_to_index(enc) (enc)->id
#define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
#define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
#define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
#define nkf_enc_asciicompat(enc) (\
nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
#define nkf_enc_unicode_p(enc) (\
nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
#define nkf_enc_cp5022x_p(enc) (\
nkf_enc_to_index(enc) == CP50220 ||\
nkf_enc_to_index(enc) == CP50221 ||\
nkf_enc_to_index(enc) == CP50222)
#ifdef DEFAULT_CODE_LOCALE
static char*
nkf_locale_charmap()
{
#ifdef HAVE_LANGINFO_H
return nl_langinfo(CODESET);
#elif defined(__WIN32__)
return sprintf("CP%d", GetACP());
#else
return NULL;
#endif
}
static nkf_encoding*
nkf_locale_encoding()
{
nkf_encoding *enc = 0;
char *encname = nkf_locale_charmap();
if (encname)
enc = nkf_enc_find(encname);
if (enc < 0) enc = 0;
return enc;
}
#endif /* DEFAULT_CODE_LOCALE */
static nkf_encoding*
nkf_default_encoding()
{
nkf_encoding *enc = 0;
#ifdef DEFAULT_CODE_LOCALE
enc = nkf_locale_encoding();
#elif DEFAULT_ENCIDX
enc = nkf_enc_from_index(DEFAULT_ENCIDX);
#endif
return enc;
}
#ifndef PERL_XS
#ifdef WIN32DLL
#define fprintf dllprintf
#endif
static void
version(void)
{
fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
}
static void
usage(void)
{
fprintf(HELP_OUTPUT,
"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"
"Flags:\n"
"b,u Output is buffered (DEFAULT),Output is unbuffered\n"
"j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
#ifdef UTF8_OUTPUT_ENABLE
" After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
#endif
"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
#ifdef UTF8_INPUT_ENABLE
" After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
#endif
"t no conversion\n"
"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"
"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"
"r {de/en}crypt ROT13/47\n"
"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"
"m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
"M[BQ] MIME encode [B:base64 Q:quoted]\n"
"l ISO8859-1 (Latin-1) support\n"
"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
" 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
" 4: JISX0208 Katakana to JISX0201 Katakana\n"
"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"
#ifdef MSDOS
"T Text mode output\n"
#endif
"O Output to File (DEFAULT 'nkf.out')\n"
"I Convert non ISO-2022-JP charactor to GETA\n"
"d,c Convert line breaks -d: LF -c: CRLF\n"
"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
"v, V Show this usage. V: show configuration\n"
"\n"
"Long name options\n"
" --ic= --oc=