From 03a1e25542345e6e4b40864a27d62d11df700047 Mon Sep 17 00:00:00 2001
From: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Sun, 23 Dec 2007 09:37:51 +0000
Subject: * ext/nkf/nkf.c (rb_nkf_enc_get): use rb_define_dummy_encoding.

* ext/nkf/nkf.c (Init_nkf): use rb_nkf_enc_get("ASCII").

* ext/nkf/nkf-utf8/nkf.c: Update 1.161.

* ext/nkf/nkf-utf9/config.h: default output encoding is now UTF-8.

* ext/nkf/lib/kconv.rb (Kconv.kconv): replace Encoding#name by
  Encoding#to_s.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14520 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
---
 ChangeLog                 |  13 ++++
 ext/nkf/lib/kconv.rb      |  11 +---
 ext/nkf/nkf-utf8/config.h |   4 +-
 ext/nkf/nkf-utf8/nkf.c    | 160 +++++++++++++++++++++++++---------------------
 ext/nkf/nkf.c             |   4 +-
 5 files changed, 107 insertions(+), 85 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 3e52e885cb..2db04cbb67 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+Sun Dec 23 18:31:49 2007  NARUSE, Yui <naruse@ruby-lang.org>
+
+	* ext/nkf/nkf.c (rb_nkf_enc_get): use rb_define_dummy_encoding.
+
+	* ext/nkf/nkf.c (Init_nkf): use rb_nkf_enc_get("ASCII").
+
+	* ext/nkf/nkf-utf8/nkf.c: Update 1.161.
+
+	* ext/nkf/nkf-utf9/config.h: default output encoding is now UTF-8.
+
+	* ext/nkf/lib/kconv.rb (Kconv.kconv): replace Encoding#name by
+	  Encoding#to_s.
+
 Sun Dec 23 18:02:52 2007  Eric Hodel  <drbrain@segment7.net>
 
 	* lib/rubygems/gem_open_uri.rb:  Fix version check.
diff --git a/ext/nkf/lib/kconv.rb b/ext/nkf/lib/kconv.rb
index f67f19a860..2f22e32aad 100644
--- a/ext/nkf/lib/kconv.rb
+++ b/ext/nkf/lib/kconv.rb
@@ -87,8 +87,8 @@ module Kconv
   # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
   def kconv(str, to_enc, from_enc=nil)
     opt = ''
-    opt += ' --ic=' + from_enc.name if from_enc
-    opt += ' --oc=' + to_enc.name if to_enc
+    opt += ' --ic=' + from_enc.to_s if from_enc
+    opt += ' --oc=' + to_enc.to_s if to_enc
 
     ::NKF::nkf(opt, str)
   end
@@ -210,13 +210,8 @@ class String
   #
   # Convert <code>self</code> to out_code.
   # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
-  #
-  # *Note*
-  # This method decode MIME encoded string and
-  # convert halfwidth katakana to fullwidth katakana.
-  # If you don't want to decode them, use NKF.nkf.
   def kconv(to_enc, from_enc=nil)
-    form_enc = self.encoding.name if !from_enc && self.encoding != Encoding.list[0]
+    form_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0]
     Kconv::kconv(self, to_enc, from_enc)
   end
   
diff --git a/ext/nkf/nkf-utf8/config.h b/ext/nkf/nkf-utf8/config.h
index 1d3d46a71e..1fa74afd40 100644
--- a/ext/nkf/nkf-utf8/config.h
+++ b/ext/nkf/nkf-utf8/config.h
@@ -59,10 +59,10 @@ typedef int nkf_nfchar;
 /******************************/
 /* デフォルトの出力コード選択 */
 /* Select DEFAULT_CODE */
-#define DEFAULT_CODE_JIS
+/* #define DEFAULT_CODE_JIS */
 /* #define DEFAULT_CODE_SJIS */
 /* #define DEFAULT_CODE_EUC */
-/* #define DEFAULT_CODE_UTF8 */
+#define DEFAULT_CODE_UTF8
 /******************************/
 #else
 #define DEFAULT_CODE_SJIS
diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c
index 8a47e643ec..66f01896d6 100644
--- a/ext/nkf/nkf-utf8/nkf.c
+++ b/ext/nkf/nkf-utf8/nkf.c
@@ -32,7 +32,7 @@
 ***********************************************************************/
 /* $Id$ */
 #define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2007-12-22"
+#define NKF_RELEASE_DATE "2007-12-23"
 #define COPY_RIGHT \
     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
@@ -72,6 +72,11 @@
 #define PUT_NEWLINE(func) func(0x0A)
 #define OCONV_NEWLINE(func) func(0, 0x0A)
 #endif
+#ifdef HELP_OUTPUT_STDERR
+#define HELP_OUTPUT stderr
+#else
+#define HELP_OUTPUT stdout
+#endif
 
 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
 #define MSDOS
@@ -905,13 +910,13 @@ char* nkf_strcpy(const char *str)
     return result;
 }
 
-static void nkf_str_upcase(const char *str, char *res, size_t length)
+static void nkf_str_upcase(const char *src, char *dest, size_t length)
 {
     int i = 0;
-    for (; i < length && str[i]; i++) {
-	res[i] = nkf_toupper(str[i]);
+    for (; i < length && src[i]; i++) {
+	dest[i] = nkf_toupper(src[i]);
     }
-    res[i] = 0;
+    dest[i] = 0;
 }
 
 static nkf_encoding *nkf_enc_from_index(int idx)
@@ -1056,8 +1061,7 @@ int main(int argc, char **argv)
 	    iconv_for_check = 0;
 #endif
           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
-              perror(*--argv);
-		*argv++;
+		perror(*(argv-1));
 		is_argument_error = TRUE;
 		continue;
           } else {
@@ -1354,6 +1358,7 @@ void options(unsigned char *cp)
     char codeset[32];
     nkf_encoding *enc;
 
+    if (!output_encoding) output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
     if (option_mode==1)
 	return;
     while(*cp && *cp++!='-');
@@ -1389,7 +1394,7 @@ void options(unsigned char *cp)
 		cp = (unsigned char *)long_option[i].alias;
 	    }else{
                 if (strcmp(long_option[i].name, "ic=") == 0){
-		    nkf_str_upcase(p, codeset, 32);
+		    nkf_str_upcase((char *)p, codeset, 32);
 		    enc = nkf_enc_find(codeset);
 		    switch (nkf_enc_to_index(enc)) {
 		    case ISO_2022_JP:
@@ -1529,8 +1534,10 @@ void options(unsigned char *cp)
 		}
                 if (strcmp(long_option[i].name, "oc=") == 0){
 		    x0201_f = FALSE;
-		    nkf_str_upcase(p, codeset, 32);
-		    output_encoding = nkf_enc_find(codeset);
+		    nkf_str_upcase((char *)p, codeset, 32);
+		    enc = nkf_enc_find(codeset);
+		    if (enc <= 0) continue;
+		    output_encoding = enc;
 		    switch (nkf_enc_to_index(output_encoding)) {
 		    case ISO_2022_JP:
 			output_conv = j_oconv;
@@ -1889,8 +1896,8 @@ void options(unsigned char *cp)
         case 't':           /* transparent mode */
             if (*cp=='1') {
 		/* alias of -t */
+		cp++;
 		nop_f = TRUE;
-		*cp++;
 	    } else if (*cp=='2') {
 		/*
 		 * -t with put/get
@@ -1898,8 +1905,8 @@ void options(unsigned char *cp)
 		 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
 		 *
 		 */
+		cp++;
 		nop_f = 2;
-		*cp++;
             } else
 		nop_f = TRUE;
             continue;
@@ -6370,87 +6377,87 @@ nkf_char no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
 #endif
 void usage(void)
 {
-    fprintf(stderr,"USAGE:  nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
-    fprintf(stderr,"Flags:\n");
-    fprintf(stderr,"b,u      Output is buffered (DEFAULT),Output is unbuffered\n");
+    fprintf(HELP_OUTPUT,"USAGE:  nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
+    fprintf(HELP_OUTPUT,"Flags:\n");
+    fprintf(HELP_OUTPUT,"b,u      Output is buffered (DEFAULT),Output is unbuffered\n");
 #ifdef DEFAULT_CODE_SJIS
-    fprintf(stderr,"j,s,e,w  Output code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n");
+    fprintf(HELP_OUTPUT,"j,s,e,w  Output code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n");
 #endif
 #ifdef DEFAULT_CODE_JIS
-    fprintf(stderr,"j,s,e,w  Output code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n");
+    fprintf(HELP_OUTPUT,"j,s,e,w  Output code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n");
 #endif
 #ifdef DEFAULT_CODE_EUC
-    fprintf(stderr,"j,s,e,w  Output code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n");
+    fprintf(HELP_OUTPUT,"j,s,e,w  Output code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n");
 #endif
 #ifdef DEFAULT_CODE_UTF8
-    fprintf(stderr,"j,s,e,w  Output code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n");
+    fprintf(HELP_OUTPUT,"j,s,e,w  Output code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n");
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-    fprintf(stderr,"         After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
+    fprintf(HELP_OUTPUT,"         After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
 #endif
-    fprintf(stderr,"J,S,E,W  Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n");
+    fprintf(HELP_OUTPUT,"J,S,E,W  Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n");
 #ifdef UTF8_INPUT_ENABLE
-    fprintf(stderr,"         After 'W' you can add more options. -W[ 8, 16 [BL] ] \n");
-#endif
-    fprintf(stderr,"t        no conversion\n");
-    fprintf(stderr,"i[@B]    Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n");
-    fprintf(stderr,"o[BJH]   Specify the Esc Seq for ASCII/Roman        (DEFAULT B)\n");
-    fprintf(stderr,"r        {de/en}crypt ROT13/47\n");
-    fprintf(stderr,"h        1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
-    fprintf(stderr,"m[BQN0]  MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
-    fprintf(stderr,"M[BQ]    MIME encode [B:base64 Q:quoted]\n");
-    fprintf(stderr,"l        ISO8859-1 (Latin-1) support\n");
-    fprintf(stderr,"f/F      Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
-    fprintf(stderr,"Z[0-4]   Default/0: Convert JISX0208 Alphabet to ASCII\n");
-    fprintf(stderr,"         1: Kankaku to one space  2: to two spaces  3: HTML Entity\n");
-    fprintf(stderr,"         4: JISX0208 Katakana to JISX0201 Katakana\n");
-    fprintf(stderr,"X,x      Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
-    fprintf(stderr,"B[0-2]   Broken input  0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
+    fprintf(HELP_OUTPUT,"         After 'W' you can add more options. -W[ 8, 16 [BL] ] \n");
+#endif
+    fprintf(HELP_OUTPUT,"t        no conversion\n");
+    fprintf(HELP_OUTPUT,"i[@B]    Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n");
+    fprintf(HELP_OUTPUT,"o[BJH]   Specify the Esc Seq for ASCII/Roman        (DEFAULT B)\n");
+    fprintf(HELP_OUTPUT,"r        {de/en}crypt ROT13/47\n");
+    fprintf(HELP_OUTPUT,"h        1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
+    fprintf(HELP_OUTPUT,"m[BQN0]  MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
+    fprintf(HELP_OUTPUT,"M[BQ]    MIME encode [B:base64 Q:quoted]\n");
+    fprintf(HELP_OUTPUT,"l        ISO8859-1 (Latin-1) support\n");
+    fprintf(HELP_OUTPUT,"f/F      Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
+    fprintf(HELP_OUTPUT,"Z[0-4]   Default/0: Convert JISX0208 Alphabet to ASCII\n");
+    fprintf(HELP_OUTPUT,"         1: Kankaku to one space  2: to two spaces  3: HTML Entity\n");
+    fprintf(HELP_OUTPUT,"         4: JISX0208 Katakana to JISX0201 Katakana\n");
+    fprintf(HELP_OUTPUT,"X,x      Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
+    fprintf(HELP_OUTPUT,"B[0-2]   Broken input  0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
 #ifdef MSDOS
-    fprintf(stderr,"T        Text mode output\n");
-#endif
-    fprintf(stderr,"O        Output to File (DEFAULT 'nkf.out')\n");
-    fprintf(stderr,"I        Convert non ISO-2022-JP charactor to GETA\n");
-    fprintf(stderr,"d,c      Convert line breaks  -d: LF  -c: CRLF\n");
-    fprintf(stderr,"-L[uwm]  line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
-    fprintf(stderr,"v, V     Show this usage. V: show configuration\n");
-    fprintf(stderr,"\n");
-    fprintf(stderr,"Long name options\n");
-    fprintf(stderr," --ic=<input codeset>  --oc=<output codeset>\n");
-    fprintf(stderr,"                   Specify the input or output codeset\n");
-    fprintf(stderr," --fj  --unix --mac  --windows\n");
-    fprintf(stderr," --jis  --euc  --sjis  --utf8  --utf16  --mime  --base64\n");
-    fprintf(stderr,"                   Convert for the system or code\n");
-    fprintf(stderr," --hiragana  --katakana  --katakana-hiragana\n");
-    fprintf(stderr,"                   To Hiragana/Katakana Conversion\n");
-    fprintf(stderr," --prefix=         Insert escape before troublesome characters of Shift_JIS\n");
+    fprintf(HELP_OUTPUT,"T        Text mode output\n");
+#endif
+    fprintf(HELP_OUTPUT,"O        Output to File (DEFAULT 'nkf.out')\n");
+    fprintf(HELP_OUTPUT,"I        Convert non ISO-2022-JP charactor to GETA\n");
+    fprintf(HELP_OUTPUT,"d,c      Convert line breaks  -d: LF  -c: CRLF\n");
+    fprintf(HELP_OUTPUT,"-L[uwm]  line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
+    fprintf(HELP_OUTPUT,"v, V     Show this usage. V: show configuration\n");
+    fprintf(HELP_OUTPUT,"\n");
+    fprintf(HELP_OUTPUT,"Long name options\n");
+    fprintf(HELP_OUTPUT," --ic=<input codeset>  --oc=<output codeset>\n");
+    fprintf(HELP_OUTPUT,"                   Specify the input or output codeset\n");
+    fprintf(HELP_OUTPUT," --fj  --unix --mac  --windows\n");
+    fprintf(HELP_OUTPUT," --jis  --euc  --sjis  --utf8  --utf16  --mime  --base64\n");
+    fprintf(HELP_OUTPUT,"                   Convert for the system or code\n");
+    fprintf(HELP_OUTPUT," --hiragana  --katakana  --katakana-hiragana\n");
+    fprintf(HELP_OUTPUT,"                   To Hiragana/Katakana Conversion\n");
+    fprintf(HELP_OUTPUT," --prefix=         Insert escape before troublesome characters of Shift_JIS\n");
 #ifdef INPUT_OPTION
-    fprintf(stderr," --cap-input, --url-input  Convert hex after ':' or '%%'\n");
+    fprintf(HELP_OUTPUT," --cap-input, --url-input  Convert hex after ':' or '%%'\n");
 #endif
 #ifdef NUMCHAR_OPTION
-    fprintf(stderr," --numchar-input   Convert Unicode Character Reference\n");
+    fprintf(HELP_OUTPUT," --numchar-input   Convert Unicode Character Reference\n");
 #endif
 #ifdef UTF8_INPUT_ENABLE
-    fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
-    fprintf(stderr,"                   Specify how nkf handles unassigned characters\n");
+    fprintf(HELP_OUTPUT," --fb-{skip, html, xml, perl, java, subchar}\n");
+    fprintf(HELP_OUTPUT,"                   Specify how nkf handles unassigned characters\n");
 #endif
 #ifdef OVERWRITE
-    fprintf(stderr," --in-place[=SUFFIX]  --overwrite[=SUFFIX]\n");
-    fprintf(stderr,"                   Overwrite original listed files by filtered result\n");
-    fprintf(stderr,"                   --overwrite preserves timestamp of original files\n");
-#endif
-    fprintf(stderr," -g  --guess       Guess the input code\n");
-    fprintf(stderr," --help  --version Show this help/the version\n");
-    fprintf(stderr,"                   For more information, see also man nkf\n");
-    fprintf(stderr,"\n");
+    fprintf(HELP_OUTPUT," --in-place[=SUFFIX]  --overwrite[=SUFFIX]\n");
+    fprintf(HELP_OUTPUT,"                   Overwrite original listed files by filtered result\n");
+    fprintf(HELP_OUTPUT,"                   --overwrite preserves timestamp of original files\n");
+#endif
+    fprintf(HELP_OUTPUT," -g  --guess       Guess the input code\n");
+    fprintf(HELP_OUTPUT," --help  --version Show this help/the version\n");
+    fprintf(HELP_OUTPUT,"                   For more information, see also man nkf\n");
+    fprintf(HELP_OUTPUT,"\n");
     version();
 }
 
 void show_configuration(void)
 {
-    fprintf(stderr, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n");
-    fprintf(stderr, "  Compile-time options:\n");
-    fprintf(stderr, "    Default output encoding:     "
+    fprintf(HELP_OUTPUT, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n");
+    fprintf(HELP_OUTPUT, "  Compile-time options:\n");
+    fprintf(HELP_OUTPUT, "    Default output encoding:     "
 #if defined(DEFAULT_CODE_JIS)
 	    "ISO-2022-JP"
 #elif defined(DEFAULT_CODE_SJIS)
@@ -6461,7 +6468,7 @@ void show_configuration(void)
 	    "UTF-8"
 #endif
 	    "\n");
-    fprintf(stderr, "    Default output newline:      "
+    fprintf(HELP_OUTPUT, "    Default output newline:      "
 #if DEFAULT_NEWLINE == CR
 	    "CR"
 #elif DEFAULT_NEWLINE == CRLF
@@ -6470,24 +6477,31 @@ void show_configuration(void)
 	    "LF"
 #endif
 	    "\n");
-    fprintf(stderr, "    Decode MIME encoded string:  "
+    fprintf(HELP_OUTPUT, "    Decode MIME encoded string:  "
 #if MIME_DECODE_DEFAULT
 	    "ON"
 #else
 	    "OFF"
 #endif
 	    "\n");
-    fprintf(stderr, "    Convert JIS X 0201 Katakana: "
+    fprintf(HELP_OUTPUT, "    Convert JIS X 0201 Katakana: "
 #if X0201_DEFAULT
 	    "ON"
 #else
 	    "OFF"
 #endif
 	    "\n");
+fprintf(HELP_OUTPUT, " --help, --version output: "
+#if HELP_OUTPUT_HELP_OUTPUT
+"HELP_OUTPUT"
+#else
+"STDOUT"
+#endif
+"\n");
 }
 
 void version(void)
 {
-    fprintf(stderr,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
+    fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
 }
 #endif /*PERL_XS*/
diff --git a/ext/nkf/nkf.c b/ext/nkf/nkf.c
index d4afb84a38..703315834e 100644
--- a/ext/nkf/nkf.c
+++ b/ext/nkf/nkf.c
@@ -69,7 +69,7 @@ rb_encoding* rb_nkf_enc_get(const char *name)
 	nkf_native_encoding * nkf_base_enc = nkf_enc_to_base_encoding(nkf_enc);
 	idx = rb_enc_find_index(nkf_enc_name(nkf_base_enc));
 	if (idx < 0) {
-	    idx = rb_enc_replicate(name, rb_ascii8bit_encoding());
+	    idx = rb_define_dummy_encoding(name);
 	} else {
 	    rb_encoding *rb_enc = rb_enc_from_index(idx);
 	    idx = rb_enc_replicate(name, rb_enc);
@@ -485,7 +485,7 @@ Init_nkf()
     rb_define_const(mNKF, "NOCONV",	Qnil);
     rb_define_const(mNKF, "UNKNOWN",	Qnil);
     rb_define_const(mNKF, "BINARY",	rb_enc_from_encoding(rb_nkf_enc_get("BINARY")));
-    rb_define_const(mNKF, "ASCII",	rb_enc_from_encoding(rb_ascii8bit_encoding()));
+    rb_define_const(mNKF, "ASCII",	rb_enc_from_encoding(rb_nkf_enc_get("ASCII")));
     rb_define_const(mNKF, "JIS",	rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP")));
     rb_define_const(mNKF, "EUC",	rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP")));
     rb_define_const(mNKF, "SJIS",	rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS")));
-- 
cgit v1.2.3