summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--ext/nkf/lib/kconv.rb50
-rw-r--r--ext/nkf/nkf-utf8/nkf.c25
-rw-r--r--ext/nkf/nkf-utf8/utf8tbl.c18
4 files changed, 47 insertions, 53 deletions
diff --git a/ChangeLog b/ChangeLog
index 060d78d56a..23f1a673a8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Tue Jun 28 01:52:00 2005 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * ext/nkf/lib/kconv.rb: add Kconv::VERSION
+ * ext/nkf/lib/kconv.rb (conv): can process arrayed options
+ * ext/nkf/nkf-utf8/nkf.c: imported Revision 1.69
+ * ext/nkf/nkf-utf8/utf8tbl.c: imported Revision 1.9
+
Sat Jun 25 17:12:20 2005 GOTOU Yuuzou <gotoyuzo@notwork.org>
* lib/webrick/httputils.rb (WEBrick::HTTPUtils.parse_query): should
diff --git a/ext/nkf/lib/kconv.rb b/ext/nkf/lib/kconv.rb
index fecddee978..8d57c2a5d2 100644
--- a/ext/nkf/lib/kconv.rb
+++ b/ext/nkf/lib/kconv.rb
@@ -11,6 +11,8 @@ module Kconv
# Public Constants
#
+ VERSION = '1.8'
+
#Constant of Encoding
AUTO = ::NKF::AUTO
JIS = ::NKF::JIS
@@ -28,6 +30,8 @@ module Kconv
# Private Constants
#
+ REVISON = %q$Revison$
+
#Regexp of Encoding
RegexpShiftjis = /\A(?:
[\x00-\x7f\xa1-\xdf] |
@@ -100,30 +104,6 @@ module Kconv
UNKNOWN => :unknown
}
- SYMBOL_TO_CONSTANT = {
- :auto => AUTO,
- :unknown => UNKNOWN,
- :binary => BINARY,
- :ascii => ASCII,
- :ascii => ASCII,
- :shiftjis => SJIS,
- :sjis => SJIS,
- :cp932 => SJIS,
- :eucjp => EUC,
- :euc => EUC,
- :eucjpms => EUC,
- :iso2022jp => JIS,
- :jis => JIS,
- :utf8 => UTF8,
- :utf8n => UTF8,
- :utf16 => UTF16,
- :utf16be => UTF16,
- :utf16ben => UTF16,
- :utf16le => UTF16,
- :utf16len => UTF16,
- :noconv => NOCONV
- }
-
#
# Public Methods
#
@@ -186,14 +166,9 @@ module Kconv
to = symbol_to_option(option[0])
from = symbol_to_option(option[1]).to_s.sub(/(-[jesw])/o){$1.upcase}
- opt = Array.new
- if option[2].is_a? Array
- opt << option[2].map{|x|symbol_to_option(x)}.compact.join('')
- elsif option[2].is_a? String
- opt << option[2]
- end
+ opt = option[2..-1].to_a.map{|x|symbol_to_option(x)}.compact.join('')
- nkf_opt = ('-x -m0 %s %s %s' % [to, from, opt.join(' ')])
+ nkf_opt = ('-x -m0 %s %s %s' % [to, from, opt])
result = ::NKF::nkf( nkf_opt, str)
end
module_function :conv
@@ -300,12 +275,12 @@ module Kconv
#
# Private Methods
#
-
+ private
def symbol_to_option(symbol)
- if symbol.to_s[0] == ?-
- return symbol.to_s
- elsif symbol.is_a? Integer
+ if symbol.is_a? Integer
symbol = CONSTANT_TO_SYMBOL[symbol]
+ elsif symbol.to_s[0] == ?-
+ return symbol.to_s
end
begin
SYMBOL_TO_OPTION[ symbol.to_s.downcase.delete('-_').to_sym ]
@@ -313,13 +288,12 @@ module Kconv
return nil
end
end
-private :symbol_to_option
module_function :symbol_to_option
end
class String
- def kconv(out_code, in_code=Kconv::AUTO)
- Kconv::kconv(self, out_code, in_code)
+ def kconv(*args)
+ Kconv::kconv(self, *args)
end
def conv(*args)
diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c
index 2973799fb2..9fd1436239 100644
--- a/ext/nkf/nkf-utf8/nkf.c
+++ b/ext/nkf/nkf-utf8/nkf.c
@@ -41,7 +41,7 @@
***********************************************************************/
/* $Id$ */
#define NKF_VERSION "2.0.5"
-#define NKF_RELEASE_DATE "2005-04-10"
+#define NKF_RELEASE_DATE "2005-06-28"
#include "config.h"
static char *CopyRight =
@@ -970,13 +970,9 @@ options(cp)
if (option_mode==1)
return;
- if (*cp++ != '-')
- return;
+ while(*cp && *cp++!='-');
while (*cp) {
- if (p && !*cp) {
- cp = p;
- p = 0;
- }
+ p = 0;
switch (*cp++) {
case '-': /* literal options */
if (!*cp) { /* ignore the rest of arguments */
@@ -986,8 +982,8 @@ options(cp)
for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
int j;
p = (unsigned char *)long_option[i].name;
- for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
- if (*p == cp[j]){
+ for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
+ if (*p == cp[j] || cp[j] == ' '){
p = &cp[j];
break;
}
@@ -996,6 +992,7 @@ options(cp)
if (p == 0) return;
cp = (unsigned char *)long_option[i].alias;
if (!*cp){
+ cp = p;
#ifdef OVERWRITE
if (strcmp(long_option[i].name, "overwrite") == 0){
file_out = TRUE;
@@ -1122,8 +1119,8 @@ options(cp)
continue;
case 'h':
/*
- bit:1 hira -> kata
- bit:2 kata -> hira
+ bit:1 katakana->hiragana
+ bit:2 hiragana->katakana
*/
if ('9'>= *cp && *cp>='0')
hira_f |= (*cp++ -'0');
@@ -1320,8 +1317,7 @@ options(cp)
continue;
case ' ':
/* module muliple options in a string are allowed for Perl moudle */
- while(*cp && *cp!='-') cp++;
- if(*cp=='-') cp++;
+ while(*cp && *cp++!='-');
continue;
default:
/* bogus option but ignored */
@@ -4732,7 +4728,7 @@ usage()
fprintf(stderr,"t no conversion\n");
fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
fprintf(stderr,"r {de/en}crypt ROT13/47\n");
- fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
+ fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
fprintf(stderr,"v Show this usage. V: show version\n");
fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
@@ -4755,6 +4751,7 @@ usage()
fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
fprintf(stderr," --x0212 Convert JISX0212\n");
fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
+ fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
#ifdef INPUT_OPTION
fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
#endif
diff --git a/ext/nkf/nkf-utf8/utf8tbl.c b/ext/nkf/nkf-utf8/utf8tbl.c
index 12e2403cf8..5b6c09ad23 100644
--- a/ext/nkf/nkf-utf8/utf8tbl.c
+++ b/ext/nkf/nkf-utf8/utf8tbl.c
@@ -1277,6 +1277,22 @@ unsigned short euc_to_utf8_FC[] = {
0x999E, 0x9A4E, 0x9AD9, 0x9ADC, 0x9B75, 0x9B72, 0x9B8F, 0x9BB1,
0x9BBB, 0x9C00, 0x9D70, 0x9D6B, 0xFA2D, 0x9E19, 0x9ED1, 0,
0, 0x2170, 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176,
+ 0x2177, 0x2178, 0x2179, 0xFFE2, 0x00A6, 0xFF07, 0xFF02,
+};
+
+/* Microsoft UCS Mapping Compatible */
+unsigned short euc_to_utf8_FC_ms[] = {
+ 0x91D7, 0x91DE, 0x91ED, 0x91EE, 0x91E4, 0x91E5, 0x9206,
+ 0x9210, 0x920A, 0x923A, 0x9240, 0x923C, 0x924E, 0x9259, 0x9251,
+ 0x9239, 0x9267, 0x92A7, 0x9277, 0x9278, 0x92E7, 0x92D7, 0x92D9,
+ 0x92D0, 0xFA27, 0x92D5, 0x92E0, 0x92D3, 0x9325, 0x9321, 0x92FB,
+ 0xFA28, 0x931E, 0x92FF, 0x931D, 0x9302, 0x9370, 0x9357, 0x93A4,
+ 0x93C6, 0x93DE, 0x93F8, 0x9431, 0x9445, 0x9448, 0x9592, 0xF9DC,
+ 0xFA29, 0x969D, 0x96AF, 0x9733, 0x973B, 0x9743, 0x974D, 0x974F,
+ 0x9751, 0x9755, 0x9857, 0x9865, 0xFA2A, 0xFA2B, 0x9927, 0xFA2C,
+ 0x999E, 0x9A4E, 0x9AD9, 0x9ADC, 0x9B75, 0x9B72, 0x9B8F, 0x9BB1,
+ 0x9BBB, 0x9C00, 0x9D70, 0x9D6B, 0xFA2D, 0x9E19, 0x9ED1, 0,
+ 0, 0x2170, 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176,
0x2177, 0x2178, 0x2179, 0xFFE2, 0xFFE4, 0xFF07, 0xFF02,
};
@@ -2300,7 +2316,7 @@ unsigned short * euc_to_utf8_2bytes_ms[] = {
euc_to_utf8_F0, euc_to_utf8_F1, euc_to_utf8_F2, euc_to_utf8_F3,
euc_to_utf8_F4, euc_to_utf8_F5, 0, 0,
0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB,
- euc_to_utf8_FC, 0, 0,
+ euc_to_utf8_FC_ms, 0, 0,
};
#ifdef X0212_ENABLE