diff options
Diffstat (limited to 'ext/nkf/nkf.c')
| -rw-r--r-- | ext/nkf/nkf.c | 198 |
1 files changed, 0 insertions, 198 deletions
diff --git a/ext/nkf/nkf.c b/ext/nkf/nkf.c deleted file mode 100644 index ca6de73e10..0000000000 --- a/ext/nkf/nkf.c +++ /dev/null @@ -1,198 +0,0 @@ -#include "ruby.h" - -#define _AUTO 0 -#define _JIS 1 -#define _EUC 2 -#define _SJIS 3 -#define _BINARY 4 -#define _NOCONV 4 -#define _UNKNOWN _AUTO - -#undef getc -#undef ungetc -#define getc(f) (input_ctr<i_len?input[input_ctr++]:-1) -#define ungetc(c,f) input_ctr-- - -#undef putchar -#define putchar(c) rb_nkf_putchar(c) - -#define INCSIZE 32 -static int incsize; - -static unsigned char *input, *output; -static int input_ctr, i_len; -static int output_ctr, o_len; - -static VALUE dst; - -static int -rb_nkf_putchar(c) - unsigned int c; -{ - if (output_ctr >= o_len) { - o_len += incsize; - rb_str_resize(dst, o_len); - output = RSTRING(dst)->ptr; - incsize *= 2; - } - output[output_ctr++] = c; - - return c; -} - -#define PERL_XS 1 -#include "nkf1.7/nkf.c" - -static VALUE -rb_nkf_kconv(obj, opt, src) - VALUE obj, opt, src; -{ - char *opt_ptr, *opt_end; - volatile VALUE v; - - reinit(); - StringValue(opt); - opt_ptr = RSTRING(opt)->ptr; - opt_end = opt_ptr + RSTRING(opt)->len; - for (; opt_ptr < opt_end; opt_ptr++) { - if (*opt_ptr != '-') { - continue; - } - arguments(opt_ptr); - } - - incsize = INCSIZE; - - input_ctr = 0; - StringValue(src); - input = RSTRING(src)->ptr; - i_len = RSTRING(src)->len; - dst = rb_str_new(0, i_len*3 + 10); - v = dst; - - output_ctr = 0; - output = RSTRING(dst)->ptr; - o_len = RSTRING(dst)->len; - *output = '\0'; - - if(iso8859_f && (oconv != j_oconv || !x0201_f )) { - iso8859_f = FALSE; - } - - kanji_convert(NULL); - RSTRING(dst)->ptr[output_ctr] = '\0'; - RSTRING(dst)->len = output_ctr; - OBJ_INFECT(dst, src); - - return dst; -} - -/* - * Character code detection - Algorithm described in: - * Ken Lunde. `Understanding Japanese Information Processing' - * Sebastopol, CA: O'Reilly & Associates. - */ - -static VALUE -rb_nkf_guess(obj, src) - VALUE obj, src; -{ - unsigned char *p; - unsigned char *pend; - int sequence_counter = 0; - - StringValue(src); - p = RSTRING(src)->ptr; - pend = p + RSTRING(src)->len; - if (p == pend) return INT2FIX(_UNKNOWN); - -#define INCR do {\ - p++;\ - if (p==pend) return INT2FIX(_UNKNOWN);\ - sequence_counter++;\ - if (sequence_counter % 2 == 1 && *p != 0xa4)\ - sequence_counter = 0;\ - if (6 <= sequence_counter) {\ - sequence_counter = 0;\ - return INT2FIX(_EUC);\ - }\ -} while (0) - - if (*p == 0xa4) - sequence_counter = 1; - - while (p<pend) { - if (*p == '\033') { - return INT2FIX(_JIS); - } - if (*p < '\006' || *p == 0x7f || *p == 0xff) { - return INT2FIX(_BINARY); - } - if (0x81 <= *p && *p <= 0x8d) { - return INT2FIX(_SJIS); - } - if (0x8f <= *p && *p <= 0x9f) { - return INT2FIX(_SJIS); - } - if (*p == 0x8e) { /* SS2 */ - INCR; - if ((0x40 <= *p && *p <= 0x7e) || - (0x80 <= *p && *p <= 0xa0) || - (0xe0 <= *p && *p <= 0xfc)) - return INT2FIX(_SJIS); - } - else if (0xa1 <= *p && *p <= 0xdf) { - INCR; - if (0xf0 <= *p && *p <= 0xfe) - return INT2FIX(_EUC); - if (0xe0 <= *p && *p <= 0xef) { - while (p < pend && *p >= 0x40) { - if (*p >= 0x81) { - if (*p <= 0x8d || (0x8f <= *p && *p <= 0x9f)) { - return INT2FIX(_SJIS); - } - else if (0xfd <= *p && *p <= 0xfe) { - return INT2FIX(_EUC); - } - } - INCR; - } - } - else if (*p <= 0x9f) { - return INT2FIX(_SJIS); - } - } - else if (0xf0 <= *p && *p <= 0xfe) { - return INT2FIX(_EUC); - } - else if (0xe0 <= *p && *p <= 0xef) { - INCR; - if ((0x40 <= *p && *p <= 0x7e) || - (0x80 <= *p && *p <= 0xa0)) { - return INT2FIX(_SJIS); - } - if (0xfd <= *p && *p <= 0xfe) { - return INT2FIX(_EUC); - } - } - INCR; - } - return INT2FIX(_UNKNOWN); -} - -void -Init_nkf() -{ - VALUE mKconv = rb_define_module("NKF"); - - rb_define_module_function(mKconv, "nkf", rb_nkf_kconv, 2); - rb_define_module_function(mKconv, "guess", rb_nkf_guess, 1); - - rb_define_const(mKconv, "AUTO", INT2FIX(_AUTO)); - rb_define_const(mKconv, "JIS", INT2FIX(_JIS)); - rb_define_const(mKconv, "EUC", INT2FIX(_EUC)); - rb_define_const(mKconv, "SJIS", INT2FIX(_SJIS)); - rb_define_const(mKconv, "BINARY", INT2FIX(_BINARY)); - rb_define_const(mKconv, "NOCONV", INT2FIX(_NOCONV)); - rb_define_const(mKconv, "UNKNOWN", INT2FIX(_UNKNOWN)); -} |
