From 81ef4309a244b78223832895d714fcaec85ed5bf Mon Sep 17 00:00:00 2001 From: matz Date: Wed, 10 Jan 2001 07:52:46 +0000 Subject: * m17n baseline. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@1107 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- Makefile.in | 4 +- array.c | 2 +- eval.c | 4 +- file.c | 2 +- gc.c | 4 +- m17n.c | 692 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ m17n.h | 109 ++++++++++ process.c | 2 +- re.h | 5 - util.c | 13 -- variable.c | 2 - 11 files changed, 809 insertions(+), 30 deletions(-) create mode 100644 m17n.c create mode 100644 m17n.h diff --git a/Makefile.in b/Makefile.in index af29b78daa..b886c1022b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -58,6 +58,7 @@ OBJS = array.@OBJEXT@ \ hash.@OBJEXT@ \ inits.@OBJEXT@ \ io.@OBJEXT@ \ + m17n.@OBJEXT@ \ marshal.@OBJEXT@ \ math.@OBJEXT@ \ numeric.@OBJEXT@ \ @@ -252,7 +253,8 @@ file.@OBJEXT@: file.c ruby.h config.h defines.h intern.h rubyio.h rubysig.h dln. gc.@OBJEXT@: gc.c ruby.h config.h defines.h intern.h rubysig.h st.h node.h env.h re.h regex.h hash.@OBJEXT@: hash.c ruby.h config.h defines.h intern.h st.h rubysig.h util.h inits.@OBJEXT@: inits.c ruby.h config.h defines.h intern.h -io.@OBJEXT@: io.c ruby.h config.h defines.h intern.h rubyio.h rubysig.h env.h util.h +io.@OBJEXT@: io.c ruby.h config.h defines.h intern.h rubyio.h rubysig.h env.h util.h m17n.h +m17n.@OBJEXT@: m17n.c m17n.h config.h main.@OBJEXT@: main.c ruby.h config.h defines.h intern.h marshal.@OBJEXT@: marshal.c ruby.h config.h defines.h intern.h rubyio.h st.h prec.@OBJEXT@: prec.c ruby.h config.h defines.h intern.h diff --git a/array.c b/array.c index 9872c460e0..1dc1627fb8 100644 --- a/array.c +++ b/array.c @@ -1440,7 +1440,7 @@ ary_make_hash(ary1, ary2) VALUE ary1, ary2; { VALUE hash = rb_hash_new(); - int i, n; + int i; for (i=0; ilen; i++) { rb_hash_aset(hash, RARRAY(ary1)->ptr[i], Qtrue); diff --git a/eval.c b/eval.c index 2fc5070a4b..8609f89b86 100644 --- a/eval.c +++ b/eval.c @@ -6625,7 +6625,7 @@ rb_mod_define_method(argc, argv, mod) VALUE mod; { ID id; - VALUE name, body; + VALUE body; if (argc == 1) { id = rb_to_id(argv[0]); @@ -8037,8 +8037,6 @@ static VALUE rb_thread_start(klass, args) VALUE klass, args; { - rb_thread_t th; - if (!rb_block_given_p()) { rb_raise(rb_eThreadError, "must be called with a block"); } diff --git a/file.c b/file.c index db200c6f7e..3e8ec3073f 100644 --- a/file.c +++ b/file.c @@ -282,7 +282,6 @@ rb_stat_inspect(self) for (i = 0; i < sizeof(member)/sizeof(member[0]); i++) { VALUE str2; - char *p; if (i > 0) { rb_str_cat2(str, ", "); @@ -2163,6 +2162,7 @@ define_filetest_function(name, func, argc) rb_define_singleton_method(rb_cFile, name, func, argc); } +void Init_File() { rb_mFileTest = rb_define_module("FileTest"); diff --git a/gc.c b/gc.c index e4320e0a4f..807fe9366c 100644 --- a/gc.c +++ b/gc.c @@ -775,7 +775,7 @@ obj_free(obj) } break; case T_STRING: -#define STR_NO_ORIG FL_USER2 /* copied from string.c */ +#define STR_NO_ORIG FL_USER0 /* copied from string.c */ if (!RANY(obj)->as.string.orig || FL_TEST(obj, STR_NO_ORIG)) { RUBY_CRITICAL(free(RANY(obj)->as.string.ptr)); } @@ -1152,8 +1152,6 @@ static VALUE undefine_final(os, obj) VALUE os, obj; { - VALUE table; - if (finalizer_table) { st_delete(finalizer_table, &obj, 0); } diff --git a/m17n.c b/m17n.c new file mode 100644 index 0000000000..0b66e6d403 --- /dev/null +++ b/m17n.c @@ -0,0 +1,692 @@ +/********************************************************************** + + m17n.c - + + $Author$ + $Date$ + created at: Thu Dec 28 16:29:43 JST 2000 + + Copyright (C) 1993-2001 Yukihiro Matsumoto + +**********************************************************************/ + +#include "config.h" +#include +#ifdef HAVE_STRING_H +# include +#else +# include +#endif +#include "m17n.h" + +#define uchar unsigned char + +static int num_encodings = 1; +m17n_encoding **m17n_encoding_table = 0; + +static int any_strlen _((const uchar *p, const uchar *e, const m17n_encoding* enc)); +static uchar *any_nth _((uchar *p, const uchar *e, int n, const m17n_encoding* enc)); + +m17n_encoding* +m17n_define_encoding(name) + const char *name; +{ + m17n_encoding *enc; + + if (!m17n_encoding_table) { + m17n_init(); + } + num_encodings++; + m17n_encoding_table = realloc(m17n_encoding_table, + sizeof(m17n_encoding*)*num_encodings); + enc = malloc(sizeof(m17n_encoding)); + + /* copy ASCII table */ + memcpy(enc, m17n_encoding_table[0], sizeof(m17n_encoding)); + /* ..but strlen() */ + m17n_encoding_func_strlen(enc, any_strlen); + /* ..and nth() */ + m17n_encoding_func_nth(enc, any_nth); + /* mbmaxlen is unknown (i.e. 0) */ + m17n_encoding_mbmaxlen(enc, 0); + + enc->name = name; + enc->index = num_encodings-1; + m17n_encoding_table[enc->index] = enc; + return enc; +} + +m17n_encoding * +m17n_find_encoding(name) + const char *name; +{ + int i; + + for (i=0; iname) == 0) { + return m17n_encoding_table[i]; + } + } + return 0; +} + +static int +asc_strlen(p, e, enc) + const uchar *p, *e; + const m17n_encoding *enc; +{ + return e - p; +} + +static int +asc_mbclen(c, enc) + int c; + const m17n_encoding *enc; +{ + return 1; +} + +static int +asc_codelen(c, enc) + unsigned int c; + const m17n_encoding *enc; +{ + return 1; +} + +static int +asc_mbcspan(p, e, enc) + const uchar *p, *e; + const m17n_encoding *enc; +{ + return 1; +} + +static int +asc_islead(c, enc) + int c; + const m17n_encoding *enc; +{ + return 1; +} + +static uchar* +asc_nth(p, e, n, enc) + uchar *p; + const uchar *e; + int n; + const m17n_encoding *enc; +{ + p += n; + if (p <= e) return p; + return 0; +} + +static uchar asc_ctype_table [] = { + 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16, + 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, + 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static int +asc_ctype(c, code, enc) + unsigned int c, code; + const m17n_encoding *enc; +{ + if (c > 0xff) return 0; + return asc_ctype_table[c] & code; +} + +static uchar asc_toupper_table[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, +}; + +static unsigned int +asc_toupper(c, enc) + unsigned int c; + const m17n_encoding *enc; +{ + if (c > 127) return c; + return asc_toupper_table[c]; +} + +static uchar asc_tolower_table[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, +}; + +static unsigned int +asc_tolower(c, enc) + unsigned int c; + const m17n_encoding *enc; +{ + if (c > 127) return c; + return asc_tolower_table[c]; +} + +static unsigned int +asc_codepoint(p, e, enc) + const uchar *p, *e; + const m17n_encoding *enc; +{ + return *p; +} + +static int +asc_firstbyte(c, enc) + unsigned int c; + const m17n_encoding *enc; +{ + return c; +} + +static void +asc_mbcput(c, p, enc) + unsigned int c; + uchar *p; + const m17n_encoding *enc; +{ + *p = (uchar)c; +} + +static int +any_strlen(p, e, enc) + const uchar *p, *e; + const m17n_encoding *enc; +{ + int c; + + if (m17n_mbmaxlen(enc) == 1) { + return e - p; + } + + for (c=0; p 0xff) return 0; + return latin1_ctype_table[c] & code; +} + +uchar latin1_toupper_table[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255 +}; + +static unsigned int +latin1_toupper(c, enc) + unsigned int c; + const m17n_encoding *enc; +{ + if (c > 0xff) return c; + return latin1_toupper_table[c]; +} + +uchar latin1_tolower_table[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + +static unsigned int +latin1_tolower(c, enc) + unsigned int c; + const m17n_encoding *enc; +{ + if (c > 0xff) return c; + return latin1_tolower_table[c]; +} + +static int +eucjp_mbclen(c, enc) + int c; + const m17n_encoding *enc; +{ + return c < 0x80 ? 1 : + 0xa1 <= c && c <= 0xfe ? 2 : + c == 0x8e ? 2 : + c == 0x8f ? 3 : + 1; +} + +static int +eucjp_mbcspan(p, e, enc) + const uchar *p, *e; + const m17n_encoding *enc; +{ + return *p < 0x80 ? 1 : + 0xa1 <= *p && *p <= 0xfe && e-p >= 2 ? 2 : + *p == 0x8e && e-p >= 2 ? 2 : + *p == 0x8f && e-p >= 3 ? 3 : + 0; +} + +static int +eucjp_islead(c, enc) + int c; + const m17n_encoding *enc; +{ + if (0xa1 <= c && c <= 0xfe) return 0; + return 1; +} + +static unsigned int +jis_codepoint(p, e, enc) + uchar *p, *e; + const m17n_encoding *enc; +{ + int n; + unsigned int c; + + if (p == e) return 0; + if (*p < 0x80) return *p; + n = m17n_mbcspan(enc, p, e); + if (e-p < n) return *p; + c = 0; + while (n--) { + c <<= 8; + c |= *p++; + } + return c; +} + +static int +jis_codelen(c, enc) + unsigned int c; + const m17n_encoding *enc; +{ + int n = 1; + for (;;) { + if (c < 0x100) return n; + c >>= 8; + n++; + } +} + +static int +jis_firstbyte(c, enc) + unsigned int c; + const m17n_encoding *enc; +{ + for (;;) { + if (c < 0x100) return c; + c >>= 8; + } +} + +static void +jis_mbcput(c, p, enc) + unsigned int c; + uchar *p; + const m17n_encoding *enc; +{ + unsigned int n; + + if (n = (c & 0xff0000)) { + *p++ = (n >> 16) & 0xff; + } + if (n = (c & 0xff00)) { + *p++ = (n >> 8) & 0xff; + } + if (n = (c & 0xff)) { + *p++ = n & 0xff; + } +} + +#define issjis1(c) ((0x81<=(c) && (c)<=0x9f) || (0xe0<=(c) && (c)<=0xfc)) +#define issjis2(c) ((0x40<=(c) && (c)<=0x7e) || (0x80<=(c) && (c)<=0xfc)) + +static int +sjis_mbclen(c, enc) + int c; + const m17n_encoding *enc; +{ + return issjis1(c) ? 2: 1; +} + +static int +sjis_mbcspan(p, e, enc) + const uchar *p, *e; + const m17n_encoding *enc; +{ + return issjis1(p[0]) && e-p >= 2 && + issjis2(p[1]) ? 2 : + (p[0] & 0x80) ? 0 : + 1; +} + +static int +sjis_islead(c, enc) + int c; + const m17n_encoding *enc; +{ + return issjis2(c) ? 0: 1; +} + +static int +sjis_ctype(c, code, enc) + unsigned int c, code; + const m17n_encoding *enc; +{ + if (0x9f < c && c < 0xe0) return 0020 & code; + return asc_ctype_table[c] & code; +} + +static const uchar utf8_mbctab[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 +}; + +static int +utf8_mbclen(c, enc) + int c; + const m17n_encoding *enc; +{ + return utf8_mbctab[c]; +} + +static int +utf8_codelen(c, enc) + unsigned int c; + const m17n_encoding *enc; +{ + if (c <= 0x7f) return 1; + if (c <= 0x7ff) return 2; + if (c <= 0xffff) return 3; + if (c <= 0x1fffff) return 4; + if (c <= 0x3ffffff) return 5; + if (c <= 0x7fffffff) return 6; + return 1; +} + +static int +utf8_mbcspan(p, e, enc) + const uchar *p, *e; + const m17n_encoding *enc; +{ + if (e-p < utf8_mbctab[*p]) return 0; + return utf8_mbctab[*p]; +} + +static int +utf8_islead(c, enc) + int c; + const m17n_encoding *enc; +{ + if (c < 0x80 || (c & 0xc0) == 0xc0) return 1; + return 0; +} + +static unsigned int +utf8_codepoint(p, e, enc) + const uchar *p, *e; + const m17n_encoding *enc; +{ + int n; + unsigned int c; + + if (p == e) return 0; + if (*p < 0x80) return *p; + c = *p++; + n = utf8_mbctab[c]; + if (e-p < n) return 0; + n--; + c &= (1<<(6-n))-1; + while (n--) { + c = c << 6 | (*p++ & ((1<<6)-1)); + } + return c; +} + +static int +utf8_firstbyte(c, enc) + unsigned int c; + const m17n_encoding *enc; +{ + if (c < 0x80) return c; + if (c <= 0x7ff) return ((c>>6)&0xff)|0xc0; + if (c <= 0xffff) return ((c>>12)&0xff)|0xe0; + if (c <= 0x1fffff) return ((c>>18)&0xff)|0xf0; + if (c <= 0x3ffffff) return ((c>>24)&0xff)|0xf8; + if (c <= 0x7fffffff) return ((c>>30)&0xff)|0xfc; + return 0; +} + +static void +utf8_mbcput(c, p, enc) + unsigned int c; + uchar *p; + const m17n_encoding *enc; +{ + if (c < 0x80) *p = c; + else if (c <= 0x7ff) { + *p++ = ((c>>6)&0xff)|0xc0; + *p = (c & 0x3f) |0x80; + } + else if (c <= 0xffff) { + *p++ = ((c>>12)&0xff)|0xe0; + *p++ = ((c>>6) &0x3f)|0x80; + *p = (c &0x3f)|0x80; + } + else if (c <= 0x1fffff) { + *p++ = ((c>>18)&0xff)|0xf0; + *p++ = ((c>>12)&0x3f)|0xe0; + *p++ = ((c>>6) &0x3f)|0x80; + *p = (c &0x3f)|0x80; + } + else if (c <= 0x3ffffff) { + *p++ = ((c>>24)&0xff)|0xf8; + *p++ = ((c>>18)&0x3f)|0xf0; + *p++ = ((c>>12)&0x3f)|0xe0; + *p++ = ((c>>6) &0x3f)|0x80; + *p = (c &0x3f)|0x80; + } + else if (c <= 0x7fffffff) { + *p++ = ((c>>30)&0xff)|0xfc; + *p++ = ((c>>24)&0x3f)|0xf8; + *p++ = ((c>>18)&0x3f)|0xf0; + *p++ = ((c>>12)&0x3f)|0xe0; + *p++ = ((c>>6) &0x3f)|0x80; + *p = (c &0x3f)|0x80; + } +} + +void +m17n_init() +{ + m17n_encoding *enc; + + if (m17n_encoding_table) return; /* already initialized */ + + m17n_encoding_table = malloc(sizeof(m17n_encoding*)*4); + enc = malloc(sizeof(m17n_encoding)); + m17n_encoding_table[0] = enc; + enc->name = "ascii"; + + m17n_encoding_mbmaxlen(enc, 1); + m17n_encoding_func_strlen(enc, asc_strlen); + m17n_encoding_func_mbclen(enc, asc_mbclen); + m17n_encoding_func_codelen(enc, asc_codelen); + m17n_encoding_func_mbcspan(enc, asc_mbcspan); + m17n_encoding_func_islead(enc, asc_islead); + m17n_encoding_func_nth(enc, asc_nth); + m17n_encoding_func_ctype(enc, asc_ctype); + m17n_encoding_func_toupper(enc, asc_toupper); + m17n_encoding_func_tolower(enc, asc_tolower); + m17n_encoding_func_codepoint(enc, asc_codepoint); + m17n_encoding_func_firstbyte(enc, asc_firstbyte); + m17n_encoding_func_mbcput(enc, asc_mbcput); + + enc = m17n_define_encoding("latin1"); + m17n_encoding_mbmaxlen(enc, 1); + m17n_encoding_func_ctype(enc, latin1_ctype); + m17n_encoding_func_toupper(enc, latin1_toupper); + m17n_encoding_func_tolower(enc, latin1_tolower); + + enc = m17n_define_encoding("euc-jp"); + m17n_encoding_mbmaxlen(enc, 3); + m17n_encoding_func_mbclen(enc, eucjp_mbclen); + m17n_encoding_func_codelen(enc, jis_codelen); + m17n_encoding_func_mbcspan(enc, eucjp_mbcspan); + m17n_encoding_func_islead(enc, eucjp_islead); + m17n_encoding_func_codepoint(enc, jis_codepoint); + m17n_encoding_func_firstbyte(enc, jis_firstbyte); + m17n_encoding_func_mbcput(enc, jis_mbcput); + + enc = m17n_define_encoding("sjis"); + m17n_encoding_mbmaxlen(enc, 2); + m17n_encoding_func_mbclen(enc, sjis_mbclen); + m17n_encoding_func_codelen(enc, jis_codelen); + m17n_encoding_func_mbcspan(enc, sjis_mbcspan); + m17n_encoding_func_islead(enc, sjis_islead); + m17n_encoding_func_ctype(enc, sjis_ctype); + m17n_encoding_func_codepoint(enc, jis_codepoint); + m17n_encoding_func_firstbyte(enc, jis_firstbyte); + m17n_encoding_func_mbcput(enc, jis_mbcput); + + enc = m17n_define_encoding("utf-8"); + m17n_encoding_mbmaxlen(enc, 6); + m17n_encoding_func_mbclen(enc, utf8_mbclen); + m17n_encoding_func_codelen(enc, utf8_codelen); + m17n_encoding_func_mbcspan(enc, utf8_mbcspan); + m17n_encoding_func_islead(enc, utf8_islead); + m17n_encoding_func_codepoint(enc, utf8_codepoint); + m17n_encoding_func_firstbyte(enc, utf8_firstbyte); + m17n_encoding_func_mbcput(enc, utf8_mbcput); +} + +int +m17n_memcmp(p1, p2, len, enc) + const char *p1, *p2; + long len; + const m17n_encoding *enc; +{ + int tmp; + + while (len--) { + if (tmp = m17n_toupper(enc, (unsigned)*p1++) - + m17n_toupper(enc, (unsigned)*p2++)) + return tmp; + } + return 0; +} diff --git a/m17n.h b/m17n.h new file mode 100644 index 0000000000..d76c5a9125 --- /dev/null +++ b/m17n.h @@ -0,0 +1,109 @@ +/********************************************************************** + + m17n.h - + + $Author$ + $Date$ + created at: Thu Dec 28 16:19:43 JST 2000 + + Copyright (C) 1993-2001 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef M17N_H +#define M17N_H + +#ifndef _ +#ifdef HAVE_PROTOTYPES +# define _(args) args +#else +# define _(args) () +#endif +#ifdef HAVE_STDARG_PROTOTYPES +# define __(args) args +#else +# define __(args) () +#endif +#endif + +void m17n_init _((void)); + +typedef struct m17n_encoding { + const char *name; + int index; + + int mbmaxlen; + + int (*strlen) _((const unsigned char *p, const unsigned char *e, const struct m17n_encoding* enc)); + int (*mbclen) _((int c, const struct m17n_encoding* enc)); + int (*codelen) _((unsigned int c, const struct m17n_encoding* enc)); + int (*mbcspan) _((const unsigned char *p, const unsigned char *e, const struct m17n_encoding* enc)); + int (*islead) _((int c, struct m17n_encoding* enc)); + unsigned char *(*nth) _((unsigned char *p, const unsigned char *e, int n, const struct m17n_encoding* enc)); + int (*ctype) _((const unsigned int c, const unsigned int code, const struct m17n_encoding* enc)); + unsigned int (*toupper) _((unsigned int c, const struct m17n_encoding* enc)); + unsigned int (*tolower) _((unsigned int c, const struct m17n_encoding* enc)); + unsigned int (*codepoint) _((const unsigned char *p, const unsigned char *e, const struct m17n_encoding* enc)); + int (*firstbyte) _((unsigned int c, const struct m17n_encoding* enc)); + void (*mbcput) _((unsigned int c, unsigned char *p, const struct m17n_encoding* enc)); +} m17n_encoding; + +extern m17n_encoding **m17n_encoding_table; + +m17n_encoding *m17n_define_encoding _((const char *name)); + +#define m17n_encoding_to_index(enc) (enc)->index +#define m17n_index_to_encoding(index) m17n_encoding_table[index] +m17n_encoding *m17n_find_encoding _((const char *name)); + +#define m17n_encoding_mbmaxlen(enc,n) (enc)->mbmaxlen = (n) +#define m17n_encoding_func_strlen(enc,func) (enc)->strlen = (func) +#define m17n_encoding_func_mbclen(enc,func) (enc)->mbclen = (func) +#define m17n_encoding_func_codelen(enc,func) (enc)->codelen = (func) +#define m17n_encoding_func_mbcspan(enc,func) (enc)->mbcspan = (func) +#define m17n_encoding_func_islead(enc,func) (enc)->islead = (func) +#define m17n_encoding_func_nth(enc,func) (enc)->nth = (func) +#define m17n_encoding_func_ctype(enc,func) (enc)->ctype = (func) +#define m17n_encoding_func_toupper(enc,func) (enc)->toupper = (func) +#define m17n_encoding_func_tolower(enc,func) (enc)->tolower = (func) +#define m17n_encoding_func_codepoint(enc,func) (enc)->codepoint = (func) +#define m17n_encoding_func_firstbyte(enc,func) (enc)->firstbyte = (func) +#define m17n_encoding_func_mbcput(enc,func) (enc)->mbcput = (func) + +#define m17n_mbmaxlen(enc) (enc)->mbmaxlen +#define m17n_strlen(enc,p,e) (*(enc)->strlen)((p),(e),(enc)) +#define m17n_mbclen(enc,c) (*(enc)->mbclen)((c)&0xff,(enc)) +#define m17n_codelen(enc,c) (*(enc)->codelen)((c),(enc)) +#define m17n_mbcspan(enc,p,e) (*(enc)->mbcspan)((p),(e),(enc)) +#define m17n_islead(enc,c) (*(enc)->islead)((c)&0xff,(enc)) +#define m17n_nth(enc,p,e,n) (*(enc)->nth)((p),(e),(n),(enc)) +#define m17n_codepoint(enc,p,e) (*(enc)->codepoint)((p),(e),(enc)) +#define m17n_firstbyte(enc,c) (*(enc)->firstbyte)((c),(enc)) +#define m17n_mbcput(enc,c,p) (*(enc)->mbcput)((c),(p),(enc)) + +#define M17N_U 01 /* Upper case */ +#define M17N_L 02 /* Lower case */ +#define M17N_N 04 /* Numeral (digit) */ +#define M17N_S 010 /* Spacing character */ +#define M17N_P 020 /* Punctuation */ +#define M17N_C 040 /* Control character */ +#define M17N_B 0100 /* Blank */ +#define M17N_X 0200 /* heXadecimal digit */ + +#define m17n_isprint(enc,c) (*(enc)->ctype)((c),(M17N_P|M17N_U|M17N_L|M17N_N|M17N_B),(enc)) +#define m17n_isspace(enc,c) (*(enc)->ctype)((c),(M17N_S),(enc)) +#define m17n_isblank(enc,c) (*(enc)->ctype)((c),(M17N_B),(enc)) +#define m17n_ispunct(enc,c) (*(enc)->ctype)((c),(M17N_P),(enc)) +#define m17n_isupper(enc,c) (*(enc)->ctype)((c),(M17N_U),(enc)) +#define m17n_islower(enc,c) (*(enc)->ctype)((c),(M17N_L),(enc)) +#define m17n_isalnum(enc,c) (*(enc)->ctype)((c),(M17N_U|M17N_L|M17N_N),(enc)) +#define m17n_isalpha(enc,c) (*(enc)->ctype)((c),(M17N_U|M17N_L),(enc)) +#define m17n_isdigit(enc,c) (*(enc)->ctype)((c),(M17N_N),(enc)) +#define m17n_isxdigit(enc,c) (*(enc)->ctype)((c),(M17N_X),(enc)) +#define m17n_iscntrl(enc,c) (*(enc)->ctype)((c),(M17N_C),(enc)) +#define m17n_toupper(enc,c) (*(enc)->toupper)((c),(enc)) +#define m17n_tolower(enc,c) (*(enc)->tolower)((c),(enc)) + +int m17n_memcmp _((const char *p1, const char *p2, long len, const m17n_encoding *enc)); + +#endif /* M17N_H */ diff --git a/process.c b/process.c index ebc76d5076..32c7d57874 100644 --- a/process.c +++ b/process.c @@ -6,7 +6,7 @@ $Date$ created at: Tue Aug 10 14:30:50 JST 1993 - Copyright (C) 1993-2000 Yukihiro Matsumoto + Copyright (C) 1993-2001 Yukihiro Matsumoto Copyright (C) 2000 Network Applied Communication Laboratory, Inc. Copyright (C) 2000 Information-technology Promotion Agency, Japan diff --git a/re.h b/re.h index d69520a8f4..88a67705d5 100644 --- a/re.h +++ b/re.h @@ -28,17 +28,12 @@ struct RMatch { #define RMATCH(obj) (R_CAST(RMatch)(obj)) -int rb_str_cicmp _((VALUE, VALUE)); VALUE rb_reg_regcomp _((VALUE)); int rb_reg_search _((VALUE, VALUE, int, int)); VALUE rb_reg_regsub _((VALUE, VALUE, struct re_registers *)); int rb_reg_adjust_startpos _((VALUE, VALUE, int, int)); - -int rb_kcode _((void)); void rb_match_busy _((VALUE)); EXTERN int ruby_ignorecase; -int rb_reg_mbclen2 _((unsigned int, VALUE)); -#define mbclen2(c,re) rb_reg_mbclen2((c),(re)) #endif diff --git a/util.c b/util.c index 25c00ae2d6..46fcf0ba79 100644 --- a/util.c +++ b/util.c @@ -78,19 +78,6 @@ int *retlen; #include "missing/file.h" #endif -static char * -check_dir(dir) - char *dir; -{ - struct stat st; - - if (dir == NULL) return NULL; - if (stat(dir, &st) < 0) return NULL; - if (!S_ISDIR(st.st_mode)) return NULL; - if (eaccess(dir, W_OK) < 0) return NULL; - return dir; -} - #if defined(MSDOS) || defined(__CYGWIN32__) || defined(NT) /* * Copyright (c) 1993, Intergraph Corporation diff --git a/variable.c b/variable.c index 69791ab26d..fae2255908 100644 --- a/variable.c +++ b/variable.c @@ -1008,8 +1008,6 @@ top_const_get(id, klassp) ID id; VALUE *klassp; { - VALUE value; - /* pre-defined class */ if (st_lookup(rb_class_tbl, id, klassp)) return Qtrue; -- cgit v1.2.3