summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.in4
-rw-r--r--array.c2
-rw-r--r--eval.c4
-rw-r--r--file.c2
-rw-r--r--gc.c4
-rw-r--r--m17n.c692
-rw-r--r--m17n.h109
-rw-r--r--process.c2
-rw-r--r--re.h5
-rw-r--r--util.c13
-rw-r--r--variable.c2
11 files changed, 809 insertions, 30 deletions
diff --git a/Makefile.in b/Makefile.in
index af29b78daa..b886c1022b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -58,6 +58,7 @@ OBJS = array.@OBJEXT@ \
hash.@OBJEXT@ \
inits.@OBJEXT@ \
io.@OBJEXT@ \
+ m17n.@OBJEXT@ \
marshal.@OBJEXT@ \
math.@OBJEXT@ \
numeric.@OBJEXT@ \
@@ -252,7 +253,8 @@ file.@OBJEXT@: file.c ruby.h config.h defines.h intern.h rubyio.h rubysig.h dln.
gc.@OBJEXT@: gc.c ruby.h config.h defines.h intern.h rubysig.h st.h node.h env.h re.h regex.h
hash.@OBJEXT@: hash.c ruby.h config.h defines.h intern.h st.h rubysig.h util.h
inits.@OBJEXT@: inits.c ruby.h config.h defines.h intern.h
-io.@OBJEXT@: io.c ruby.h config.h defines.h intern.h rubyio.h rubysig.h env.h util.h
+io.@OBJEXT@: io.c ruby.h config.h defines.h intern.h rubyio.h rubysig.h env.h util.h m17n.h
+m17n.@OBJEXT@: m17n.c m17n.h config.h
main.@OBJEXT@: main.c ruby.h config.h defines.h intern.h
marshal.@OBJEXT@: marshal.c ruby.h config.h defines.h intern.h rubyio.h st.h
prec.@OBJEXT@: prec.c ruby.h config.h defines.h intern.h
diff --git a/array.c b/array.c
index 9872c460e0..1dc1627fb8 100644
--- a/array.c
+++ b/array.c
@@ -1440,7 +1440,7 @@ ary_make_hash(ary1, ary2)
VALUE ary1, ary2;
{
VALUE hash = rb_hash_new();
- int i, n;
+ int i;
for (i=0; i<RARRAY(ary1)->len; i++) {
rb_hash_aset(hash, RARRAY(ary1)->ptr[i], Qtrue);
diff --git a/eval.c b/eval.c
index 2fc5070a4b..8609f89b86 100644
--- a/eval.c
+++ b/eval.c
@@ -6625,7 +6625,7 @@ rb_mod_define_method(argc, argv, mod)
VALUE mod;
{
ID id;
- VALUE name, body;
+ VALUE body;
if (argc == 1) {
id = rb_to_id(argv[0]);
@@ -8037,8 +8037,6 @@ static VALUE
rb_thread_start(klass, args)
VALUE klass, args;
{
- rb_thread_t th;
-
if (!rb_block_given_p()) {
rb_raise(rb_eThreadError, "must be called with a block");
}
diff --git a/file.c b/file.c
index db200c6f7e..3e8ec3073f 100644
--- a/file.c
+++ b/file.c
@@ -282,7 +282,6 @@ rb_stat_inspect(self)
for (i = 0; i < sizeof(member)/sizeof(member[0]); i++) {
VALUE str2;
- char *p;
if (i > 0) {
rb_str_cat2(str, ", ");
@@ -2163,6 +2162,7 @@ define_filetest_function(name, func, argc)
rb_define_singleton_method(rb_cFile, name, func, argc);
}
+void
Init_File()
{
rb_mFileTest = rb_define_module("FileTest");
diff --git a/gc.c b/gc.c
index e4320e0a4f..807fe9366c 100644
--- a/gc.c
+++ b/gc.c
@@ -775,7 +775,7 @@ obj_free(obj)
}
break;
case T_STRING:
-#define STR_NO_ORIG FL_USER2 /* copied from string.c */
+#define STR_NO_ORIG FL_USER0 /* copied from string.c */
if (!RANY(obj)->as.string.orig || FL_TEST(obj, STR_NO_ORIG)) {
RUBY_CRITICAL(free(RANY(obj)->as.string.ptr));
}
@@ -1152,8 +1152,6 @@ static VALUE
undefine_final(os, obj)
VALUE os, obj;
{
- VALUE table;
-
if (finalizer_table) {
st_delete(finalizer_table, &obj, 0);
}
diff --git a/m17n.c b/m17n.c
new file mode 100644
index 0000000000..0b66e6d403
--- /dev/null
+++ b/m17n.c
@@ -0,0 +1,692 @@
+/**********************************************************************
+
+ m17n.c -
+
+ $Author$
+ $Date$
+ created at: Thu Dec 28 16:29:43 JST 2000
+
+ Copyright (C) 1993-2001 Yukihiro Matsumoto
+
+**********************************************************************/
+
+#include "config.h"
+#include <stdlib.h>
+#ifdef HAVE_STRING_H
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+#include "m17n.h"
+
+#define uchar unsigned char
+
+static int num_encodings = 1;
+m17n_encoding **m17n_encoding_table = 0;
+
+static int any_strlen _((const uchar *p, const uchar *e, const m17n_encoding* enc));
+static uchar *any_nth _((uchar *p, const uchar *e, int n, const m17n_encoding* enc));
+
+m17n_encoding*
+m17n_define_encoding(name)
+ const char *name;
+{
+ m17n_encoding *enc;
+
+ if (!m17n_encoding_table) {
+ m17n_init();
+ }
+ num_encodings++;
+ m17n_encoding_table = realloc(m17n_encoding_table,
+ sizeof(m17n_encoding*)*num_encodings);
+ enc = malloc(sizeof(m17n_encoding));
+
+ /* copy ASCII table */
+ memcpy(enc, m17n_encoding_table[0], sizeof(m17n_encoding));
+ /* ..but strlen() */
+ m17n_encoding_func_strlen(enc, any_strlen);
+ /* ..and nth() */
+ m17n_encoding_func_nth(enc, any_nth);
+ /* mbmaxlen is unknown (i.e. 0) */
+ m17n_encoding_mbmaxlen(enc, 0);
+
+ enc->name = name;
+ enc->index = num_encodings-1;
+ m17n_encoding_table[enc->index] = enc;
+ return enc;
+}
+
+m17n_encoding *
+m17n_find_encoding(name)
+ const char *name;
+{
+ int i;
+
+ for (i=0; i<num_encodings; i++) {
+ if (strcmp(name, m17n_encoding_table[i]->name) == 0) {
+ return m17n_encoding_table[i];
+ }
+ }
+ return 0;
+}
+
+static int
+asc_strlen(p, e, enc)
+ const uchar *p, *e;
+ const m17n_encoding *enc;
+{
+ return e - p;
+}
+
+static int
+asc_mbclen(c, enc)
+ int c;
+ const m17n_encoding *enc;
+{
+ return 1;
+}
+
+static int
+asc_codelen(c, enc)
+ unsigned int c;
+ const m17n_encoding *enc;
+{
+ return 1;
+}
+
+static int
+asc_mbcspan(p, e, enc)
+ const uchar *p, *e;
+ const m17n_encoding *enc;
+{
+ return 1;
+}
+
+static int
+asc_islead(c, enc)
+ int c;
+ const m17n_encoding *enc;
+{
+ return 1;
+}
+
+static uchar*
+asc_nth(p, e, n, enc)
+ uchar *p;
+ const uchar *e;
+ int n;
+ const m17n_encoding *enc;
+{
+ p += n;
+ if (p <= e) return p;
+ return 0;
+}
+
+static uchar asc_ctype_table [] = {
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
+ 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
+ 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static int
+asc_ctype(c, code, enc)
+ unsigned int c, code;
+ const m17n_encoding *enc;
+{
+ if (c > 0xff) return 0;
+ return asc_ctype_table[c] & code;
+}
+
+static uchar asc_toupper_table[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
+};
+
+static unsigned int
+asc_toupper(c, enc)
+ unsigned int c;
+ const m17n_encoding *enc;
+{
+ if (c > 127) return c;
+ return asc_toupper_table[c];
+}
+
+static uchar asc_tolower_table[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+};
+
+static unsigned int
+asc_tolower(c, enc)
+ unsigned int c;
+ const m17n_encoding *enc;
+{
+ if (c > 127) return c;
+ return asc_tolower_table[c];
+}
+
+static unsigned int
+asc_codepoint(p, e, enc)
+ const uchar *p, *e;
+ const m17n_encoding *enc;
+{
+ return *p;
+}
+
+static int
+asc_firstbyte(c, enc)
+ unsigned int c;
+ const m17n_encoding *enc;
+{
+ return c;
+}
+
+static void
+asc_mbcput(c, p, enc)
+ unsigned int c;
+ uchar *p;
+ const m17n_encoding *enc;
+{
+ *p = (uchar)c;
+}
+
+static int
+any_strlen(p, e, enc)
+ const uchar *p, *e;
+ const m17n_encoding *enc;
+{
+ int c;
+
+ if (m17n_mbmaxlen(enc) == 1) {
+ return e - p;
+ }
+
+ for (c=0; p<e; c++) {
+ int n = m17n_mbcspan(enc, p, e);
+
+ if (n == 0) return -1;
+ p += n;
+ }
+ return c;
+}
+
+static uchar*
+any_nth(p, e, n, enc)
+ uchar *p;
+ const uchar *e;
+ int n;
+ const m17n_encoding *enc;
+{
+ int c;
+
+ if (m17n_mbmaxlen(enc) == 1) {
+ p += n;
+ }
+ else {
+ for (c=0; p<e && n--; c++) {
+ int n = m17n_mbcspan(enc, p, e);
+
+ if (n == 0) return (uchar*)-1;
+ p += n;
+ }
+ }
+ if (p <= e) return p;
+ return 0;
+}
+
+static uchar latin1_ctype_table[] = {
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
+ 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
+ 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1, 1, 1, 1, 1, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 16, 2, 2, 2, 2, 2, 2, 2, 2
+};
+
+static int
+latin1_ctype(c, code, enc)
+ unsigned int c, code;
+ const m17n_encoding *enc;
+{
+ if (c > 0xff) return 0;
+ return latin1_ctype_table[c] & code;
+}
+
+uchar latin1_toupper_table[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255
+};
+
+static unsigned int
+latin1_toupper(c, enc)
+ unsigned int c;
+ const m17n_encoding *enc;
+{
+ if (c > 0xff) return c;
+ return latin1_toupper_table[c];
+}
+
+uchar latin1_tolower_table[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+};
+
+static unsigned int
+latin1_tolower(c, enc)
+ unsigned int c;
+ const m17n_encoding *enc;
+{
+ if (c > 0xff) return c;
+ return latin1_tolower_table[c];
+}
+
+static int
+eucjp_mbclen(c, enc)
+ int c;
+ const m17n_encoding *enc;
+{
+ return c < 0x80 ? 1 :
+ 0xa1 <= c && c <= 0xfe ? 2 :
+ c == 0x8e ? 2 :
+ c == 0x8f ? 3 :
+ 1;
+}
+
+static int
+eucjp_mbcspan(p, e, enc)
+ const uchar *p, *e;
+ const m17n_encoding *enc;
+{
+ return *p < 0x80 ? 1 :
+ 0xa1 <= *p && *p <= 0xfe && e-p >= 2 ? 2 :
+ *p == 0x8e && e-p >= 2 ? 2 :
+ *p == 0x8f && e-p >= 3 ? 3 :
+ 0;
+}
+
+static int
+eucjp_islead(c, enc)
+ int c;
+ const m17n_encoding *enc;
+{
+ if (0xa1 <= c && c <= 0xfe) return 0;
+ return 1;
+}
+
+static unsigned int
+jis_codepoint(p, e, enc)
+ uchar *p, *e;
+ const m17n_encoding *enc;
+{
+ int n;
+ unsigned int c;
+
+ if (p == e) return 0;
+ if (*p < 0x80) return *p;
+ n = m17n_mbcspan(enc, p, e);
+ if (e-p < n) return *p;
+ c = 0;
+ while (n--) {
+ c <<= 8;
+ c |= *p++;
+ }
+ return c;
+}
+
+static int
+jis_codelen(c, enc)
+ unsigned int c;
+ const m17n_encoding *enc;
+{
+ int n = 1;
+ for (;;) {
+ if (c < 0x100) return n;
+ c >>= 8;
+ n++;
+ }
+}
+
+static int
+jis_firstbyte(c, enc)
+ unsigned int c;
+ const m17n_encoding *enc;
+{
+ for (;;) {
+ if (c < 0x100) return c;
+ c >>= 8;
+ }
+}
+
+static void
+jis_mbcput(c, p, enc)
+ unsigned int c;
+ uchar *p;
+ const m17n_encoding *enc;
+{
+ unsigned int n;
+
+ if (n = (c & 0xff0000)) {
+ *p++ = (n >> 16) & 0xff;
+ }
+ if (n = (c & 0xff00)) {
+ *p++ = (n >> 8) & 0xff;
+ }
+ if (n = (c & 0xff)) {
+ *p++ = n & 0xff;
+ }
+}
+
+#define issjis1(c) ((0x81<=(c) && (c)<=0x9f) || (0xe0<=(c) && (c)<=0xfc))
+#define issjis2(c) ((0x40<=(c) && (c)<=0x7e) || (0x80<=(c) && (c)<=0xfc))
+
+static int
+sjis_mbclen(c, enc)
+ int c;
+ const m17n_encoding *enc;
+{
+ return issjis1(c) ? 2: 1;
+}
+
+static int
+sjis_mbcspan(p, e, enc)
+ const uchar *p, *e;
+ const m17n_encoding *enc;
+{
+ return issjis1(p[0]) && e-p >= 2 &&
+ issjis2(p[1]) ? 2 :
+ (p[0] & 0x80) ? 0 :
+ 1;
+}
+
+static int
+sjis_islead(c, enc)
+ int c;
+ const m17n_encoding *enc;
+{
+ return issjis2(c) ? 0: 1;
+}
+
+static int
+sjis_ctype(c, code, enc)
+ unsigned int c, code;
+ const m17n_encoding *enc;
+{
+ if (0x9f < c && c < 0xe0) return 0020 & code;
+ return asc_ctype_table[c] & code;
+}
+
+static const uchar utf8_mbctab[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+};
+
+static int
+utf8_mbclen(c, enc)
+ int c;
+ const m17n_encoding *enc;
+{
+ return utf8_mbctab[c];
+}
+
+static int
+utf8_codelen(c, enc)
+ unsigned int c;
+ const m17n_encoding *enc;
+{
+ if (c <= 0x7f) return 1;
+ if (c <= 0x7ff) return 2;
+ if (c <= 0xffff) return 3;
+ if (c <= 0x1fffff) return 4;
+ if (c <= 0x3ffffff) return 5;
+ if (c <= 0x7fffffff) return 6;
+ return 1;
+}
+
+static int
+utf8_mbcspan(p, e, enc)
+ const uchar *p, *e;
+ const m17n_encoding *enc;
+{
+ if (e-p < utf8_mbctab[*p]) return 0;
+ return utf8_mbctab[*p];
+}
+
+static int
+utf8_islead(c, enc)
+ int c;
+ const m17n_encoding *enc;
+{
+ if (c < 0x80 || (c & 0xc0) == 0xc0) return 1;
+ return 0;
+}
+
+static unsigned int
+utf8_codepoint(p, e, enc)
+ const uchar *p, *e;
+ const m17n_encoding *enc;
+{
+ int n;
+ unsigned int c;
+
+ if (p == e) return 0;
+ if (*p < 0x80) return *p;
+ c = *p++;
+ n = utf8_mbctab[c];
+ if (e-p < n) return 0;
+ n--;
+ c &= (1<<(6-n))-1;
+ while (n--) {
+ c = c << 6 | (*p++ & ((1<<6)-1));
+ }
+ return c;
+}
+
+static int
+utf8_firstbyte(c, enc)
+ unsigned int c;
+ const m17n_encoding *enc;
+{
+ if (c < 0x80) return c;
+ if (c <= 0x7ff) return ((c>>6)&0xff)|0xc0;
+ if (c <= 0xffff) return ((c>>12)&0xff)|0xe0;
+ if (c <= 0x1fffff) return ((c>>18)&0xff)|0xf0;
+ if (c <= 0x3ffffff) return ((c>>24)&0xff)|0xf8;
+ if (c <= 0x7fffffff) return ((c>>30)&0xff)|0xfc;
+ return 0;
+}
+
+static void
+utf8_mbcput(c, p, enc)
+ unsigned int c;
+ uchar *p;
+ const m17n_encoding *enc;
+{
+ if (c < 0x80) *p = c;
+ else if (c <= 0x7ff) {
+ *p++ = ((c>>6)&0xff)|0xc0;
+ *p = (c & 0x3f) |0x80;
+ }
+ else if (c <= 0xffff) {
+ *p++ = ((c>>12)&0xff)|0xe0;
+ *p++ = ((c>>6) &0x3f)|0x80;
+ *p = (c &0x3f)|0x80;
+ }
+ else if (c <= 0x1fffff) {
+ *p++ = ((c>>18)&0xff)|0xf0;
+ *p++ = ((c>>12)&0x3f)|0xe0;
+ *p++ = ((c>>6) &0x3f)|0x80;
+ *p = (c &0x3f)|0x80;
+ }
+ else if (c <= 0x3ffffff) {
+ *p++ = ((c>>24)&0xff)|0xf8;
+ *p++ = ((c>>18)&0x3f)|0xf0;
+ *p++ = ((c>>12)&0x3f)|0xe0;
+ *p++ = ((c>>6) &0x3f)|0x80;
+ *p = (c &0x3f)|0x80;
+ }
+ else if (c <= 0x7fffffff) {
+ *p++ = ((c>>30)&0xff)|0xfc;
+ *p++ = ((c>>24)&0x3f)|0xf8;
+ *p++ = ((c>>18)&0x3f)|0xf0;
+ *p++ = ((c>>12)&0x3f)|0xe0;
+ *p++ = ((c>>6) &0x3f)|0x80;
+ *p = (c &0x3f)|0x80;
+ }
+}
+
+void
+m17n_init()
+{
+ m17n_encoding *enc;
+
+ if (m17n_encoding_table) return; /* already initialized */
+
+ m17n_encoding_table = malloc(sizeof(m17n_encoding*)*4);
+ enc = malloc(sizeof(m17n_encoding));
+ m17n_encoding_table[0] = enc;
+ enc->name = "ascii";
+
+ m17n_encoding_mbmaxlen(enc, 1);
+ m17n_encoding_func_strlen(enc, asc_strlen);
+ m17n_encoding_func_mbclen(enc, asc_mbclen);
+ m17n_encoding_func_codelen(enc, asc_codelen);
+ m17n_encoding_func_mbcspan(enc, asc_mbcspan);
+ m17n_encoding_func_islead(enc, asc_islead);
+ m17n_encoding_func_nth(enc, asc_nth);
+ m17n_encoding_func_ctype(enc, asc_ctype);
+ m17n_encoding_func_toupper(enc, asc_toupper);
+ m17n_encoding_func_tolower(enc, asc_tolower);
+ m17n_encoding_func_codepoint(enc, asc_codepoint);
+ m17n_encoding_func_firstbyte(enc, asc_firstbyte);
+ m17n_encoding_func_mbcput(enc, asc_mbcput);
+
+ enc = m17n_define_encoding("latin1");
+ m17n_encoding_mbmaxlen(enc, 1);
+ m17n_encoding_func_ctype(enc, latin1_ctype);
+ m17n_encoding_func_toupper(enc, latin1_toupper);
+ m17n_encoding_func_tolower(enc, latin1_tolower);
+
+ enc = m17n_define_encoding("euc-jp");
+ m17n_encoding_mbmaxlen(enc, 3);
+ m17n_encoding_func_mbclen(enc, eucjp_mbclen);
+ m17n_encoding_func_codelen(enc, jis_codelen);
+ m17n_encoding_func_mbcspan(enc, eucjp_mbcspan);
+ m17n_encoding_func_islead(enc, eucjp_islead);
+ m17n_encoding_func_codepoint(enc, jis_codepoint);
+ m17n_encoding_func_firstbyte(enc, jis_firstbyte);
+ m17n_encoding_func_mbcput(enc, jis_mbcput);
+
+ enc = m17n_define_encoding("sjis");
+ m17n_encoding_mbmaxlen(enc, 2);
+ m17n_encoding_func_mbclen(enc, sjis_mbclen);
+ m17n_encoding_func_codelen(enc, jis_codelen);
+ m17n_encoding_func_mbcspan(enc, sjis_mbcspan);
+ m17n_encoding_func_islead(enc, sjis_islead);
+ m17n_encoding_func_ctype(enc, sjis_ctype);
+ m17n_encoding_func_codepoint(enc, jis_codepoint);
+ m17n_encoding_func_firstbyte(enc, jis_firstbyte);
+ m17n_encoding_func_mbcput(enc, jis_mbcput);
+
+ enc = m17n_define_encoding("utf-8");
+ m17n_encoding_mbmaxlen(enc, 6);
+ m17n_encoding_func_mbclen(enc, utf8_mbclen);
+ m17n_encoding_func_codelen(enc, utf8_codelen);
+ m17n_encoding_func_mbcspan(enc, utf8_mbcspan);
+ m17n_encoding_func_islead(enc, utf8_islead);
+ m17n_encoding_func_codepoint(enc, utf8_codepoint);
+ m17n_encoding_func_firstbyte(enc, utf8_firstbyte);
+ m17n_encoding_func_mbcput(enc, utf8_mbcput);
+}
+
+int
+m17n_memcmp(p1, p2, len, enc)
+ const char *p1, *p2;
+ long len;
+ const m17n_encoding *enc;
+{
+ int tmp;
+
+ while (len--) {
+ if (tmp = m17n_toupper(enc, (unsigned)*p1++) -
+ m17n_toupper(enc, (unsigned)*p2++))
+ return tmp;
+ }
+ return 0;
+}
diff --git a/m17n.h b/m17n.h
new file mode 100644
index 0000000000..d76c5a9125
--- /dev/null
+++ b/m17n.h
@@ -0,0 +1,109 @@
+/**********************************************************************
+
+ m17n.h -
+
+ $Author$
+ $Date$
+ created at: Thu Dec 28 16:19:43 JST 2000
+
+ Copyright (C) 1993-2001 Yukihiro Matsumoto
+
+**********************************************************************/
+
+#ifndef M17N_H
+#define M17N_H
+
+#ifndef _
+#ifdef HAVE_PROTOTYPES
+# define _(args) args
+#else
+# define _(args) ()
+#endif
+#ifdef HAVE_STDARG_PROTOTYPES
+# define __(args) args
+#else
+# define __(args) ()
+#endif
+#endif
+
+void m17n_init _((void));
+
+typedef struct m17n_encoding {
+ const char *name;
+ int index;
+
+ int mbmaxlen;
+
+ int (*strlen) _((const unsigned char *p, const unsigned char *e, const struct m17n_encoding* enc));
+ int (*mbclen) _((int c, const struct m17n_encoding* enc));
+ int (*codelen) _((unsigned int c, const struct m17n_encoding* enc));
+ int (*mbcspan) _((const unsigned char *p, const unsigned char *e, const struct m17n_encoding* enc));
+ int (*islead) _((int c, struct m17n_encoding* enc));
+ unsigned char *(*nth) _((unsigned char *p, const unsigned char *e, int n, const struct m17n_encoding* enc));
+ int (*ctype) _((const unsigned int c, const unsigned int code, const struct m17n_encoding* enc));
+ unsigned int (*toupper) _((unsigned int c, const struct m17n_encoding* enc));
+ unsigned int (*tolower) _((unsigned int c, const struct m17n_encoding* enc));
+ unsigned int (*codepoint) _((const unsigned char *p, const unsigned char *e, const struct m17n_encoding* enc));
+ int (*firstbyte) _((unsigned int c, const struct m17n_encoding* enc));
+ void (*mbcput) _((unsigned int c, unsigned char *p, const struct m17n_encoding* enc));
+} m17n_encoding;
+
+extern m17n_encoding **m17n_encoding_table;
+
+m17n_encoding *m17n_define_encoding _((const char *name));
+
+#define m17n_encoding_to_index(enc) (enc)->index
+#define m17n_index_to_encoding(index) m17n_encoding_table[index]
+m17n_encoding *m17n_find_encoding _((const char *name));
+
+#define m17n_encoding_mbmaxlen(enc,n) (enc)->mbmaxlen = (n)
+#define m17n_encoding_func_strlen(enc,func) (enc)->strlen = (func)
+#define m17n_encoding_func_mbclen(enc,func) (enc)->mbclen = (func)
+#define m17n_encoding_func_codelen(enc,func) (enc)->codelen = (func)
+#define m17n_encoding_func_mbcspan(enc,func) (enc)->mbcspan = (func)
+#define m17n_encoding_func_islead(enc,func) (enc)->islead = (func)
+#define m17n_encoding_func_nth(enc,func) (enc)->nth = (func)
+#define m17n_encoding_func_ctype(enc,func) (enc)->ctype = (func)
+#define m17n_encoding_func_toupper(enc,func) (enc)->toupper = (func)
+#define m17n_encoding_func_tolower(enc,func) (enc)->tolower = (func)
+#define m17n_encoding_func_codepoint(enc,func) (enc)->codepoint = (func)
+#define m17n_encoding_func_firstbyte(enc,func) (enc)->firstbyte = (func)
+#define m17n_encoding_func_mbcput(enc,func) (enc)->mbcput = (func)
+
+#define m17n_mbmaxlen(enc) (enc)->mbmaxlen
+#define m17n_strlen(enc,p,e) (*(enc)->strlen)((p),(e),(enc))
+#define m17n_mbclen(enc,c) (*(enc)->mbclen)((c)&0xff,(enc))
+#define m17n_codelen(enc,c) (*(enc)->codelen)((c),(enc))
+#define m17n_mbcspan(enc,p,e) (*(enc)->mbcspan)((p),(e),(enc))
+#define m17n_islead(enc,c) (*(enc)->islead)((c)&0xff,(enc))
+#define m17n_nth(enc,p,e,n) (*(enc)->nth)((p),(e),(n),(enc))
+#define m17n_codepoint(enc,p,e) (*(enc)->codepoint)((p),(e),(enc))
+#define m17n_firstbyte(enc,c) (*(enc)->firstbyte)((c),(enc))
+#define m17n_mbcput(enc,c,p) (*(enc)->mbcput)((c),(p),(enc))
+
+#define M17N_U 01 /* Upper case */
+#define M17N_L 02 /* Lower case */
+#define M17N_N 04 /* Numeral (digit) */
+#define M17N_S 010 /* Spacing character */
+#define M17N_P 020 /* Punctuation */
+#define M17N_C 040 /* Control character */
+#define M17N_B 0100 /* Blank */
+#define M17N_X 0200 /* heXadecimal digit */
+
+#define m17n_isprint(enc,c) (*(enc)->ctype)((c),(M17N_P|M17N_U|M17N_L|M17N_N|M17N_B),(enc))
+#define m17n_isspace(enc,c) (*(enc)->ctype)((c),(M17N_S),(enc))
+#define m17n_isblank(enc,c) (*(enc)->ctype)((c),(M17N_B),(enc))
+#define m17n_ispunct(enc,c) (*(enc)->ctype)((c),(M17N_P),(enc))
+#define m17n_isupper(enc,c) (*(enc)->ctype)((c),(M17N_U),(enc))
+#define m17n_islower(enc,c) (*(enc)->ctype)((c),(M17N_L),(enc))
+#define m17n_isalnum(enc,c) (*(enc)->ctype)((c),(M17N_U|M17N_L|M17N_N),(enc))
+#define m17n_isalpha(enc,c) (*(enc)->ctype)((c),(M17N_U|M17N_L),(enc))
+#define m17n_isdigit(enc,c) (*(enc)->ctype)((c),(M17N_N),(enc))
+#define m17n_isxdigit(enc,c) (*(enc)->ctype)((c),(M17N_X),(enc))
+#define m17n_iscntrl(enc,c) (*(enc)->ctype)((c),(M17N_C),(enc))
+#define m17n_toupper(enc,c) (*(enc)->toupper)((c),(enc))
+#define m17n_tolower(enc,c) (*(enc)->tolower)((c),(enc))
+
+int m17n_memcmp _((const char *p1, const char *p2, long len, const m17n_encoding *enc));
+
+#endif /* M17N_H */
diff --git a/process.c b/process.c
index ebc76d5076..32c7d57874 100644
--- a/process.c
+++ b/process.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Aug 10 14:30:50 JST 1993
- Copyright (C) 1993-2000 Yukihiro Matsumoto
+ Copyright (C) 1993-2001 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/re.h b/re.h
index d69520a8f4..88a67705d5 100644
--- a/re.h
+++ b/re.h
@@ -28,17 +28,12 @@ struct RMatch {
#define RMATCH(obj) (R_CAST(RMatch)(obj))
-int rb_str_cicmp _((VALUE, VALUE));
VALUE rb_reg_regcomp _((VALUE));
int rb_reg_search _((VALUE, VALUE, int, int));
VALUE rb_reg_regsub _((VALUE, VALUE, struct re_registers *));
int rb_reg_adjust_startpos _((VALUE, VALUE, int, int));
-
-int rb_kcode _((void));
void rb_match_busy _((VALUE));
EXTERN int ruby_ignorecase;
-int rb_reg_mbclen2 _((unsigned int, VALUE));
-#define mbclen2(c,re) rb_reg_mbclen2((c),(re))
#endif
diff --git a/util.c b/util.c
index 25c00ae2d6..46fcf0ba79 100644
--- a/util.c
+++ b/util.c
@@ -78,19 +78,6 @@ int *retlen;
#include "missing/file.h"
#endif
-static char *
-check_dir(dir)
- char *dir;
-{
- struct stat st;
-
- if (dir == NULL) return NULL;
- if (stat(dir, &st) < 0) return NULL;
- if (!S_ISDIR(st.st_mode)) return NULL;
- if (eaccess(dir, W_OK) < 0) return NULL;
- return dir;
-}
-
#if defined(MSDOS) || defined(__CYGWIN32__) || defined(NT)
/*
* Copyright (c) 1993, Intergraph Corporation
diff --git a/variable.c b/variable.c
index 69791ab26d..fae2255908 100644
--- a/variable.c
+++ b/variable.c
@@ -1008,8 +1008,6 @@ top_const_get(id, klassp)
ID id;
VALUE *klassp;
{
- VALUE value;
-
/* pre-defined class */
if (st_lookup(rb_class_tbl, id, klassp)) return Qtrue;