14 files changed, 152 insertions, 39 deletions
diff --git a/ChangeLog b/ChangeLog
index 76e8e3325d..50df92731a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,47 @@
+Tue Jan  1 21:11:33 2008  Tanaka Akira  <akr@fsij.org>
+
+	* include/ruby/encoding.h (rb_isascii): defined.
+	  (rb_isalnum): ditto.
+	  (rb_isalpha): ditto.
+	  (rb_isblank): ditto.
+	  (rb_iscntrl): ditto.
+	  (rb_isdigit): ditto.
+	  (rb_isgraph): ditto.
+	  (rb_islower): ditto.
+	  (rb_isprint): ditto.
+	  (rb_ispunct): ditto.
+	  (rb_isspace): ditto.
+	  (rb_isupper): ditto.
+	  (rb_isxdigit): ditto.
+	  (rb_tolower): ditto.
+	  (rb_toupper): ditto.
+
+	* include/ruby/st.h (st_strcasecmp): declared.
+	  (st_strncasecmp): ditto.
+
+	* st.c (type_strcasehash): use st_strcasecmp instead of strcasecmp.
+	  (st_strcasecmp): defined.
+	  (st_strncasecmp): ditto.
+
+	* include/ruby/ruby.h: include include/ruby/encoding.h.
+	  (ISASCII): use rb_isascii.
+	  (ISPRINT): use rb_isprint.
+	  (ISSPACE): use rb_isspace.
+	  (ISUPPER): use rb_isupper.
+	  (ISLOWER): use rb_islower.
+	  (ISALNUM): use rb_isalnum.
+	  (ISALPHA): use rb_isalpha.
+	  (ISDIGIT): use rb_isdigit.
+	  (ISXDIGIT): use rb_isxdigit.
+	  (TOUPPER): defined.
+	  (TOLOWER): ditto.
+	  (STRCASECMP): ditto.
+	  (STRNCASECMP): ditto.
+
+	* dir.c, encoding.c, file.c, hash.c, process.c, ruby.c, time.c,
+	  transcode.c, ext/readline/readline.c: use locale insensitive
+	  functions.  [ruby-core:14662]
+
 Tue Jan  1 17:50:47 2008  Nobuyoshi Nakada  <nobu@ruby-lang.org>
 
 	* io.c (rb_io_mode_enc): encoding spec is not allowed in binary mode.
diff --git a/dir.c b/dir.c
index bb5e9457a3..8bb8df71f3 100644
--- a/dir.c
+++ b/dir.c
@@ -88,7 +88,7 @@ char *strchr(char*,char);
 #define FNM_NOMATCH	1
 #define FNM_ERROR	2
 
-#define downcase(c) (nocase && ISUPPER(c) ? tolower(c) : (c))
+#define downcase(c) (nocase && ISUPPER(c) ? TOLOWER(c) : (c))
 #define compare(c1, c2) (((unsigned char)(c1)) - ((unsigned char)(c2)))
 
 /* caution: in case *p == '\0'
diff --git a/encoding.c b/encoding.c
index cf044b1cbe..0a8e249cd6 100644
--- a/encoding.c
+++ b/encoding.c
@@ -173,7 +173,7 @@ rb_enc_register(const char *name, rb_encoding *encoding)
 
     if (index >= 0) {
 	rb_encoding *oldenc = rb_enc_from_index(index);
-	if (strcasecmp(name, rb_enc_name(oldenc))) {
+	if (STRCASECMP(name, rb_enc_name(oldenc))) {
 	    st_data_t key = (st_data_t)name, alias;
 	    st_delete(enc_table_alias, &key, &alias);
 	    index = enc_register(name, encoding);
@@ -341,7 +341,7 @@ rb_enc_registered(const char *name)
 	    if (i < ENCODING_INLINE_MAX - 1) i = ENCODING_INLINE_MAX - 1;
 	    continue;
 	}
-	if (strcasecmp(name, enc_table[i].name) == 0) {
+	if (STRCASECMP(name, enc_table[i].name) == 0) {
 	    return i;
 	}
     }
@@ -368,7 +368,7 @@ rb_enc_find_index(const char *name)
 	char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib);
 	while (s < e) {
 	    if (!ISALNUM(*s)) *s = '_';
-	    else if (ISUPPER(*s)) *s = tolower(*s);
+	    else if (ISUPPER(*s)) *s = TOLOWER(*s);
 	    ++s;
 	}
 	OBJ_FREEZE(enclib);
diff --git a/ext/readline/readline.c b/ext/readline/readline.c
index 2e9a177c3b..53d37a06aa 100644
--- a/ext/readline/readline.c
+++ b/ext/readline/readline.c
@@ -8,7 +8,6 @@
 #include "ruby/config.h"
 #include <errno.h>
 #include <stdio.h>
-#include <ctype.h>
 #include <string.h>
 #ifdef HAVE_READLINE_READLINE_H
 #include <readline/readline.h>
@@ -30,8 +29,6 @@
 
 static VALUE mReadline;
 
-#define TOLOWER(c) (isupper(c) ? tolower(c) : c)
-
 #define COMPLETION_PROC "completion_proc"
 #define COMPLETION_CASE_FOLD "completion_case_fold"
 static ID completion_proc, completion_case_fold;
diff --git a/file.c b/file.c
index c9cf386c0d..6e04e83359 100644
--- a/file.c
+++ b/file.c
@@ -2532,10 +2532,6 @@ rb_path_end(const char *path)
     buflen = RSTRING_LEN(result),\
     pend = p + buflen)
 
-#if !defined(TOLOWER)
-#define TOLOWER(c) (ISUPPER(c) ? tolower(c) : (c))
-#endif
-
 static int is_absolute_path(const char*);
 
 static VALUE
diff --git a/hash.c b/hash.c
index bd22fef1da..4e80b56f42 100644
--- a/hash.c
+++ b/hash.c
@@ -1781,7 +1781,7 @@ env_delete(VALUE obj, VALUE name)
 
 	ruby_setenv(nam, 0);
 #ifdef ENV_IGNORECASE
-	if (strcasecmp(nam, PATH_ENV) == 0)
+	if (STRCASECMP(nam, PATH_ENV) == 0)
 #else
 	if (strcmp(nam, PATH_ENV) == 0)
 #endif
@@ -1817,7 +1817,7 @@ rb_f_getenv(VALUE obj, VALUE name)
     env = getenv(nam);
     if (env) {
 #ifdef ENV_IGNORECASE
-	if (strcasecmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted())
+	if (STRCASECMP(nam, PATH_ENV) == 0 && !rb_env_path_tainted())
 #else
 	if (strcmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted())
 #endif
@@ -1859,7 +1859,7 @@ env_fetch(int argc, VALUE *argv)
 	return if_none;
     }
 #ifdef ENV_IGNORECASE
-    if (strcasecmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted())
+    if (STRCASECMP(nam, PATH_ENV) == 0 && !rb_env_path_tainted())
 #else
     if (strcmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted())
 #endif
@@ -1893,7 +1893,7 @@ envix(const char *nam)
     for (i = 0; env[i]; i++) {
 	if (
 #ifdef ENV_IGNORECASE
-	    strncasecmp(env[i],nam,len) == 0
+	    STRNCASECMP(env[i],nam,len) == 0
 #else
 	    memcmp(env[i],nam,len) == 0
 #endif
@@ -2014,7 +2014,7 @@ env_aset(VALUE obj, VALUE nm, VALUE val)
 
     ruby_setenv(name, value);
 #ifdef ENV_IGNORECASE
-    if (strcasecmp(name, PATH_ENV) == 0) {
+    if (STRCASECMP(name, PATH_ENV) == 0) {
 #else
     if (strcmp(name, PATH_ENV) == 0) {
 #endif
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 89f3ec36b8..e10d893fb1 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -136,4 +136,20 @@ VALUE rb_enc_default_external(void);
 void rb_enc_set_default_external(VALUE encoding);
 VALUE rb_locale_charmap(VALUE klass);
 
+#define rb_isascii(c) ONIGENC_IS_CODE_ASCII(c)
+#define rb_isalnum(c) ONIGENC_IS_CODE_ALNUM(ONIG_ENCODING_ASCII, c)
+#define rb_isalpha(c) ONIGENC_IS_CODE_ALPHA(ONIG_ENCODING_ASCII, c)
+#define rb_isblank(c) ONIGENC_IS_CODE_BLANK(ONIG_ENCODING_ASCII, c)
+#define rb_iscntrl(c) ONIGENC_IS_CODE_CNTRL(ONIG_ENCODING_ASCII, c)
+#define rb_isdigit(c) ONIGENC_IS_CODE_DIGIT(ONIG_ENCODING_ASCII, c)
+#define rb_isgraph(c) ONIGENC_IS_CODE_GRAPH(ONIG_ENCODING_ASCII, c)
+#define rb_islower(c) ONIGENC_IS_CODE_LOWER(ONIG_ENCODING_ASCII, c)
+#define rb_isprint(c) ONIGENC_IS_CODE_PRINT(ONIG_ENCODING_ASCII, c)
+#define rb_ispunct(c) ONIGENC_IS_CODE_PUNCT(ONIG_ENCODING_ASCII, c)
+#define rb_isspace(c) ONIGENC_IS_CODE_SPACE(ONIG_ENCODING_ASCII, c)
+#define rb_isupper(c) ONIGENC_IS_CODE_UPPER(ONIG_ENCODING_ASCII, c)
+#define rb_isxdigit(c) ONIGENC_IS_CODE_XDIGIT(ONIG_ENCODING_ASCII, c)
+#define rb_tolower(c) rb_enc_tolower(c, ONIG_ENCODING_ASCII)
+#define rb_toupper(c) rb_enc_toupper(c, ONIG_ENCODING_ASCII)
+
 #endif /* RUBY_ENCODING_H */
diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h
index 90402afeec..8ce623dfe3 100644
--- a/include/ruby/ruby.h
+++ b/include/ruby/ruby.h
@@ -63,20 +63,6 @@ extern "C" {
 
 #include "defines.h"
 
-/* need to include <ctype.h> to use these macros */
-#ifndef ISPRINT
-#define ISASCII(c) isascii((int)(unsigned char)(c))
-#undef ISPRINT
-#define ISPRINT(c) (ISASCII(c) && isprint((int)(unsigned char)(c)))
-#define ISSPACE(c) (ISASCII(c) && isspace((int)(unsigned char)(c)))
-#define ISUPPER(c) (ISASCII(c) && isupper((int)(unsigned char)(c)))
-#define ISLOWER(c) (ISASCII(c) && islower((int)(unsigned char)(c)))
-#define ISALNUM(c) (ISASCII(c) && isalnum((int)(unsigned char)(c)))
-#define ISALPHA(c) (ISASCII(c) && isalpha((int)(unsigned char)(c)))
-#define ISDIGIT(c) (ISASCII(c) && isdigit((int)(unsigned char)(c)))
-#define ISXDIGIT(c) (ISASCII(c) && isxdigit((int)(unsigned char)(c)))
-#endif
-
 #if defined(HAVE_ALLOCA_H)
 #include <alloca.h>
 #else
@@ -982,4 +968,22 @@ int rb_remove_event_hook(rb_event_hook_func_t func);
 }  /* extern "C" { */
 #endif
 
+#include "encoding.h"
+#ifndef ISPRINT
+#define ISASCII(c) rb_isascii((int)(unsigned char)(c))
+#undef ISPRINT
+#define ISPRINT(c) (ISASCII(c) && rb_isprint((int)(unsigned char)(c)))
+#define ISSPACE(c) (ISASCII(c) && rb_isspace((int)(unsigned char)(c)))
+#define ISUPPER(c) (ISASCII(c) && rb_isupper((int)(unsigned char)(c)))
+#define ISLOWER(c) (ISASCII(c) && rb_islower((int)(unsigned char)(c)))
+#define ISALNUM(c) (ISASCII(c) && rb_isalnum((int)(unsigned char)(c)))
+#define ISALPHA(c) (ISASCII(c) && rb_isalpha((int)(unsigned char)(c)))
+#define ISDIGIT(c) (ISASCII(c) && rb_isdigit((int)(unsigned char)(c)))
+#define ISXDIGIT(c) (ISASCII(c) && rb_isxdigit((int)(unsigned char)(c)))
+#endif
+#define TOUPPER(c) (rb_toupper((int)(unsigned char)(c)))
+#define TOLOWER(c) (rb_tolower((int)(unsigned char)(c)))
+#define STRCASECMP(s1, s2) (st_strcasecmp(s1, s2))
+#define STRNCASECMP(s1, s2, n) (st_strncasecmp(s1, s2, n))
+
 #endif /* RUBY_H */
diff --git a/include/ruby/st.h b/include/ruby/st.h
index 4adf48fde4..199d67e411 100644
--- a/include/ruby/st.h
+++ b/include/ruby/st.h
@@ -87,6 +87,8 @@ void st_clear(st_table *);
 st_table *st_copy(st_table *);
 int st_numcmp(st_data_t, st_data_t);
 int st_numhash(st_data_t);
+int st_strcasecmp(const char *s1, const char *s2);
+int st_strncasecmp(const char *s1, const char *s2, size_t n);
 
 #if defined(__cplusplus)
 #if 0
diff --git a/process.c b/process.c
index 01f97fca34..91596ff302 100644
--- a/process.c
+++ b/process.c
@@ -990,7 +990,7 @@ proc_exec_v(char **argv, const char *prog)
 #endif
 	char *extension;
 
-	if ((extension = strrchr(prog, '.')) != NULL && strcasecmp(extension, ".bat") == 0) {
+	if ((extension = strrchr(prog, '.')) != NULL && STRCASECMP(extension, ".bat") == 0) {
 	    char **new_argv;
 	    char *p;
 	    int n;
@@ -1128,7 +1128,7 @@ proc_spawn_v(char **argv, char *prog)
 	return -1;
 
 #if defined(__human68k__)
-    if ((extension = strrchr(prog, '.')) != NULL && strcasecmp(extension, ".bat") == 0) {
+    if ((extension = strrchr(prog, '.')) != NULL && STRCASECMP(extension, ".bat") == 0) {
 	char **new_argv;
 	char *p;
 	int n;
diff --git a/ruby.c b/ruby.c
index bc0fdde4e1..ddcdb3f958 100644
--- a/ruby.c
+++ b/ruby.c
@@ -184,7 +184,7 @@ rubylib_mangled_path(const char *s, unsigned int l)
 	    notfound = 1;
 	}
     }
-    if (!newp || l < oldl || strncasecmp(oldp, s, oldl) != 0) {
+    if (!newp || l < oldl || STRNCASECMP(oldp, s, oldl) != 0) {
 	return rb_str_new(s, l);
     }
     ret = rb_str_new(0, l + newl - oldl);
@@ -345,7 +345,7 @@ ruby_init_loadpath(void)
     p = strrchr(libpath, '/');
     if (p) {
 	*p = 0;
-	if (p - libpath > 3 && !strcasecmp(p - 4, "/bin")) {
+	if (p - libpath > 3 && !STRCASECMP(p - 4, "/bin")) {
 	    p -= 4;
 	    *p = 0;
 	}
@@ -1039,7 +1039,7 @@ load_file(VALUE parser, const char *fname, int script, struct cmdline_options *o
 #if defined DOSISH || defined __CYGWIN__
 	{
 	    const char *ext = strrchr(fname, '.');
-	    if (ext && strcasecmp(ext, ".exe") == 0)
+	    if (ext && STRCASECMP(ext, ".exe") == 0)
 		mode |= O_BINARY;
 	}
 #endif
diff --git a/st.c b/st.c
index a0e669294b..4af67caf56 100644
--- a/st.c
+++ b/st.c
@@ -54,7 +54,7 @@ static const struct st_hash_type type_strhash = {
 
 static int strcasehash(const char *);
 static const struct st_hash_type type_strcasehash = {
-    strcasecmp,
+    st_strcasecmp,
     strcasehash,
 };
 
@@ -861,6 +861,60 @@ strhash(register const char *string)
     return hval;
 }
 
+int
+st_strcasecmp(const char *s1, const char *s2)
+{
+    unsigned int c1, c2;
+
+    while (1) {
+        c1 = (unsigned char)*s1++;
+        c2 = (unsigned char)*s2++;
+        if (!c1) break;
+        if (!c2) break;
+        if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A';
+        if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A';
+        if (c1 != c2) {
+            if (c1 > c2)
+                return 1;
+            else
+                return -1;
+        }
+    }
+    if (c1 != '\0')
+        return 1;
+    if (c2 != '\0')
+        return -1;
+    return 0;
+}
+
+int
+st_strncasecmp(const char *s1, const char *s2, size_t n)
+{
+    unsigned int c1, c2;
+
+    while (n--) {
+        c1 = (unsigned char)*s1++;
+        c2 = (unsigned char)*s2++;
+        if (!c1) break;
+        if (!c2) break;
+        if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A';
+        if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A';
+        if (c1 != c2) {
+            if (c1 > c2)
+                return 1;
+            else
+                return -1;
+        }
+    }
+    if (n == 0)
+        return 0;
+    if (c1 != '\0')
+        return 1;
+    if (c2 != '\0')
+        return -1;
+    return 0;
+}
+
 static int
 strcasehash(register const char *string)
 {
diff --git a/time.c b/time.c
index f9ebc6900f..1d9186d8c3 100644
--- a/time.c
+++ b/time.c
@@ -419,7 +419,7 @@ time_arg(int argc, VALUE *argv, struct tm *tm, long *nsec)
 	    tm->tm_mon = -1;
 	    for (i=0; i<12; i++) {
 		if (RSTRING_LEN(s) == 3 &&
-		    strcasecmp(months[i], RSTRING_PTR(v[1])) == 0) {
+		    STRCASECMP(months[i], RSTRING_PTR(v[1])) == 0) {
 		    tm->tm_mon = i;
 		    break;
 		}
diff --git a/transcode.c b/transcode.c
index 4875570ef4..daac1ed566 100644
--- a/transcode.c
+++ b/transcode.c
@@ -110,7 +110,7 @@ init_transcoder_table(void)
     rb_declare_transcoder("ISO-2022-JP", "UTF-8", "japanese");
 }
 
-#define encoding_equal(enc1, enc2) (strcasecmp(enc1, enc2) == 0)
+#define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0)
 
 static rb_transcoder *
 transcode_dispatch(const char* from_encoding, const char* to_encoding)
@@ -297,7 +297,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
 	    return to_encidx;
 	}
     }
-    if (strcasecmp(from_e, to_e) == 0) {
+    if (STRCASECMP(from_e, to_e) == 0) {
 	return -1;
     }