summaryrefslogtreecommitdiff
path: root/dir.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-07-07 17:29:44 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-07-07 17:29:44 +0000
commit0ad460dde8b91ebda10df52ba54b512b98429a4e (patch)
tree9aaf714cc3ff12582e3753e6d9b610c44d4f8d50 /dir.c
parentf10c88827e2a63ea306527d00ef0377488806ac1 (diff)
* dir.c: preserve encoding of strings in glob and fnmatch.
* include/ruby/ruby.h: related changes. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@17950 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'dir.c')
-rw-r--r--dir.c245
1 files changed, 93 insertions, 152 deletions
diff --git a/dir.c b/dir.c
index 213d6cdb87..284a1ce352 100644
--- a/dir.c
+++ b/dir.c
@@ -80,96 +80,35 @@ char *strchr(char*,char);
#define FNM_NOMATCH 1
#define FNM_ERROR 2
-#define downcase(c) (nocase && ISUPPER(c) ? TOLOWER(c) : (c))
-#define compare(c1, c2) (((unsigned char)(c1)) - ((unsigned char)(c2)))
+# define Next(p, enc) (rb_enc_right_char_head(p, p+1, enc))
+# define Inc(p, enc) ((p) = Next(p, enc))
-/* caution: in case *p == '\0'
- Next(p) == p + 1 in single byte environment
- Next(p) == p in multi byte environment
-*/
-#if defined(CharNext)
-# define Next(p) CharNext(p)
-#elif defined(DJGPP)
-# define Next(p) ((p) + mblen(p, RUBY_MBCHAR_MAXSIZE))
-#elif defined(__EMX__)
-# define Next(p) ((p) + emx_mblen(p))
-static inline int
-emx_mblen(const char *p)
-{
- int n = mblen(p, RUBY_MBCHAR_MAXSIZE);
- return (n < 0) ? 1 : n;
-}
-#endif
-
-#ifndef Next /* single byte environment */
-# define Next(p) ((p) + 1)
-# define Inc(p) (++(p))
-# define Compare(p1, p2) (compare(downcase(*(p1)), downcase(*(p2))))
-#else /* multi byte environment */
-# define Inc(p) ((p) = Next(p))
-# define Compare(p1, p2) (CompareImpl(p1, p2, nocase))
static int
-CompareImpl(const char *p1, const char *p2, int nocase)
+char_casecmp(const char *p1, const char *p2, rb_encoding *enc, const int nocase)
{
- const int len1 = Next(p1) - p1;
- const int len2 = Next(p2) - p2;
-#ifdef _WIN32
- char buf1[10], buf2[10]; /* large enough? */
-#endif
-
- if (len1 < 0 || len2 < 0) {
- rb_fatal("CompareImpl: negative len");
- }
-
- if (len1 == 0) return len2;
- if (len2 == 0) return -len1;
-
-#ifdef _WIN32
- if (nocase && rb_w32_iswinnt()) {
- if (len1 > 1) {
- if (len1 >= sizeof(buf1)) {
- rb_fatal("CompareImpl: too large len");
- }
- memcpy(buf1, p1, len1);
- buf1[len1] = '\0';
- CharLower(buf1);
- p1 = buf1; /* trick */
- }
- if (len2 > 1) {
- if (len2 >= sizeof(buf2)) {
- rb_fatal("CompareImpl: too large len");
- }
- memcpy(buf2, p2, len2);
- buf2[len2] = '\0';
- CharLower(buf2);
- p2 = buf2; /* trick */
- }
+ const char *p1end, *p2end;
+
+ if (!*p1) return *p1;
+ if (!*p2) return -*p2;
+ p1end = p1 + strlen(p1);
+ p2end = p2 + strlen(p2);
+ int c1 = rb_enc_codepoint(p1, p1end, enc);
+ int c2 = rb_enc_codepoint(p2, p2end, enc);
+
+ if (c1 == c2) return 0;
+ if (nocase) {
+ c1 = rb_enc_toupper(c1, enc);
+ c2 = rb_enc_toupper(c2, enc);
}
-#endif
- if (len1 == 1)
- if (len2 == 1)
- return compare(downcase(*p1), downcase(*p2));
- else {
- const int ret = compare(downcase(*p1), *p2);
- return ret ? ret : -1;
- }
- else
- if (len2 == 1) {
- const int ret = compare(*p1, downcase(*p2));
- return ret ? ret : 1;
- }
- else {
- const int ret = memcmp(p1, p2, len1 < len2 ? len1 : len2);
- return ret ? ret : len1 - len2;
- }
+ return c1 - c2;
}
-#endif /* environment */
static char *
bracket(
const char *p, /* pattern (next to '[') */
const char *s, /* string */
- int flags)
+ int flags,
+ rb_encoding *enc)
{
const int nocase = flags & FNM_CASEFOLD;
const int escape = !(flags & FNM_NOESCAPE);
@@ -187,19 +126,19 @@ bracket(
t1++;
if (!*t1)
return NULL;
- p = Next(t1);
+ p = Next(t1, enc);
if (p[0] == '-' && p[1] != ']') {
const char *t2 = p + 1;
if (escape && *t2 == '\\')
t2++;
if (!*t2)
return NULL;
- p = Next(t2);
- if (!ok && Compare(t1, s) <= 0 && Compare(s, t2) <= 0)
+ p = Next(t2, enc);
+ if (!ok && char_casecmp(t1, s, enc, nocase) <= 0 && char_casecmp(s, t2, enc, nocase) <= 0)
ok = 1;
}
else
- if (!ok && Compare(t1, s) == 0)
+ if (!ok && char_casecmp(t1, s, enc, nocase) == 0)
ok = 1;
}
@@ -219,7 +158,8 @@ static int
fnmatch_helper(
const char **pcur, /* pattern */
const char **scur, /* string */
- int flags)
+ int flags,
+ rb_encoding *enc)
{
const int period = !(flags & FNM_DOTMATCH);
const int pathname = flags & FNM_PATHNAME;
@@ -253,16 +193,16 @@ fnmatch_helper(
if (ISEND(s))
RETURN(FNM_NOMATCH);
p++;
- Inc(s);
+ Inc(s, enc);
continue;
case '[': {
const char *t;
if (ISEND(s))
RETURN(FNM_NOMATCH);
- if ((t = bracket(p + 1, s, flags)) != 0) {
+ if ((t = bracket(p + 1, s, flags, enc)) != 0) {
p = t;
- Inc(s);
+ Inc(s, enc);
continue;
}
goto failed;
@@ -275,16 +215,16 @@ fnmatch_helper(
RETURN(ISEND(p) ? 0 : FNM_NOMATCH);
if (ISEND(p))
goto failed;
- if (Compare(p, s) != 0)
+ if (char_casecmp(p, s, enc, nocase) != 0)
goto failed;
- Inc(p);
- Inc(s);
+ Inc(p, enc);
+ Inc(s, enc);
continue;
failed: /* try next '*' position */
if (ptmp && stmp) {
p = ptmp;
- Inc(stmp); /* !ISEND(*stmp) */
+ Inc(stmp, enc); /* !ISEND(*stmp) */
s = stmp;
continue;
}
@@ -294,12 +234,15 @@ fnmatch_helper(
static int
fnmatch(
- const char *p, /* pattern */
- const char *s, /* string */
+ VALUE pattern,
+ VALUE string,
int flags)
{
+ const char *p = RSTRING_PTR(pattern); /* pattern */
+ const char *s = RSTRING_PTR(string); /* string */
const int period = !(flags & FNM_DOTMATCH);
const int pathname = flags & FNM_PATHNAME;
+ rb_encoding *enc = rb_enc_get(pattern);
const char *ptmp = 0;
const char *stmp = 0;
@@ -311,8 +254,8 @@ fnmatch(
ptmp = p;
stmp = s;
}
- if (fnmatch_helper(&p, &s, flags) == 0) {
- while (*s && *s != '/') Inc(s);
+ if (fnmatch_helper(&p, &s, flags, enc) == 0) {
+ while (*s && *s != '/') Inc(s, enc);
if (*p && *s) {
p++;
s++;
@@ -323,7 +266,7 @@ fnmatch(
}
/* failed : try next recursion */
if (ptmp && stmp && !(period && *stmp == '.')) {
- while (*stmp && *stmp != '/') Inc(stmp);
+ while (*stmp && *stmp != '/') Inc(stmp, enc);
if (*stmp) {
p = ptmp;
stmp++;
@@ -335,7 +278,7 @@ fnmatch(
}
}
else
- return fnmatch_helper(&p, &s, flags);
+ return fnmatch_helper(&p, &s, flags, enc);
}
VALUE rb_cDir;
@@ -1030,7 +973,7 @@ do_opendir(const char *path, int flags)
/* Return nonzero if S has any special globbing chars in it. */
static int
-has_magic(const char *s, int flags)
+has_magic(const char *s, int flags, rb_encoding *enc)
{
const int escape = !(flags & FNM_NOESCAPE);
const int nocase = flags & FNM_CASEFOLD;
@@ -1055,7 +998,7 @@ has_magic(const char *s, int flags)
return 1;
}
- p = Next(p-1);
+ p = Next(p-1, enc);
}
return 0;
@@ -1063,7 +1006,7 @@ has_magic(const char *s, int flags)
/* Find separator in globbing pattern. */
static char *
-find_dirsep(const char *s, int flags)
+find_dirsep(const char *s, int flags, rb_encoding *enc)
{
const int escape = !(flags & FNM_NOESCAPE);
@@ -1091,7 +1034,7 @@ find_dirsep(const char *s, int flags)
continue;
}
- p = Next(p-1);
+ p = Next(p-1, enc);
}
return (char *)p-1;
@@ -1099,7 +1042,7 @@ find_dirsep(const char *s, int flags)
/* Remove escaping backslashes */
static void
-remove_backslashes(char *p)
+remove_backslashes(char *p, rb_encoding *enc)
{
char *t = p;
char *s = p;
@@ -1112,7 +1055,7 @@ remove_backslashes(char *p)
s = ++p;
if (!*p) break;
}
- Inc(p);
+ Inc(p, enc);
}
while (*p++);
@@ -1133,7 +1076,7 @@ struct glob_pattern {
static void glob_free_pattern(struct glob_pattern *list);
static struct glob_pattern *
-glob_make_pattern(const char *p, int flags)
+glob_make_pattern(const char *p, int flags, rb_encoding *enc)
{
struct glob_pattern *list, *tmp, **tail = &list;
int dirsep = 0; /* pattern is terminated with '/' */
@@ -1149,7 +1092,7 @@ glob_make_pattern(const char *p, int flags)
dirsep = 1;
}
else {
- const char *m = find_dirsep(p, flags);
+ const char *m = find_dirsep(p, flags, enc);
char *buf = GLOB_ALLOC_N(char, m-p+1);
if (!buf) {
GLOB_FREE(tmp);
@@ -1157,7 +1100,7 @@ glob_make_pattern(const char *p, int flags)
}
memcpy(buf, p, m-p);
buf[m-p] = '\0';
- tmp->type = has_magic(buf, flags) ? MAGICAL : PLAIN;
+ tmp->type = has_magic(buf, flags, enc) ? MAGICAL : PLAIN;
tmp->str = buf;
if (*m) {
dirsep = 1;
@@ -1230,8 +1173,8 @@ enum answer { YES, NO, UNKNOWN };
#endif
struct glob_args {
- void (*func)(const char *, VALUE);
- const char *path;
+ void (*func)(VALUE, VALUE);
+ VALUE path;
VALUE value;
};
@@ -1248,7 +1191,7 @@ glob_func_caller(VALUE val)
static int
glob_helper(
- const char *path,
+ VALUE vpath,
int dirsep, /* '/' should be placed before appending child entry's name to 'path'. */
enum answer exist, /* Does 'path' indicate an existing entry? */
enum answer isdir, /* Does 'path' indicate a directory or a symlink to a directory? */
@@ -1258,6 +1201,7 @@ glob_helper(
ruby_glob_func *func,
VALUE arg)
{
+ const char *path = RSTRING_PTR(vpath);
struct stat st;
int status = 0;
struct glob_pattern **cur, **new_beg, **new_end;
@@ -1310,13 +1254,13 @@ glob_helper(
}
}
if (match_all && exist == YES) {
- status = glob_call_func(func, path, arg);
+ status = glob_call_func(func, rb_tainted_str_new2(path), arg);
if (status) return status;
}
if (match_dir && isdir == YES) {
char *tmp = join_path(path, dirsep, "");
if (!tmp) return -1;
- status = glob_call_func(func, tmp, arg);
+ status = glob_call_func(func, rb_tainted_str_new2(tmp), arg);
GLOB_FREE(tmp);
if (status) return status;
}
@@ -1338,7 +1282,7 @@ glob_helper(
break;
}
if (recursive && strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0
- && fnmatch("*", dp->d_name, flags) == 0) {
+ && fnmatch(rb_usascii_str_new2("*"), rb_str_new2(dp->d_name), flags) == 0) {
#ifndef _WIN32
if (do_lstat(buf, &st, flags) == 0)
new_isdir = S_ISDIR(st.st_mode) ? YES : S_ISLNK(st.st_mode) ? UNKNOWN : NO;
@@ -1364,12 +1308,13 @@ glob_helper(
p = p->next; /* 0 times recursion */
}
if (p->type == PLAIN || p->type == MAGICAL) {
- if (fnmatch(p->str, dp->d_name, flags) == 0)
+ if (fnmatch(rb_str_new2(p->str), rb_str_new2(dp->d_name), flags) == 0)
*new_end++ = p->next;
}
}
- status = glob_helper(buf, 1, YES, new_isdir, new_beg, new_end, flags, func, arg);
+ status = glob_helper(rb_enc_str_new(buf, strlen(buf), rb_enc_get(vpath)), 1,
+ YES, new_isdir, new_beg, new_end, flags, func, arg);
GLOB_FREE(buf);
GLOB_FREE(new_beg);
if (status) break;
@@ -1395,7 +1340,7 @@ glob_helper(
break;
}
strcpy(name, (*cur)->str);
- if (escape) remove_backslashes(name);
+ if (escape) remove_backslashes(name, rb_enc_get(vpath));
new_beg = new_end = GLOB_ALLOC_N(struct glob_pattern *, end - beg);
if (!new_beg) {
@@ -1405,7 +1350,7 @@ glob_helper(
}
*new_end++ = (*cur)->next;
for (cur2 = cur + 1; cur2 < copy_end; ++cur2) {
- if (*cur2 && fnmatch((*cur2)->str, name, flags) == 0) {
+ if (*cur2 && fnmatch(rb_str_new2((*cur2)->str), rb_str_new2(name), flags) == 0) {
*new_end++ = (*cur2)->next;
*cur2 = 0;
}
@@ -1418,7 +1363,8 @@ glob_helper(
status = -1;
break;
}
- status = glob_helper(buf, 1, UNKNOWN, UNKNOWN, new_beg, new_end, flags, func, arg);
+ status = glob_helper(rb_enc_str_new(buf, strlen(buf), rb_enc_get(vpath)), 1,
+ UNKNOWN, UNKNOWN, new_beg, new_end, flags, func, arg);
GLOB_FREE(buf);
GLOB_FREE(new_beg);
if (status) break;
@@ -1432,15 +1378,14 @@ glob_helper(
}
static int
-ruby_glob0(const char *path, int flags, ruby_glob_func *func, VALUE arg)
+ruby_glob0(VALUE path, int flags, ruby_glob_func *func, VALUE arg)
{
struct glob_pattern *list;
const char *root, *start;
- char *buf;
- int n;
int status;
+ VALUE buf;
- start = root = path;
+ start = root = RSTRING_PTR(path);
flags |= FNM_SYSCASE;
#if defined DOSISH
root = rb_path_skip_prefix(root);
@@ -1448,20 +1393,14 @@ ruby_glob0(const char *path, int flags, ruby_glob_func *func, VALUE arg)
if (root && *root == '/') root++;
- n = root - start;
- buf = GLOB_ALLOC_N(char, n + 1);
- if (!buf) return -1;
- MEMCPY(buf, start, char, n);
- buf[n] = '\0';
+ buf = rb_enc_str_new(start, root - start, rb_enc_get(path));
- list = glob_make_pattern(root, flags);
+ list = glob_make_pattern(root, flags, rb_enc_get(path));
if (!list) {
- GLOB_FREE(buf);
return -1;
}
status = glob_helper(buf, 0, UNKNOWN, UNKNOWN, &list, &list + 1, flags, func, arg);
glob_free_pattern(list);
- GLOB_FREE(buf);
return status;
}
@@ -1469,11 +1408,11 @@ ruby_glob0(const char *path, int flags, ruby_glob_func *func, VALUE arg)
int
ruby_glob(const char *path, int flags, ruby_glob_func *func, VALUE arg)
{
- return ruby_glob0(path, flags & ~GLOB_VERBOSE, func, arg);
+ return ruby_glob0(rb_str_new2(path), flags & ~GLOB_VERBOSE, func, arg);
}
static int
-rb_glob_caller(const char *path, VALUE a)
+rb_glob_caller(VALUE path, VALUE a)
{
int status;
struct glob_args *args = (struct glob_args *)a;
@@ -1484,7 +1423,7 @@ rb_glob_caller(const char *path, VALUE a)
}
static int
-rb_glob2(const char *path, int flags, void (*func)(const char *, VALUE), VALUE arg)
+rb_glob2(VALUE path, int flags, void (*func)(VALUE, VALUE), VALUE arg)
{
struct glob_args args;
@@ -1499,20 +1438,21 @@ rb_glob2(const char *path, int flags, void (*func)(const char *, VALUE), VALUE a
}
void
-rb_glob(const char *path, void (*func)(const char *, VALUE), VALUE arg)
+rb_glob(const char *path, void (*func)(VALUE, VALUE), VALUE arg)
{
- int status = rb_glob2(path, 0, func, arg);
+ int status = rb_glob2(rb_str_new2(path), 0, func, arg);
if (status) GLOB_JUMP_TAG(status);
}
static void
-push_pattern(const char *path, VALUE ary)
+push_pattern(VALUE path, VALUE ary)
{
- rb_ary_push(ary, rb_tainted_str_new2(path));
+ OBJ_TAINT(path);
+ rb_ary_push(ary, path);
}
-int
-ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg)
+static int
+ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg, rb_encoding *enc)
{
const int escape = !(flags & FNM_NOESCAPE);
const char *p = str;
@@ -1531,7 +1471,7 @@ ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg)
if (*p == '\\' && escape) {
if (!*++p) break;
}
- Inc(p);
+ Inc(p, enc);
}
if (lbrace && rbrace) {
@@ -1551,17 +1491,17 @@ ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg)
if (*p == '\\' && escape) {
if (++p == rbrace) break;
}
- Inc(p);
+ Inc(p, enc);
}
memcpy(buf+shift, t, p-t);
strcpy(buf+shift+(p-t), rbrace+1);
- status = ruby_brace_expand(buf, flags, func, arg);
+ status = ruby_brace_expand(buf, flags, func, arg, enc);
if (status) break;
}
GLOB_FREE(buf);
}
else if (!lbrace && !rbrace) {
- status = (*func)(s, arg);
+ status = (*func)(rb_enc_str_new(s, strlen(s), enc), arg);
}
return status;
@@ -1574,7 +1514,7 @@ struct brace_args {
};
static int
-glob_brace(const char *path, VALUE val)
+glob_brace(VALUE path, VALUE val)
{
struct brace_args *arg = (struct brace_args *)val;
@@ -1582,24 +1522,24 @@ glob_brace(const char *path, VALUE val)
}
static int
-ruby_brace_glob0(const char *str, int flags, ruby_glob_func *func, VALUE arg)
+ruby_brace_glob0(VALUE str, int flags, ruby_glob_func *func, VALUE arg)
{
struct brace_args args;
args.func = func;
args.value = arg;
args.flags = flags;
- return ruby_brace_expand(str, flags, glob_brace, (VALUE)&args);
+ return ruby_brace_expand(RSTRING_PTR(str), flags, glob_brace, (VALUE)&args, rb_enc_get(str));
}
int
ruby_brace_glob(const char *str, int flags, ruby_glob_func *func, VALUE arg)
{
- return ruby_brace_glob0(str, flags & ~GLOB_VERBOSE, func, arg);
+ return ruby_brace_glob0(rb_str_new2(str), flags & ~GLOB_VERBOSE, func, arg);
}
static int
-push_glob(VALUE ary, const char *str, int flags)
+push_glob(VALUE ary, VALUE str, int flags)
{
struct glob_args args;
@@ -1618,11 +1558,12 @@ rb_push_glob(VALUE str, int flags) /* '\0' is delimiter */
ary = rb_ary_new();
while (offset < RSTRING_LEN(str)) {
- int status = push_glob(ary, RSTRING_PTR(str) + offset, flags);
char *p, *pend;
+ p = RSTRING_PTR(str) + offset;
+ int status = push_glob(ary,
+ rb_enc_str_new(p, strlen(p), rb_enc_get(str)), flags);
if (status) GLOB_JUMP_TAG(status);
if (offset >= RSTRING_LEN(str)) break;
- p = RSTRING_PTR(str) + offset;
p += strlen(p) + 1;
pend = RSTRING_PTR(str) + RSTRING_LEN(str);
while (p < pend && !*p)
@@ -1643,7 +1584,7 @@ dir_globs(long argc, VALUE *argv, int flags)
int status;
VALUE str = argv[i];
StringValue(str);
- status = push_glob(ary, RSTRING_PTR(str), flags);
+ status = push_glob(ary, str, flags);
if (status) GLOB_JUMP_TAG(status);
}
@@ -1914,7 +1855,7 @@ file_s_fnmatch(int argc, VALUE *argv, VALUE obj)
StringValue(pattern);
FilePathStringValue(path);
- if (fnmatch(RSTRING_PTR(pattern), RSTRING_PTR(path), flags) == 0)
+ if (fnmatch(pattern, path, flags) == 0)
return Qtrue;
return Qfalse;