summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-07-03 02:44:36 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-07-03 02:44:36 +0000
commit42c34dd375c368d8e08f5243571d6983c9d02e19 (patch)
treeca65f172177fd534d6048825636d265636c8a396
parent83c7e46aa795f949122e0a8cafb3fc905992ddd2 (diff)
merge revision(s) 34372:
* file.c (rb_enc_path_next, rb_enc_path_skip_prefix) (rb_enc_path_last_separator, rb_enc_path_end) (ruby_enc_find_basename, ruby_enc_find_extname): encoding-aware path handling functions. * file.c (rb_home_dir, file_expand_path, rb_realpath_internal) (rb_file_s_basename, rb_file_dirname, rb_file_s_extname) (rb_file_join): should respect the encodings of arguments than file system encoding. [ruby-dev:45145] [Bug #5919] * dir.c (check_dirname, ruby_glob0): ditto. * ext/pathname/pathname.c (path_sub_ext): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_3@36279 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog16
-rw-r--r--dir.c11
-rw-r--r--ext/pathname/pathname.c7
-rw-r--r--file.c415
-rw-r--r--include/ruby/encoding.h6
-rw-r--r--include/ruby/intern.h6
-rw-r--r--test/pathname/test_pathname.rb8
-rw-r--r--test/ruby/test_file_exhaustive.rb44
-rw-r--r--version.h2
9 files changed, 335 insertions, 180 deletions
diff --git a/ChangeLog b/ChangeLog
index 60b2350065..6569001023 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+Tue Jul 3 11:44:23 2012 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * file.c (rb_enc_path_next, rb_enc_path_skip_prefix)
+ (rb_enc_path_last_separator, rb_enc_path_end)
+ (ruby_enc_find_basename, ruby_enc_find_extname): encoding-aware
+ path handling functions.
+
+ * file.c (rb_home_dir, file_expand_path, rb_realpath_internal)
+ (rb_file_s_basename, rb_file_dirname, rb_file_s_extname)
+ (rb_file_join): should respect the encodings of arguments than
+ file system encoding. [ruby-dev:45145] [Bug #5919]
+
+ * dir.c (check_dirname, ruby_glob0): ditto.
+
+ * ext/pathname/pathname.c (path_sub_ext): ditto.
+
Tue Jul 3 11:43:46 2012 Nobuyoshi Nakada <nobu@ruby-lang.org>
* dir.c (dir_chdir, check_dirname): get rid of optimization-out.
diff --git a/dir.c b/dir.c
index 2cc1226d4d..387a2490b6 100644
--- a/dir.c
+++ b/dir.c
@@ -910,11 +910,16 @@ check_dirname(volatile VALUE *dir)
{
VALUE d = *dir;
char *path, *pend;
+ long len;
+ rb_encoding *enc;
rb_secure(2);
FilePathValue(d);
- path = RSTRING_PTR(d);
- if (path && *(pend = rb_path_end(rb_path_skip_prefix(path)))) {
+ enc = rb_enc_get(d);
+ RSTRING_GETMEM(d, path, len);
+ pend = path + len;
+ pend = rb_enc_path_end(rb_enc_path_skip_prefix(path, pend, enc), pend, enc);
+ if (pend - path < len) {
d = rb_str_subseq(d, 0, pend - path);
}
*dir = rb_str_encode_ospath(d);
@@ -1494,7 +1499,7 @@ ruby_glob0(const char *path, int flags, ruby_glob_func *func, VALUE arg, rb_enco
start = root = path;
flags |= FNM_SYSCASE;
#if defined DOSISH
- root = rb_path_skip_prefix(root);
+ root = rb_enc_path_skip_prefix(root, root + strlen(root), enc);
#endif
if (root && *root == '/') root++;
diff --git a/ext/pathname/pathname.c b/ext/pathname/pathname.c
index e1632c524b..e35c19d541 100644
--- a/ext/pathname/pathname.c
+++ b/ext/pathname/pathname.c
@@ -1,4 +1,5 @@
#include "ruby.h"
+#include "ruby/encoding.h"
static VALUE rb_cPathname;
static ID id_at_path, id_to_path;
@@ -184,15 +185,15 @@ path_sub_ext(VALUE self, VALUE repl)
StringValue(repl);
p = RSTRING_PTR(str);
- ext = ruby_find_extname(p, &extlen);
+ extlen = RSTRING_LEN(str);
+ ext = ruby_enc_find_extname(p, &extlen, rb_enc_get(str));
if (ext == NULL) {
ext = p + RSTRING_LEN(str);
}
else if (extlen <= 1) {
ext += extlen;
}
- str2 = rb_str_dup(str);
- rb_str_resize(str2, ext-p);
+ str2 = rb_str_subseq(str, 0, ext-p);
rb_str_append(str2, repl);
OBJ_INFECT(str2, str);
return rb_class_new_instance(1, &str2, rb_obj_class(self));
diff --git a/file.c b/file.c
index 67b281aba1..a8c621a99d 100644
--- a/file.c
+++ b/file.c
@@ -2425,6 +2425,8 @@ rb_file_s_symlink(VALUE klass, VALUE from, VALUE to)
#endif
#ifdef HAVE_READLINK
+static VALUE rb_readlink(VALUE path);
+
/*
* call-seq:
* File.readlink(link_name) -> file_name
@@ -2439,6 +2441,12 @@ rb_file_s_symlink(VALUE klass, VALUE from, VALUE to)
static VALUE
rb_file_s_readlink(VALUE klass, VALUE path)
{
+ return rb_readlink(path);
+}
+
+static VALUE
+rb_readlink(VALUE path)
+{
char *buf;
int size = 100;
ssize_t rv;
@@ -2604,9 +2612,8 @@ static const char file_alt_separator[] = {FILE_ALT_SEPARATOR, '\0'};
#define istrailinggarbage(x) 0
#endif
-#ifndef CharNext /* defined as CharNext[AW] on Windows. */
-# define CharNext(p) ((p) + 1)
-#endif
+#define Next(p, e, enc) ((p) + rb_enc_mbclen((p), (e), (enc)))
+#define Inc(p, e, enc) ((p) = Next((p), (e), (enc)))
#if defined(DOSISH_UNC)
#define has_unc(buf) (isdirsep((buf)[0]) && isdirsep((buf)[1]))
@@ -2668,40 +2675,40 @@ not_same_drive(VALUE path, int drive)
#endif
static inline char *
-skiproot(const char *path)
+skiproot(const char *path, const char *end, rb_encoding *enc)
{
#ifdef DOSISH_DRIVE_LETTER
- if (has_drive_letter(path)) path += 2;
+ if (path + 2 <= end && has_drive_letter(path)) path += 2;
#endif
- while (isdirsep(*path)) path++;
+ while (path < end && isdirsep(*path)) path++;
return (char *)path;
}
-#define nextdirsep rb_path_next
+#define nextdirsep rb_enc_path_next
char *
-rb_path_next(const char *s)
+rb_enc_path_next(const char *s, const char *e, rb_encoding *enc)
{
- while (*s && !isdirsep(*s)) {
- s = CharNext(s);
+ while (s < e && !isdirsep(*s)) {
+ Inc(s, e, enc);
}
return (char *)s;
}
#if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER)
-#define skipprefix rb_path_skip_prefix
+#define skipprefix rb_enc_path_skip_prefix
#else
-#define skipprefix(path) (path)
+#define skipprefix(path, end, enc) (path)
#endif
char *
-rb_path_skip_prefix(const char *path)
+rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc)
{
#if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER)
#ifdef DOSISH_UNC
- if (isdirsep(path[0]) && isdirsep(path[1])) {
+ if (path + 2 <= end && isdirsep(path[0]) && isdirsep(path[1])) {
path += 2;
- while (isdirsep(*path)) path++;
- if (*(path = nextdirsep(path)) && path[1] && !isdirsep(path[1]))
- path = nextdirsep(path + 1);
+ while (path < end && isdirsep(*path)) path++;
+ if ((path = rb_enc_path_next(path, end, enc)) < end && path[0] && path[1] && !isdirsep(path[1]))
+ path = rb_enc_path_next(path + 1, end, enc);
return (char *)path;
}
#endif
@@ -2714,78 +2721,78 @@ rb_path_skip_prefix(const char *path)
}
static inline char *
-skipprefixroot(const char *path)
+skipprefixroot(const char *path, const char *end, rb_encoding *enc)
{
#if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER)
- char *p = skipprefix(path);
+ char *p = skipprefix(path, end, enc);
while (isdirsep(*p)) p++;
return p;
#else
- return skiproot(path);
+ return skiproot(path, end, enc);
#endif
}
-#define strrdirsep rb_path_last_separator
+#define strrdirsep rb_enc_path_last_separator
char *
-rb_path_last_separator(const char *path)
+rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc)
{
char *last = NULL;
- while (*path) {
+ while (path < end) {
if (isdirsep(*path)) {
const char *tmp = path++;
- while (isdirsep(*path)) path++;
- if (!*path) break;
+ while (path < end && isdirsep(*path)) path++;
+ if (path >= end) break;
last = (char *)tmp;
}
else {
- path = CharNext(path);
+ Inc(path, end, enc);
}
}
return last;
}
static char *
-chompdirsep(const char *path)
+chompdirsep(const char *path, const char *end, rb_encoding *enc)
{
- while (*path) {
+ while (path < end) {
if (isdirsep(*path)) {
const char *last = path++;
- while (isdirsep(*path)) path++;
- if (!*path) return (char *)last;
+ while (path < end && isdirsep(*path)) path++;
+ if (path >= end) return (char *)last;
}
else {
- path = CharNext(path);
+ Inc(path, end, enc);
}
}
return (char *)path;
}
char *
-rb_path_end(const char *path)
+rb_enc_path_end(const char *path, const char *end, rb_encoding *enc)
{
- if (isdirsep(*path)) path++;
- return chompdirsep(path);
+ if (path < end && isdirsep(*path)) path++;
+ return chompdirsep(path, end, enc);
}
#if USE_NTFS
static char *
-ntfs_tail(const char *path)
+ntfs_tail(const char *path, const char *end, rb_encoding *enc)
{
- while (*path == '.') path++;
- while (*path && *path != ':') {
+ while (path < end && *path == '.') path++;
+ while (path < end && *path != ':') {
if (istrailinggarbage(*path)) {
const char *last = path++;
- while (istrailinggarbage(*path)) path++;
- if (!*path || *path == ':') return (char *)last;
+ while (path < end && istrailinggarbage(*path)) path++;
+ if (path >= end || *path == ':') return (char *)last;
}
else if (isdirsep(*path)) {
const char *last = path++;
- while (isdirsep(*path)) path++;
- if (!*path) return (char *)last;
+ while (path < end && isdirsep(*path)) path++;
+ if (path >= end) return (char *)last;
if (*path == ':') path++;
}
else {
- path = CharNext(path);
+ Inc(path, end, enc);
}
}
return (char *)path;
@@ -2814,9 +2821,10 @@ rb_home_dir(const char *user, VALUE result)
const char *dir;
char *buf;
#if defined DOSISH || defined __CYGWIN__
- char *p;
+ char *p, *bend;
#endif
long dirlen;
+ rb_encoding *enc;
if (!user || !*user) {
if (!(dir = getenv("HOME"))) {
@@ -2835,32 +2843,61 @@ rb_home_dir(const char *user, VALUE result)
}
dirlen = strlen(pwPtr->pw_dir);
rb_str_resize(result, dirlen);
- strcpy(buf = RSTRING_PTR(result), pwPtr->pw_dir);
+ memcpy(buf = RSTRING_PTR(result), pwPtr->pw_dir, dirlen + 1);
endpwent();
#else
return Qnil;
#endif
}
+ enc = rb_filesystem_encoding();
+ rb_enc_associate(result, enc);
#if defined DOSISH || defined __CYGWIN__
- for (p = buf; *p; p = CharNext(p)) {
+ for (bend = (p = buf) + dirlen; p < bend; Inc(p, bend, enc)) {
if (*p == '\\') {
*p = '/';
}
}
#endif
- rb_enc_associate_index(result, rb_filesystem_encindex());
return result;
}
+static char *
+append_fspath(VALUE result, VALUE fname, char *dir, rb_encoding **enc, rb_encoding *fsenc)
+{
+ char *buf, *cwdp = dir;
+ VALUE dirname = Qnil;
+ size_t dirlen = strlen(dir), buflen = rb_str_capacity(result);
+
+ if (*enc != fsenc) {
+ rb_encoding *direnc = rb_enc_check(fname, dirname = rb_enc_str_new(dir, dirlen, fsenc));
+ if (direnc != fsenc) {
+ dirname = rb_str_conv_enc(dirname, fsenc, direnc);
+ RSTRING_GETMEM(dirname, cwdp, dirlen);
+ }
+ *enc = direnc;
+ rb_enc_associate(result, direnc);
+ }
+ do {buflen *= 2;} while (dirlen > buflen);
+ rb_str_resize(result, buflen);
+ buf = RSTRING_PTR(result);
+ memcpy(buf, cwdp, dirlen);
+ xfree(dir);
+ if (!NIL_P(dirname)) rb_str_resize(dirname, 0);
+ return buf + dirlen;
+}
+
static VALUE
file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
{
- const char *s, *b;
+ const char *s, *b, *fend;
char *buf, *p, *pend, *root;
- size_t buflen, dirlen, bdiff;
+ size_t buflen, bdiff;
int tainted;
+ rb_encoding *enc, *fsenc = rb_filesystem_encoding();
s = StringValuePtr(fname);
+ fend = s + RSTRING_LEN(fname);
+ enc = rb_enc_get(fname);
BUFINIT();
tainted = OBJ_TAINTED(fname);
@@ -2874,7 +2911,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
if (*++s) ++s;
}
else {
- s = nextdirsep(b = s);
+ s = nextdirsep(b = s, fend, enc);
userlen = s - b;
BUFCHECK(bdiff + userlen >= buflen);
memcpy(p, b, userlen);
@@ -2920,18 +2957,16 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
}
}
if (!same) {
- char *dir = getcwdofdrv(*s);
-
+ char *e = append_fspath(result, fname, getcwdofdrv(*s), &enc, fsenc);
tainted = 1;
- dirlen = strlen(dir);
- BUFCHECK(dirlen > buflen);
- strcpy(buf, dir);
- xfree(dir);
- rb_enc_associate_index(result, rb_filesystem_encindex());
+ BUFINIT();
+ p = e;
}
- else
- rb_enc_associate(result, rb_enc_check(result, fname));
- p = chompdirsep(skiproot(buf));
+ else {
+ rb_enc_associate(result, enc = rb_enc_check(result, fname));
+ p = pend;
+ }
+ p = chompdirsep(skiproot(buf, p, enc), p, enc);
s += 2;
}
}
@@ -2939,28 +2974,25 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
else if (!rb_is_absolute_path(s)) {
if (!NIL_P(dname)) {
file_expand_path(dname, Qnil, abs_mode, result);
- BUFINIT();
rb_enc_associate(result, rb_enc_check(result, fname));
+ BUFINIT();
+ p = pend;
}
else {
- char *dir = my_getcwd();
-
+ char *e = append_fspath(result, fname, my_getcwd(), &enc, fsenc);
tainted = 1;
- dirlen = strlen(dir);
- BUFCHECK(dirlen > buflen);
- strcpy(buf, dir);
- xfree(dir);
- rb_enc_associate_index(result, rb_filesystem_encindex());
+ BUFINIT();
+ p = e;
}
#if defined DOSISH || defined __CYGWIN__
if (isdirsep(*s)) {
/* specified full path, but not drive letter nor UNC */
/* we need to get the drive letter or UNC share name */
- p = skipprefix(buf);
+ p = skipprefix(buf, p, enc);
}
else
#endif
- p = chompdirsep(skiproot(buf));
+ p = chompdirsep(skiproot(buf, p, enc), p, enc);
}
else {
size_t len;
@@ -2984,7 +3016,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
rb_str_set_len(result, p-buf+1);
BUFCHECK(bdiff + 1 >= buflen);
p[1] = 0;
- root = skipprefix(buf);
+ root = skipprefix(buf, p+1, enc);
b = s;
while (*s) {
@@ -3000,7 +3032,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
/* We must go back to the parent */
char *n;
*p = '\0';
- if (!(n = strrdirsep(root))) {
+ if (!(n = strrdirsep(root, p, enc))) {
*p = '/';
}
else {
@@ -3030,7 +3062,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
--s;
case ' ': {
const char *e = s;
- while (istrailinggarbage(*s)) s++;
+ while (s < fend && istrailinggarbage(*s)) s++;
if (!*s) {
s = e;
goto endpath;
@@ -3055,7 +3087,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
b = ++s;
break;
default:
- s = CharNext(s);
+ Inc(s, fend, enc);
break;
}
}
@@ -3080,14 +3112,18 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
BUFCHECK(bdiff + (s-b) >= buflen);
memcpy(++p, b, s-b);
p += s-b;
+ rb_str_set_len(result, p-buf);
}
- if (p == skiproot(buf) - 1) p++;
+ if (p == skiproot(buf, p + !!*p, enc) - 1) p++;
#if USE_NTFS
*p = '\0';
- if ((s = strrdirsep(b = buf)) != 0 && !strpbrk(s, "*?")) {
+ if ((s = strrdirsep(b = buf, p, enc)) != 0 && !strpbrk(s, "*?")) {
+ VALUE tmp, v;
size_t len;
- WIN32_FIND_DATA wfd;
+ rb_encoding *enc;
+ WCHAR *wstr;
+ WIN32_FIND_DATAW wfd;
HANDLE h;
#ifdef __CYGWIN__
#ifdef HAVE_CYGWIN_CONV_PATH
@@ -3137,21 +3173,43 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
}
*p = '/';
#endif
- h = FindFirstFile(b, &wfd);
+ rb_str_set_len(result, p - buf + strlen(p));
+ enc = rb_enc_get(result);
+ tmp = result;
+ if (enc != rb_utf8_encoding() && rb_enc_str_coderange(result) != ENC_CODERANGE_7BIT) {
+ tmp = rb_str_encode_ospath(result);
+ }
+ len = MultiByteToWideChar(CP_UTF8, 0, RSTRING_PTR(tmp), -1, NULL, 0);
+ wstr = ALLOCV_N(WCHAR, v, len);
+ MultiByteToWideChar(CP_UTF8, 0, RSTRING_PTR(tmp), -1, wstr, len);
+ if (tmp != result) rb_str_resize(tmp, 0);
+ h = FindFirstFileW(wstr, &wfd);
+ ALLOCV_END(v);
if (h != INVALID_HANDLE_VALUE) {
+ size_t wlen;
FindClose(h);
- len = strlen(wfd.cFileName);
+ len = lstrlenW(wfd.cFileName);
#ifdef __CYGWIN__
if (lnk_added && len > 4 &&
- STRCASECMP(wfd.cFileName + len - 4, ".lnk") == 0) {
- wfd.cFileName[len -= 4] = '\0';
+ wcsicmp(wfd.cFileName + len - 4, L".lnk") == 0) {
+ wfd.cFileName[len -= 4] = L'\0';
}
#else
p = (char *)s;
#endif
++p;
+ wlen = (int)len;
+ len = WideCharToMultiByte(CP_UTF8, 0, wfd.cFileName, wlen, NULL, 0, NULL, NULL);
BUFCHECK(bdiff + len >= buflen);
- memcpy(p, wfd.cFileName, len + 1);
+ WideCharToMultiByte(CP_UTF8, 0, wfd.cFileName, wlen, p, len + 1, NULL, NULL);
+ if (tmp != result) {
+ rb_str_buf_cat(tmp, p, len);
+ tmp = rb_str_encode(tmp, rb_enc_from_encoding(enc), 0, Qnil);
+ len = RSTRING_LEN(tmp);
+ BUFCHECK(bdiff + len >= buflen);
+ memcpy(p, RSTRING_PTR(tmp), len);
+ rb_str_resize(tmp, 0);
+ }
p += len;
}
#ifdef __CYGWIN__
@@ -3252,23 +3310,27 @@ rb_file_s_absolute_path(int argc, VALUE *argv)
}
static void
-realpath_rec(long *prefixlenp, VALUE *resolvedp, char *unresolved, VALUE loopcheck, int strict, int last)
+realpath_rec(long *prefixlenp, VALUE *resolvedp, const char *unresolved, VALUE loopcheck, int strict, int last)
{
+ const char *pend = unresolved + strlen(unresolved);
+ rb_encoding *enc = rb_enc_get(*resolvedp);
ID resolving;
CONST_ID(resolving, "resolving");
- while (*unresolved) {
- char *testname = unresolved;
- char *unresolved_firstsep = rb_path_next(unresolved);
- long testnamelen = unresolved_firstsep - unresolved;
- char *unresolved_nextname = unresolved_firstsep;
- while (isdirsep(*unresolved_nextname)) unresolved_nextname++;
+ while (unresolved < pend) {
+ const char *testname = unresolved;
+ const char *unresolved_firstsep = rb_enc_path_next(unresolved, pend, enc);
+ long testnamelen = unresolved_firstsep - unresolved;
+ const char *unresolved_nextname = unresolved_firstsep;
+ while (unresolved_nextname < pend && isdirsep(*unresolved_nextname))
+ unresolved_nextname++;
unresolved = unresolved_nextname;
if (testnamelen == 1 && testname[0] == '.') {
}
else if (testnamelen == 2 && testname[0] == '.' && testname[1] == '.') {
if (*prefixlenp < RSTRING_LEN(*resolvedp)) {
- char *resolved_names = RSTRING_PTR(*resolvedp) + *prefixlenp;
- char *lastsep = rb_path_last_separator(resolved_names);
+ const char *resolved_str = RSTRING_PTR(*resolvedp);
+ const char *resolved_names = resolved_str + *prefixlenp;
+ const char *lastsep = strrdirsep(resolved_names, resolved_str + RSTRING_LEN(*resolvedp), enc);
long len = lastsep ? lastsep - resolved_names : 0;
rb_str_resize(*resolvedp, *prefixlenp + len);
}
@@ -3307,23 +3369,24 @@ realpath_rec(long *prefixlenp, VALUE *resolvedp, char *unresolved, VALUE loopche
}
#ifdef HAVE_READLINK
if (S_ISLNK(sbuf.st_mode)) {
- volatile VALUE link;
- char *link_prefix, *link_names;
+ VALUE link;
+ const char *link_prefix, *link_names;
long link_prefixlen;
rb_hash_aset(loopcheck, testpath, ID2SYM(resolving));
- link = rb_file_s_readlink(rb_cFile, testpath);
+ link = rb_readlink(testpath);
link_prefix = RSTRING_PTR(link);
- link_names = skipprefixroot(link_prefix);
- link_prefixlen = link_names - link_prefix;
- if (link_prefixlen == 0) {
- realpath_rec(prefixlenp, resolvedp, link_names, loopcheck, strict, *unresolved_firstsep == '\0');
- }
- else {
- *resolvedp = rb_str_new(link_prefix, link_prefixlen);
- *prefixlenp = link_prefixlen;
- realpath_rec(prefixlenp, resolvedp, link_names, loopcheck, strict, *unresolved_firstsep == '\0');
- }
- rb_hash_aset(loopcheck, testpath, rb_str_dup_frozen(*resolvedp));
+ link_names = skipprefixroot(link_prefix, link_prefix + RSTRING_LEN(link), rb_enc_get(link));
+ link_prefixlen = link_names - link_prefix;
+ if (link_prefixlen > 0) {
+ rb_encoding *enc, *linkenc = rb_enc_get(link);
+ link = rb_str_subseq(link, 0, link_prefixlen);
+ enc = rb_enc_check(*resolvedp, link);
+ if (enc != linkenc) link = rb_str_conv_enc(link, linkenc, enc);
+ *resolvedp = link;
+ *prefixlenp = link_prefixlen;
+ }
+ realpath_rec(prefixlenp, resolvedp, link_names, loopcheck, strict, *unresolved_firstsep == '\0');
+ rb_hash_aset(loopcheck, testpath, rb_str_dup_frozen(*resolvedp));
}
else
#endif
@@ -3346,8 +3409,10 @@ rb_realpath_internal(VALUE basedir, VALUE path, int strict)
VALUE loopcheck;
volatile VALUE curdir = Qnil;
+ rb_encoding *enc;
char *path_names = NULL, *basedir_names = NULL, *curdir_names = NULL;
- char *ptr, *prefixptr = NULL;
+ char *ptr, *prefixptr = NULL, *pend;
+ long len;
rb_secure(2);
@@ -3359,34 +3424,33 @@ rb_realpath_internal(VALUE basedir, VALUE path, int strict)
basedir = rb_str_dup_frozen(basedir);
}
- ptr = RSTRING_PTR(unresolved_path);
- path_names = skipprefixroot(ptr);
+ RSTRING_GETMEM(unresolved_path, ptr, len);
+ path_names = skipprefixroot(ptr, ptr + len, rb_enc_get(unresolved_path));
if (ptr != path_names) {
- resolved = rb_enc_str_new(ptr, path_names - ptr,
- rb_enc_get(unresolved_path));
+ resolved = rb_str_subseq(unresolved_path, 0, path_names - ptr);
goto root_found;
}
if (!NIL_P(basedir)) {
- ptr = RSTRING_PTR(basedir);
- basedir_names = skipprefixroot(ptr);
+ RSTRING_GETMEM(basedir, ptr, len);
+ basedir_names = skipprefixroot(ptr, ptr + len, rb_enc_get(basedir));
if (ptr != basedir_names) {
- resolved = rb_enc_str_new(ptr, basedir_names - ptr,
- rb_enc_get(basedir));
- goto root_found;
+ resolved = rb_str_subseq(basedir, 0, basedir_names - ptr);
+ goto root_found;
}
}
curdir = rb_dir_getwd();
- ptr = RSTRING_PTR(curdir);
- curdir_names = skipprefixroot(ptr);
- resolved = rb_enc_str_new(ptr, curdir_names - ptr, rb_enc_get(curdir));
+ RSTRING_GETMEM(curdir, ptr, len);
+ curdir_names = skipprefixroot(ptr, ptr + len, rb_enc_get(curdir));
+ resolved = rb_str_subseq(curdir, 0, curdir_names - ptr);
root_found:
- prefixptr = RSTRING_PTR(resolved);
- prefixlen = RSTRING_LEN(resolved);
- ptr = chompdirsep(prefixptr);
- if (*ptr) {
+ RSTRING_GETMEM(resolved, prefixptr, prefixlen);
+ pend = prefixptr + prefixlen;
+ enc = rb_enc_get(resolved);
+ ptr = chompdirsep(prefixptr, pend, enc);
+ if (ptr < pend) {
prefixlen = ++ptr - prefixptr;
rb_str_set_len(resolved, prefixlen);
}
@@ -3395,7 +3459,7 @@ rb_realpath_internal(VALUE basedir, VALUE path, int strict)
if (*prefixptr == FILE_ALT_SEPARATOR) {
*prefixptr = '/';
}
- prefixptr = CharNext(prefixptr);
+ Inc(prefixptr, pend, enc);
}
#endif
@@ -3452,44 +3516,52 @@ rb_file_s_realdirpath(int argc, VALUE *argv, VALUE klass)
}
static size_t
-rmext(const char *p, long l0, long l1, const char *e)
+rmext(const char *p, long l0, long l1, const char *e, long l2, rb_encoding *enc)
{
- long l2;
+ int len1, len2;
+ unsigned int c;
+ const char *s, *last;
if (!e) return 0;
- l2 = strlen(e);
- if (l2 == 2 && e[1] == '*') {
- unsigned char c = *e;
+ c = rb_enc_codepoint_len(e, e + l2, &len1, enc);
+ if (rb_enc_ascget(e + len1, e + l2, &len2, enc) == '*' && len1 + len2 == l2) {
+ if (c == '.') return l0;
+ s = p;
e = p + l1;
- do {
- if (e <= p + l0) return 0;
- } while (*--e != c);
- return e - p;
+ last = e;
+ while (s < e) {
+ if (rb_enc_codepoint_len(s, e, &len1, enc) == c) last = s;
+ s += len1;
+ }
+ return last - p;
}
if (l1 < l2) return l1;
+ s = p+l1-l2;
+ if (rb_enc_left_char_head(p, s, p+l1, enc) != s) return 0;
#if CASEFOLD_FILESYSTEM
#define fncomp strncasecmp
#else
#define fncomp strncmp
#endif
- if (fncomp(p+l1-l2, e, l2) == 0) {
+ if (fncomp(s, e, l2) == 0) {
return l1-l2;
}
return 0;
}
const char *
-ruby_find_basename(const char *name, long *baselen, long *alllen)
+ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc)
{
- const char *p, *q, *e;
+ const char *p, *q, *e, *end;
#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC
const char *root;
#endif
long f = 0, n = -1;
- name = skipprefix(name);
+ end = name + *alllen;
+ name = skipprefix(name, end, enc);
#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC
root = name;
#endif
@@ -3516,19 +3588,19 @@ ruby_find_basename(const char *name, long *baselen, long *alllen)
#endif
}
else {
- if (!(p = strrdirsep(name))) {
+ if (!(p = strrdirsep(name, end, enc))) {
p = name;
}
else {
while (isdirsep(*p)) p++; /* skip last / */
}
#if USE_NTFS
- n = ntfs_tail(p) - p;
+ n = ntfs_tail(p, end, enc) - p;
#else
- n = chompdirsep(p) - p;
+ n = chompdirsep(p, end, enc) - p;
#endif
for (q = p; q - p < n && *q == '.'; q++);
- for (e = 0; q - p < n; q = CharNext(q)) {
+ for (e = 0; q - p < n; Inc(q, end, enc)) {
if (*q == '.') e = q;
}
if (e) f = e - p;
@@ -3562,6 +3634,7 @@ rb_file_s_basename(int argc, VALUE *argv)
VALUE fname, fext, basename;
const char *name, *p;
long f, n;
+ rb_encoding *enc;
if (rb_scan_args(argc, argv, "11", &fname, &fext) == 2) {
rb_encoding *enc;
@@ -3572,15 +3645,29 @@ rb_file_s_basename(int argc, VALUE *argv)
}
}
FilePathStringValue(fname);
- if (!NIL_P(fext)) rb_enc_check(fname, fext);
- if (RSTRING_LEN(fname) == 0 || !*(name = RSTRING_PTR(fname)))
+ if (!NIL_P(fext)) enc = rb_enc_check(fname, fext);
+ else enc = rb_enc_get(fname);
+ if ((n = RSTRING_LEN(fname)) == 0 || !*(name = RSTRING_PTR(fname)))
return rb_str_new_shared(fname);
- p = ruby_find_basename(name, &f, &n);
+ p = ruby_enc_find_basename(name, &f, &n, enc);
if (n >= 0) {
- if (NIL_P(fext) || !(f = rmext(p, f, n, StringValueCStr(fext)))) {
+ if (NIL_P(fext)) {
f = n;
}
+ else {
+ rb_encoding *fenc = rb_enc_get(fext);
+ const char *fp;
+ if (enc != fenc &&
+ rb_enc_str_coderange(fext) != ENC_CODERANGE_7BIT) {
+ fext = rb_str_conv_enc(fext, fenc, enc);
+ }
+ fp = StringValueCStr(fext);
+ if (!(f = rmext(p, f, n, fp, RSTRING_LEN(fext), enc))) {
+ f = n;
+ }
+ RB_GC_GUARD(fext);
+ }
if (f == RSTRING_LEN(fname)) return rb_str_new_shared(fname);
}
@@ -3611,20 +3698,23 @@ rb_file_s_dirname(VALUE klass, VALUE fname)
VALUE
rb_file_dirname(VALUE fname)
{
- const char *name, *root, *p;
+ const char *name, *root, *p, *end;
VALUE dirname;
+ rb_encoding *enc;
FilePathStringValue(fname);
name = StringValueCStr(fname);
- root = skiproot(name);
+ end = name + RSTRING_LEN(fname);
+ enc = rb_enc_get(fname);
+ root = skiproot(name, end, enc);
#ifdef DOSISH_UNC
if (root > name + 1 && isdirsep(*name))
- root = skipprefix(name = root - 2);
+ root = skipprefix(name = root - 2, end, enc);
#else
if (root > name + 1)
name = root - 1;
#endif
- p = strrdirsep(root);
+ p = strrdirsep(root, end, enc);
if (!p) {
p = root;
}
@@ -3632,7 +3722,7 @@ rb_file_dirname(VALUE fname)
return rb_usascii_str_new2(".");
#ifdef DOSISH_DRIVE_LETTER
if (has_drive_letter(name) && isdirsep(*(name + 2))) {
- const char *top = skiproot(name + 2);
+ const char *top = skiproot(name + 2, end, enc);
dirname = rb_str_new(name, 3);
rb_str_cat(dirname, top, p - top);
}
@@ -3661,11 +3751,11 @@ rb_file_dirname(VALUE fname)
*
*/
const char *
-ruby_find_extname(const char *name, long *len)
+ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
{
- const char *p, *e;
+ const char *p, *e, *end = name + (len ? *len : (long)strlen(name));
- p = strrdirsep(name); /* get the last path component */
+ p = strrdirsep(name, end, enc); /* get the last path component */
if (!p)
p = name;
else
@@ -3698,7 +3788,7 @@ ruby_find_extname(const char *name, long *len)
#endif
else if (isdirsep(*p))
break;
- p = CharNext(p);
+ Inc(p, end, enc);
}
if (len) {
@@ -3736,11 +3826,11 @@ rb_file_s_extname(VALUE klass, VALUE fname)
FilePathStringValue(fname);
name = StringValueCStr(fname);
- e = ruby_find_extname(name, &len);
+ len = RSTRING_LEN(fname);
+ e = ruby_enc_find_extname(name, &len, rb_enc_get(fname));
if (len <= 1)
return rb_str_new(0, 0);
- extname = rb_str_new(e, len); /* keep the dot, too! */
- rb_enc_copy(extname, fname);
+ extname = rb_str_subseq(fname, e - name, len); /* keep the dot, too! */
OBJ_INFECT(extname, fname);
return extname;
}
@@ -3803,8 +3893,9 @@ rb_file_join(VALUE ary, VALUE sep)
len = 1;
for (i=0; i<RARRAY_LEN(ary); i++) {
- if (TYPE(RARRAY_PTR(ary)[i]) == T_STRING) {
- len += RSTRING_LEN(RARRAY_PTR(ary)[i]);
+ tmp = RARRAY_PTR(ary)[i];
+ if (RB_TYPE_P(tmp, T_STRING)) {
+ len += RSTRING_LEN(tmp);
}
else {
len += 10;
@@ -3837,11 +3928,12 @@ rb_file_join(VALUE ary, VALUE sep)
FilePathStringValue(tmp);
}
name = StringValueCStr(result);
+ len = RSTRING_LEN(result);
if (i == 0) {
rb_enc_copy(result, tmp);
}
else if (!NIL_P(sep)) {
- tail = chompdirsep(name);
+ tail = chompdirsep(name, name + len, rb_enc_get(result));
if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) {
rb_str_set_len(result, tail - name);
}
@@ -4977,6 +5069,8 @@ path_check_0(VALUE path, int execpath)
{
struct stat st;
const char *p0 = StringValueCStr(path);
+ const char *e0;
+ rb_encoding *enc;
char *p = 0, *s;
if (!rb_is_absolute_path(p0)) {
@@ -4991,6 +5085,8 @@ path_check_0(VALUE path, int execpath)
path = newpath;
p0 = RSTRING_PTR(path);
}
+ e0 = p0 + RSTRING_LEN(path);
+ enc = rb_enc_get(path);
for (;;) {
#ifndef S_IWOTH
# define S_IWOTH 002
@@ -5007,10 +5103,11 @@ path_check_0(VALUE path, int execpath)
RB_GC_GUARD(path);
return 0;
}
- s = strrdirsep(p0);
+ s = strrdirsep(p0, e0, enc);
if (p) *p = '/';
if (!s || s == p0) return 1;
p = s;
+ e0 = p;
*p = '\0';
}
}
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 058462fd24..cced3d2688 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -211,6 +211,12 @@ void rb_enc_set_default_external(VALUE encoding);
void rb_enc_set_default_internal(VALUE encoding);
VALUE rb_locale_charmap(VALUE klass);
long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
+char *rb_enc_path_next(const char *,const char *,rb_encoding*);
+char *rb_enc_path_skip_prefix(const char *,const char *,rb_encoding*);
+char *rb_enc_path_last_separator(const char *,const char *,rb_encoding*);
+char *rb_enc_path_end(const char *,const char *,rb_encoding*);
+const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc);
+const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc);
RUBY_EXTERN VALUE rb_cEncoding;
#define ENC_DUMMY_FLAG (1<<24)
diff --git a/include/ruby/intern.h b/include/ruby/intern.h
index 50451f35b3..af31ddd5f7 100644
--- a/include/ruby/intern.h
+++ b/include/ruby/intern.h
@@ -402,15 +402,9 @@ int rb_find_file_ext_safe(VALUE*, const char* const*, int);
VALUE rb_find_file_safe(VALUE, int);
int rb_find_file_ext(VALUE*, const char* const*);
VALUE rb_find_file(VALUE);
-char *rb_path_next(const char *);
-char *rb_path_skip_prefix(const char *);
-char *rb_path_last_separator(const char *);
-char *rb_path_end(const char *);
VALUE rb_file_directory_p(VALUE,VALUE);
VALUE rb_str_encode_ospath(VALUE);
int rb_is_absolute_path(const char *);
-const char *ruby_find_basename(const char *name, long *baselen, long *alllen);
-const char *ruby_find_extname(const char *name, long *len);
/* gc.c */
void ruby_set_stack_size(size_t);
NORETURN(void rb_memerror(void));
diff --git a/test/pathname/test_pathname.rb b/test/pathname/test_pathname.rb
index 8a8c0d9844..61d288d68d 100644
--- a/test/pathname/test_pathname.rb
+++ b/test/pathname/test_pathname.rb
@@ -1,5 +1,3 @@
-#!/usr/bin/env ruby
-
require 'test/unit'
require 'pathname'
@@ -185,10 +183,8 @@ class TestPathname < Test::Unit::TestCase
if DOSISH
defassert(:del_trailing_separator, "a", "a\\")
- require 'Win32API'
- if Win32API.new('kernel32', 'GetACP', nil, 'L').call == 932
- defassert(:del_trailing_separator, "\225\\", "\225\\\\") # SJIS
- end
+ defassert(:del_trailing_separator, "\225\\".force_encoding("cp932"), "\225\\\\".force_encoding("cp932"))
+ defassert(:del_trailing_separator, "\225".force_encoding("cp437"), "\225\\\\".force_encoding("cp437"))
end
def test_plus
diff --git a/test/ruby/test_file_exhaustive.rb b/test/ruby/test_file_exhaustive.rb
index c10b05a86b..260610e572 100644
--- a/test/ruby/test_file_exhaustive.rb
+++ b/test/ruby/test_file_exhaustive.rb
@@ -3,6 +3,8 @@ require "fileutils"
require "tmpdir"
class TestFileExhaustive < Test::Unit::TestCase
+ DRIVE = Dir.pwd[%r'\A(?:[a-z]:|//[^/]+/[^/]+)'i]
+
def assert_incompatible_encoding
d = "\u{3042}\u{3044}".encode("utf-16le")
assert_raise(Encoding::CompatibilityError) {yield d}
@@ -400,13 +402,29 @@ class TestFileExhaustive < Test::Unit::TestCase
assert_match(/\Ac:\//i, File.expand_path('c:foo', 'd:/bar'))
assert_match(%r'\Ac:/bar/foo\z'i, File.expand_path('c:foo', 'c:/bar'))
end
- if drive = Dir.pwd[%r'\A(?:[a-z]:|//[^/]+/[^/]+)'i]
+ if DRIVE
assert_match(%r"\Az:/foo\z"i, File.expand_path('/foo', "z:/bar"))
assert_match(%r"\A//host/share/foo\z"i, File.expand_path('/foo', "//host/share/bar"))
- assert_match(%r"\A#{drive}/foo\z"i, File.expand_path('/foo'))
+ assert_match(%r"\A#{DRIVE}/foo\z"i, File.expand_path('/foo'))
else
assert_equal("/foo", File.expand_path('/foo'))
end
+ drive = (DRIVE ? 'C:' : '')
+ if Encoding.find("filesystem") == Encoding::CP1251
+ a = "#{drive}/\u3042\u3044\u3046\u3048\u304a".encode("cp932")
+ else
+ a = "#{drive}/\u043f\u0440\u0438\u0432\u0435\u0442".encode("cp1251")
+ end
+ assert_equal(a, File.expand_path(a))
+ a = "#{drive}/\225\\\\"
+ if File::ALT_SEPARATOR == '\\'
+ [%W"cp437 #{drive}/\225", %W"cp932 #{drive}/\225\\"]
+ else
+ [["cp437", a], ["cp932", a]]
+ end.each do |cp, expected|
+ assert_equal(expected.force_encoding(cp), File.expand_path(a.dup.force_encoding(cp)), cp)
+ end
+
assert_kind_of(String, File.expand_path("~")) if ENV["HOME"]
assert_raise(ArgumentError) { File.expand_path("~foo_bar_baz_unknown_user_wahaha") }
assert_raise(ArgumentError) { File.expand_path("~foo_bar_baz_unknown_user_wahaha", "/") }
@@ -447,16 +465,31 @@ class TestFileExhaustive < Test::Unit::TestCase
assert_equal(basename, File.basename(@file + ".", ".*"))
assert_equal(basename, File.basename(@file + "::$DATA", ".*"))
end
+ if File::ALT_SEPARATOR == '\\'
+ a = "foo/\225\\\\"
+ [%W"cp437 \225", %W"cp932 \225\\"].each do |cp, expected|
+ assert_equal(expected.force_encoding(cp), File.basename(a.dup.force_encoding(cp)), cp)
+ end
+ end
assert_incompatible_encoding {|d| File.basename(d)}
assert_incompatible_encoding {|d| File.basename(d, ".*")}
assert_raise(Encoding::CompatibilityError) {File.basename("foo.ext", ".*".encode("utf-16le"))}
+
+ s = "foo\x93_a".force_encoding("cp932")
+ assert_equal(s, File.basename(s, "_a"))
end
def test_dirname
assert(@file.start_with?(File.dirname(@file)))
assert_equal(".", File.dirname(""))
assert_incompatible_encoding {|d| File.dirname(d)}
+ if File::ALT_SEPARATOR == '\\'
+ a = "\225\\\\foo"
+ [%W"cp437 \225", %W"cp932 \225\\"].each do |cp, expected|
+ assert_equal(expected.force_encoding(cp), File.dirname(a.dup.force_encoding(cp)), cp)
+ end
+ end
end
def test_extname
@@ -500,6 +533,13 @@ class TestFileExhaustive < Test::Unit::TestCase
def o.to_path; "foo"; end
assert_equal(s, File.join(o, "bar", "baz"))
assert_equal(s, File.join("foo" + File::SEPARATOR, "bar", File::SEPARATOR + "baz"))
+ if File::ALT_SEPARATOR == '\\'
+ a = "\225\\"
+ b = "foo"
+ [%W"cp437 \225\\foo", %W"cp932 \225\\/foo"].each do |cp, expected|
+ assert_equal(expected.force_encoding(cp), File.join(a.dup.force_encoding(cp), b.dup.force_encoding(cp)), cp)
+ end
+ end
end
def test_truncate
diff --git a/version.h b/version.h
index 889e061956..058bea37e7 100644
--- a/version.h
+++ b/version.h
@@ -1,5 +1,5 @@
#define RUBY_VERSION "1.9.3"
-#define RUBY_PATCHLEVEL 244
+#define RUBY_PATCHLEVEL 245
#define RUBY_RELEASE_DATE "2012-07-03"
#define RUBY_RELEASE_YEAR 2012