diff options
Diffstat (limited to 'file.c')
| -rw-r--r-- | file.c | 1076 |
1 files changed, 655 insertions, 421 deletions
@@ -169,6 +169,7 @@ typedef struct timespec stat_timestamp; #include "internal.h" #include "internal/compilers.h" #include "internal/dir.h" +#include "internal/encoding.h" #include "internal/error.h" #include "internal/file.h" #include "internal/io.h" @@ -213,15 +214,16 @@ file_path_convert(VALUE name) return name; } -static rb_encoding * +static void check_path_encoding(VALUE str) { - rb_encoding *enc = rb_enc_get(str); - if (!rb_enc_asciicompat(enc)) { - rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE, - rb_enc_name(enc), rb_str_inspect(str)); + if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) { + rb_encoding *enc = rb_str_enc_get(str); + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE, + rb_enc_name(enc), rb_str_inspect(str)); + } } - return enc; } VALUE @@ -243,13 +245,20 @@ VALUE rb_get_path_check_convert(VALUE obj) { obj = file_path_convert(obj); + rb_get_path_check_no_convert(obj); + return rb_str_new_frozen(obj); +} +/* TODO: name */ +VALUE +rb_get_path_check_no_convert(VALUE obj) +{ check_path_encoding(obj); if (!rb_str_to_cstr(obj)) { rb_raise(rb_eArgError, "path name contains null byte"); } - return rb_str_new4(obj); + return obj; } VALUE @@ -264,6 +273,19 @@ rb_get_path(VALUE obj) return rb_get_path_check_convert(rb_get_path_check_to_string(obj)); } +static inline VALUE +check_path(VALUE obj, const char **cstr) +{ + VALUE str = rb_get_path_check_convert(rb_get_path_check_to_string(obj)); +#if RUBY_DEBUG + str = rb_str_new_frozen(str); +#endif + *cstr = RSTRING_PTR(str); + return str; +} + +#define CheckPath(str, cstr) RB_GC_GUARD(str) = check_path(str, &cstr); + VALUE rb_str_encode_ospath(VALUE path) { @@ -365,9 +387,15 @@ rb_str_normalize_ospath(const char *ptr, long len) const char *p = ptr; const char *e = ptr + len; const char *p1 = p; - VALUE str = rb_str_buf_new(len); rb_encoding *enc = rb_utf8_encoding(); - rb_enc_associate(str, enc); + VALUE str = rb_utf8_str_new(ptr, len); + if (RB_LIKELY(rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)) { + return str; + } + else { + str = rb_str_buf_new(len); + rb_enc_associate(str, enc); + } while (p < e) { int l, c; @@ -605,17 +633,31 @@ statx_mtimespec(const rb_io_stat_data *st) /* * call-seq: - * stat <=> other_stat -> -1, 0, 1, nil + * self <=> other -> -1, 0, 1, or nil + * + * Compares +self+ and +other+, by comparing their modification times; + * that is, by comparing <tt>self.mtime</tt> and <tt>other.mtime</tt>. * - * Compares File::Stat objects by comparing their respective modification - * times. + * Returns: * - * +nil+ is returned if +other_stat+ is not a File::Stat object + * - +-1+, if <tt>self.mtime</tt> is earlier. + * - +0+, if the two values are equal. + * - +1+, if <tt>self.mtime</tt> is later. + * - +nil+, if +other+ is not a File::Stat object. * - * f1 = File.new("f1", "w") - * sleep 1 - * f2 = File.new("f2", "w") - * f1.stat <=> f2.stat #=> -1 + * Examples: + * + * stat0 = File.stat('README.md') + * stat1 = File.stat('NEWS.md') + * stat0.mtime # => 2025-12-20 15:33:05.6972341 -0600 + * stat1.mtime # => 2025-12-20 16:02:08.2672945 -0600 + * stat0 <=> stat1 # => -1 + * stat0 <=> stat0.dup # => 0 + * stat1 <=> stat0 # => 1 + * stat0 <=> :foo # => nil + * + * \Class \File::Stat includes module Comparable, + * each of whose methods uses File::Stat#<=> for comparison. */ static VALUE @@ -1066,13 +1108,28 @@ static VALUE statx_birthtime(const rb_io_stat_data *st); /* * call-seq: - * stat.atime -> time - * - * Returns the last access time for this file as an object of class - * Time. - * - * File.stat("testfile").atime #=> Wed Dec 31 18:00:00 CST 1969 - * + * atime -> new_time + * + * Returns a new Time object containing the access time + * of the object represented by +self+ + * at the time +self+ was created; + * see {Snapshot}[rdoc-ref:File::Stat@Snapshot]: + * + * filepath = 't.tmp' + * File.write(filepath, 'foo') + * file = File.new(filepath, 'w') + * stat = File::Stat.new(filepath) + * file.atime # => 2026-03-31 16:26:39.5913207 -0500 + * stat.atime # => 2026-03-31 16:26:39.5913207 -0500 + * File.write(filepath, 'bar') + * file.atime # => 2026-03-31 16:27:01.4981624 -0500 # Changed by access. + * stat.atime # => 2026-03-31 16:26:39.5913207 -0500 # Unchanged by access. + * stat = File::Stat.new(filepath) + * stat.atime # => 2026-03-31 16:27:01.4981624 -0500 # New access time. + * file.close + * File.delete(filepath) + * + * See {File System Timestamps}[rdoc-ref:file/timestamps.md]. */ static VALUE @@ -1120,24 +1177,22 @@ rb_stat_ctime(VALUE self) #if defined(HAVE_STAT_BIRTHTIME) /* * call-seq: - * stat.birthtime -> time - * - * Returns the birth time for <i>stat</i>. - * - * If the platform doesn't have birthtime, raises NotImplementedError. - * - * File.write("testfile", "foo") - * sleep 10 - * File.write("testfile", "bar") - * sleep 10 - * File.chmod(0644, "testfile") - * sleep 10 - * File.read("testfile") - * File.stat("testfile").birthtime #=> 2014-02-24 11:19:17 +0900 - * File.stat("testfile").mtime #=> 2014-02-24 11:19:27 +0900 - * File.stat("testfile").ctime #=> 2014-02-24 11:19:37 +0900 - * File.stat("testfile").atime #=> 2014-02-24 11:19:47 +0900 - * + * birthtime -> new_time + * + * Returns a new Time object containing the create time + * of the object represented by +self+ + * at the time +self+ was created; + * see {Snapshot}[rdoc-ref:File::Stat@Snapshot]: + * + * filename = 't.tmp' + * stat = File::Stat.new(filename) # Raises Errno::ENOENT: No such file or directory + * File.write(filename, 'foo') + * stat = File::Stat.new(filename) + * stat.birthtime # => 2026-04-14 10:41:55.5146554 -0500 + * File.delete(filename) + * stat.birthtime # => 2026-04-14 10:41:55.5146554 -0500 + * + * See {File System Timestamps}[rdoc-ref:file/timestamps.md]. */ static VALUE @@ -2172,7 +2227,7 @@ rb_file_size_p(VALUE obj, VALUE fname) * File.owned?(file_name) -> true or false * * Returns <code>true</code> if the named file exists and the - * effective used id of the calling process is the owner of + * effective user id of the calling process is the owner of * the file. * * _file_name_ can be an IO object. @@ -2383,7 +2438,7 @@ rb_file_ftype(mode_t mode) t = "unknown"; } - return rb_usascii_str_new2(t); + return rb_fstring_cstr(t); } /* @@ -2417,14 +2472,25 @@ rb_file_s_ftype(VALUE klass, VALUE fname) /* * call-seq: - * File.atime(file_name) -> time + * File.atime(object) -> new_time * - * Returns the last access time for the named file as a Time object. + * Returns a new Time object containing the time of the most recent + * access (read or write) to the object, + * which may be a string filepath or dirpath, or a File or Dir object: * - * _file_name_ can be an IO object. + * filepath = 't.tmp' + * File.exist?(filepath) # => false + * File.atime(filepath) # Raises Errno::ENOENT. + * File.write(filepath, 'foo') + * File.atime(filepath) # => 2026-03-31 16:39:37.9290772 -0500 + * File.write(filepath, 'bar') + * File.atime(filepath) # => 2026-03-31 16:39:57.7710876 -0500 * - * File.atime("testfile") #=> Wed Apr 09 08:51:48 CDT 2003 + * File.atime('.') # => 2026-03-31 16:47:49.0970483 -0500 + * File.atime(File.new('README.md')) # => 2026-03-31 11:15:27.8215934 -0500 + * File.atime(Dir.new('.')) # => 2026-03-31 12:39:45.5910591 -0500 * + * See {File System Timestamps}[rdoc-ref:file/timestamps.md]. */ static VALUE @@ -2442,13 +2508,22 @@ rb_file_s_atime(VALUE klass, VALUE fname) /* * call-seq: - * file.atime -> time - * - * Returns the last access time (a Time object) for <i>file</i>, or - * epoch if <i>file</i> has not been accessed. - * - * File.new("testfile").atime #=> Wed Dec 31 18:00:00 CST 1969 - * + * atime -> new_time + * + * Returns a new Time object containing the time of the most recent + * access (read or write) to the file represented by +self+: + * + * filepath = 't.tmp' + * file = File.new(filepath, 'a+') + * file.atime # => 2026-03-31 17:11:27.7285397 -0500 + * file.write('foo') + * file.atime # => 2026-03-31 17:11:27.7285397 -0500 # Unchanged; not yet written. + * file.flush + * file.atime # => 2026-03-31 17:12:11.3408054 -0500 # Changed; now written. + * file.close + * File.delete(filename) + * + * See {File System Timestamps}[rdoc-ref:file/timestamps.md]. */ static VALUE @@ -2570,16 +2645,21 @@ rb_file_ctime(VALUE obj) #if defined(HAVE_STAT_BIRTHTIME) /* * call-seq: - * File.birthtime(file_name) -> time + * File.birthtime(entry_path) -> new_time * - * Returns the birth time for the named file. + * Returns a new Time object containing the create time + * of the entry at the given +path+: * - * _file_name_ can be an IO object. - * - * File.birthtime("testfile") #=> Wed Apr 09 08:53:13 CDT 2003 - * - * If the platform doesn't have birthtime, raises NotImplementedError. + * path = 't.tmp' + * File.birthtime(path) # Raises Errno::ENOENT: No such file or directory + * File.write(path, 'foo') + * File.birthtime(path) # => 2026-04-14 11:10:43.2891695 -0500 + * File.write(path, 'bar') + * File.birthtime(path) # => 2026-04-14 11:10:43.2891695 -0500 + * File.delete(path) + * File.birthtime(path) # Raises Errno::ENOENT: No such file or directory * + * See {File System Timestamps}[rdoc-ref:file/timestamps.md]. */ VALUE @@ -2601,14 +2681,21 @@ rb_file_s_birthtime(VALUE klass, VALUE fname) #if defined(HAVE_STAT_BIRTHTIME) /* * call-seq: - * file.birthtime -> time + * birthtime -> new_time * - * Returns the birth time for <i>file</i>. + * Returns a new Time object containing the create time for +self+: * - * File.new("testfile").birthtime #=> Wed Apr 09 08:53:14 CDT 2003 - * - * If the platform doesn't have birthtime, raises NotImplementedError. + * filepath = 't.tmp' + * File.write(filepath, 'foo') + * file = File.new(filepath) + * file.birthtime # => 2026-04-14 15:53:45.002656 -0500 + * File.write(filepath, 'bar') + * file.birthtime # => 2026-04-14 15:53:45.002656 -0500 + * file.close + * File.delete(filepath) + * file.birthtime # Raises IOError: closed stream * + * See {File System Timestamps}[rdoc-ref:file/timestamps.md]. */ static VALUE @@ -3396,7 +3483,7 @@ unlink_internal(const char *path, void *arg) * Since the underlying implementation relies on the * <code>unlink(2)</code> system call, the type of * exception raised depends on its error type (see - * https://linux.die.net/man/2/unlink) and has the form of + * https://man7.org/linux/man-pages/man2/unlink.2.html) and has the form of * e.g. Errno::ENOENT. * * See also Dir::rmdir. @@ -3542,8 +3629,10 @@ static const char file_alt_separator[] = {FILE_ALT_SEPARATOR, '\0'}; # define isADS(x) 0 #endif -#define Next(p, e, enc) ((p) + rb_enc_mbclen((p), (e), (enc))) -#define Inc(p, e, enc) ((p) = Next((p), (e), (enc))) +#define enc_mbclen_needed(enc) (!rb_str_encindex_fastpath(rb_enc_to_index(enc))) + +#define Next(p, e, mb_enc, enc) ((p) + ((mb_enc) ? rb_enc_mbclen((p), (e), (enc)) : 1)) +#define Inc(p, e, mb_enc, enc) ((p) = Next((p), (e), (mb_enc), (enc))) #if defined(DOSISH_UNC) #define has_unc(buf) (isdirsep((buf)[0]) && isdirsep((buf)[1])) @@ -3564,11 +3653,12 @@ has_drive_letter(const char *buf) } #ifndef _WIN32 -static char* +static VALUE getcwdofdrv(int drv) { char drive[4]; - char *drvcwd, *oldcwd; + char *oldcwd; + VALUE drvcwd; drive[0] = drv; drive[1] = ':'; @@ -3580,13 +3670,13 @@ getcwdofdrv(int drv) */ oldcwd = ruby_getcwd(); if (chdir(drive) == 0) { - drvcwd = ruby_getcwd(); + drvcwd = rb_dir_getwd_ospath(); chdir(oldcwd); xfree(oldcwd); } else { /* perhaps the drive is not exist. we return only drive letter */ - drvcwd = strdup(drive); + drvcwd = rb_enc_str_new_cstr(drive, rb_filesystem_encoding()); } return drvcwd; } @@ -3607,7 +3697,7 @@ not_same_drive(VALUE path, int drive) #endif /* DOSISH_DRIVE_LETTER */ static inline char * -skiproot(const char *path, const char *end, rb_encoding *enc) +skiproot(const char *path, const char *end) { #ifdef DOSISH_DRIVE_LETTER if (path + 2 <= end && has_drive_letter(path)) path += 2; @@ -3616,57 +3706,76 @@ skiproot(const char *path, const char *end, rb_encoding *enc) return (char *)path; } -#define nextdirsep rb_enc_path_next -char * -rb_enc_path_next(const char *s, const char *e, rb_encoding *enc) +static inline char * +enc_path_next(const char *s, const char *e, bool mb_enc, rb_encoding *enc) { while (s < e && !isdirsep(*s)) { - Inc(s, e, enc); + Inc(s, e, mb_enc, enc); } return (char *)s; } +#define nextdirsep rb_enc_path_next +char * +rb_enc_path_next(const char *s, const char *e, rb_encoding *enc) +{ + return enc_path_next(s, e, enc_mbclen_needed(enc), enc); +} + #if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) -#define skipprefix rb_enc_path_skip_prefix +#define skipprefix enc_path_skip_prefix #else -#define skipprefix(path, end, enc) (path) +#define skipprefix(path, end, mb_enc, enc) (path) #endif -char * -rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc) +static inline char * +enc_path_skip_prefix(const char *path, const char *end, bool mb_enc, rb_encoding *enc) { #if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) #ifdef DOSISH_UNC if (path + 2 <= end && isdirsep(path[0]) && isdirsep(path[1])) { path += 2; while (path < end && isdirsep(*path)) path++; - if ((path = rb_enc_path_next(path, end, enc)) < end && path[0] && path[1] && !isdirsep(path[1])) - path = rb_enc_path_next(path + 1, end, enc); + if ((path = enc_path_next(path, end, mb_enc, enc)) < end && + path + 2 <= end && !isdirsep(path[1])) { + path = enc_path_next(path + 1, end, mb_enc, enc); + } return (char *)path; } #endif #ifdef DOSISH_DRIVE_LETTER - if (has_drive_letter(path)) + if (path + 2 <= end && has_drive_letter(path)) return (char *)(path + 2); #endif #endif /* defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) */ return (char *)path; } +char * +rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc) +{ + return enc_path_skip_prefix(path, end, enc_mbclen_needed(enc), enc); +} + static inline char * skipprefixroot(const char *path, const char *end, rb_encoding *enc) { #if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) - char *p = skipprefix(path, end, enc); - while (isdirsep(*p)) p++; + char *p = skipprefix(path, end, enc_mbclen_needed(enc), enc); + while (p < end && isdirsep(*p)) p++; return p; #else - return skiproot(path, end, enc); + return skiproot(path, end); #endif } -#define strrdirsep rb_enc_path_last_separator char * -rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc) +rb_enc_path_skip_prefix_root(const char *path, const char *end, rb_encoding *enc) +{ + return skipprefixroot(path, end, enc); +} + +static char * +enc_path_last_separator(const char *path, const char *end, bool mb_enc, rb_encoding *enc) { char *last = NULL; while (path < end) { @@ -3677,14 +3786,44 @@ rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc) last = (char *)tmp; } else { - Inc(path, end, enc); + Inc(path, end, mb_enc, enc); } } return last; } +char * +rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc) +{ + return enc_path_last_separator(path, end, enc_mbclen_needed(enc), enc); +} + +static inline char * +strrdirsep(const char *path, const char *end, bool mb_enc, rb_encoding *enc) +{ + if (RB_UNLIKELY(mb_enc)) { + return enc_path_last_separator(path, end, mb_enc, enc); + } + + const char *cursor = end - 1; + + while (isdirsep(cursor[0])) { + cursor--; + } + + while (cursor >= path) { + if (isdirsep(cursor[0])) { + while (cursor > path && isdirsep(cursor[-1])) { + cursor--; + } + return (char *)cursor; + } + cursor--; + } + return NULL; +} static char * -chompdirsep(const char *path, const char *end, rb_encoding *enc) +chompdirsep(const char *path, const char *end, bool mb_enc, rb_encoding *enc) { while (path < end) { if (isdirsep(*path)) { @@ -3693,7 +3832,7 @@ chompdirsep(const char *path, const char *end, rb_encoding *enc) if (path >= end) return (char *)last; } else { - Inc(path, end, enc); + Inc(path, end, mb_enc, enc); } } return (char *)path; @@ -3703,13 +3842,13 @@ char * rb_enc_path_end(const char *path, const char *end, rb_encoding *enc) { if (path < end && isdirsep(*path)) path++; - return chompdirsep(path, end, enc); + return chompdirsep(path, end, enc_mbclen_needed(enc), enc); } static rb_encoding * fs_enc_check(VALUE path1, VALUE path2) { - rb_encoding *enc = rb_enc_check(path1, path2); + rb_encoding *enc = rb_enc_check_str(path1, path2); int encidx = rb_enc_to_index(enc); if (encidx == ENCINDEX_US_ASCII) { encidx = rb_enc_get_index(path1); @@ -3724,6 +3863,7 @@ fs_enc_check(VALUE path1, VALUE path2) static char * ntfs_tail(const char *path, const char *end, rb_encoding *enc) { + bool mb_enc = enc_mbclen_needed(enc); while (path < end && *path == '.') path++; while (path < end && !isADS(*path)) { if (istrailinggarbage(*path)) { @@ -3738,7 +3878,7 @@ ntfs_tail(const char *path, const char *end, rb_encoding *enc) if (isADS(*path)) path++; } else { - Inc(path, end, enc); + Inc(path, end, mb_enc, enc); } } return (char *)path; @@ -3786,10 +3926,6 @@ static VALUE copy_home_path(VALUE result, const char *dir) { char *buf; -#if defined DOSISH || defined __CYGWIN__ - char *p, *bend; - rb_encoding *enc; -#endif long dirlen; int encidx; @@ -3798,10 +3934,11 @@ copy_home_path(VALUE result, const char *dir) memcpy(buf = RSTRING_PTR(result), dir, dirlen); encidx = rb_filesystem_encindex(); rb_enc_associate_index(result, encidx); -#if defined DOSISH || defined __CYGWIN__ - enc = rb_enc_from_index(encidx); - for (bend = (p = buf) + dirlen; p < bend; Inc(p, bend, enc)) { - if (*p == '\\') { +#if defined FILE_ALT_SEPARATOR + rb_encoding *enc = rb_enc_from_index(encidx); + bool mb_enc = enc_mbclen_needed(enc); + for (char *p = buf, *bend = p + dirlen; p < bend; Inc(p, bend, mb_enc, enc)) { + if (*p == FILE_ALT_SEPARATOR) { *p = '/'; } } @@ -3909,16 +4046,19 @@ ospath_new(const char *ptr, long len, rb_encoding *fsenc) } static char * -append_fspath(VALUE result, VALUE fname, char *dir, rb_encoding **enc, rb_encoding *fsenc) +append_fspath(VALUE result, VALUE fname, VALUE dirname, rb_encoding **enc, rb_encoding *fsenc) { - char *buf, *cwdp = dir; - VALUE dirname = Qnil; - size_t dirlen = strlen(dir), buflen = rb_str_capacity(result); + if (RB_UNLIKELY(!rb_enc_asciicompat(fsenc) || rb_enc_str_coderange(dirname) != ENC_CODERANGE_7BIT)) { + dirname = rb_str_new_shared(dirname); + rb_enc_associate(dirname, fsenc); + } + + char *buf, *cwdp; + size_t dirlen = RSTRING_LEN(dirname); + size_t buflen = rb_str_capacity(result); if (NORMALIZE_UTF8PATH || *enc != fsenc) { - dirname = ospath_new(dir, dirlen, fsenc); if (!rb_enc_compatible(fname, dirname)) { - xfree(dir); /* rb_enc_check must raise because the two encodings are not * compatible. */ rb_enc_check(fname, dirname); @@ -3927,19 +4067,15 @@ append_fspath(VALUE result, VALUE fname, char *dir, rb_encoding **enc, rb_encodi rb_encoding *direnc = fs_enc_check(fname, dirname); if (direnc != fsenc) { dirname = rb_str_conv_enc(dirname, fsenc, direnc); - RSTRING_GETMEM(dirname, cwdp, dirlen); - } - else if (NORMALIZE_UTF8PATH) { - RSTRING_GETMEM(dirname, cwdp, dirlen); } *enc = direnc; } + + RSTRING_GETMEM(dirname, cwdp, dirlen); do {buflen *= 2;} while (dirlen > buflen); rb_str_resize(result, buflen); buf = RSTRING_PTR(result); memcpy(buf, cwdp, dirlen); - xfree(dir); - if (!NIL_P(dirname)) rb_str_resize(dirname, 0); rb_enc_associate(result, *enc); return buf + dirlen; } @@ -3954,16 +4090,21 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na s = StringValuePtr(fname); fend = s + RSTRING_LEN(fname); - enc = rb_enc_get(fname); + enc = rb_str_enc_get(fname); + bool mb_enc = enc_mbclen_needed(enc); + if (!mb_enc && RTEST(dname)) { + mb_enc = enc_mbclen_needed(rb_str_enc_get(dname)); + } + BUFINIT(); - if (s[0] == '~' && abs_mode == 0) { /* execute only if NOT absolute_path() */ + if (s < fend && s[0] == '~' && abs_mode == 0) { /* execute only if NOT absolute_path() */ long userlen = 0; - if (isdirsep(s[1]) || s[1] == '\0') { + if (s + 1 == fend || isdirsep(s[1])) { buf = 0; b = 0; rb_str_set_len(result, 0); - if (*++s) ++s; + if (++s < fend) ++s; rb_default_home_dir(result); } else { @@ -3993,8 +4134,8 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } #ifdef DOSISH_DRIVE_LETTER /* skip drive letter */ - else if (has_drive_letter(s)) { - if (isdirsep(s[2])) { + else if (s + 1 < fend && has_drive_letter(s)) { + if (s + 2 < fend && isdirsep(s[2])) { /* specified drive letter, and full path */ /* skip drive letter */ BUFCHECK(bdiff + 2 >= buflen); @@ -4023,12 +4164,12 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na rb_enc_associate(result, enc = fs_enc_check(result, fname)); p = pend; } - p = chompdirsep(skiproot(buf, p, enc), p, enc); + p = chompdirsep(skiproot(buf, p), p, mb_enc, enc); s += 2; } } #endif /* DOSISH_DRIVE_LETTER */ - else if (!rb_is_absolute_path(s)) { + else if (s == fend || !rb_is_absolute_path(s)) { if (!NIL_P(dname)) { rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); rb_enc_associate(result, fs_enc_check(result, fname)); @@ -4036,24 +4177,24 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na p = pend; } else { - char *e = append_fspath(result, fname, ruby_getcwd(), &enc, fsenc); + char *e = append_fspath(result, fname, rb_dir_getwd_ospath(), &enc, fsenc); BUFINIT(); p = e; } -#if defined DOSISH || defined __CYGWIN__ - if (isdirsep(*s)) { +#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC + if (s < fend && isdirsep(*s)) { /* specified full path, but not drive letter nor UNC */ /* we need to get the drive letter or UNC share name */ - p = skipprefix(buf, p, enc); + p = skipprefix(buf, p, mb_enc, enc); } else -#endif /* defined DOSISH || defined __CYGWIN__ */ - p = chompdirsep(skiproot(buf, p, enc), p, enc); +#endif /* defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC */ + p = chompdirsep(skiproot(buf, p), p, mb_enc, enc); } else { size_t len; b = s; - do s++; while (isdirsep(*s)); + do s++; while (s < fend && isdirsep(*s)); len = s - b; p = buf + len; BUFCHECK(bdiff >= buflen); @@ -4072,23 +4213,24 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na rb_str_set_len(result, p-buf+1); BUFCHECK(bdiff + 1 >= buflen); p[1] = 0; - root = skipprefix(buf, p+1, enc); + root = skipprefix(buf, p+1, mb_enc, enc); b = s; - while (*s) { + while (s < fend) { switch (*s) { case '.': if (b == s++) { /* beginning of path element */ - switch (*s) { - case '\0': + if (s == fend) { b = s; break; + } + switch (*s) { case '.': - if (*(s+1) == '\0' || isdirsep(*(s+1))) { + if (s+1 == fend || isdirsep(*(s+1))) { /* We must go back to the parent */ char *n; *p = '\0'; - if (!(n = strrdirsep(root, p, enc))) { + if (!(n = strrdirsep(root, p, mb_enc, enc))) { *p = '/'; } else { @@ -4098,13 +4240,13 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } #if USE_NTFS else { - do ++s; while (istrailinggarbage(*s)); + do ++s; while (s < fend && istrailinggarbage(*s)); } #endif /* USE_NTFS */ break; case '/': -#if defined DOSISH || defined __CYGWIN__ - case '\\': +#if defined FILE_ALT_SEPARATOR + case FILE_ALT_SEPARATOR: #endif b = ++s; break; @@ -4128,8 +4270,8 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na #endif /* USE_NTFS */ break; case '/': -#if defined DOSISH || defined __CYGWIN__ - case '\\': +#if defined FILE_ALT_SEPARATOR + case FILE_ALT_SEPARATOR: #endif if (s > b) { WITH_ROOTDIFF(BUFCOPY(b, s-b)); @@ -4151,7 +4293,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } } #endif /* __APPLE__ */ - Inc(s, fend, enc); + Inc(s, fend, mb_enc, enc); break; } } @@ -4179,7 +4321,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na BUFCOPY(b, s-b); rb_str_set_len(result, p-buf); } - if (p == skiproot(buf, p + !!*p, enc) - 1) p++; + if (p == skiproot(buf, p + !!*p) - 1) p++; #if USE_NTFS *p = '\0'; @@ -4337,31 +4479,43 @@ rb_file_s_expand_path(int argc, const VALUE *argv) } /* + * :markup: markdown + * * call-seq: - * File.expand_path(file_name [, dir_string] ) -> abs_file_name + * File.expand_path(path, dirpath = '.') -> absolute_path * - * Converts a pathname to an absolute pathname. Relative paths are - * referenced from the current working directory of the process unless - * +dir_string+ is given, in which case it will be used as the - * starting point. The given pathname may start with a - * ``<code>~</code>'', which expands to the process owner's home - * directory (the environment variable +HOME+ must be set - * correctly). ``<code>~</code><i>user</i>'' expands to the named - * user's home directory. + * Returns the string absolute path for the given `path`. * - * File.expand_path("~oracle/bin") #=> "/home/oracle/bin" + * Evaluates a relative path with respect to the directory given by `dirpath`: * - * A simple example of using +dir_string+ is as follows. - * File.expand_path("ruby", "/usr/bin") #=> "/usr/bin/ruby" + * ```ruby + * Dir.chdir('/snap') + * # Default dirpath. + * File.expand_path('README') # => "/snap/README" + * File.expand_path('bin') # => "/snap/bin" + * File.expand_path('bin/../var') # => "/snap/var" # Cleaned. + * # Other dirpath. + * File.expand_path('../zip', '/usr/bin/ruby') # => "/usr/bin/zip" + * Dir.chdir('/usr/bin') + * File.expand_path('../../snap', __FILE__) # => "/usr/snap" + * ``` + * + * Evaluates an absolute path without respect to `dirpath`: * - * A more complex example which also resolves parent directory is as follows. - * Suppose we are in bin/mygem and want the absolute path of lib/mygem.rb. + * ```ruby + * File.expand_path('/snap') # => "/snap" + * File.expand_path('/snap', 'nosuch') # => "/snap" + * File.expand_path('/snap/../snap') # => "/snap" # Cleaned. + * ``` * - * File.expand_path("../../lib/mygem.rb", __FILE__) - * #=> ".../path/to/project/lib/mygem.rb" + * More examples: + * + * ``` + * Dir.chdir('/usr/bin') + * File.expand_path('../../snap', __FILE__) # => "/usr/snap" + * File.expand_path('../../snap') # => "/snap" + * ``` * - * So first it resolves the parent of __FILE__, that is bin/, then go to the - * parent, the root of the project and appends +lib/mygem.rb+. */ static VALUE @@ -4449,9 +4603,10 @@ realpath_rec(long *prefixlenp, VALUE *resolvedp, const char *unresolved, VALUE f } else if (testnamelen == 2 && testname[0] == '.' && testname[1] == '.') { if (*prefixlenp < RSTRING_LEN(*resolvedp)) { + bool mb_enc = enc_mbclen_needed(enc); const char *resolved_str = RSTRING_PTR(*resolvedp); const char *resolved_names = resolved_str + *prefixlenp; - const char *lastsep = strrdirsep(resolved_names, resolved_str + RSTRING_LEN(*resolvedp), enc); + const char *lastsep = strrdirsep(resolved_names, resolved_str + RSTRING_LEN(*resolvedp), mb_enc, enc); long len = lastsep ? lastsep - resolved_names : 0; rb_str_resize(*resolvedp, *prefixlenp + len); } @@ -4591,7 +4746,8 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum root_found: RSTRING_GETMEM(resolved, prefixptr, prefixlen); pend = prefixptr + prefixlen; - ptr = chompdirsep(prefixptr, pend, enc); + bool mb_enc = enc_mbclen_needed(enc); + ptr = chompdirsep(prefixptr, pend, mb_enc, enc); if (ptr < pend) { prefixlen = ++ptr - prefixptr; rb_str_set_len(resolved, prefixlen); @@ -4601,7 +4757,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum if (*prefixptr == FILE_ALT_SEPARATOR) { *prefixptr = '/'; } - Inc(prefixptr, pend, enc); + Inc(prefixptr, pend, mb_enc, enc); } #endif @@ -4637,7 +4793,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum return resolved; } -static VALUE rb_file_join(VALUE ary); +static VALUE rb_file_join(long argc, VALUE *args); #ifndef HAVE_REALPATH static VALUE @@ -4678,7 +4834,8 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, rb_encoding *origenc, enum unresolved_path = rb_str_dup_frozen(path); if (*RSTRING_PTR(unresolved_path) != '/' && !NIL_P(basedir)) { - unresolved_path = rb_file_join(rb_assoc_new(basedir, unresolved_path)); + VALUE paths[2] = {basedir, unresolved_path}; + unresolved_path = rb_file_join(2, paths); } if (origenc) unresolved_path = TO_OSPATH(unresolved_path); @@ -4844,23 +5001,29 @@ rmext(const char *p, long l0, long l1, const char *e, long l2, rb_encoding *enc) return 0; } -const char * -ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc) +static inline const char * +enc_find_basename(const char *name, long *baselen, long *alllen, bool mb_enc, rb_encoding *enc) { const char *p, *q, *e, *end; -#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC - const char *root; -#endif long f = 0, n = -1; - end = name + (alllen ? (size_t)*alllen : strlen(name)); - name = skipprefix(name, end, enc); + long len = (alllen ? (size_t)*alllen : strlen(name)); + + if (len <= 0) { + return name; + } + + end = name + len; + name = skipprefix(name, end, mb_enc, enc); #if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC - root = name; + const char *root = name; #endif - while (isdirsep(*name)) + + while (name < end && isdirsep(*name)) { name++; - if (!*name) { + } + + if (name == end) { p = name - 1; f = 1; #if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC @@ -4881,91 +5044,127 @@ ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encodin #endif /* defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC */ } else { - if (!(p = strrdirsep(name, end, enc))) { + p = strrdirsep(name, end, mb_enc, enc); + if (!p) { p = name; } else { - while (isdirsep(*p)) p++; /* skip last / */ + while (isdirsep(*p)) { + p++; /* skip last / */ + } } #if USE_NTFS n = ntfs_tail(p, end, enc) - p; #else - n = chompdirsep(p, end, enc) - p; + n = chompdirsep(p, end, mb_enc, enc) - p; #endif for (q = p; q - p < n && *q == '.'; q++); - for (e = 0; q - p < n; Inc(q, end, enc)) { + for (e = 0; q - p < n; Inc(q, end, mb_enc, enc)) { if (*q == '.') e = q; } - if (e) f = e - p; - else f = n; + if (e) { + f = e - p; + } + else { + f = n; + } } - if (baselen) + if (baselen) { *baselen = f; - if (alllen) + } + if (alllen) { *alllen = n; + } return p; } +const char * +ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc) +{ + return enc_find_basename(name, baselen, alllen, enc_mbclen_needed(enc), enc); +} + /* * call-seq: - * File.basename(file_name [, suffix] ) -> base_name + * File.basename(path, suffix = '') -> new_string * - * Returns the last component of the filename given in - * <i>file_name</i> (after first stripping trailing separators), - * which can be formed using both File::SEPARATOR and - * File::ALT_SEPARATOR as the separator when File::ALT_SEPARATOR is - * not <code>nil</code>. If <i>suffix</i> is given and present at the - * end of <i>file_name</i>, it is removed. If <i>suffix</i> is ".*", - * any extension will be removed. + * Returns a new string containing all or part of the last entry of the given +path+. + * Entries are delimited by the value of constant File::SEPARATOR + * and, if non-nil, the value of constant File::ALT_SEPARATOR. + * + * When +suffix+ is the empty string <tt>''</tt>, + * returns all of the last entry: + * + * File.basename('foo/bar/baz/bat.txt') # => "bat.txt" + * File.basename('foo/bar/baz') # => "baz" + * + * File::SEPARATOR # => "/" + * File.basename('foo/bar.txt////') # => "bar.txt" + * File::ALT_SEPARATOR # => "\\" # On Windows. + * File.basename('foo/bar.txt//\\\\//') # => "bar.txt" + * + * When +suffix+ is <tt>'.*'</tt>, + * the last {filename extension}[https://en.wikipedia.org/wiki/Filename_extension], + * if any, is removed: + * + * File.basename('foo/bar.txt', '.*') # => "bar" + * File.basename('foo/bar.txt.old', '.*') # => "bar.txt" + * File.basename('foo/bar', '.*') # => "bar" + * + * When +suffix+ is any string other than <tt>''</tt> or <tt>'.*'</tt>, + * the matching trailing substring, if any, is removed: + * + * File.basename('foo/bar.txt', '.txt') # => "bar" + * File.basename('foo/bar.txt', 'txt') # => "bar." + * File.basename('foo/bar.txt', '*') # => "bar.txt" + * File.basename('foo/bar.txt', '.') # => "bar.txt" * - * File.basename("/home/gumby/work/ruby.rb") #=> "ruby.rb" - * File.basename("/home/gumby/work/ruby.rb", ".rb") #=> "ruby" - * File.basename("/home/gumby/work/ruby.rb", ".*") #=> "ruby" */ static VALUE rb_file_s_basename(int argc, VALUE *argv, VALUE _) { - VALUE fname, fext, basename; - const char *name, *p; - long f, n; + VALUE fname, fext = Qnil; + const char *name, *p, *fp = 0; + long f = 0, n; rb_encoding *enc; - fext = Qnil; - if (rb_check_arity(argc, 1, 2) == 2) { + argc = rb_check_arity(argc, 1, 2); + fname = argv[0]; + CheckPath(fname, name); + if (argc == 2) { fext = argv[1]; - StringValue(fext); - enc = check_path_encoding(fext); + fp = StringValueCStr(fext); + check_path_encoding(fext); } - fname = argv[0]; - FilePathStringValue(fname); if (NIL_P(fext) || !(enc = rb_enc_compatible(fname, fext))) { - enc = rb_enc_get(fname); - fext = Qnil; + enc = rb_str_enc_get(fname); } - if ((n = RSTRING_LEN(fname)) == 0 || !*(name = RSTRING_PTR(fname))) - return rb_str_new_shared(fname); - p = ruby_enc_find_basename(name, &f, &n, enc); + n = RSTRING_LEN(fname); + if (n <= 0 || !*name) { + return rb_enc_str_new(0, 0, enc); + } + + bool mb_enc = enc_mbclen_needed(enc); + p = enc_find_basename(name, &f, &n, mb_enc, enc); if (n >= 0) { - if (NIL_P(fext)) { + if (!fp) { f = n; } else { - const char *fp; - fp = StringValueCStr(fext); if (!(f = rmext(p, f, n, fp, RSTRING_LEN(fext), enc))) { f = n; } RB_GC_GUARD(fext); } - if (f == RSTRING_LEN(fname)) return rb_str_new_shared(fname); + if (f == RSTRING_LEN(fname)) { + return rb_str_new_shared(fname); + } } - basename = rb_str_new(p, f); - rb_enc_copy(basename, fname); - return basename; + return rb_enc_str_new(p, f, enc); } static VALUE rb_file_dirname_n(VALUE fname, int n); @@ -5010,19 +5209,18 @@ rb_file_dirname_n(VALUE fname, int n) { const char *name, *root, *p, *end; VALUE dirname; - rb_encoding *enc; - VALUE sepsv = 0; - const char **seps; if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n); - FilePathStringValue(fname); - name = StringValueCStr(fname); + CheckPath(fname, name); end = name + RSTRING_LEN(fname); - enc = rb_enc_get(fname); - root = skiproot(name, end, enc); + + bool mb_enc = !rb_str_enc_fastpath(fname); + rb_encoding *enc = rb_str_enc_get(fname); + + root = skiproot(name, end); #ifdef DOSISH_UNC if (root > name + 1 && isdirsep(*name)) - root = skipprefix(name = root - 2, end, enc); + root = skipprefix(name = root - 2, end, mb_enc, enc); #else if (root > name + 1) name = root - 1; @@ -5031,72 +5229,41 @@ rb_file_dirname_n(VALUE fname, int n) p = root; } else { - int i; - switch (n) { - case 0: - p = end; - break; - case 1: - if (!(p = strrdirsep(root, end, enc))) p = root; - break; - default: - seps = ALLOCV_N(const char *, sepsv, n); - for (i = 0; i < n; ++i) seps[i] = root; - i = 0; - for (p = root; p < end; ) { - if (isdirsep(*p)) { - const char *tmp = p++; - while (p < end && isdirsep(*p)) p++; - if (p >= end) break; - seps[i++] = tmp; - if (i == n) i = 0; - } - else { - Inc(p, end, enc); - } + p = end; + while (n) { + if (!(p = strrdirsep(root, p, mb_enc, enc))) { + p = root; + break; } - p = seps[i]; - ALLOCV_END(sepsv); - break; + n--; } } - if (p == name) - return rb_usascii_str_new2("."); + + if (p == name) { + return rb_enc_str_new(".", 1, enc); + } #ifdef DOSISH_DRIVE_LETTER - if (has_drive_letter(name) && isdirsep(*(name + 2))) { - const char *top = skiproot(name + 2, end, enc); - dirname = rb_str_new(name, 3); + if (name + 3 < end && has_drive_letter(name) && isdirsep(*(name + 2))) { + const char *top = skiproot(name + 2, end); + dirname = rb_enc_str_new(name, 3, enc); rb_str_cat(dirname, top, p - top); } else #endif - dirname = rb_str_new(name, p - name); + dirname = rb_enc_str_new(name, p - name, enc); #ifdef DOSISH_DRIVE_LETTER - if (has_drive_letter(name) && root == name + 2 && p - name == 2) + if (root == name + 2 && p == root && name[1] == ':') rb_str_cat(dirname, ".", 1); #endif - rb_enc_copy(dirname, fname); return dirname; } -/* - * accept a String, and return the pointer of the extension. - * if len is passed, set the length of extension to it. - * returned pointer is in ``name'' or NULL. - * returns *len - * no dot NULL 0 - * dotfile top 0 - * end with dot dot 1 - * .ext dot len of .ext - * .ext:stream dot len of .ext without :stream (NTFS only) - * - */ -const char * -ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc) +static inline const char * +enc_find_extname(const char *name, long *len, bool mb_enc, rb_encoding *enc) { const char *p, *e, *end = name + (len ? *len : (long)strlen(name)); - p = strrdirsep(name, end, enc); /* get the last path component */ + p = strrdirsep(name, end, mb_enc, enc); /* get the last path component */ if (!p) p = name; else @@ -5129,7 +5296,7 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc) #endif else if (isdirsep(*p)) break; - Inc(p, end, enc); + Inc(p, end, mb_enc, enc); } if (len) { @@ -5145,57 +5312,106 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc) } /* + * accept a String, and return the pointer of the extension. + * if len is passed, set the length of extension to it. + * returned pointer is in ``name'' or NULL. + * returns *len + * no dot NULL 0 + * dotfile top 0 + * end with dot dot 1 + * .ext dot len of .ext + * .ext:stream dot len of .ext without :stream (NTFS only) + * + */ +const char * +ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc) +{ + return enc_find_extname(name, len, enc_mbclen_needed(enc), enc); +} + +/* + * :markup: markdown + * * call-seq: - * File.extname(path) -> string + * File.extname(path) -> extension + * + * Returns the filename extension -- + * usually the portion of the string `path` + * beginning from the last period: * - * Returns the extension (the portion of file name in +path+ - * starting from the last period). + * ```ruby + * File.extname('t.rb') # => ".rb" + * File.extname('foo.bar.t.rb') # => ".rb" + * File.extname('foo/bar/t.rb') # => ".rb" + * File.extname('nosuch.txt') # => ".txt" # Path need not exist. + * ``` * - * If +path+ is a dotfile, or starts with a period, then the starting - * dot is not dealt with the start of the extension. + * Returns the entire string when there is no period: * - * An empty string will also be returned when the period is the last character - * in +path+. + * ```ruby + * Pathname('foo').extname # => "" + * ``` * - * On Windows, trailing dots are truncated. + * Returns an empty string when the only period is the first character: * - * File.extname("test.rb") #=> ".rb" - * File.extname("a/b/d/test.rb") #=> ".rb" - * File.extname(".a/b/d/test.rb") #=> ".rb" - * File.extname("foo.") #=> "" on Windows - * File.extname("foo.") #=> "." on non-Windows - * File.extname("test") #=> "" - * File.extname(".profile") #=> "" - * File.extname(".profile.sh") #=> ".sh" + * ```ruby + * File.extname('.irbrc') # => "" + * ``` + * + * Returns an empty string or `'.'` when `path` ends with a period: + * + * ``` + * File.extname('foo.') # => "" # On Windows. + * File.extname('foo.') # => "." # Elsewhere. + * File.extname('foo....') # => "" # On Windows. + * File.extname('foo....') # => "." # Elsewhere. + * ``` * */ static VALUE rb_file_s_extname(VALUE klass, VALUE fname) { - const char *name, *e; - long len; - VALUE extname; + const char *name; + CheckPath(fname, name); + long len = RSTRING_LEN(fname); - FilePathStringValue(fname); - name = StringValueCStr(fname); - len = RSTRING_LEN(fname); - e = ruby_enc_find_extname(name, &len, rb_enc_get(fname)); - if (len < 1) - return rb_str_new(0, 0); - extname = rb_str_subseq(fname, e - name, len); /* keep the dot, too! */ - return extname; + if (len < 1) { + return rb_enc_str_new(0, 0, rb_str_enc_get(fname)); + } + + bool mb_enc = !rb_str_enc_fastpath(fname); + rb_encoding *enc = rb_str_enc_get(fname); + + const char *ext = enc_find_extname(name, &len, mb_enc, enc); + return rb_enc_str_new(ext, len, enc); } /* - * call-seq: + * call-seq: * File.path(path) -> string * - * Returns the string representation of the path + * Returns the string representation of the path * * File.path(File::NULL) #=> "/dev/null" * File.path(Pathname.new("/tmp")) #=> "/tmp" * + * If +path+ is not a String: + * + * 1. If it has the +to_path+ method, that method will be called to + * coerce to a String. + * + * 2. Otherwise, or if the coerced result is not a String too, the + * standard coercion using +to_str+ method will take place on that + * object. (See also String.try_convert) + * + * The coerced string must satisfy the following conditions: + * + * 1. It must be in an ASCII-compatible encoding; otherwise, an + * Encoding::CompatibilityError is raised. + * + * 2. It must not contain the NUL character (<tt>\0</tt>); otherwise, + * an ArgumentError is raised. */ static VALUE @@ -5222,15 +5438,17 @@ rb_file_s_split(VALUE klass, VALUE path) return rb_assoc_new(rb_file_dirname(path), rb_file_s_basename(1,&path,Qundef)); } +static VALUE rb_file_join_ary(VALUE ary); + static VALUE file_inspect_join(VALUE ary, VALUE arg, int recur) { if (recur || ary == arg) rb_raise(rb_eArgError, "recursive array"); - return rb_file_join(arg); + return rb_file_join_ary(arg); } static VALUE -rb_file_join(VALUE ary) +rb_file_join_ary(VALUE ary) { long len, i; VALUE result, tmp; @@ -5278,11 +5496,11 @@ rb_file_join(VALUE ary) rb_enc_copy(result, tmp); } else { - tail = chompdirsep(name, name + len, rb_enc_get(result)); - if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) { + tail = chompdirsep(name, name + len, true, rb_enc_get(result)); + if (RSTRING_LEN(tmp) > 0 && isdirsep(RSTRING_PTR(tmp)[0])) { rb_str_set_len(result, tail - name); } - else if (!*tail) { + else if (tail == name + len) { rb_str_cat(result, "/", 1); } } @@ -5295,6 +5513,77 @@ rb_file_join(VALUE ary) return result; } +static inline VALUE +rb_file_join_fastpath(long argc, VALUE *args) +{ + long size = argc; + + long i; + for (i = 0; i < argc; i++) { + VALUE tmp = args[i]; + if (RB_LIKELY(RB_TYPE_P(tmp, T_STRING) && rb_str_enc_fastpath(tmp))) { + size += RSTRING_LEN(tmp); + } + else { + return 0; + } + } + + VALUE result = rb_str_buf_new(size); + + int encidx = ENCODING_GET_INLINED(args[0]); + ENCODING_SET_INLINED(result, encidx); + rb_str_buf_append(result, args[0]); + + const char *name = RSTRING_PTR(result); + for (i = 1; i < argc; i++) { + VALUE tmp = args[i]; + long len = RSTRING_LEN(result); + + const char *tmp_s; + long tmp_len; + RSTRING_GETMEM(tmp, tmp_s, tmp_len); + + if (tmp_len > 0 && isdirsep(tmp_s[0])) { + // right side has a leading separator, remove left side separators. + long chomp = len; + while (chomp > 0 && isdirsep(name[chomp - 1])) { + --chomp; + } + rb_str_set_len(result, chomp); + } + else if (len < 1 || !isdirsep(name[len - 1])) { + // neither side have a separator, append one; + rb_str_cat(result, "/", 1); + } + + if (RB_UNLIKELY(ENCODING_GET_INLINED(tmp) != encidx)) { + rb_encoding *new_enc = fs_enc_check(result, tmp); + rb_enc_associate(result, new_enc); + encidx = rb_enc_to_index(new_enc); + } + + rb_str_buf_cat(result, tmp_s, tmp_len); + } + + rb_str_null_check(result); + return result; +} + +static inline VALUE +rb_file_join(long argc, VALUE *args) +{ + if (RB_UNLIKELY(argc == 0)) { + return rb_str_new(0, 0); + } + + VALUE result = rb_file_join_fastpath(argc, args); + if (RB_LIKELY(result)) { + return result; + } + + return rb_file_join_ary(rb_ary_new_from_values(argc, args)); +} /* * call-seq: * File.join(string, ...) -> string @@ -5307,9 +5596,9 @@ rb_file_join(VALUE ary) */ static VALUE -rb_file_s_join(VALUE klass, VALUE args) +rb_file_s_join(int argc, VALUE *argv, VALUE klass) { - return rb_file_join(args); + return rb_file_join(argc, argv); } #if defined(HAVE_TRUNCATE) @@ -5449,7 +5738,7 @@ rb_thread_flock(void *data) * call-seq: * flock(locking_constant) -> 0 or false * - * Locks or unlocks file +self+ according to the given `locking_constant`, + * Locks or unlocks file `self` according to the given `locking_constant`, * a bitwise OR of the values in the table below. * * Not available on all platforms. @@ -5459,10 +5748,10 @@ rb_thread_flock(void *data) * * | Constant | Lock | Effect * |-----------------|--------------|-----------------------------------------------------------------------------------------------------------------| - * | +File::LOCK_EX+ | Exclusive | Only one process may hold an exclusive lock for +self+ at a time. | - * | +File::LOCK_NB+ | Non-blocking | No blocking; may be combined with +File::LOCK_SH+ or +File::LOCK_EX+ using the bitwise OR operator <tt>\|</tt>. | - * | +File::LOCK_SH+ | Shared | Multiple processes may each hold a shared lock for +self+ at the same time. | - * | +File::LOCK_UN+ | Unlock | Remove an existing lock held by this process. | + * | `File::LOCK_EX` | Exclusive | Only one process may hold an exclusive lock for `self` at a time. | + * | `File::LOCK_NB` | Non-blocking | No blocking; may be combined with `File::LOCK_SH` or `File::LOCK_EX` using the bitwise OR operator <tt>\|</tt>. | + * | `File::LOCK_SH` | Shared | Multiple processes may each hold a shared lock for `self` at the same time. | + * | `File::LOCK_UN` | Unlock | Remove an existing lock held by this process. | * * Example: * @@ -5589,11 +5878,11 @@ test_check(int n, int argc, VALUE *argv) * | <tt>'z'</tt> | Whether the entity exists and is of length zero. | * * - This test operates only on the entity at `path0`, - * and returns an integer size or +nil+: + * and returns an integer size or `nil`: * * | Character | Test | * |:------------:|:---------------------------------------------------------------------------------------------| - * | <tt>'s'</tt> | Returns positive integer size if the entity exists and has non-zero length, +nil+ otherwise. | + * | <tt>'s'</tt> | Returns positive integer size if the entity exists and has non-zero length, `nil` otherwise. | * * - Each of these tests operates only on the entity at `path0`, * and returns a Time object; @@ -5775,13 +6064,48 @@ rb_f_test(int argc, VALUE *argv, VALUE _) /* * Document-class: File::Stat * - * Objects of class File::Stat encapsulate common status information - * for File objects. The information is recorded at the moment the - * File::Stat object is created; changes made to the file after that - * point will not be reflected. File::Stat objects are returned by - * IO#stat, File::stat, File#lstat, and File::lstat. Many of these - * methods return platform-specific values, and not all values are - * meaningful on all systems. See also Kernel#test. + * A \File::Stat object contains information about an entry in the file system. + * + * Each of these methods returns a new \File::Stat object: + * + * - File#lstat. + * - File::Stat.new. + * - File::lstat. + * - File::stat. + * - IO#stat. + * + * === Snapshot + * + * A new \File::Stat object takes an immediate "snapshot" of the entry's information; + * the captured information is never updated, + * regardless of changes in the actual entry: + * + * The entry must exist when File::Stat.new is called: + * + * filepath = 't.tmp' + * File.exist?(filepath) # => false + * File::Stat.new(filepath) # Raises Errno::ENOENT: No such file or directory. + * File.write(filepath, 'foo') # Create the file. + * stat = File::Stat.new(filepath) # Okay. + * + * Later changes to the actual entry do not change the \File::Stat object: + * + * File.atime(filepath) # => 2026-04-01 11:51:38.0014518 -0500 + * stat.atime # => 2026-04-01 11:51:38.0014518 -0500 + * File.write(filepath, 'bar') + * File.atime(filepath) # => 2026-04-01 11:58:11.922614 -0500 + * stat.atime # => 2026-04-01 11:51:38.0014518 -0500 + * File.delete(filepath) + * stat.atime # => 2026-04-01 11:51:38.0014518 -0500 + * + * === OS-Dependencies + * + * Methods in a \File::Stat object may return platform-dependents values, + * and not all values are meaningful on all systems; + * for example, File::Stat#blocks returns +nil+ on Windows, + * but returns an integer on Linux. + * + * See also Kernel#test. */ static VALUE @@ -6475,96 +6799,6 @@ rb_is_absolute_path(const char *path) return 0; } -#ifndef ENABLE_PATH_CHECK -# if defined DOSISH || defined __CYGWIN__ -# define ENABLE_PATH_CHECK 0 -# else -# define ENABLE_PATH_CHECK 1 -# endif -#endif - -#if ENABLE_PATH_CHECK -static int -path_check_0(VALUE path) -{ - struct stat st; - const char *p0 = StringValueCStr(path); - const char *e0; - rb_encoding *enc; - char *p = 0, *s; - - if (!rb_is_absolute_path(p0)) { - char *buf = ruby_getcwd(); - VALUE newpath; - - newpath = rb_str_new2(buf); - xfree(buf); - - rb_str_cat2(newpath, "/"); - rb_str_cat2(newpath, p0); - path = newpath; - p0 = RSTRING_PTR(path); - } - e0 = p0 + RSTRING_LEN(path); - enc = rb_enc_get(path); - for (;;) { -#ifndef S_IWOTH -# define S_IWOTH 002 -#endif - if (STAT(p0, &st) == 0 && S_ISDIR(st.st_mode) && (st.st_mode & S_IWOTH) -#ifdef S_ISVTX - && !(p && (st.st_mode & S_ISVTX)) -#endif - && !access(p0, W_OK)) { - rb_enc_warn(enc, "Insecure world writable dir %s in PATH, mode 0%" -#if SIZEOF_DEV_T > SIZEOF_INT - PRI_MODET_PREFIX"o", -#else - "o", -#endif - p0, st.st_mode); - if (p) *p = '/'; - RB_GC_GUARD(path); - return 0; - } - s = strrdirsep(p0, e0, enc); - if (p) *p = '/'; - if (!s || s == p0) return 1; - p = s; - e0 = p; - *p = '\0'; - } -} -#endif - -int -rb_path_check(const char *path) -{ - rb_warn_deprecated_to_remove_at(3.6, "rb_path_check", NULL); -#if ENABLE_PATH_CHECK - const char *p0, *p, *pend; - const char sep = PATH_SEP_CHAR; - - if (!path) return 1; - - pend = path + strlen(path); - p0 = path; - p = strchr(path, sep); - if (!p) p = pend; - - for (;;) { - if (!path_check_0(rb_str_new(p0, p - p0))) { - return 0; /* not safe */ - } - p0 = p + 1; - if (p0 > pend) break; - p = strchr(p0, sep); - if (!p) p = pend; - } -#endif - return 1; -} - int ruby_is_fd_loadable(int fd) { @@ -6785,7 +7019,7 @@ const char ruby_null_device[] = * Methods File.new and File.open each may take string argument +mode+, which: * * - Begins with a 1- or 2-character - * {read/write mode}[rdoc-ref:File@Read-2FWrite+Mode]. + * {read/write mode}[rdoc-ref:File@ReadWrite+Mode]. * - May also contain a 1-character {data mode}[rdoc-ref:File@Data+Mode]. * - May also contain a 1-character * {file-create mode}[rdoc-ref:File@File-Create+Mode]. @@ -7426,7 +7660,7 @@ const char ruby_null_device[] = * * First, what's elsewhere. Class \File: * - * - Inherits from {class IO}[rdoc-ref:IO@What-27s+Here], + * - Inherits from {class IO}[rdoc-ref:IO@Whats+Here], * in particular, methods for creating, reading, and writing files * - Includes module FileTest, * which provides dozens of additional methods. @@ -7641,7 +7875,7 @@ Init_File(void) /* separates directory parts in path */ rb_define_const(rb_cFile, "SEPARATOR", separator); rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1); - rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2); + rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -1); #ifdef DOSISH /* platform specific alternative separator */ @@ -7945,11 +8179,11 @@ Init_File(void) * * ==== File::FNM_EXTGLOB * - * Flag File::FNM_EXTGLOB enables pattern <tt>'{_a_,_b_}'</tt>, + * Flag File::FNM_EXTGLOB enables pattern <tt>'{a,b}'</tt>, * which matches pattern '_a_' and pattern '_b_'; * behaves like * a {regexp union}[rdoc-ref:Regexp.union] - * (e.g., <tt>'(?:_a_|_b_)'</tt>): + * (e.g., <tt>'(?:a|b)'</tt>): * * pattern = '{LEGAL,BSDL}' * Dir.glob(pattern) # => ["LEGAL", "BSDL"] |
