summaryrefslogtreecommitdiff
path: root/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'file.c')
-rw-r--r--file.c1076
1 files changed, 655 insertions, 421 deletions
diff --git a/file.c b/file.c
index 3d8c800429..fffd09c22e 100644
--- a/file.c
+++ b/file.c
@@ -169,6 +169,7 @@ typedef struct timespec stat_timestamp;
#include "internal.h"
#include "internal/compilers.h"
#include "internal/dir.h"
+#include "internal/encoding.h"
#include "internal/error.h"
#include "internal/file.h"
#include "internal/io.h"
@@ -213,15 +214,16 @@ file_path_convert(VALUE name)
return name;
}
-static rb_encoding *
+static void
check_path_encoding(VALUE str)
{
- rb_encoding *enc = rb_enc_get(str);
- if (!rb_enc_asciicompat(enc)) {
- rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE,
- rb_enc_name(enc), rb_str_inspect(str));
+ if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
+ rb_encoding *enc = rb_str_enc_get(str);
+ if (!rb_enc_asciicompat(enc)) {
+ rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE,
+ rb_enc_name(enc), rb_str_inspect(str));
+ }
}
- return enc;
}
VALUE
@@ -243,13 +245,20 @@ VALUE
rb_get_path_check_convert(VALUE obj)
{
obj = file_path_convert(obj);
+ rb_get_path_check_no_convert(obj);
+ return rb_str_new_frozen(obj);
+}
+/* TODO: name */
+VALUE
+rb_get_path_check_no_convert(VALUE obj)
+{
check_path_encoding(obj);
if (!rb_str_to_cstr(obj)) {
rb_raise(rb_eArgError, "path name contains null byte");
}
- return rb_str_new4(obj);
+ return obj;
}
VALUE
@@ -264,6 +273,19 @@ rb_get_path(VALUE obj)
return rb_get_path_check_convert(rb_get_path_check_to_string(obj));
}
+static inline VALUE
+check_path(VALUE obj, const char **cstr)
+{
+ VALUE str = rb_get_path_check_convert(rb_get_path_check_to_string(obj));
+#if RUBY_DEBUG
+ str = rb_str_new_frozen(str);
+#endif
+ *cstr = RSTRING_PTR(str);
+ return str;
+}
+
+#define CheckPath(str, cstr) RB_GC_GUARD(str) = check_path(str, &cstr);
+
VALUE
rb_str_encode_ospath(VALUE path)
{
@@ -365,9 +387,15 @@ rb_str_normalize_ospath(const char *ptr, long len)
const char *p = ptr;
const char *e = ptr + len;
const char *p1 = p;
- VALUE str = rb_str_buf_new(len);
rb_encoding *enc = rb_utf8_encoding();
- rb_enc_associate(str, enc);
+ VALUE str = rb_utf8_str_new(ptr, len);
+ if (RB_LIKELY(rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)) {
+ return str;
+ }
+ else {
+ str = rb_str_buf_new(len);
+ rb_enc_associate(str, enc);
+ }
while (p < e) {
int l, c;
@@ -605,17 +633,31 @@ statx_mtimespec(const rb_io_stat_data *st)
/*
* call-seq:
- * stat <=> other_stat -> -1, 0, 1, nil
+ * self <=> other -> -1, 0, 1, or nil
+ *
+ * Compares +self+ and +other+, by comparing their modification times;
+ * that is, by comparing <tt>self.mtime</tt> and <tt>other.mtime</tt>.
*
- * Compares File::Stat objects by comparing their respective modification
- * times.
+ * Returns:
*
- * +nil+ is returned if +other_stat+ is not a File::Stat object
+ * - +-1+, if <tt>self.mtime</tt> is earlier.
+ * - +0+, if the two values are equal.
+ * - +1+, if <tt>self.mtime</tt> is later.
+ * - +nil+, if +other+ is not a File::Stat object.
*
- * f1 = File.new("f1", "w")
- * sleep 1
- * f2 = File.new("f2", "w")
- * f1.stat <=> f2.stat #=> -1
+ * Examples:
+ *
+ * stat0 = File.stat('README.md')
+ * stat1 = File.stat('NEWS.md')
+ * stat0.mtime # => 2025-12-20 15:33:05.6972341 -0600
+ * stat1.mtime # => 2025-12-20 16:02:08.2672945 -0600
+ * stat0 <=> stat1 # => -1
+ * stat0 <=> stat0.dup # => 0
+ * stat1 <=> stat0 # => 1
+ * stat0 <=> :foo # => nil
+ *
+ * \Class \File::Stat includes module Comparable,
+ * each of whose methods uses File::Stat#<=> for comparison.
*/
static VALUE
@@ -1066,13 +1108,28 @@ static VALUE statx_birthtime(const rb_io_stat_data *st);
/*
* call-seq:
- * stat.atime -> time
- *
- * Returns the last access time for this file as an object of class
- * Time.
- *
- * File.stat("testfile").atime #=> Wed Dec 31 18:00:00 CST 1969
- *
+ * atime -> new_time
+ *
+ * Returns a new Time object containing the access time
+ * of the object represented by +self+
+ * at the time +self+ was created;
+ * see {Snapshot}[rdoc-ref:File::Stat@Snapshot]:
+ *
+ * filepath = 't.tmp'
+ * File.write(filepath, 'foo')
+ * file = File.new(filepath, 'w')
+ * stat = File::Stat.new(filepath)
+ * file.atime # => 2026-03-31 16:26:39.5913207 -0500
+ * stat.atime # => 2026-03-31 16:26:39.5913207 -0500
+ * File.write(filepath, 'bar')
+ * file.atime # => 2026-03-31 16:27:01.4981624 -0500 # Changed by access.
+ * stat.atime # => 2026-03-31 16:26:39.5913207 -0500 # Unchanged by access.
+ * stat = File::Stat.new(filepath)
+ * stat.atime # => 2026-03-31 16:27:01.4981624 -0500 # New access time.
+ * file.close
+ * File.delete(filepath)
+ *
+ * See {File System Timestamps}[rdoc-ref:file/timestamps.md].
*/
static VALUE
@@ -1120,24 +1177,22 @@ rb_stat_ctime(VALUE self)
#if defined(HAVE_STAT_BIRTHTIME)
/*
* call-seq:
- * stat.birthtime -> time
- *
- * Returns the birth time for <i>stat</i>.
- *
- * If the platform doesn't have birthtime, raises NotImplementedError.
- *
- * File.write("testfile", "foo")
- * sleep 10
- * File.write("testfile", "bar")
- * sleep 10
- * File.chmod(0644, "testfile")
- * sleep 10
- * File.read("testfile")
- * File.stat("testfile").birthtime #=> 2014-02-24 11:19:17 +0900
- * File.stat("testfile").mtime #=> 2014-02-24 11:19:27 +0900
- * File.stat("testfile").ctime #=> 2014-02-24 11:19:37 +0900
- * File.stat("testfile").atime #=> 2014-02-24 11:19:47 +0900
- *
+ * birthtime -> new_time
+ *
+ * Returns a new Time object containing the create time
+ * of the object represented by +self+
+ * at the time +self+ was created;
+ * see {Snapshot}[rdoc-ref:File::Stat@Snapshot]:
+ *
+ * filename = 't.tmp'
+ * stat = File::Stat.new(filename) # Raises Errno::ENOENT: No such file or directory
+ * File.write(filename, 'foo')
+ * stat = File::Stat.new(filename)
+ * stat.birthtime # => 2026-04-14 10:41:55.5146554 -0500
+ * File.delete(filename)
+ * stat.birthtime # => 2026-04-14 10:41:55.5146554 -0500
+ *
+ * See {File System Timestamps}[rdoc-ref:file/timestamps.md].
*/
static VALUE
@@ -2172,7 +2227,7 @@ rb_file_size_p(VALUE obj, VALUE fname)
* File.owned?(file_name) -> true or false
*
* Returns <code>true</code> if the named file exists and the
- * effective used id of the calling process is the owner of
+ * effective user id of the calling process is the owner of
* the file.
*
* _file_name_ can be an IO object.
@@ -2383,7 +2438,7 @@ rb_file_ftype(mode_t mode)
t = "unknown";
}
- return rb_usascii_str_new2(t);
+ return rb_fstring_cstr(t);
}
/*
@@ -2417,14 +2472,25 @@ rb_file_s_ftype(VALUE klass, VALUE fname)
/*
* call-seq:
- * File.atime(file_name) -> time
+ * File.atime(object) -> new_time
*
- * Returns the last access time for the named file as a Time object.
+ * Returns a new Time object containing the time of the most recent
+ * access (read or write) to the object,
+ * which may be a string filepath or dirpath, or a File or Dir object:
*
- * _file_name_ can be an IO object.
+ * filepath = 't.tmp'
+ * File.exist?(filepath) # => false
+ * File.atime(filepath) # Raises Errno::ENOENT.
+ * File.write(filepath, 'foo')
+ * File.atime(filepath) # => 2026-03-31 16:39:37.9290772 -0500
+ * File.write(filepath, 'bar')
+ * File.atime(filepath) # => 2026-03-31 16:39:57.7710876 -0500
*
- * File.atime("testfile") #=> Wed Apr 09 08:51:48 CDT 2003
+ * File.atime('.') # => 2026-03-31 16:47:49.0970483 -0500
+ * File.atime(File.new('README.md')) # => 2026-03-31 11:15:27.8215934 -0500
+ * File.atime(Dir.new('.')) # => 2026-03-31 12:39:45.5910591 -0500
*
+ * See {File System Timestamps}[rdoc-ref:file/timestamps.md].
*/
static VALUE
@@ -2442,13 +2508,22 @@ rb_file_s_atime(VALUE klass, VALUE fname)
/*
* call-seq:
- * file.atime -> time
- *
- * Returns the last access time (a Time object) for <i>file</i>, or
- * epoch if <i>file</i> has not been accessed.
- *
- * File.new("testfile").atime #=> Wed Dec 31 18:00:00 CST 1969
- *
+ * atime -> new_time
+ *
+ * Returns a new Time object containing the time of the most recent
+ * access (read or write) to the file represented by +self+:
+ *
+ * filepath = 't.tmp'
+ * file = File.new(filepath, 'a+')
+ * file.atime # => 2026-03-31 17:11:27.7285397 -0500
+ * file.write('foo')
+ * file.atime # => 2026-03-31 17:11:27.7285397 -0500 # Unchanged; not yet written.
+ * file.flush
+ * file.atime # => 2026-03-31 17:12:11.3408054 -0500 # Changed; now written.
+ * file.close
+ * File.delete(filename)
+ *
+ * See {File System Timestamps}[rdoc-ref:file/timestamps.md].
*/
static VALUE
@@ -2570,16 +2645,21 @@ rb_file_ctime(VALUE obj)
#if defined(HAVE_STAT_BIRTHTIME)
/*
* call-seq:
- * File.birthtime(file_name) -> time
+ * File.birthtime(entry_path) -> new_time
*
- * Returns the birth time for the named file.
+ * Returns a new Time object containing the create time
+ * of the entry at the given +path+:
*
- * _file_name_ can be an IO object.
- *
- * File.birthtime("testfile") #=> Wed Apr 09 08:53:13 CDT 2003
- *
- * If the platform doesn't have birthtime, raises NotImplementedError.
+ * path = 't.tmp'
+ * File.birthtime(path) # Raises Errno::ENOENT: No such file or directory
+ * File.write(path, 'foo')
+ * File.birthtime(path) # => 2026-04-14 11:10:43.2891695 -0500
+ * File.write(path, 'bar')
+ * File.birthtime(path) # => 2026-04-14 11:10:43.2891695 -0500
+ * File.delete(path)
+ * File.birthtime(path) # Raises Errno::ENOENT: No such file or directory
*
+ * See {File System Timestamps}[rdoc-ref:file/timestamps.md].
*/
VALUE
@@ -2601,14 +2681,21 @@ rb_file_s_birthtime(VALUE klass, VALUE fname)
#if defined(HAVE_STAT_BIRTHTIME)
/*
* call-seq:
- * file.birthtime -> time
+ * birthtime -> new_time
*
- * Returns the birth time for <i>file</i>.
+ * Returns a new Time object containing the create time for +self+:
*
- * File.new("testfile").birthtime #=> Wed Apr 09 08:53:14 CDT 2003
- *
- * If the platform doesn't have birthtime, raises NotImplementedError.
+ * filepath = 't.tmp'
+ * File.write(filepath, 'foo')
+ * file = File.new(filepath)
+ * file.birthtime # => 2026-04-14 15:53:45.002656 -0500
+ * File.write(filepath, 'bar')
+ * file.birthtime # => 2026-04-14 15:53:45.002656 -0500
+ * file.close
+ * File.delete(filepath)
+ * file.birthtime # Raises IOError: closed stream
*
+ * See {File System Timestamps}[rdoc-ref:file/timestamps.md].
*/
static VALUE
@@ -3396,7 +3483,7 @@ unlink_internal(const char *path, void *arg)
* Since the underlying implementation relies on the
* <code>unlink(2)</code> system call, the type of
* exception raised depends on its error type (see
- * https://linux.die.net/man/2/unlink) and has the form of
+ * https://man7.org/linux/man-pages/man2/unlink.2.html) and has the form of
* e.g. Errno::ENOENT.
*
* See also Dir::rmdir.
@@ -3542,8 +3629,10 @@ static const char file_alt_separator[] = {FILE_ALT_SEPARATOR, '\0'};
# define isADS(x) 0
#endif
-#define Next(p, e, enc) ((p) + rb_enc_mbclen((p), (e), (enc)))
-#define Inc(p, e, enc) ((p) = Next((p), (e), (enc)))
+#define enc_mbclen_needed(enc) (!rb_str_encindex_fastpath(rb_enc_to_index(enc)))
+
+#define Next(p, e, mb_enc, enc) ((p) + ((mb_enc) ? rb_enc_mbclen((p), (e), (enc)) : 1))
+#define Inc(p, e, mb_enc, enc) ((p) = Next((p), (e), (mb_enc), (enc)))
#if defined(DOSISH_UNC)
#define has_unc(buf) (isdirsep((buf)[0]) && isdirsep((buf)[1]))
@@ -3564,11 +3653,12 @@ has_drive_letter(const char *buf)
}
#ifndef _WIN32
-static char*
+static VALUE
getcwdofdrv(int drv)
{
char drive[4];
- char *drvcwd, *oldcwd;
+ char *oldcwd;
+ VALUE drvcwd;
drive[0] = drv;
drive[1] = ':';
@@ -3580,13 +3670,13 @@ getcwdofdrv(int drv)
*/
oldcwd = ruby_getcwd();
if (chdir(drive) == 0) {
- drvcwd = ruby_getcwd();
+ drvcwd = rb_dir_getwd_ospath();
chdir(oldcwd);
xfree(oldcwd);
}
else {
/* perhaps the drive is not exist. we return only drive letter */
- drvcwd = strdup(drive);
+ drvcwd = rb_enc_str_new_cstr(drive, rb_filesystem_encoding());
}
return drvcwd;
}
@@ -3607,7 +3697,7 @@ not_same_drive(VALUE path, int drive)
#endif /* DOSISH_DRIVE_LETTER */
static inline char *
-skiproot(const char *path, const char *end, rb_encoding *enc)
+skiproot(const char *path, const char *end)
{
#ifdef DOSISH_DRIVE_LETTER
if (path + 2 <= end && has_drive_letter(path)) path += 2;
@@ -3616,57 +3706,76 @@ skiproot(const char *path, const char *end, rb_encoding *enc)
return (char *)path;
}
-#define nextdirsep rb_enc_path_next
-char *
-rb_enc_path_next(const char *s, const char *e, rb_encoding *enc)
+static inline char *
+enc_path_next(const char *s, const char *e, bool mb_enc, rb_encoding *enc)
{
while (s < e && !isdirsep(*s)) {
- Inc(s, e, enc);
+ Inc(s, e, mb_enc, enc);
}
return (char *)s;
}
+#define nextdirsep rb_enc_path_next
+char *
+rb_enc_path_next(const char *s, const char *e, rb_encoding *enc)
+{
+ return enc_path_next(s, e, enc_mbclen_needed(enc), enc);
+}
+
#if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER)
-#define skipprefix rb_enc_path_skip_prefix
+#define skipprefix enc_path_skip_prefix
#else
-#define skipprefix(path, end, enc) (path)
+#define skipprefix(path, end, mb_enc, enc) (path)
#endif
-char *
-rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc)
+static inline char *
+enc_path_skip_prefix(const char *path, const char *end, bool mb_enc, rb_encoding *enc)
{
#if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER)
#ifdef DOSISH_UNC
if (path + 2 <= end && isdirsep(path[0]) && isdirsep(path[1])) {
path += 2;
while (path < end && isdirsep(*path)) path++;
- if ((path = rb_enc_path_next(path, end, enc)) < end && path[0] && path[1] && !isdirsep(path[1]))
- path = rb_enc_path_next(path + 1, end, enc);
+ if ((path = enc_path_next(path, end, mb_enc, enc)) < end &&
+ path + 2 <= end && !isdirsep(path[1])) {
+ path = enc_path_next(path + 1, end, mb_enc, enc);
+ }
return (char *)path;
}
#endif
#ifdef DOSISH_DRIVE_LETTER
- if (has_drive_letter(path))
+ if (path + 2 <= end && has_drive_letter(path))
return (char *)(path + 2);
#endif
#endif /* defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) */
return (char *)path;
}
+char *
+rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc)
+{
+ return enc_path_skip_prefix(path, end, enc_mbclen_needed(enc), enc);
+}
+
static inline char *
skipprefixroot(const char *path, const char *end, rb_encoding *enc)
{
#if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER)
- char *p = skipprefix(path, end, enc);
- while (isdirsep(*p)) p++;
+ char *p = skipprefix(path, end, enc_mbclen_needed(enc), enc);
+ while (p < end && isdirsep(*p)) p++;
return p;
#else
- return skiproot(path, end, enc);
+ return skiproot(path, end);
#endif
}
-#define strrdirsep rb_enc_path_last_separator
char *
-rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc)
+rb_enc_path_skip_prefix_root(const char *path, const char *end, rb_encoding *enc)
+{
+ return skipprefixroot(path, end, enc);
+}
+
+static char *
+enc_path_last_separator(const char *path, const char *end, bool mb_enc, rb_encoding *enc)
{
char *last = NULL;
while (path < end) {
@@ -3677,14 +3786,44 @@ rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc)
last = (char *)tmp;
}
else {
- Inc(path, end, enc);
+ Inc(path, end, mb_enc, enc);
}
}
return last;
}
+char *
+rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc)
+{
+ return enc_path_last_separator(path, end, enc_mbclen_needed(enc), enc);
+}
+
+static inline char *
+strrdirsep(const char *path, const char *end, bool mb_enc, rb_encoding *enc)
+{
+ if (RB_UNLIKELY(mb_enc)) {
+ return enc_path_last_separator(path, end, mb_enc, enc);
+ }
+
+ const char *cursor = end - 1;
+
+ while (isdirsep(cursor[0])) {
+ cursor--;
+ }
+
+ while (cursor >= path) {
+ if (isdirsep(cursor[0])) {
+ while (cursor > path && isdirsep(cursor[-1])) {
+ cursor--;
+ }
+ return (char *)cursor;
+ }
+ cursor--;
+ }
+ return NULL;
+}
static char *
-chompdirsep(const char *path, const char *end, rb_encoding *enc)
+chompdirsep(const char *path, const char *end, bool mb_enc, rb_encoding *enc)
{
while (path < end) {
if (isdirsep(*path)) {
@@ -3693,7 +3832,7 @@ chompdirsep(const char *path, const char *end, rb_encoding *enc)
if (path >= end) return (char *)last;
}
else {
- Inc(path, end, enc);
+ Inc(path, end, mb_enc, enc);
}
}
return (char *)path;
@@ -3703,13 +3842,13 @@ char *
rb_enc_path_end(const char *path, const char *end, rb_encoding *enc)
{
if (path < end && isdirsep(*path)) path++;
- return chompdirsep(path, end, enc);
+ return chompdirsep(path, end, enc_mbclen_needed(enc), enc);
}
static rb_encoding *
fs_enc_check(VALUE path1, VALUE path2)
{
- rb_encoding *enc = rb_enc_check(path1, path2);
+ rb_encoding *enc = rb_enc_check_str(path1, path2);
int encidx = rb_enc_to_index(enc);
if (encidx == ENCINDEX_US_ASCII) {
encidx = rb_enc_get_index(path1);
@@ -3724,6 +3863,7 @@ fs_enc_check(VALUE path1, VALUE path2)
static char *
ntfs_tail(const char *path, const char *end, rb_encoding *enc)
{
+ bool mb_enc = enc_mbclen_needed(enc);
while (path < end && *path == '.') path++;
while (path < end && !isADS(*path)) {
if (istrailinggarbage(*path)) {
@@ -3738,7 +3878,7 @@ ntfs_tail(const char *path, const char *end, rb_encoding *enc)
if (isADS(*path)) path++;
}
else {
- Inc(path, end, enc);
+ Inc(path, end, mb_enc, enc);
}
}
return (char *)path;
@@ -3786,10 +3926,6 @@ static VALUE
copy_home_path(VALUE result, const char *dir)
{
char *buf;
-#if defined DOSISH || defined __CYGWIN__
- char *p, *bend;
- rb_encoding *enc;
-#endif
long dirlen;
int encidx;
@@ -3798,10 +3934,11 @@ copy_home_path(VALUE result, const char *dir)
memcpy(buf = RSTRING_PTR(result), dir, dirlen);
encidx = rb_filesystem_encindex();
rb_enc_associate_index(result, encidx);
-#if defined DOSISH || defined __CYGWIN__
- enc = rb_enc_from_index(encidx);
- for (bend = (p = buf) + dirlen; p < bend; Inc(p, bend, enc)) {
- if (*p == '\\') {
+#if defined FILE_ALT_SEPARATOR
+ rb_encoding *enc = rb_enc_from_index(encidx);
+ bool mb_enc = enc_mbclen_needed(enc);
+ for (char *p = buf, *bend = p + dirlen; p < bend; Inc(p, bend, mb_enc, enc)) {
+ if (*p == FILE_ALT_SEPARATOR) {
*p = '/';
}
}
@@ -3909,16 +4046,19 @@ ospath_new(const char *ptr, long len, rb_encoding *fsenc)
}
static char *
-append_fspath(VALUE result, VALUE fname, char *dir, rb_encoding **enc, rb_encoding *fsenc)
+append_fspath(VALUE result, VALUE fname, VALUE dirname, rb_encoding **enc, rb_encoding *fsenc)
{
- char *buf, *cwdp = dir;
- VALUE dirname = Qnil;
- size_t dirlen = strlen(dir), buflen = rb_str_capacity(result);
+ if (RB_UNLIKELY(!rb_enc_asciicompat(fsenc) || rb_enc_str_coderange(dirname) != ENC_CODERANGE_7BIT)) {
+ dirname = rb_str_new_shared(dirname);
+ rb_enc_associate(dirname, fsenc);
+ }
+
+ char *buf, *cwdp;
+ size_t dirlen = RSTRING_LEN(dirname);
+ size_t buflen = rb_str_capacity(result);
if (NORMALIZE_UTF8PATH || *enc != fsenc) {
- dirname = ospath_new(dir, dirlen, fsenc);
if (!rb_enc_compatible(fname, dirname)) {
- xfree(dir);
/* rb_enc_check must raise because the two encodings are not
* compatible. */
rb_enc_check(fname, dirname);
@@ -3927,19 +4067,15 @@ append_fspath(VALUE result, VALUE fname, char *dir, rb_encoding **enc, rb_encodi
rb_encoding *direnc = fs_enc_check(fname, dirname);
if (direnc != fsenc) {
dirname = rb_str_conv_enc(dirname, fsenc, direnc);
- RSTRING_GETMEM(dirname, cwdp, dirlen);
- }
- else if (NORMALIZE_UTF8PATH) {
- RSTRING_GETMEM(dirname, cwdp, dirlen);
}
*enc = direnc;
}
+
+ RSTRING_GETMEM(dirname, cwdp, dirlen);
do {buflen *= 2;} while (dirlen > buflen);
rb_str_resize(result, buflen);
buf = RSTRING_PTR(result);
memcpy(buf, cwdp, dirlen);
- xfree(dir);
- if (!NIL_P(dirname)) rb_str_resize(dirname, 0);
rb_enc_associate(result, *enc);
return buf + dirlen;
}
@@ -3954,16 +4090,21 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
s = StringValuePtr(fname);
fend = s + RSTRING_LEN(fname);
- enc = rb_enc_get(fname);
+ enc = rb_str_enc_get(fname);
+ bool mb_enc = enc_mbclen_needed(enc);
+ if (!mb_enc && RTEST(dname)) {
+ mb_enc = enc_mbclen_needed(rb_str_enc_get(dname));
+ }
+
BUFINIT();
- if (s[0] == '~' && abs_mode == 0) { /* execute only if NOT absolute_path() */
+ if (s < fend && s[0] == '~' && abs_mode == 0) { /* execute only if NOT absolute_path() */
long userlen = 0;
- if (isdirsep(s[1]) || s[1] == '\0') {
+ if (s + 1 == fend || isdirsep(s[1])) {
buf = 0;
b = 0;
rb_str_set_len(result, 0);
- if (*++s) ++s;
+ if (++s < fend) ++s;
rb_default_home_dir(result);
}
else {
@@ -3993,8 +4134,8 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
}
#ifdef DOSISH_DRIVE_LETTER
/* skip drive letter */
- else if (has_drive_letter(s)) {
- if (isdirsep(s[2])) {
+ else if (s + 1 < fend && has_drive_letter(s)) {
+ if (s + 2 < fend && isdirsep(s[2])) {
/* specified drive letter, and full path */
/* skip drive letter */
BUFCHECK(bdiff + 2 >= buflen);
@@ -4023,12 +4164,12 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
rb_enc_associate(result, enc = fs_enc_check(result, fname));
p = pend;
}
- p = chompdirsep(skiproot(buf, p, enc), p, enc);
+ p = chompdirsep(skiproot(buf, p), p, mb_enc, enc);
s += 2;
}
}
#endif /* DOSISH_DRIVE_LETTER */
- else if (!rb_is_absolute_path(s)) {
+ else if (s == fend || !rb_is_absolute_path(s)) {
if (!NIL_P(dname)) {
rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result);
rb_enc_associate(result, fs_enc_check(result, fname));
@@ -4036,24 +4177,24 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
p = pend;
}
else {
- char *e = append_fspath(result, fname, ruby_getcwd(), &enc, fsenc);
+ char *e = append_fspath(result, fname, rb_dir_getwd_ospath(), &enc, fsenc);
BUFINIT();
p = e;
}
-#if defined DOSISH || defined __CYGWIN__
- if (isdirsep(*s)) {
+#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC
+ if (s < fend && isdirsep(*s)) {
/* specified full path, but not drive letter nor UNC */
/* we need to get the drive letter or UNC share name */
- p = skipprefix(buf, p, enc);
+ p = skipprefix(buf, p, mb_enc, enc);
}
else
-#endif /* defined DOSISH || defined __CYGWIN__ */
- p = chompdirsep(skiproot(buf, p, enc), p, enc);
+#endif /* defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC */
+ p = chompdirsep(skiproot(buf, p), p, mb_enc, enc);
}
else {
size_t len;
b = s;
- do s++; while (isdirsep(*s));
+ do s++; while (s < fend && isdirsep(*s));
len = s - b;
p = buf + len;
BUFCHECK(bdiff >= buflen);
@@ -4072,23 +4213,24 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
rb_str_set_len(result, p-buf+1);
BUFCHECK(bdiff + 1 >= buflen);
p[1] = 0;
- root = skipprefix(buf, p+1, enc);
+ root = skipprefix(buf, p+1, mb_enc, enc);
b = s;
- while (*s) {
+ while (s < fend) {
switch (*s) {
case '.':
if (b == s++) { /* beginning of path element */
- switch (*s) {
- case '\0':
+ if (s == fend) {
b = s;
break;
+ }
+ switch (*s) {
case '.':
- if (*(s+1) == '\0' || isdirsep(*(s+1))) {
+ if (s+1 == fend || isdirsep(*(s+1))) {
/* We must go back to the parent */
char *n;
*p = '\0';
- if (!(n = strrdirsep(root, p, enc))) {
+ if (!(n = strrdirsep(root, p, mb_enc, enc))) {
*p = '/';
}
else {
@@ -4098,13 +4240,13 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
}
#if USE_NTFS
else {
- do ++s; while (istrailinggarbage(*s));
+ do ++s; while (s < fend && istrailinggarbage(*s));
}
#endif /* USE_NTFS */
break;
case '/':
-#if defined DOSISH || defined __CYGWIN__
- case '\\':
+#if defined FILE_ALT_SEPARATOR
+ case FILE_ALT_SEPARATOR:
#endif
b = ++s;
break;
@@ -4128,8 +4270,8 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
#endif /* USE_NTFS */
break;
case '/':
-#if defined DOSISH || defined __CYGWIN__
- case '\\':
+#if defined FILE_ALT_SEPARATOR
+ case FILE_ALT_SEPARATOR:
#endif
if (s > b) {
WITH_ROOTDIFF(BUFCOPY(b, s-b));
@@ -4151,7 +4293,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
}
}
#endif /* __APPLE__ */
- Inc(s, fend, enc);
+ Inc(s, fend, mb_enc, enc);
break;
}
}
@@ -4179,7 +4321,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
BUFCOPY(b, s-b);
rb_str_set_len(result, p-buf);
}
- if (p == skiproot(buf, p + !!*p, enc) - 1) p++;
+ if (p == skiproot(buf, p + !!*p) - 1) p++;
#if USE_NTFS
*p = '\0';
@@ -4337,31 +4479,43 @@ rb_file_s_expand_path(int argc, const VALUE *argv)
}
/*
+ * :markup: markdown
+ *
* call-seq:
- * File.expand_path(file_name [, dir_string] ) -> abs_file_name
+ * File.expand_path(path, dirpath = '.') -> absolute_path
*
- * Converts a pathname to an absolute pathname. Relative paths are
- * referenced from the current working directory of the process unless
- * +dir_string+ is given, in which case it will be used as the
- * starting point. The given pathname may start with a
- * ``<code>~</code>'', which expands to the process owner's home
- * directory (the environment variable +HOME+ must be set
- * correctly). ``<code>~</code><i>user</i>'' expands to the named
- * user's home directory.
+ * Returns the string absolute path for the given `path`.
*
- * File.expand_path("~oracle/bin") #=> "/home/oracle/bin"
+ * Evaluates a relative path with respect to the directory given by `dirpath`:
*
- * A simple example of using +dir_string+ is as follows.
- * File.expand_path("ruby", "/usr/bin") #=> "/usr/bin/ruby"
+ * ```ruby
+ * Dir.chdir('/snap')
+ * # Default dirpath.
+ * File.expand_path('README') # => "/snap/README"
+ * File.expand_path('bin') # => "/snap/bin"
+ * File.expand_path('bin/../var') # => "/snap/var" # Cleaned.
+ * # Other dirpath.
+ * File.expand_path('../zip', '/usr/bin/ruby') # => "/usr/bin/zip"
+ * Dir.chdir('/usr/bin')
+ * File.expand_path('../../snap', __FILE__) # => "/usr/snap"
+ * ```
+ *
+ * Evaluates an absolute path without respect to `dirpath`:
*
- * A more complex example which also resolves parent directory is as follows.
- * Suppose we are in bin/mygem and want the absolute path of lib/mygem.rb.
+ * ```ruby
+ * File.expand_path('/snap') # => "/snap"
+ * File.expand_path('/snap', 'nosuch') # => "/snap"
+ * File.expand_path('/snap/../snap') # => "/snap" # Cleaned.
+ * ```
*
- * File.expand_path("../../lib/mygem.rb", __FILE__)
- * #=> ".../path/to/project/lib/mygem.rb"
+ * More examples:
+ *
+ * ```
+ * Dir.chdir('/usr/bin')
+ * File.expand_path('../../snap', __FILE__) # => "/usr/snap"
+ * File.expand_path('../../snap') # => "/snap"
+ * ```
*
- * So first it resolves the parent of __FILE__, that is bin/, then go to the
- * parent, the root of the project and appends +lib/mygem.rb+.
*/
static VALUE
@@ -4449,9 +4603,10 @@ realpath_rec(long *prefixlenp, VALUE *resolvedp, const char *unresolved, VALUE f
}
else if (testnamelen == 2 && testname[0] == '.' && testname[1] == '.') {
if (*prefixlenp < RSTRING_LEN(*resolvedp)) {
+ bool mb_enc = enc_mbclen_needed(enc);
const char *resolved_str = RSTRING_PTR(*resolvedp);
const char *resolved_names = resolved_str + *prefixlenp;
- const char *lastsep = strrdirsep(resolved_names, resolved_str + RSTRING_LEN(*resolvedp), enc);
+ const char *lastsep = strrdirsep(resolved_names, resolved_str + RSTRING_LEN(*resolvedp), mb_enc, enc);
long len = lastsep ? lastsep - resolved_names : 0;
rb_str_resize(*resolvedp, *prefixlenp + len);
}
@@ -4591,7 +4746,8 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum
root_found:
RSTRING_GETMEM(resolved, prefixptr, prefixlen);
pend = prefixptr + prefixlen;
- ptr = chompdirsep(prefixptr, pend, enc);
+ bool mb_enc = enc_mbclen_needed(enc);
+ ptr = chompdirsep(prefixptr, pend, mb_enc, enc);
if (ptr < pend) {
prefixlen = ++ptr - prefixptr;
rb_str_set_len(resolved, prefixlen);
@@ -4601,7 +4757,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum
if (*prefixptr == FILE_ALT_SEPARATOR) {
*prefixptr = '/';
}
- Inc(prefixptr, pend, enc);
+ Inc(prefixptr, pend, mb_enc, enc);
}
#endif
@@ -4637,7 +4793,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum
return resolved;
}
-static VALUE rb_file_join(VALUE ary);
+static VALUE rb_file_join(long argc, VALUE *args);
#ifndef HAVE_REALPATH
static VALUE
@@ -4678,7 +4834,8 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, rb_encoding *origenc, enum
unresolved_path = rb_str_dup_frozen(path);
if (*RSTRING_PTR(unresolved_path) != '/' && !NIL_P(basedir)) {
- unresolved_path = rb_file_join(rb_assoc_new(basedir, unresolved_path));
+ VALUE paths[2] = {basedir, unresolved_path};
+ unresolved_path = rb_file_join(2, paths);
}
if (origenc) unresolved_path = TO_OSPATH(unresolved_path);
@@ -4844,23 +5001,29 @@ rmext(const char *p, long l0, long l1, const char *e, long l2, rb_encoding *enc)
return 0;
}
-const char *
-ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc)
+static inline const char *
+enc_find_basename(const char *name, long *baselen, long *alllen, bool mb_enc, rb_encoding *enc)
{
const char *p, *q, *e, *end;
-#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC
- const char *root;
-#endif
long f = 0, n = -1;
- end = name + (alllen ? (size_t)*alllen : strlen(name));
- name = skipprefix(name, end, enc);
+ long len = (alllen ? (size_t)*alllen : strlen(name));
+
+ if (len <= 0) {
+ return name;
+ }
+
+ end = name + len;
+ name = skipprefix(name, end, mb_enc, enc);
#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC
- root = name;
+ const char *root = name;
#endif
- while (isdirsep(*name))
+
+ while (name < end && isdirsep(*name)) {
name++;
- if (!*name) {
+ }
+
+ if (name == end) {
p = name - 1;
f = 1;
#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC
@@ -4881,91 +5044,127 @@ ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encodin
#endif /* defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC */
}
else {
- if (!(p = strrdirsep(name, end, enc))) {
+ p = strrdirsep(name, end, mb_enc, enc);
+ if (!p) {
p = name;
}
else {
- while (isdirsep(*p)) p++; /* skip last / */
+ while (isdirsep(*p)) {
+ p++; /* skip last / */
+ }
}
#if USE_NTFS
n = ntfs_tail(p, end, enc) - p;
#else
- n = chompdirsep(p, end, enc) - p;
+ n = chompdirsep(p, end, mb_enc, enc) - p;
#endif
for (q = p; q - p < n && *q == '.'; q++);
- for (e = 0; q - p < n; Inc(q, end, enc)) {
+ for (e = 0; q - p < n; Inc(q, end, mb_enc, enc)) {
if (*q == '.') e = q;
}
- if (e) f = e - p;
- else f = n;
+ if (e) {
+ f = e - p;
+ }
+ else {
+ f = n;
+ }
}
- if (baselen)
+ if (baselen) {
*baselen = f;
- if (alllen)
+ }
+ if (alllen) {
*alllen = n;
+ }
return p;
}
+const char *
+ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc)
+{
+ return enc_find_basename(name, baselen, alllen, enc_mbclen_needed(enc), enc);
+}
+
/*
* call-seq:
- * File.basename(file_name [, suffix] ) -> base_name
+ * File.basename(path, suffix = '') -> new_string
*
- * Returns the last component of the filename given in
- * <i>file_name</i> (after first stripping trailing separators),
- * which can be formed using both File::SEPARATOR and
- * File::ALT_SEPARATOR as the separator when File::ALT_SEPARATOR is
- * not <code>nil</code>. If <i>suffix</i> is given and present at the
- * end of <i>file_name</i>, it is removed. If <i>suffix</i> is ".*",
- * any extension will be removed.
+ * Returns a new string containing all or part of the last entry of the given +path+.
+ * Entries are delimited by the value of constant File::SEPARATOR
+ * and, if non-nil, the value of constant File::ALT_SEPARATOR.
+ *
+ * When +suffix+ is the empty string <tt>''</tt>,
+ * returns all of the last entry:
+ *
+ * File.basename('foo/bar/baz/bat.txt') # => "bat.txt"
+ * File.basename('foo/bar/baz') # => "baz"
+ *
+ * File::SEPARATOR # => "/"
+ * File.basename('foo/bar.txt////') # => "bar.txt"
+ * File::ALT_SEPARATOR # => "\\" # On Windows.
+ * File.basename('foo/bar.txt//\\\\//') # => "bar.txt"
+ *
+ * When +suffix+ is <tt>'.*'</tt>,
+ * the last {filename extension}[https://en.wikipedia.org/wiki/Filename_extension],
+ * if any, is removed:
+ *
+ * File.basename('foo/bar.txt', '.*') # => "bar"
+ * File.basename('foo/bar.txt.old', '.*') # => "bar.txt"
+ * File.basename('foo/bar', '.*') # => "bar"
+ *
+ * When +suffix+ is any string other than <tt>''</tt> or <tt>'.*'</tt>,
+ * the matching trailing substring, if any, is removed:
+ *
+ * File.basename('foo/bar.txt', '.txt') # => "bar"
+ * File.basename('foo/bar.txt', 'txt') # => "bar."
+ * File.basename('foo/bar.txt', '*') # => "bar.txt"
+ * File.basename('foo/bar.txt', '.') # => "bar.txt"
*
- * File.basename("/home/gumby/work/ruby.rb") #=> "ruby.rb"
- * File.basename("/home/gumby/work/ruby.rb", ".rb") #=> "ruby"
- * File.basename("/home/gumby/work/ruby.rb", ".*") #=> "ruby"
*/
static VALUE
rb_file_s_basename(int argc, VALUE *argv, VALUE _)
{
- VALUE fname, fext, basename;
- const char *name, *p;
- long f, n;
+ VALUE fname, fext = Qnil;
+ const char *name, *p, *fp = 0;
+ long f = 0, n;
rb_encoding *enc;
- fext = Qnil;
- if (rb_check_arity(argc, 1, 2) == 2) {
+ argc = rb_check_arity(argc, 1, 2);
+ fname = argv[0];
+ CheckPath(fname, name);
+ if (argc == 2) {
fext = argv[1];
- StringValue(fext);
- enc = check_path_encoding(fext);
+ fp = StringValueCStr(fext);
+ check_path_encoding(fext);
}
- fname = argv[0];
- FilePathStringValue(fname);
if (NIL_P(fext) || !(enc = rb_enc_compatible(fname, fext))) {
- enc = rb_enc_get(fname);
- fext = Qnil;
+ enc = rb_str_enc_get(fname);
}
- if ((n = RSTRING_LEN(fname)) == 0 || !*(name = RSTRING_PTR(fname)))
- return rb_str_new_shared(fname);
- p = ruby_enc_find_basename(name, &f, &n, enc);
+ n = RSTRING_LEN(fname);
+ if (n <= 0 || !*name) {
+ return rb_enc_str_new(0, 0, enc);
+ }
+
+ bool mb_enc = enc_mbclen_needed(enc);
+ p = enc_find_basename(name, &f, &n, mb_enc, enc);
if (n >= 0) {
- if (NIL_P(fext)) {
+ if (!fp) {
f = n;
}
else {
- const char *fp;
- fp = StringValueCStr(fext);
if (!(f = rmext(p, f, n, fp, RSTRING_LEN(fext), enc))) {
f = n;
}
RB_GC_GUARD(fext);
}
- if (f == RSTRING_LEN(fname)) return rb_str_new_shared(fname);
+ if (f == RSTRING_LEN(fname)) {
+ return rb_str_new_shared(fname);
+ }
}
- basename = rb_str_new(p, f);
- rb_enc_copy(basename, fname);
- return basename;
+ return rb_enc_str_new(p, f, enc);
}
static VALUE rb_file_dirname_n(VALUE fname, int n);
@@ -5010,19 +5209,18 @@ rb_file_dirname_n(VALUE fname, int n)
{
const char *name, *root, *p, *end;
VALUE dirname;
- rb_encoding *enc;
- VALUE sepsv = 0;
- const char **seps;
if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n);
- FilePathStringValue(fname);
- name = StringValueCStr(fname);
+ CheckPath(fname, name);
end = name + RSTRING_LEN(fname);
- enc = rb_enc_get(fname);
- root = skiproot(name, end, enc);
+
+ bool mb_enc = !rb_str_enc_fastpath(fname);
+ rb_encoding *enc = rb_str_enc_get(fname);
+
+ root = skiproot(name, end);
#ifdef DOSISH_UNC
if (root > name + 1 && isdirsep(*name))
- root = skipprefix(name = root - 2, end, enc);
+ root = skipprefix(name = root - 2, end, mb_enc, enc);
#else
if (root > name + 1)
name = root - 1;
@@ -5031,72 +5229,41 @@ rb_file_dirname_n(VALUE fname, int n)
p = root;
}
else {
- int i;
- switch (n) {
- case 0:
- p = end;
- break;
- case 1:
- if (!(p = strrdirsep(root, end, enc))) p = root;
- break;
- default:
- seps = ALLOCV_N(const char *, sepsv, n);
- for (i = 0; i < n; ++i) seps[i] = root;
- i = 0;
- for (p = root; p < end; ) {
- if (isdirsep(*p)) {
- const char *tmp = p++;
- while (p < end && isdirsep(*p)) p++;
- if (p >= end) break;
- seps[i++] = tmp;
- if (i == n) i = 0;
- }
- else {
- Inc(p, end, enc);
- }
+ p = end;
+ while (n) {
+ if (!(p = strrdirsep(root, p, mb_enc, enc))) {
+ p = root;
+ break;
}
- p = seps[i];
- ALLOCV_END(sepsv);
- break;
+ n--;
}
}
- if (p == name)
- return rb_usascii_str_new2(".");
+
+ if (p == name) {
+ return rb_enc_str_new(".", 1, enc);
+ }
#ifdef DOSISH_DRIVE_LETTER
- if (has_drive_letter(name) && isdirsep(*(name + 2))) {
- const char *top = skiproot(name + 2, end, enc);
- dirname = rb_str_new(name, 3);
+ if (name + 3 < end && has_drive_letter(name) && isdirsep(*(name + 2))) {
+ const char *top = skiproot(name + 2, end);
+ dirname = rb_enc_str_new(name, 3, enc);
rb_str_cat(dirname, top, p - top);
}
else
#endif
- dirname = rb_str_new(name, p - name);
+ dirname = rb_enc_str_new(name, p - name, enc);
#ifdef DOSISH_DRIVE_LETTER
- if (has_drive_letter(name) && root == name + 2 && p - name == 2)
+ if (root == name + 2 && p == root && name[1] == ':')
rb_str_cat(dirname, ".", 1);
#endif
- rb_enc_copy(dirname, fname);
return dirname;
}
-/*
- * accept a String, and return the pointer of the extension.
- * if len is passed, set the length of extension to it.
- * returned pointer is in ``name'' or NULL.
- * returns *len
- * no dot NULL 0
- * dotfile top 0
- * end with dot dot 1
- * .ext dot len of .ext
- * .ext:stream dot len of .ext without :stream (NTFS only)
- *
- */
-const char *
-ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
+static inline const char *
+enc_find_extname(const char *name, long *len, bool mb_enc, rb_encoding *enc)
{
const char *p, *e, *end = name + (len ? *len : (long)strlen(name));
- p = strrdirsep(name, end, enc); /* get the last path component */
+ p = strrdirsep(name, end, mb_enc, enc); /* get the last path component */
if (!p)
p = name;
else
@@ -5129,7 +5296,7 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
#endif
else if (isdirsep(*p))
break;
- Inc(p, end, enc);
+ Inc(p, end, mb_enc, enc);
}
if (len) {
@@ -5145,57 +5312,106 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
}
/*
+ * accept a String, and return the pointer of the extension.
+ * if len is passed, set the length of extension to it.
+ * returned pointer is in ``name'' or NULL.
+ * returns *len
+ * no dot NULL 0
+ * dotfile top 0
+ * end with dot dot 1
+ * .ext dot len of .ext
+ * .ext:stream dot len of .ext without :stream (NTFS only)
+ *
+ */
+const char *
+ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
+{
+ return enc_find_extname(name, len, enc_mbclen_needed(enc), enc);
+}
+
+/*
+ * :markup: markdown
+ *
* call-seq:
- * File.extname(path) -> string
+ * File.extname(path) -> extension
+ *
+ * Returns the filename extension --
+ * usually the portion of the string `path`
+ * beginning from the last period:
*
- * Returns the extension (the portion of file name in +path+
- * starting from the last period).
+ * ```ruby
+ * File.extname('t.rb') # => ".rb"
+ * File.extname('foo.bar.t.rb') # => ".rb"
+ * File.extname('foo/bar/t.rb') # => ".rb"
+ * File.extname('nosuch.txt') # => ".txt" # Path need not exist.
+ * ```
*
- * If +path+ is a dotfile, or starts with a period, then the starting
- * dot is not dealt with the start of the extension.
+ * Returns the entire string when there is no period:
*
- * An empty string will also be returned when the period is the last character
- * in +path+.
+ * ```ruby
+ * Pathname('foo').extname # => ""
+ * ```
*
- * On Windows, trailing dots are truncated.
+ * Returns an empty string when the only period is the first character:
*
- * File.extname("test.rb") #=> ".rb"
- * File.extname("a/b/d/test.rb") #=> ".rb"
- * File.extname(".a/b/d/test.rb") #=> ".rb"
- * File.extname("foo.") #=> "" on Windows
- * File.extname("foo.") #=> "." on non-Windows
- * File.extname("test") #=> ""
- * File.extname(".profile") #=> ""
- * File.extname(".profile.sh") #=> ".sh"
+ * ```ruby
+ * File.extname('.irbrc') # => ""
+ * ```
+ *
+ * Returns an empty string or `'.'` when `path` ends with a period:
+ *
+ * ```
+ * File.extname('foo.') # => "" # On Windows.
+ * File.extname('foo.') # => "." # Elsewhere.
+ * File.extname('foo....') # => "" # On Windows.
+ * File.extname('foo....') # => "." # Elsewhere.
+ * ```
*
*/
static VALUE
rb_file_s_extname(VALUE klass, VALUE fname)
{
- const char *name, *e;
- long len;
- VALUE extname;
+ const char *name;
+ CheckPath(fname, name);
+ long len = RSTRING_LEN(fname);
- FilePathStringValue(fname);
- name = StringValueCStr(fname);
- len = RSTRING_LEN(fname);
- e = ruby_enc_find_extname(name, &len, rb_enc_get(fname));
- if (len < 1)
- return rb_str_new(0, 0);
- extname = rb_str_subseq(fname, e - name, len); /* keep the dot, too! */
- return extname;
+ if (len < 1) {
+ return rb_enc_str_new(0, 0, rb_str_enc_get(fname));
+ }
+
+ bool mb_enc = !rb_str_enc_fastpath(fname);
+ rb_encoding *enc = rb_str_enc_get(fname);
+
+ const char *ext = enc_find_extname(name, &len, mb_enc, enc);
+ return rb_enc_str_new(ext, len, enc);
}
/*
- * call-seq:
+ * call-seq:
* File.path(path) -> string
*
- * Returns the string representation of the path
+ * Returns the string representation of the path
*
* File.path(File::NULL) #=> "/dev/null"
* File.path(Pathname.new("/tmp")) #=> "/tmp"
*
+ * If +path+ is not a String:
+ *
+ * 1. If it has the +to_path+ method, that method will be called to
+ * coerce to a String.
+ *
+ * 2. Otherwise, or if the coerced result is not a String too, the
+ * standard coercion using +to_str+ method will take place on that
+ * object. (See also String.try_convert)
+ *
+ * The coerced string must satisfy the following conditions:
+ *
+ * 1. It must be in an ASCII-compatible encoding; otherwise, an
+ * Encoding::CompatibilityError is raised.
+ *
+ * 2. It must not contain the NUL character (<tt>\0</tt>); otherwise,
+ * an ArgumentError is raised.
*/
static VALUE
@@ -5222,15 +5438,17 @@ rb_file_s_split(VALUE klass, VALUE path)
return rb_assoc_new(rb_file_dirname(path), rb_file_s_basename(1,&path,Qundef));
}
+static VALUE rb_file_join_ary(VALUE ary);
+
static VALUE
file_inspect_join(VALUE ary, VALUE arg, int recur)
{
if (recur || ary == arg) rb_raise(rb_eArgError, "recursive array");
- return rb_file_join(arg);
+ return rb_file_join_ary(arg);
}
static VALUE
-rb_file_join(VALUE ary)
+rb_file_join_ary(VALUE ary)
{
long len, i;
VALUE result, tmp;
@@ -5278,11 +5496,11 @@ rb_file_join(VALUE ary)
rb_enc_copy(result, tmp);
}
else {
- tail = chompdirsep(name, name + len, rb_enc_get(result));
- if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) {
+ tail = chompdirsep(name, name + len, true, rb_enc_get(result));
+ if (RSTRING_LEN(tmp) > 0 && isdirsep(RSTRING_PTR(tmp)[0])) {
rb_str_set_len(result, tail - name);
}
- else if (!*tail) {
+ else if (tail == name + len) {
rb_str_cat(result, "/", 1);
}
}
@@ -5295,6 +5513,77 @@ rb_file_join(VALUE ary)
return result;
}
+static inline VALUE
+rb_file_join_fastpath(long argc, VALUE *args)
+{
+ long size = argc;
+
+ long i;
+ for (i = 0; i < argc; i++) {
+ VALUE tmp = args[i];
+ if (RB_LIKELY(RB_TYPE_P(tmp, T_STRING) && rb_str_enc_fastpath(tmp))) {
+ size += RSTRING_LEN(tmp);
+ }
+ else {
+ return 0;
+ }
+ }
+
+ VALUE result = rb_str_buf_new(size);
+
+ int encidx = ENCODING_GET_INLINED(args[0]);
+ ENCODING_SET_INLINED(result, encidx);
+ rb_str_buf_append(result, args[0]);
+
+ const char *name = RSTRING_PTR(result);
+ for (i = 1; i < argc; i++) {
+ VALUE tmp = args[i];
+ long len = RSTRING_LEN(result);
+
+ const char *tmp_s;
+ long tmp_len;
+ RSTRING_GETMEM(tmp, tmp_s, tmp_len);
+
+ if (tmp_len > 0 && isdirsep(tmp_s[0])) {
+ // right side has a leading separator, remove left side separators.
+ long chomp = len;
+ while (chomp > 0 && isdirsep(name[chomp - 1])) {
+ --chomp;
+ }
+ rb_str_set_len(result, chomp);
+ }
+ else if (len < 1 || !isdirsep(name[len - 1])) {
+ // neither side have a separator, append one;
+ rb_str_cat(result, "/", 1);
+ }
+
+ if (RB_UNLIKELY(ENCODING_GET_INLINED(tmp) != encidx)) {
+ rb_encoding *new_enc = fs_enc_check(result, tmp);
+ rb_enc_associate(result, new_enc);
+ encidx = rb_enc_to_index(new_enc);
+ }
+
+ rb_str_buf_cat(result, tmp_s, tmp_len);
+ }
+
+ rb_str_null_check(result);
+ return result;
+}
+
+static inline VALUE
+rb_file_join(long argc, VALUE *args)
+{
+ if (RB_UNLIKELY(argc == 0)) {
+ return rb_str_new(0, 0);
+ }
+
+ VALUE result = rb_file_join_fastpath(argc, args);
+ if (RB_LIKELY(result)) {
+ return result;
+ }
+
+ return rb_file_join_ary(rb_ary_new_from_values(argc, args));
+}
/*
* call-seq:
* File.join(string, ...) -> string
@@ -5307,9 +5596,9 @@ rb_file_join(VALUE ary)
*/
static VALUE
-rb_file_s_join(VALUE klass, VALUE args)
+rb_file_s_join(int argc, VALUE *argv, VALUE klass)
{
- return rb_file_join(args);
+ return rb_file_join(argc, argv);
}
#if defined(HAVE_TRUNCATE)
@@ -5449,7 +5738,7 @@ rb_thread_flock(void *data)
* call-seq:
* flock(locking_constant) -> 0 or false
*
- * Locks or unlocks file +self+ according to the given `locking_constant`,
+ * Locks or unlocks file `self` according to the given `locking_constant`,
* a bitwise OR of the values in the table below.
*
* Not available on all platforms.
@@ -5459,10 +5748,10 @@ rb_thread_flock(void *data)
*
* | Constant | Lock | Effect
* |-----------------|--------------|-----------------------------------------------------------------------------------------------------------------|
- * | +File::LOCK_EX+ | Exclusive | Only one process may hold an exclusive lock for +self+ at a time. |
- * | +File::LOCK_NB+ | Non-blocking | No blocking; may be combined with +File::LOCK_SH+ or +File::LOCK_EX+ using the bitwise OR operator <tt>\|</tt>. |
- * | +File::LOCK_SH+ | Shared | Multiple processes may each hold a shared lock for +self+ at the same time. |
- * | +File::LOCK_UN+ | Unlock | Remove an existing lock held by this process. |
+ * | `File::LOCK_EX` | Exclusive | Only one process may hold an exclusive lock for `self` at a time. |
+ * | `File::LOCK_NB` | Non-blocking | No blocking; may be combined with `File::LOCK_SH` or `File::LOCK_EX` using the bitwise OR operator <tt>\|</tt>. |
+ * | `File::LOCK_SH` | Shared | Multiple processes may each hold a shared lock for `self` at the same time. |
+ * | `File::LOCK_UN` | Unlock | Remove an existing lock held by this process. |
*
* Example:
*
@@ -5589,11 +5878,11 @@ test_check(int n, int argc, VALUE *argv)
* | <tt>'z'</tt> | Whether the entity exists and is of length zero. |
*
* - This test operates only on the entity at `path0`,
- * and returns an integer size or +nil+:
+ * and returns an integer size or `nil`:
*
* | Character | Test |
* |:------------:|:---------------------------------------------------------------------------------------------|
- * | <tt>'s'</tt> | Returns positive integer size if the entity exists and has non-zero length, +nil+ otherwise. |
+ * | <tt>'s'</tt> | Returns positive integer size if the entity exists and has non-zero length, `nil` otherwise. |
*
* - Each of these tests operates only on the entity at `path0`,
* and returns a Time object;
@@ -5775,13 +6064,48 @@ rb_f_test(int argc, VALUE *argv, VALUE _)
/*
* Document-class: File::Stat
*
- * Objects of class File::Stat encapsulate common status information
- * for File objects. The information is recorded at the moment the
- * File::Stat object is created; changes made to the file after that
- * point will not be reflected. File::Stat objects are returned by
- * IO#stat, File::stat, File#lstat, and File::lstat. Many of these
- * methods return platform-specific values, and not all values are
- * meaningful on all systems. See also Kernel#test.
+ * A \File::Stat object contains information about an entry in the file system.
+ *
+ * Each of these methods returns a new \File::Stat object:
+ *
+ * - File#lstat.
+ * - File::Stat.new.
+ * - File::lstat.
+ * - File::stat.
+ * - IO#stat.
+ *
+ * === Snapshot
+ *
+ * A new \File::Stat object takes an immediate "snapshot" of the entry's information;
+ * the captured information is never updated,
+ * regardless of changes in the actual entry:
+ *
+ * The entry must exist when File::Stat.new is called:
+ *
+ * filepath = 't.tmp'
+ * File.exist?(filepath) # => false
+ * File::Stat.new(filepath) # Raises Errno::ENOENT: No such file or directory.
+ * File.write(filepath, 'foo') # Create the file.
+ * stat = File::Stat.new(filepath) # Okay.
+ *
+ * Later changes to the actual entry do not change the \File::Stat object:
+ *
+ * File.atime(filepath) # => 2026-04-01 11:51:38.0014518 -0500
+ * stat.atime # => 2026-04-01 11:51:38.0014518 -0500
+ * File.write(filepath, 'bar')
+ * File.atime(filepath) # => 2026-04-01 11:58:11.922614 -0500
+ * stat.atime # => 2026-04-01 11:51:38.0014518 -0500
+ * File.delete(filepath)
+ * stat.atime # => 2026-04-01 11:51:38.0014518 -0500
+ *
+ * === OS-Dependencies
+ *
+ * Methods in a \File::Stat object may return platform-dependents values,
+ * and not all values are meaningful on all systems;
+ * for example, File::Stat#blocks returns +nil+ on Windows,
+ * but returns an integer on Linux.
+ *
+ * See also Kernel#test.
*/
static VALUE
@@ -6475,96 +6799,6 @@ rb_is_absolute_path(const char *path)
return 0;
}
-#ifndef ENABLE_PATH_CHECK
-# if defined DOSISH || defined __CYGWIN__
-# define ENABLE_PATH_CHECK 0
-# else
-# define ENABLE_PATH_CHECK 1
-# endif
-#endif
-
-#if ENABLE_PATH_CHECK
-static int
-path_check_0(VALUE path)
-{
- struct stat st;
- const char *p0 = StringValueCStr(path);
- const char *e0;
- rb_encoding *enc;
- char *p = 0, *s;
-
- if (!rb_is_absolute_path(p0)) {
- char *buf = ruby_getcwd();
- VALUE newpath;
-
- newpath = rb_str_new2(buf);
- xfree(buf);
-
- rb_str_cat2(newpath, "/");
- rb_str_cat2(newpath, p0);
- path = newpath;
- p0 = RSTRING_PTR(path);
- }
- e0 = p0 + RSTRING_LEN(path);
- enc = rb_enc_get(path);
- for (;;) {
-#ifndef S_IWOTH
-# define S_IWOTH 002
-#endif
- if (STAT(p0, &st) == 0 && S_ISDIR(st.st_mode) && (st.st_mode & S_IWOTH)
-#ifdef S_ISVTX
- && !(p && (st.st_mode & S_ISVTX))
-#endif
- && !access(p0, W_OK)) {
- rb_enc_warn(enc, "Insecure world writable dir %s in PATH, mode 0%"
-#if SIZEOF_DEV_T > SIZEOF_INT
- PRI_MODET_PREFIX"o",
-#else
- "o",
-#endif
- p0, st.st_mode);
- if (p) *p = '/';
- RB_GC_GUARD(path);
- return 0;
- }
- s = strrdirsep(p0, e0, enc);
- if (p) *p = '/';
- if (!s || s == p0) return 1;
- p = s;
- e0 = p;
- *p = '\0';
- }
-}
-#endif
-
-int
-rb_path_check(const char *path)
-{
- rb_warn_deprecated_to_remove_at(3.6, "rb_path_check", NULL);
-#if ENABLE_PATH_CHECK
- const char *p0, *p, *pend;
- const char sep = PATH_SEP_CHAR;
-
- if (!path) return 1;
-
- pend = path + strlen(path);
- p0 = path;
- p = strchr(path, sep);
- if (!p) p = pend;
-
- for (;;) {
- if (!path_check_0(rb_str_new(p0, p - p0))) {
- return 0; /* not safe */
- }
- p0 = p + 1;
- if (p0 > pend) break;
- p = strchr(p0, sep);
- if (!p) p = pend;
- }
-#endif
- return 1;
-}
-
int
ruby_is_fd_loadable(int fd)
{
@@ -6785,7 +7019,7 @@ const char ruby_null_device[] =
* Methods File.new and File.open each may take string argument +mode+, which:
*
* - Begins with a 1- or 2-character
- * {read/write mode}[rdoc-ref:File@Read-2FWrite+Mode].
+ * {read/write mode}[rdoc-ref:File@ReadWrite+Mode].
* - May also contain a 1-character {data mode}[rdoc-ref:File@Data+Mode].
* - May also contain a 1-character
* {file-create mode}[rdoc-ref:File@File-Create+Mode].
@@ -7426,7 +7660,7 @@ const char ruby_null_device[] =
*
* First, what's elsewhere. Class \File:
*
- * - Inherits from {class IO}[rdoc-ref:IO@What-27s+Here],
+ * - Inherits from {class IO}[rdoc-ref:IO@Whats+Here],
* in particular, methods for creating, reading, and writing files
* - Includes module FileTest,
* which provides dozens of additional methods.
@@ -7641,7 +7875,7 @@ Init_File(void)
/* separates directory parts in path */
rb_define_const(rb_cFile, "SEPARATOR", separator);
rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1);
- rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2);
+ rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -1);
#ifdef DOSISH
/* platform specific alternative separator */
@@ -7945,11 +8179,11 @@ Init_File(void)
*
* ==== File::FNM_EXTGLOB
*
- * Flag File::FNM_EXTGLOB enables pattern <tt>'{_a_,_b_}'</tt>,
+ * Flag File::FNM_EXTGLOB enables pattern <tt>'{a,b}'</tt>,
* which matches pattern '_a_' and pattern '_b_';
* behaves like
* a {regexp union}[rdoc-ref:Regexp.union]
- * (e.g., <tt>'(?:_a_|_b_)'</tt>):
+ * (e.g., <tt>'(?:a|b)'</tt>):
*
* pattern = '{LEGAL,BSDL}'
* Dir.glob(pattern) # => ["LEGAL", "BSDL"]