summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2025-12-20 13:48:10 +0100
committerJean Boussier <jean.boussier@gmail.com>2026-02-12 15:52:42 +0100
commit3b5ee7488c8064cd3b1afc41bc43afdb907fdf16 (patch)
treef89f2b23ef11412d6afb16b20b4057fd17c8c731
parente26bef571c3c916826fdd2f468dea7ca41369f8e (diff)
Dir.scan: return or yield children along with their type
[Feature #21800] There are numerous ruby tools that need to recursively scan the project directory, such as Zeitwerk, rubocop, etc. All of them end up listing childs of a directory then for each child emit a `stat` call to check if it's a directory or not. This is common enough for a pattern that on most operating systems, `struct dirent` include a `dtype` member that allows to check the file type without issuing a any extra system calls. By yielding that type, we can make these routines twice as fast. ``` $ hyperfine './miniruby --disable-all --yjit ../test.rb' 'OPT=1 ./miniruby --disable-all --yjit ../test.rb' Benchmark 1: ./miniruby --disable-all --yjit ../test.rb Time (mean ± σ): 1.428 s ± 0.062 s [User: 0.342 s, System: 1.070 s] Range (min … max): 1.396 s … 1.601 s 10 runs Benchmark 2: OPT=1 ./miniruby --disable-all --yjit ../test.rb Time (mean ± σ): 673.8 ms ± 5.8 ms [User: 146.0 ms, System: 527.3 ms] Range (min … max): 659.7 ms … 679.6 ms 10 runs Summary OPT=1 ./miniruby --disable-all --yjit ../test.rb ran 2.12 ± 0.09 times faster than ./miniruby --disable-all --yjit ../test.rb ``` ```ruby if ENV['OPT'] def count_ruby_files count = 0 queue = [File.expand_path(__dir__)] while dir = queue.pop Dir.scan(dir) do |name, type| next if name.start_with?(".") case type when :directory queue << File.join(dir, name) when :file count += 1 if name.end_with?(".rb") end end end count end else def count_ruby_files count = 0 queue = [File.expand_path(__dir__)] while dir = queue.pop Dir.each_child(dir) do |name| next if name.start_with?(".") abspath = File.join(dir, name) if File.directory?(abspath) queue << abspath else count += 1 if name.end_with?(".rb") end end end count end end 10.times do count_ruby_files end ```
-rw-r--r--dir.c158
-rw-r--r--spec/ruby/core/dir/fixtures/common.rb39
-rw-r--r--spec/ruby/core/dir/scan_spec.rb224
3 files changed, 398 insertions, 23 deletions
diff --git a/dir.c b/dir.c
index 25ed59c668..fc2e6b7715 100644
--- a/dir.c
+++ b/dir.c
@@ -504,6 +504,20 @@ fnmatch(
}
VALUE rb_cDir;
+static VALUE sym_directory, sym_link, sym_file, sym_unknown;
+
+#ifdef DT_BLK
+static VALUE sym_block_device;
+#endif
+#ifdef DT_CHR
+static VALUE sym_character_device;
+#endif
+#ifdef DT_FIFO
+static VALUE sym_fifo;
+#endif
+#ifdef DT_SOCK
+static VALUE sym_socket;
+#endif
struct dir_data {
DIR *dir;
@@ -905,14 +919,61 @@ dir_read(VALUE dir)
}
}
-static VALUE dir_each_entry(VALUE, VALUE (*)(VALUE, VALUE), VALUE, int);
+static VALUE dir_each_entry(VALUE, VALUE (*)(VALUE, VALUE, unsigned char), VALUE, int);
static VALUE
-dir_yield(VALUE arg, VALUE path)
+dir_yield(VALUE arg, VALUE path, unsigned char dtype)
{
return rb_yield(path);
}
+static VALUE
+dir_yield_with_type(VALUE arg, VALUE path, unsigned char dtype)
+{
+ VALUE type;
+ switch (dtype) {
+#ifdef DT_BLK
+ case DT_BLK:
+ type = sym_block_device;
+ break;
+#endif
+#ifdef DT_CHR
+ case DT_CHR:
+ type = sym_character_device;
+ break;
+#endif
+ case DT_DIR:
+ type = sym_directory;
+ break;
+#ifdef DT_FIFO
+ case DT_FIFO:
+ type = sym_fifo;
+ break;
+#endif
+ case DT_LNK:
+ type = sym_link;
+ break;
+ case DT_REG:
+ type = sym_file;
+ break;
+#ifdef DT_SOCK
+ case DT_SOCK:
+ type = sym_socket;
+ break;
+#endif
+ default:
+ type = sym_unknown;
+ break;
+ }
+
+ if (NIL_P(arg)) {
+ return rb_yield_values(2, path, type);
+ }
+ else {
+ return rb_ary_push(arg, rb_assoc_new(path, type));
+ }
+}
+
/*
* call-seq:
* each {|entry_name| ... } -> self
@@ -940,7 +1001,7 @@ dir_each(VALUE dir)
}
static VALUE
-dir_each_entry(VALUE dir, VALUE (*each)(VALUE, VALUE), VALUE arg, int children_only)
+dir_each_entry(VALUE dir, VALUE (*each)(VALUE, VALUE, unsigned char), VALUE arg, int children_only)
{
struct dir_data *dirp;
struct dirent *dp;
@@ -966,7 +1027,7 @@ dir_each_entry(VALUE dir, VALUE (*each)(VALUE, VALUE), VALUE arg, int children_o
else
#endif
path = rb_external_str_new_with_enc(name, namlen, dirp->enc);
- (*each)(arg, path);
+ (*each)(arg, path, dp->d_type);
}
return dir;
}
@@ -3471,10 +3532,16 @@ dir_foreach(int argc, VALUE *argv, VALUE io)
}
static VALUE
+dir_entry_ary_push(VALUE ary, VALUE entry, unsigned char ftype)
+{
+ return rb_ary_push(ary, entry);
+}
+
+static VALUE
dir_collect(VALUE dir)
{
VALUE ary = rb_ary_new();
- dir_each_entry(dir, rb_ary_push, ary, FALSE);
+ dir_each_entry(dir, dir_entry_ary_push, ary, FALSE);
return ary;
}
@@ -3569,12 +3636,37 @@ static VALUE
dir_collect_children(VALUE dir)
{
VALUE ary = rb_ary_new();
- dir_each_entry(dir, rb_ary_push, ary, TRUE);
+ dir_each_entry(dir, dir_entry_ary_push, ary, TRUE);
return ary;
}
/*
* call-seq:
+ * children -> array
+ *
+ * Returns an array of the entry names in +self+ along with their type
+ * except for <tt>'.'</tt> and <tt>'..'</tt>:
+ *
+ * dir = Dir.new('/example')
+ * dir.scan # => [["config.h", :file], ["lib", :directory], ["main.rb", :file]]
+ *
+ */
+static VALUE
+dir_scan_children(VALUE dir)
+{
+ if (rb_block_given_p()) {
+ dir_each_entry(dir, dir_yield_with_type, Qnil, TRUE);
+ return Qnil;
+ }
+ else {
+ VALUE ary = rb_ary_new();
+ dir_each_entry(dir, dir_yield_with_type, ary, TRUE);
+ return ary;
+ }
+}
+
+/*
+ * call-seq:
* Dir.children(dirpath) -> array
* Dir.children(dirpath, encoding: 'UTF-8') -> array
*
@@ -3601,6 +3693,40 @@ dir_s_children(int argc, VALUE *argv, VALUE io)
return rb_ensure(dir_collect_children, dir, dir_close, dir);
}
+/*
+ * call-seq:
+ * Dir.scan(dirpath) {|entry_name, entry_type| ... } -> nil
+ * Dir.scan(dirpath, encoding: 'UTF-8') {|entry_name, entry_type| ... } -> nil
+ * Dir.scan(dirpath) -> [[entry_name, entry_type], ...]
+ * Dir.scan(dirpath, encoding: 'UTF-8') -> [[entry_name, entry_type], ...]
+ *
+ * Yields or returns an array of the entry names in the directory at +dirpath+
+ * associated with their type, except for <tt>'.'</tt> and <tt>'..'</tt>;
+ * sets the given encoding onto each returned entry name.
+ *
+ * The type symbol is one of:
+ * ``<code>:file</code>'', ``<code>:directory</code>'',
+ * ``<code>:characterSpecial</code>'', ``<code>:blockSpecial</code>'',
+ * ``<code>:fifo</code>'', ``<code>:link</code>'',
+ * or ``<code>:socket</code>'':
+ *
+ * Dir.children('/example') # => [["config.h", :file], ["lib", :directory], ["main.rb", :file]]
+ * Dir.children('/example').first.first.encoding
+ * # => #<Encoding:UTF-8>
+ * Dir.children('/example', encoding: 'US-ASCII').first.encoding
+ * # => #<Encoding:US-ASCII>
+ *
+ * See {String Encoding}[rdoc-ref:encodings.rdoc@String+Encoding].
+ *
+ * Raises an exception if the directory does not exist.
+ */
+static VALUE
+dir_s_scan(int argc, VALUE *argv, VALUE klass)
+{
+ VALUE dir = dir_open_dir(argc, argv);
+ return rb_ensure(dir_scan_children, dir, dir_close, dir);
+}
+
static int
fnmatch_brace(const char *pattern, VALUE val, void *enc)
{
@@ -3804,6 +3930,24 @@ rb_dir_s_empty_p(VALUE obj, VALUE dirname)
void
Init_Dir(void)
{
+ sym_directory = ID2SYM(rb_intern("directory"));
+ sym_link = ID2SYM(rb_intern("link"));
+ sym_file = ID2SYM(rb_intern("file"));
+ sym_unknown = ID2SYM(rb_intern("unknown"));
+
+#ifdef DT_BLK
+ sym_block_device = ID2SYM(rb_intern("blockSpecial"));
+#endif
+#ifdef DT_CHR
+ sym_character_device = ID2SYM(rb_intern("characterSpecial"));
+#endif
+#ifdef DT_FIFO
+ sym_fifo = ID2SYM(rb_intern("fifo"));
+#endif
+#ifdef DT_SOCK
+ sym_socket = ID2SYM(rb_intern("socket"));
+#endif
+
rb_gc_register_address(&chdir_lock.path);
rb_gc_register_address(&chdir_lock.thread);
@@ -3817,6 +3961,7 @@ Init_Dir(void)
rb_define_singleton_method(rb_cDir, "entries", dir_entries, -1);
rb_define_singleton_method(rb_cDir, "each_child", dir_s_each_child, -1);
rb_define_singleton_method(rb_cDir, "children", dir_s_children, -1);
+ rb_define_singleton_method(rb_cDir, "scan", dir_s_scan, -1);
rb_define_method(rb_cDir,"fileno", dir_fileno, 0);
rb_define_method(rb_cDir,"path", dir_path, 0);
@@ -3826,6 +3971,7 @@ Init_Dir(void)
rb_define_method(rb_cDir,"each", dir_each, 0);
rb_define_method(rb_cDir,"each_child", dir_each_child_m, 0);
rb_define_method(rb_cDir,"children", dir_collect_children, 0);
+ rb_define_method(rb_cDir,"scan", dir_scan_children, 0);
rb_define_method(rb_cDir,"rewind", dir_rewind, 0);
rb_define_method(rb_cDir,"tell", dir_tell, 0);
rb_define_method(rb_cDir,"seek", dir_seek, 1);
diff --git a/spec/ruby/core/dir/fixtures/common.rb b/spec/ruby/core/dir/fixtures/common.rb
index 848656c9b9..cfec91f68f 100644
--- a/spec/ruby/core/dir/fixtures/common.rb
+++ b/spec/ruby/core/dir/fixtures/common.rb
@@ -115,6 +115,7 @@ module DirSpecs
end
def self.create_mock_dirs
+ delete_mock_dirs
mock_dir_files.each do |name|
file = File.join mock_dir, name
mkdir_p File.dirname(file)
@@ -172,24 +173,28 @@ module DirSpecs
end
end
+ def self.expected_paths_with_type
+ [
+ [".", :directory],
+ ["..", :directory],
+ [".dotfile", :file],
+ [".dotsubdir", :directory],
+ ["brace", :directory],
+ ["deeply", :directory],
+ ["dir", :directory],
+ ["dir_filename_ordering", :file],
+ ["file_one.ext", :file],
+ ["file_two.ext", :file],
+ ["nested", :directory],
+ ["nondotfile", :file],
+ ["special", :directory],
+ ["subdir_one", :directory],
+ ["subdir_two", :directory],
+ ]
+ end
+
def self.expected_paths
- %w[
- .
- ..
- .dotfile
- .dotsubdir
- brace
- deeply
- dir
- dir_filename_ordering
- file_one.ext
- file_two.ext
- nested
- nondotfile
- special
- subdir_one
- subdir_two
- ]
+ expected_paths_with_type.map(&:first)
end
def self.expected_glob_paths
diff --git a/spec/ruby/core/dir/scan_spec.rb b/spec/ruby/core/dir/scan_spec.rb
new file mode 100644
index 0000000000..a34eedf13b
--- /dev/null
+++ b/spec/ruby/core/dir/scan_spec.rb
@@ -0,0 +1,224 @@
+# encoding: utf-8
+
+require_relative '../../spec_helper'
+require_relative 'fixtures/common'
+require_relative '../file/fixtures/file_types'
+
+ruby_version_is "4.1" do
+ describe "Dir.scan" do
+ before :all do
+ FileSpecs.configure_types
+ end
+
+ before :all do
+ DirSpecs.create_mock_dirs
+ end
+
+ after :all do
+ DirSpecs.delete_mock_dirs
+ end
+
+ before :each do
+ @internal = Encoding.default_internal
+ end
+
+ after :each do
+ Encoding.default_internal = @internal
+ end
+
+ it "returns an Array of filename and type pairs in an existing directory including dotfiles" do
+ a = Dir.scan(DirSpecs.mock_dir).sort
+
+ a.should == DirSpecs.expected_paths_with_type - [[".", :directory], ["..", :directory]]
+
+ a = Dir.scan("#{DirSpecs.mock_dir}/deeply/nested").sort
+ a.should == [[".dotfile.ext", :file], ["directory", :directory]]
+ end
+
+ it "yields filename and type in an existing directory including dotfiles" do
+ a = []
+ Dir.scan(DirSpecs.mock_dir) do |n, t|
+ a << [n, t]
+ end
+ a.sort!
+ a.should == DirSpecs.expected_paths_with_type - [[".", :directory], ["..", :directory]]
+
+ a = []
+ Dir.scan("#{DirSpecs.mock_dir}/deeply/nested") do |n, t|
+ a << [n, t]
+ end
+ a.sort!
+ a.should == [[".dotfile.ext", :file], ["directory", :directory]]
+ end
+
+ it "calls #to_path on non-String arguments" do
+ p = mock('path')
+ p.should_receive(:to_path).and_return(DirSpecs.mock_dir)
+ Dir.scan(p)
+ end
+
+ it "accepts an options Hash" do
+ a = Dir.scan("#{DirSpecs.mock_dir}/deeply/nested", encoding: "utf-8").sort
+ a.should == [[".dotfile.ext", :file], ["directory", :directory]]
+ end
+
+ it "returns children names encoded with the filesystem encoding by default" do
+ # This spec depends on the locale not being US-ASCII because if it is, the
+ # children that are not ascii_only? will be BINARY encoded.
+ children = Dir.scan(File.join(DirSpecs.mock_dir, 'special')).sort
+ encoding = Encoding.find("filesystem")
+ encoding = Encoding::BINARY if encoding == Encoding::US_ASCII
+ platform_is_not :windows do
+ children.should include(["こんにちは.txt".dup.force_encoding(encoding), :file])
+ end
+ children.first.first.encoding.should equal(Encoding.find("filesystem"))
+ end
+
+ it "returns children names encoded with the specified encoding" do
+ dir = File.join(DirSpecs.mock_dir, 'special')
+ children = Dir.scan(dir, encoding: "euc-jp").sort
+ children.first.first.encoding.should equal(Encoding::EUC_JP)
+ end
+
+ it "returns children names transcoded to the default internal encoding" do
+ Encoding.default_internal = Encoding::EUC_KR
+ children = Dir.scan(File.join(DirSpecs.mock_dir, 'special')).sort
+ children.first.first.encoding.should equal(Encoding::EUC_KR)
+ end
+
+ it "raises a SystemCallError if called with a nonexistent directory" do
+ -> { Dir.scan DirSpecs.nonexistent }.should raise_error(SystemCallError)
+ end
+
+ it "handles symlink" do
+ FileSpecs.symlink do |path|
+ Dir.scan(File.dirname(path)).map(&:last).should include(:link)
+ end
+ end
+
+ platform_is_not :windows do
+ it "handles socket" do
+ FileSpecs.socket do |path|
+ Dir.scan(File.dirname(path)).map(&:last).should include(:socket)
+ end
+ end
+
+ it "handles FIFO" do
+ FileSpecs.fifo do |path|
+ Dir.scan(File.dirname(path)).map(&:last).should include(:fifo)
+ end
+ end
+
+ it "handles character devices" do
+ FileSpecs.character_device do |path|
+ Dir.scan(File.dirname(path)).map(&:last).should include(:characterSpecial)
+ end
+ end
+ end
+
+ platform_is_not :freebsd, :windows do
+ with_block_device do
+ it "handles block devices" do
+ FileSpecs.block_device do |path|
+ Dir.scan(File.dirname(path)).map(&:last).should include(:blockSpecial)
+ end
+ end
+ end
+ end
+ end
+
+ describe "Dir#scan" do
+ before :all do
+ DirSpecs.create_mock_dirs
+ end
+
+ after :all do
+ DirSpecs.delete_mock_dirs
+ end
+
+ before :each do
+ @internal = Encoding.default_internal
+ end
+
+ after :each do
+ Encoding.default_internal = @internal
+ @dir.close if @dir
+ end
+
+ it "returns an Array of filenames in an existing directory including dotfiles" do
+ @dir = Dir.new(DirSpecs.mock_dir)
+ a = @dir.scan.sort
+ @dir.close
+
+ a.should == DirSpecs.expected_paths_with_type - [[".", :directory], ["..", :directory]]
+
+ @dir = Dir.new("#{DirSpecs.mock_dir}/deeply/nested")
+ a = @dir.scan.sort
+ a.should == [[".dotfile.ext", :file], ["directory", :directory]]
+ end
+
+ it "yields filename and type in an existing directory including dotfiles" do
+ @dir = Dir.new(DirSpecs.mock_dir)
+ a = []
+ @dir.scan do |n, t|
+ a << [n, t]
+ end
+ a.sort!
+ a.should == DirSpecs.expected_paths_with_type - [[".", :directory], ["..", :directory]]
+
+ @dir = Dir.new("#{DirSpecs.mock_dir}/deeply/nested")
+ a = []
+ @dir.scan do |n, t|
+ a << [n, t]
+ end
+ a.sort!
+ a.should == [[".dotfile.ext", :file], ["directory", :directory]]
+ end
+
+ it "accepts an encoding keyword for the encoding of the entries" do
+ @dir = Dir.new("#{DirSpecs.mock_dir}/deeply/nested", encoding: "utf-8")
+ dirs = @dir.to_a.sort
+ dirs.each { |d| d.encoding.should == Encoding::UTF_8 }
+ end
+
+ it "returns children names encoded with the filesystem encoding by default" do
+ # This spec depends on the locale not being US-ASCII because if it is, the
+ # children that are not ascii_only? will be BINARY encoded.
+ @dir = Dir.new(File.join(DirSpecs.mock_dir, 'special'))
+ children = @dir.scan.sort
+ encoding = Encoding.find("filesystem")
+ encoding = Encoding::BINARY if encoding == Encoding::US_ASCII
+ platform_is_not :windows do
+ children.should include(["こんにちは.txt".dup.force_encoding(encoding), :file])
+ end
+ children.first.first.encoding.should equal(Encoding.find("filesystem"))
+ end
+
+ it "returns children names encoded with the specified encoding" do
+ path = File.join(DirSpecs.mock_dir, 'special')
+ @dir = Dir.new(path, encoding: "euc-jp")
+ children = @dir.children.sort
+ children.first.encoding.should equal(Encoding::EUC_JP)
+ end
+
+ it "returns children names transcoded to the default internal encoding" do
+ Encoding.default_internal = Encoding::EUC_KR
+ @dir = Dir.new(File.join(DirSpecs.mock_dir, 'special'))
+ children = @dir.scan.sort
+ children.first.first.encoding.should equal(Encoding::EUC_KR)
+ end
+
+ it "returns the same result when called repeatedly" do
+ @dir = Dir.open DirSpecs.mock_dir
+
+ a = []
+ @dir.each {|dir| a << dir}
+
+ b = []
+ @dir.each {|dir| b << dir}
+
+ a.sort.should == b.sort
+ a.sort.should == DirSpecs.expected_paths
+ end
+ end
+end