diff options
| author | Jean Boussier <jean.boussier@gmail.com> | 2025-12-20 13:48:10 +0100 |
|---|---|---|
| committer | Jean Boussier <jean.boussier@gmail.com> | 2026-02-12 15:52:42 +0100 |
| commit | 3b5ee7488c8064cd3b1afc41bc43afdb907fdf16 (patch) | |
| tree | f89f2b23ef11412d6afb16b20b4057fd17c8c731 | |
| parent | e26bef571c3c916826fdd2f468dea7ca41369f8e (diff) | |
Dir.scan: return or yield children along with their type
[Feature #21800]
There are numerous ruby tools that need to recursively scan
the project directory, such as Zeitwerk, rubocop, etc.
All of them end up listing childs of a directory then for each child
emit a `stat` call to check if it's a directory or not.
This is common enough for a pattern that on most operating
systems, `struct dirent` include a `dtype` member that allows to
check the file type without issuing a any extra system calls.
By yielding that type, we can make these routines twice as fast.
```
$ hyperfine './miniruby --disable-all --yjit ../test.rb' 'OPT=1 ./miniruby --disable-all --yjit ../test.rb'
Benchmark 1: ./miniruby --disable-all --yjit ../test.rb
Time (mean ± σ): 1.428 s ± 0.062 s [User: 0.342 s, System: 1.070 s]
Range (min … max): 1.396 s … 1.601 s 10 runs
Benchmark 2: OPT=1 ./miniruby --disable-all --yjit ../test.rb
Time (mean ± σ): 673.8 ms ± 5.8 ms [User: 146.0 ms, System: 527.3 ms]
Range (min … max): 659.7 ms … 679.6 ms 10 runs
Summary
OPT=1 ./miniruby --disable-all --yjit ../test.rb ran
2.12 ± 0.09 times faster than ./miniruby --disable-all --yjit ../test.rb
```
```ruby
if ENV['OPT']
def count_ruby_files
count = 0
queue = [File.expand_path(__dir__)]
while dir = queue.pop
Dir.scan(dir) do |name, type|
next if name.start_with?(".")
case type
when :directory
queue << File.join(dir, name)
when :file
count += 1 if name.end_with?(".rb")
end
end
end
count
end
else
def count_ruby_files
count = 0
queue = [File.expand_path(__dir__)]
while dir = queue.pop
Dir.each_child(dir) do |name|
next if name.start_with?(".")
abspath = File.join(dir, name)
if File.directory?(abspath)
queue << abspath
else
count += 1 if name.end_with?(".rb")
end
end
end
count
end
end
10.times do
count_ruby_files
end
```
| -rw-r--r-- | dir.c | 158 | ||||
| -rw-r--r-- | spec/ruby/core/dir/fixtures/common.rb | 39 | ||||
| -rw-r--r-- | spec/ruby/core/dir/scan_spec.rb | 224 |
3 files changed, 398 insertions, 23 deletions
@@ -504,6 +504,20 @@ fnmatch( } VALUE rb_cDir; +static VALUE sym_directory, sym_link, sym_file, sym_unknown; + +#ifdef DT_BLK +static VALUE sym_block_device; +#endif +#ifdef DT_CHR +static VALUE sym_character_device; +#endif +#ifdef DT_FIFO +static VALUE sym_fifo; +#endif +#ifdef DT_SOCK +static VALUE sym_socket; +#endif struct dir_data { DIR *dir; @@ -905,14 +919,61 @@ dir_read(VALUE dir) } } -static VALUE dir_each_entry(VALUE, VALUE (*)(VALUE, VALUE), VALUE, int); +static VALUE dir_each_entry(VALUE, VALUE (*)(VALUE, VALUE, unsigned char), VALUE, int); static VALUE -dir_yield(VALUE arg, VALUE path) +dir_yield(VALUE arg, VALUE path, unsigned char dtype) { return rb_yield(path); } +static VALUE +dir_yield_with_type(VALUE arg, VALUE path, unsigned char dtype) +{ + VALUE type; + switch (dtype) { +#ifdef DT_BLK + case DT_BLK: + type = sym_block_device; + break; +#endif +#ifdef DT_CHR + case DT_CHR: + type = sym_character_device; + break; +#endif + case DT_DIR: + type = sym_directory; + break; +#ifdef DT_FIFO + case DT_FIFO: + type = sym_fifo; + break; +#endif + case DT_LNK: + type = sym_link; + break; + case DT_REG: + type = sym_file; + break; +#ifdef DT_SOCK + case DT_SOCK: + type = sym_socket; + break; +#endif + default: + type = sym_unknown; + break; + } + + if (NIL_P(arg)) { + return rb_yield_values(2, path, type); + } + else { + return rb_ary_push(arg, rb_assoc_new(path, type)); + } +} + /* * call-seq: * each {|entry_name| ... } -> self @@ -940,7 +1001,7 @@ dir_each(VALUE dir) } static VALUE -dir_each_entry(VALUE dir, VALUE (*each)(VALUE, VALUE), VALUE arg, int children_only) +dir_each_entry(VALUE dir, VALUE (*each)(VALUE, VALUE, unsigned char), VALUE arg, int children_only) { struct dir_data *dirp; struct dirent *dp; @@ -966,7 +1027,7 @@ dir_each_entry(VALUE dir, VALUE (*each)(VALUE, VALUE), VALUE arg, int children_o else #endif path = rb_external_str_new_with_enc(name, namlen, dirp->enc); - (*each)(arg, path); + (*each)(arg, path, dp->d_type); } return dir; } @@ -3471,10 +3532,16 @@ dir_foreach(int argc, VALUE *argv, VALUE io) } static VALUE +dir_entry_ary_push(VALUE ary, VALUE entry, unsigned char ftype) +{ + return rb_ary_push(ary, entry); +} + +static VALUE dir_collect(VALUE dir) { VALUE ary = rb_ary_new(); - dir_each_entry(dir, rb_ary_push, ary, FALSE); + dir_each_entry(dir, dir_entry_ary_push, ary, FALSE); return ary; } @@ -3569,12 +3636,37 @@ static VALUE dir_collect_children(VALUE dir) { VALUE ary = rb_ary_new(); - dir_each_entry(dir, rb_ary_push, ary, TRUE); + dir_each_entry(dir, dir_entry_ary_push, ary, TRUE); return ary; } /* * call-seq: + * children -> array + * + * Returns an array of the entry names in +self+ along with their type + * except for <tt>'.'</tt> and <tt>'..'</tt>: + * + * dir = Dir.new('/example') + * dir.scan # => [["config.h", :file], ["lib", :directory], ["main.rb", :file]] + * + */ +static VALUE +dir_scan_children(VALUE dir) +{ + if (rb_block_given_p()) { + dir_each_entry(dir, dir_yield_with_type, Qnil, TRUE); + return Qnil; + } + else { + VALUE ary = rb_ary_new(); + dir_each_entry(dir, dir_yield_with_type, ary, TRUE); + return ary; + } +} + +/* + * call-seq: * Dir.children(dirpath) -> array * Dir.children(dirpath, encoding: 'UTF-8') -> array * @@ -3601,6 +3693,40 @@ dir_s_children(int argc, VALUE *argv, VALUE io) return rb_ensure(dir_collect_children, dir, dir_close, dir); } +/* + * call-seq: + * Dir.scan(dirpath) {|entry_name, entry_type| ... } -> nil + * Dir.scan(dirpath, encoding: 'UTF-8') {|entry_name, entry_type| ... } -> nil + * Dir.scan(dirpath) -> [[entry_name, entry_type], ...] + * Dir.scan(dirpath, encoding: 'UTF-8') -> [[entry_name, entry_type], ...] + * + * Yields or returns an array of the entry names in the directory at +dirpath+ + * associated with their type, except for <tt>'.'</tt> and <tt>'..'</tt>; + * sets the given encoding onto each returned entry name. + * + * The type symbol is one of: + * ``<code>:file</code>'', ``<code>:directory</code>'', + * ``<code>:characterSpecial</code>'', ``<code>:blockSpecial</code>'', + * ``<code>:fifo</code>'', ``<code>:link</code>'', + * or ``<code>:socket</code>'': + * + * Dir.children('/example') # => [["config.h", :file], ["lib", :directory], ["main.rb", :file]] + * Dir.children('/example').first.first.encoding + * # => #<Encoding:UTF-8> + * Dir.children('/example', encoding: 'US-ASCII').first.encoding + * # => #<Encoding:US-ASCII> + * + * See {String Encoding}[rdoc-ref:encodings.rdoc@String+Encoding]. + * + * Raises an exception if the directory does not exist. + */ +static VALUE +dir_s_scan(int argc, VALUE *argv, VALUE klass) +{ + VALUE dir = dir_open_dir(argc, argv); + return rb_ensure(dir_scan_children, dir, dir_close, dir); +} + static int fnmatch_brace(const char *pattern, VALUE val, void *enc) { @@ -3804,6 +3930,24 @@ rb_dir_s_empty_p(VALUE obj, VALUE dirname) void Init_Dir(void) { + sym_directory = ID2SYM(rb_intern("directory")); + sym_link = ID2SYM(rb_intern("link")); + sym_file = ID2SYM(rb_intern("file")); + sym_unknown = ID2SYM(rb_intern("unknown")); + +#ifdef DT_BLK + sym_block_device = ID2SYM(rb_intern("blockSpecial")); +#endif +#ifdef DT_CHR + sym_character_device = ID2SYM(rb_intern("characterSpecial")); +#endif +#ifdef DT_FIFO + sym_fifo = ID2SYM(rb_intern("fifo")); +#endif +#ifdef DT_SOCK + sym_socket = ID2SYM(rb_intern("socket")); +#endif + rb_gc_register_address(&chdir_lock.path); rb_gc_register_address(&chdir_lock.thread); @@ -3817,6 +3961,7 @@ Init_Dir(void) rb_define_singleton_method(rb_cDir, "entries", dir_entries, -1); rb_define_singleton_method(rb_cDir, "each_child", dir_s_each_child, -1); rb_define_singleton_method(rb_cDir, "children", dir_s_children, -1); + rb_define_singleton_method(rb_cDir, "scan", dir_s_scan, -1); rb_define_method(rb_cDir,"fileno", dir_fileno, 0); rb_define_method(rb_cDir,"path", dir_path, 0); @@ -3826,6 +3971,7 @@ Init_Dir(void) rb_define_method(rb_cDir,"each", dir_each, 0); rb_define_method(rb_cDir,"each_child", dir_each_child_m, 0); rb_define_method(rb_cDir,"children", dir_collect_children, 0); + rb_define_method(rb_cDir,"scan", dir_scan_children, 0); rb_define_method(rb_cDir,"rewind", dir_rewind, 0); rb_define_method(rb_cDir,"tell", dir_tell, 0); rb_define_method(rb_cDir,"seek", dir_seek, 1); diff --git a/spec/ruby/core/dir/fixtures/common.rb b/spec/ruby/core/dir/fixtures/common.rb index 848656c9b9..cfec91f68f 100644 --- a/spec/ruby/core/dir/fixtures/common.rb +++ b/spec/ruby/core/dir/fixtures/common.rb @@ -115,6 +115,7 @@ module DirSpecs end def self.create_mock_dirs + delete_mock_dirs mock_dir_files.each do |name| file = File.join mock_dir, name mkdir_p File.dirname(file) @@ -172,24 +173,28 @@ module DirSpecs end end + def self.expected_paths_with_type + [ + [".", :directory], + ["..", :directory], + [".dotfile", :file], + [".dotsubdir", :directory], + ["brace", :directory], + ["deeply", :directory], + ["dir", :directory], + ["dir_filename_ordering", :file], + ["file_one.ext", :file], + ["file_two.ext", :file], + ["nested", :directory], + ["nondotfile", :file], + ["special", :directory], + ["subdir_one", :directory], + ["subdir_two", :directory], + ] + end + def self.expected_paths - %w[ - . - .. - .dotfile - .dotsubdir - brace - deeply - dir - dir_filename_ordering - file_one.ext - file_two.ext - nested - nondotfile - special - subdir_one - subdir_two - ] + expected_paths_with_type.map(&:first) end def self.expected_glob_paths diff --git a/spec/ruby/core/dir/scan_spec.rb b/spec/ruby/core/dir/scan_spec.rb new file mode 100644 index 0000000000..a34eedf13b --- /dev/null +++ b/spec/ruby/core/dir/scan_spec.rb @@ -0,0 +1,224 @@ +# encoding: utf-8 + +require_relative '../../spec_helper' +require_relative 'fixtures/common' +require_relative '../file/fixtures/file_types' + +ruby_version_is "4.1" do + describe "Dir.scan" do + before :all do + FileSpecs.configure_types + end + + before :all do + DirSpecs.create_mock_dirs + end + + after :all do + DirSpecs.delete_mock_dirs + end + + before :each do + @internal = Encoding.default_internal + end + + after :each do + Encoding.default_internal = @internal + end + + it "returns an Array of filename and type pairs in an existing directory including dotfiles" do + a = Dir.scan(DirSpecs.mock_dir).sort + + a.should == DirSpecs.expected_paths_with_type - [[".", :directory], ["..", :directory]] + + a = Dir.scan("#{DirSpecs.mock_dir}/deeply/nested").sort + a.should == [[".dotfile.ext", :file], ["directory", :directory]] + end + + it "yields filename and type in an existing directory including dotfiles" do + a = [] + Dir.scan(DirSpecs.mock_dir) do |n, t| + a << [n, t] + end + a.sort! + a.should == DirSpecs.expected_paths_with_type - [[".", :directory], ["..", :directory]] + + a = [] + Dir.scan("#{DirSpecs.mock_dir}/deeply/nested") do |n, t| + a << [n, t] + end + a.sort! + a.should == [[".dotfile.ext", :file], ["directory", :directory]] + end + + it "calls #to_path on non-String arguments" do + p = mock('path') + p.should_receive(:to_path).and_return(DirSpecs.mock_dir) + Dir.scan(p) + end + + it "accepts an options Hash" do + a = Dir.scan("#{DirSpecs.mock_dir}/deeply/nested", encoding: "utf-8").sort + a.should == [[".dotfile.ext", :file], ["directory", :directory]] + end + + it "returns children names encoded with the filesystem encoding by default" do + # This spec depends on the locale not being US-ASCII because if it is, the + # children that are not ascii_only? will be BINARY encoded. + children = Dir.scan(File.join(DirSpecs.mock_dir, 'special')).sort + encoding = Encoding.find("filesystem") + encoding = Encoding::BINARY if encoding == Encoding::US_ASCII + platform_is_not :windows do + children.should include(["こんにちは.txt".dup.force_encoding(encoding), :file]) + end + children.first.first.encoding.should equal(Encoding.find("filesystem")) + end + + it "returns children names encoded with the specified encoding" do + dir = File.join(DirSpecs.mock_dir, 'special') + children = Dir.scan(dir, encoding: "euc-jp").sort + children.first.first.encoding.should equal(Encoding::EUC_JP) + end + + it "returns children names transcoded to the default internal encoding" do + Encoding.default_internal = Encoding::EUC_KR + children = Dir.scan(File.join(DirSpecs.mock_dir, 'special')).sort + children.first.first.encoding.should equal(Encoding::EUC_KR) + end + + it "raises a SystemCallError if called with a nonexistent directory" do + -> { Dir.scan DirSpecs.nonexistent }.should raise_error(SystemCallError) + end + + it "handles symlink" do + FileSpecs.symlink do |path| + Dir.scan(File.dirname(path)).map(&:last).should include(:link) + end + end + + platform_is_not :windows do + it "handles socket" do + FileSpecs.socket do |path| + Dir.scan(File.dirname(path)).map(&:last).should include(:socket) + end + end + + it "handles FIFO" do + FileSpecs.fifo do |path| + Dir.scan(File.dirname(path)).map(&:last).should include(:fifo) + end + end + + it "handles character devices" do + FileSpecs.character_device do |path| + Dir.scan(File.dirname(path)).map(&:last).should include(:characterSpecial) + end + end + end + + platform_is_not :freebsd, :windows do + with_block_device do + it "handles block devices" do + FileSpecs.block_device do |path| + Dir.scan(File.dirname(path)).map(&:last).should include(:blockSpecial) + end + end + end + end + end + + describe "Dir#scan" do + before :all do + DirSpecs.create_mock_dirs + end + + after :all do + DirSpecs.delete_mock_dirs + end + + before :each do + @internal = Encoding.default_internal + end + + after :each do + Encoding.default_internal = @internal + @dir.close if @dir + end + + it "returns an Array of filenames in an existing directory including dotfiles" do + @dir = Dir.new(DirSpecs.mock_dir) + a = @dir.scan.sort + @dir.close + + a.should == DirSpecs.expected_paths_with_type - [[".", :directory], ["..", :directory]] + + @dir = Dir.new("#{DirSpecs.mock_dir}/deeply/nested") + a = @dir.scan.sort + a.should == [[".dotfile.ext", :file], ["directory", :directory]] + end + + it "yields filename and type in an existing directory including dotfiles" do + @dir = Dir.new(DirSpecs.mock_dir) + a = [] + @dir.scan do |n, t| + a << [n, t] + end + a.sort! + a.should == DirSpecs.expected_paths_with_type - [[".", :directory], ["..", :directory]] + + @dir = Dir.new("#{DirSpecs.mock_dir}/deeply/nested") + a = [] + @dir.scan do |n, t| + a << [n, t] + end + a.sort! + a.should == [[".dotfile.ext", :file], ["directory", :directory]] + end + + it "accepts an encoding keyword for the encoding of the entries" do + @dir = Dir.new("#{DirSpecs.mock_dir}/deeply/nested", encoding: "utf-8") + dirs = @dir.to_a.sort + dirs.each { |d| d.encoding.should == Encoding::UTF_8 } + end + + it "returns children names encoded with the filesystem encoding by default" do + # This spec depends on the locale not being US-ASCII because if it is, the + # children that are not ascii_only? will be BINARY encoded. + @dir = Dir.new(File.join(DirSpecs.mock_dir, 'special')) + children = @dir.scan.sort + encoding = Encoding.find("filesystem") + encoding = Encoding::BINARY if encoding == Encoding::US_ASCII + platform_is_not :windows do + children.should include(["こんにちは.txt".dup.force_encoding(encoding), :file]) + end + children.first.first.encoding.should equal(Encoding.find("filesystem")) + end + + it "returns children names encoded with the specified encoding" do + path = File.join(DirSpecs.mock_dir, 'special') + @dir = Dir.new(path, encoding: "euc-jp") + children = @dir.children.sort + children.first.encoding.should equal(Encoding::EUC_JP) + end + + it "returns children names transcoded to the default internal encoding" do + Encoding.default_internal = Encoding::EUC_KR + @dir = Dir.new(File.join(DirSpecs.mock_dir, 'special')) + children = @dir.scan.sort + children.first.first.encoding.should equal(Encoding::EUC_KR) + end + + it "returns the same result when called repeatedly" do + @dir = Dir.open DirSpecs.mock_dir + + a = [] + @dir.each {|dir| a << dir} + + b = [] + @dir.each {|dir| b << dir} + + a.sort.should == b.sort + a.sort.should == DirSpecs.expected_paths + end + end +end |
