summaryrefslogtreecommitdiff
path: root/test/ruby/test_file.rb
diff options
context:
space:
mode:
authorKJ Tsanaktsidis <kj@kjtsanaktsidis.id.au>2023-12-28 16:08:54 +1100
committerKJ Tsanaktsidis <kj@kjtsanaktsidis.id.au>2024-01-10 21:02:23 +1100
commit31371b2e24b03ccb0a03b622faf8c65e6cf6a31a (patch)
treee419cf38ee171aaf893dd45dcc80ebff5e051cd5 /test/ruby/test_file.rb
parentc4051d5f4324536a932e99cbe43f5b7dbe34254a (diff)
Fix CRLF -> LF conversion on read for rb_io_fdopen & rb_file_open
When opening a file with `File.open`, and then setting the encoding with `IO#set_encoding`, it still correctly performs CRLF -> LF conversion on Windows when reading files with a CRLF line ending in them (in text mode). However, the file is opened instead with either the `rb_io_fdopen` or `rb_file_open` APIs from C, the CRLF conversion is _NOT_ set up correctly; it works if the encoding is not specified, but if `IO#set_encoding` is called, the conversion stops happening. This seems to be because the encflags never get ECONV_DEFAULT_NEWLINE_DECORATOR set in these codepaths. Concretely, this means that the conversion doesn't happen in the following circumstances: * When loading ruby files with require (that calls rb_io_fdopen) * When parsing ruuby files with RubyVM::AbstractSyntaxTree (that calls rb_file_open). This then causes the ErrorHighlight tests to fail on windows if git has checked them out with CRLF line endings - the error messages it's testing wind up with literal \r\n sequences in them because the iseq text from the parser contains un-newline-converted strings. This commit fixes the problem by copy-pasting the relevant snippet which sets this up in `rb_io_extract_modeenc` (for the File.open path) into the relevant codepaths for `rb_io_fdopen` and `rb_file_open`. [Bug #20101]
Diffstat (limited to 'test/ruby/test_file.rb')
-rw-r--r--test/ruby/test_file.rb246
1 files changed, 246 insertions, 0 deletions
diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb
index 409d21fc4e..aa10566bfa 100644
--- a/test/ruby/test_file.rb
+++ b/test/ruby/test_file.rb
@@ -554,4 +554,250 @@ class TestFile < Test::Unit::TestCase
assert_file.absolute_path?("/foo/bar\\baz")
end
end
+
+ class NewlineConvTests < Test::Unit::TestCase
+ TEST_STRING_WITH_CRLF = "line1\r\nline2\r\n".freeze
+ TEST_STRING_WITH_LF = "line1\nline2\n".freeze
+
+ def setup
+ @tmpdir = Dir.mktmpdir(self.class.name)
+ @read_path_with_crlf = File.join(@tmpdir, "read_path_with_crlf")
+ File.binwrite(@read_path_with_crlf, TEST_STRING_WITH_CRLF)
+ @read_path_with_lf = File.join(@tmpdir, "read_path_with_lf")
+ File.binwrite(@read_path_with_lf, TEST_STRING_WITH_LF)
+ @write_path = File.join(@tmpdir, "write_path")
+ File.binwrite(@write_path, '')
+ end
+
+ def teardown
+ FileUtils.rm_rf @tmpdir
+ end
+
+ def windows?
+ /cygwin|mswin|mingw/ =~ RUBY_PLATFORM
+ end
+
+ def open_file_with(method, filename, mode)
+ read_or_write = mode.include?('w') ? :write : :read
+ binary_or_text = mode.include?('b') ? :binary : :text
+
+ f = case method
+ when :ruby_file_open
+ File.open(filename, mode)
+ when :c_rb_file_open
+ Bug::File::NewlineConv.rb_file_open(filename, read_or_write, binary_or_text)
+ when :c_rb_io_fdopen
+ Bug::File::NewlineConv.rb_io_fdopen(filename, read_or_write, binary_or_text)
+ else
+ raise "Don't know how to open with #{method}"
+ end
+
+ begin
+ yield f
+ ensure
+ f.close
+ end
+ end
+
+ def assert_file_contents_has_lf(f)
+ assert_equal TEST_STRING_WITH_LF, f.read
+ end
+
+ def assert_file_contents_has_crlf(f)
+ assert_equal TEST_STRING_WITH_CRLF, f.read
+ end
+
+ def assert_file_contents_has_lf_on_windows(f)
+ if windows?
+ assert_file_contents_has_lf(f)
+ else
+ assert_file_contents_has_crlf(f)
+ end
+ end
+
+ def assert_file_contents_has_crlf_on_windows(f)
+ if windows?
+ assert_file_contents_has_crlf(f)
+ else
+ assert_file_contents_has_lf(f)
+ end
+ end
+
+ def test_ruby_file_open_text_mode_read_crlf
+ open_file_with(:ruby_file_open, @read_path_with_crlf, 'r') { |f| assert_file_contents_has_lf_on_windows(f) }
+ end
+
+ def test_ruby_file_open_bin_mode_read_crlf
+ open_file_with(:ruby_file_open, @read_path_with_crlf, 'rb') { |f| assert_file_contents_has_crlf(f) }
+ end
+
+ def test_ruby_file_open_text_mode_read_lf
+ open_file_with(:ruby_file_open, @read_path_with_lf, 'r') { |f| assert_file_contents_has_lf(f) }
+ end
+
+ def test_ruby_file_open_bin_mode_read_lf
+ open_file_with(:ruby_file_open, @read_path_with_lf, 'rb') { |f| assert_file_contents_has_lf(f) }
+ end
+
+ def test_ruby_file_open_text_mode_read_crlf_with_utf8_encoding
+ open_file_with(:ruby_file_open, @read_path_with_crlf, 'r') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_lf_on_windows(f)
+ end
+ end
+
+ def test_ruby_file_open_bin_mode_read_crlf_with_utf8_encoding
+ open_file_with(:ruby_file_open, @read_path_with_crlf, 'rb') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_crlf(f)
+ end
+ end
+
+ def test_ruby_file_open_text_mode_read_lf_with_utf8_encoding
+ open_file_with(:ruby_file_open, @read_path_with_lf, 'r') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_lf(f)
+ end
+ end
+
+ def test_ruby_file_open_bin_mode_read_lf_with_utf8_encoding
+ open_file_with(:ruby_file_open, @read_path_with_lf, 'rb') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_lf(f)
+ end
+ end
+
+ def test_ruby_file_open_text_mode_write_lf
+ open_file_with(:ruby_file_open, @write_path, 'w') { |f| f.write TEST_STRING_WITH_LF }
+ File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf_on_windows(f) }
+ end
+
+ def test_ruby_file_open_bin_mode_write_lf
+ open_file_with(:ruby_file_open, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_LF }
+ File.open(@write_path, 'rb') { |f| assert_file_contents_has_lf(f) }
+ end
+
+ def test_ruby_file_open_bin_mode_write_crlf
+ open_file_with(:ruby_file_open, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_CRLF }
+ File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf(f) }
+ end
+
+ def test_c_rb_file_open_text_mode_read_crlf
+ open_file_with(:c_rb_file_open, @read_path_with_crlf, 'r') { |f| assert_file_contents_has_lf_on_windows(f) }
+ end
+
+ def test_c_rb_file_open_bin_mode_read_crlf
+ open_file_with(:c_rb_file_open, @read_path_with_crlf, 'rb') { |f| assert_file_contents_has_crlf(f) }
+ end
+
+ def test_c_rb_file_open_text_mode_read_lf
+ open_file_with(:c_rb_file_open, @read_path_with_lf, 'r') { |f| assert_file_contents_has_lf(f) }
+ end
+
+ def test_c_rb_file_open_bin_mode_read_lf
+ open_file_with(:c_rb_file_open, @read_path_with_lf, 'rb') { |f| assert_file_contents_has_lf(f) }
+ end
+
+ def test_c_rb_file_open_text_mode_write_lf
+ open_file_with(:c_rb_file_open, @write_path, 'w') { |f| f.write TEST_STRING_WITH_LF }
+ File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf_on_windows(f) }
+ end
+
+ def test_c_rb_file_open_bin_mode_write_lf
+ open_file_with(:c_rb_file_open, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_LF }
+ File.open(@write_path, 'rb') { |f| assert_file_contents_has_lf(f) }
+ end
+
+ def test_c_rb_file_open_bin_mode_write_crlf
+ open_file_with(:c_rb_file_open, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_CRLF }
+ File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf(f) }
+ end
+
+ def test_c_rb_file_open_text_mode_read_crlf_with_utf8_encoding
+ open_file_with(:c_rb_file_open, @read_path_with_crlf, 'r') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_lf_on_windows(f)
+ end
+ end
+
+ def test_c_rb_file_open_bin_mode_read_crlf_with_utf8_encoding
+ open_file_with(:c_rb_file_open, @read_path_with_crlf, 'rb') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_crlf(f)
+ end
+ end
+
+ def test_c_rb_file_open_text_mode_read_lf_with_utf8_encoding
+ open_file_with(:c_rb_file_open, @read_path_with_lf, 'r') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_lf(f)
+ end
+ end
+
+ def test_c_rb_file_open_bin_mode_read_lf_with_utf8_encoding
+ open_file_with(:c_rb_file_open, @read_path_with_lf, 'rb') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_lf(f)
+ end
+ end
+
+ def test_c_rb_io_fdopen_text_mode_read_crlf
+ open_file_with(:c_rb_io_fdopen, @read_path_with_crlf, 'r') { |f| assert_file_contents_has_lf_on_windows(f) }
+ end
+
+ def test_c_rb_io_fdopen_bin_mode_read_crlf
+ open_file_with(:c_rb_io_fdopen, @read_path_with_crlf, 'rb') { |f| assert_file_contents_has_crlf(f) }
+ end
+
+ def test_c_rb_io_fdopen_text_mode_read_lf
+ open_file_with(:c_rb_io_fdopen, @read_path_with_lf, 'r') { |f| assert_file_contents_has_lf(f) }
+ end
+
+ def test_c_rb_io_fdopen_bin_mode_read_lf
+ open_file_with(:c_rb_io_fdopen, @read_path_with_lf, 'rb') { |f| assert_file_contents_has_lf(f) }
+ end
+
+ def test_c_rb_io_fdopen_text_mode_write_lf
+ open_file_with(:c_rb_io_fdopen, @write_path, 'w') { |f| f.write TEST_STRING_WITH_LF }
+ File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf_on_windows(f) }
+ end
+
+ def test_c_rb_io_fdopen_bin_mode_write_lf
+ open_file_with(:c_rb_io_fdopen, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_LF }
+ File.open(@write_path, 'rb') { |f| assert_file_contents_has_lf(f) }
+ end
+
+ def test_c_rb_io_fdopen_bin_mode_write_crlf
+ open_file_with(:c_rb_io_fdopen, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_CRLF }
+ File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf(f) }
+ end
+
+ def test_c_rb_io_fdopen_text_mode_read_crlf_with_utf8_encoding
+ open_file_with(:c_rb_io_fdopen, @read_path_with_crlf, 'r') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_lf_on_windows(f)
+ end
+ end
+
+ def test_c_rb_io_fdopen_bin_mode_read_crlf_with_utf8_encoding
+ open_file_with(:c_rb_io_fdopen, @read_path_with_crlf, 'rb') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_crlf(f)
+ end
+ end
+
+ def test_c_rb_io_fdopen_text_mode_read_lf_with_utf8_encoding
+ open_file_with(:c_rb_io_fdopen, @read_path_with_lf, 'r') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_lf(f)
+ end
+ end
+
+ def test_c_rb_io_fdopen_bin_mode_read_lf_with_utf8_encoding
+ open_file_with(:c_rb_io_fdopen, @read_path_with_lf, 'rb') do |f|
+ f.set_encoding Encoding::UTF_8, '-'
+ assert_file_contents_has_lf(f)
+ end
+ end
+ end
end