diff options
author | Hiroshi SHIBATA <hsbt@ruby-lang.org> | 2022-12-09 08:46:14 +0900 |
---|---|---|
committer | Hiroshi SHIBATA <hsbt@ruby-lang.org> | 2022-12-09 16:36:22 +0900 |
commit | 643918ecfe9c980f251247de6acd3be6280da24c (patch) | |
tree | a5b4011c13ee3af5b110e377a839e79045266dcd /test/csv | |
parent | 260a00d80e4dcc930b040313a99da29e4b1e6678 (diff) |
Merge csv-3.2.6
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/6890
Diffstat (limited to 'test/csv')
-rw-r--r-- | test/csv/interface/test_read.rb | 18 | ||||
-rw-r--r-- | test/csv/interface/test_write.rb | 9 | ||||
-rw-r--r-- | test/csv/parse/test_convert.rb | 55 | ||||
-rw-r--r-- | test/csv/parse/test_general.rb | 88 | ||||
-rw-r--r-- | test/csv/parse/test_header.rb | 9 | ||||
-rw-r--r-- | test/csv/parse/test_inputs_scanner.rb | 63 | ||||
-rw-r--r-- | test/csv/parse/test_liberal_parsing.rb | 11 | ||||
-rw-r--r-- | test/csv/parse/test_read.rb | 27 | ||||
-rw-r--r-- | test/csv/test_data_converters.rb | 84 | ||||
-rw-r--r-- | test/csv/test_encodings.rb | 31 | ||||
-rw-r--r-- | test/csv/test_patterns.rb | 27 | ||||
-rw-r--r-- | test/csv/test_table.rb | 73 |
12 files changed, 481 insertions, 14 deletions
diff --git a/test/csv/interface/test_read.rb b/test/csv/interface/test_read.rb index d73622d554..001177036a 100644 --- a/test/csv/interface/test_read.rb +++ b/test/csv/interface/test_read.rb @@ -26,7 +26,7 @@ class TestCSVInterfaceRead < Test::Unit::TestCase def test_foreach rows = [] - CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n").each do |row| + CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n") do |row| rows << row end assert_equal(@rows, rows) @@ -37,7 +37,7 @@ class TestCSVInterfaceRead < Test::Unit::TestCase def test_foreach_in_ractor ractor = Ractor.new(@input.path) do |path| rows = [] - CSV.foreach(path, col_sep: "\t", row_sep: "\r\n").each do |row| + CSV.foreach(path, col_sep: "\t", row_sep: "\r\n") do |row| rows << row end rows @@ -52,13 +52,13 @@ class TestCSVInterfaceRead < Test::Unit::TestCase def test_foreach_mode rows = [] - CSV.foreach(@input.path, "r", col_sep: "\t", row_sep: "\r\n").each do |row| + CSV.foreach(@input.path, "r", col_sep: "\t", row_sep: "\r\n") do |row| rows << row end assert_equal(@rows, rows) end - def test_foreach_enumurator + def test_foreach_enumerator rows = CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n").to_a assert_equal(@rows, rows) end @@ -205,6 +205,16 @@ class TestCSVInterfaceRead < Test::Unit::TestCase end end + def test_open_with_newline + CSV.open(@input.path, col_sep: "\t", universal_newline: true) do |csv| + assert_equal(@rows, csv.to_a) + end + File.binwrite(@input.path, "1,2,3\r\n" "4,5\n") + CSV.open(@input.path, newline: :universal) do |csv| + assert_equal(@rows, csv.to_a) + end + end + def test_parse assert_equal(@rows, CSV.parse(@data, col_sep: "\t", row_sep: "\r\n")) diff --git a/test/csv/interface/test_write.rb b/test/csv/interface/test_write.rb index 02c2c5c5ce..0cd39a7663 100644 --- a/test/csv/interface/test_write.rb +++ b/test/csv/interface/test_write.rb @@ -85,6 +85,15 @@ testrow LINE end + def test_generate_lines + lines = CSV.generate_lines([["foo", "bar"], [1, 2], [3, 4]]) + assert_equal(<<-LINES, lines) +foo,bar +1,2 +3,4 + LINES + end + def test_headers_detection headers = ["a", "b", "c"] CSV.open(@output.path, "w", headers: true) do |csv| diff --git a/test/csv/parse/test_convert.rb b/test/csv/parse/test_convert.rb index 21d9f20b28..c9195c71d9 100644 --- a/test/csv/parse/test_convert.rb +++ b/test/csv/parse/test_convert.rb @@ -15,6 +15,22 @@ class TestCSVParseConvert < Test::Unit::TestCase @time = Time.utc(2018, 12, 30, 6, 41, 29) @windows_safe_time_data = @time.strftime("%a %b %d %H:%M:%S %Y") + + @preserving_converter = lambda do |field, info| + f = field.encode(CSV::ConverterEncoding) + return f if info.quoted? + begin + Integer(f, 10) + rescue + f + end + end + + @quoted_header_converter = lambda do |field, info| + f = field.encode(CSV::ConverterEncoding) + return f if info.quoted? + f.to_sym + end end def test_integer @@ -107,4 +123,43 @@ class TestCSVParseConvert < Test::Unit::TestCase assert_equal([nil, "empty", "a"], CSV.parse_line(',"",a', empty_value: "empty")) end + + def test_quoted_parse_line + row = CSV.parse_line('1,"2",3', converters: @preserving_converter) + assert_equal([1, "2", 3], row) + end + + def test_quoted_parse + expected = [["quoted", "unquoted"], ["109", 1], ["10A", 2]] + rows = CSV.parse(<<~CSV, converters: @preserving_converter) + "quoted",unquoted + "109",1 + "10A",2 + CSV + assert_equal(expected, rows) + end + + def test_quoted_alternating_quote + row = CSV.parse_line('"1",2,"3"', converters: @preserving_converter) + assert_equal(['1', 2, '3'], row) + end + + def test_quoted_parse_headers + expected = [["quoted", :unquoted], ["109", "1"], ["10A", "2"]] + table = CSV.parse(<<~CSV, headers: true, header_converters: @quoted_header_converter) + "quoted",unquoted + "109",1 + "10A",2 + CSV + assert_equal(expected, table.to_a) + end + + def test_quoted_parse_with_string_headers + expected = [["quoted", :unquoted], %w[109 1], %w[10A 2]] + table = CSV.parse(<<~CSV, headers: '"quoted",unquoted', header_converters: @quoted_header_converter) + "109",1 + "10A",2 + CSV + assert_equal(expected, table.to_a) + end end diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index c740462c01..902be2ce4a 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -199,6 +199,32 @@ line,5,jkl field_size_limit: 2048 ) end + def test_field_size_limit_max_allowed + column = "abcde" + assert_equal([[column]], + CSV.parse("\"#{column}\"", + field_size_limit: column.size + 1)) + end + + def test_field_size_limit_quote_simple + column = "abcde" + assert_parse_errors_out("\"#{column}\"", + field_size_limit: column.size) + end + + def test_field_size_limit_no_quote_implicitly + column = "abcde" + assert_parse_errors_out("#{column}", + field_size_limit: column.size) + end + + def test_field_size_limit_no_quote_explicitly + column = "abcde" + assert_parse_errors_out("#{column}", + field_size_limit: column.size, + quote_char: nil) + end + def test_field_size_limit_in_extended_column_not_exceeding data = <<~DATA "a","b" @@ -221,6 +247,59 @@ line,5,jkl assert_parse_errors_out(data, field_size_limit: 5) end + def test_max_field_size_controls_lookahead + assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"', + max_field_size: 2048 ) + end + + def test_max_field_size_max_allowed + column = "abcde" + assert_equal([[column]], + CSV.parse("\"#{column}\"", + max_field_size: column.size)) + end + + def test_max_field_size_quote_simple + column = "abcde" + assert_parse_errors_out("\"#{column}\"", + max_field_size: column.size - 1) + end + + def test_max_field_size_no_quote_implicitly + column = "abcde" + assert_parse_errors_out("#{column}", + max_field_size: column.size - 1) + end + + def test_max_field_size_no_quote_explicitly + column = "abcde" + assert_parse_errors_out("#{column}", + max_field_size: column.size - 1, + quote_char: nil) + end + + def test_max_field_size_in_extended_column_not_exceeding + data = <<~DATA + "a","b" + " + 2 + ","" + DATA + assert_nothing_raised(CSV::MalformedCSVError) do + CSV.parse(data, max_field_size: 3) + end + end + + def test_max_field_size_in_extended_column_exceeding + data = <<~DATA + "a","b" + " + 2345 + ","" + DATA + assert_parse_errors_out(data, max_field_size: 4) + end + def test_row_sep_auto_cr assert_equal([["a"]], CSV.parse("a\r")) end @@ -246,14 +325,7 @@ line,5,jkl private def assert_parse_errors_out(data, **options) assert_raise(CSV::MalformedCSVError) do - timeout = 0.2 - if defined?(RubyVM::YJIT.enabled?) and RubyVM::YJIT.enabled? - timeout = 1 # for --yjit-call-threshold=1 - end - if defined?(RubyVM::MJIT.enabled?) and RubyVM::MJIT.enabled? - timeout = 5 # for --jit-wait - end - Timeout.timeout(timeout) do + Timeout.timeout(0.2) do CSV.parse(data, **options) fail("Parse didn't error out") end diff --git a/test/csv/parse/test_header.rb b/test/csv/parse/test_header.rb index 481c5107c6..e8c3786d68 100644 --- a/test/csv/parse/test_header.rb +++ b/test/csv/parse/test_header.rb @@ -218,6 +218,13 @@ A,B,C assert_equal([:one, :two_three], csv.headers) end + def test_builtin_symbol_raw_converter + csv = CSV.parse( "a b,c d", headers: true, + return_headers: true, + header_converters: :symbol_raw ) + assert_equal([:"a b", :"c d"], csv.headers) + end + def test_builtin_symbol_converter_with_punctuation csv = CSV.parse( "One, Two & Three ($)", headers: true, return_headers: true, @@ -228,7 +235,7 @@ A,B,C def test_builtin_converters_with_blank_header csv = CSV.parse( "one,,three", headers: true, return_headers: true, - header_converters: [:downcase, :symbol] ) + header_converters: [:downcase, :symbol, :symbol_raw] ) assert_equal([:one, nil, :three], csv.headers) end diff --git a/test/csv/parse/test_inputs_scanner.rb b/test/csv/parse/test_inputs_scanner.rb new file mode 100644 index 0000000000..06e1c845d5 --- /dev/null +++ b/test/csv/parse/test_inputs_scanner.rb @@ -0,0 +1,63 @@ +require_relative "../helper" + +class TestCSVParseInputsScanner < Test::Unit::TestCase + include Helper + + def test_scan_keep_over_chunks_nested_back + input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 2) + scanner.keep_start + assert_equal("abc", scanner.scan_all(/[a-c]+/)) + scanner.keep_start + assert_equal("def", scanner.scan_all(/[d-f]+/)) + scanner.keep_back + scanner.keep_back + assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) + end + + def test_scan_keep_over_chunks_nested_drop_back + input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 3) + scanner.keep_start + assert_equal("ab", scanner.scan(/../)) + scanner.keep_start + assert_equal("c", scanner.scan(/./)) + assert_equal("d", scanner.scan(/./)) + scanner.keep_drop + scanner.keep_back + assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) + end + + def test_each_line_keep_over_chunks_multibyte + input = CSV::Parser::UnoptimizedStringIO.new("ab\n\u{3000}a\n") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 1) + each_line = scanner.each_line("\n") + assert_equal("ab\n", each_line.next) + scanner.keep_start + assert_equal("\u{3000}a\n", each_line.next) + scanner.keep_back + assert_equal("\u{3000}a\n", scanner.scan_all(/[^,]+/)) + end + + def test_each_line_keep_over_chunks_fit_chunk_size + input = CSV::Parser::UnoptimizedStringIO.new("\na") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 1) + each_line = scanner.each_line("\n") + assert_equal("\n", each_line.next) + scanner.keep_start + assert_equal("a", each_line.next) + scanner.keep_back + end +end diff --git a/test/csv/parse/test_liberal_parsing.rb b/test/csv/parse/test_liberal_parsing.rb index 2f7b34689f..5796d10828 100644 --- a/test/csv/parse/test_liberal_parsing.rb +++ b/test/csv/parse/test_liberal_parsing.rb @@ -28,6 +28,17 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase CSV.parse_line(input, liberal_parsing: true)) end + def test_endline_after_quoted_field_end + csv = CSV.new("A\r\n\"B\"\nC\r\n", liberal_parsing: true) + assert_equal(["A"], csv.gets) + error = assert_raise(CSV::MalformedCSVError) do + csv.gets + end + assert_equal('Illegal end-of-line sequence outside of a quoted field <"\n"> in line 2.', + error.message) + assert_equal(["C"], csv.gets) + end + def test_quote_after_column_separator error = assert_raise(CSV::MalformedCSVError) do CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true) diff --git a/test/csv/parse/test_read.rb b/test/csv/parse/test_read.rb new file mode 100644 index 0000000000..ba6fe985a9 --- /dev/null +++ b/test/csv/parse/test_read.rb @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# frozen_string_literal: false + +require_relative "../helper" + +class TestCSVParseRead < Test::Unit::TestCase + extend DifferentOFS + + def test_shift + data = <<-CSV +1 +2 +3 + CSV + csv = CSV.new(data) + assert_equal([ + ["1"], + [["2"], ["3"]], + nil, + ], + [ + csv.shift, + csv.read, + csv.shift, + ]) + end +end diff --git a/test/csv/test_data_converters.rb b/test/csv/test_data_converters.rb index 1620e077be..c20a5d1f4b 100644 --- a/test/csv/test_data_converters.rb +++ b/test/csv/test_data_converters.rb @@ -103,4 +103,88 @@ class TestCSVDataConverters < Test::Unit::TestCase assert_equal(datetime, CSV::Converters[:date_time][iso8601_string]) end + + def test_builtin_date_time_converter_rfc3339_minute + rfc3339_string = "2018-01-14 22:25" + datetime = DateTime.new(2018, 1, 14, 22, 25) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_second + rfc3339_string = "2018-01-14 22:25:19" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_under_second + rfc3339_string = "2018-01-14 22:25:19.1" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_under_second_offset + rfc3339_string = "2018-01-14 22:25:19.1+09:00" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9") + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_offset + rfc3339_string = "2018-01-14 22:25:19+09:00" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9") + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_utc + rfc3339_string = "2018-01-14 22:25:19Z" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_minute + rfc3339_string = "2018-01-14\t22:25" + datetime = DateTime.new(2018, 1, 14, 22, 25) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_second + rfc3339_string = "2018-01-14\t22:25:19" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_under_second + rfc3339_string = "2018-01-14\t22:25:19.1" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_under_second_offset + rfc3339_string = "2018-01-14\t22:25:19.1+09:00" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9") + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_offset + rfc3339_string = "2018-01-14\t22:25:19+09:00" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9") + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_utc + rfc3339_string = "2018-01-14\t22:25:19Z" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end end diff --git a/test/csv/test_encodings.rb b/test/csv/test_encodings.rb index 8d228c05f3..f08d551f69 100644 --- a/test/csv/test_encodings.rb +++ b/test/csv/test_encodings.rb @@ -288,6 +288,37 @@ class TestCSVEncodings < Test::Unit::TestCase error.message) end + def test_string_input_transcode + # U+3042 HIRAGANA LETTER A + # U+3044 HIRAGANA LETTER I + # U+3046 HIRAGANA LETTER U + value = "\u3042\u3044\u3046" + csv = CSV.new(value, encoding: "UTF-8:EUC-JP") + assert_equal([[value.encode("EUC-JP")]], + csv.read) + end + + def test_string_input_set_encoding_string + # U+3042 HIRAGANA LETTER A + # U+3044 HIRAGANA LETTER I + # U+3046 HIRAGANA LETTER U + value = "\u3042\u3044\u3046".encode("EUC-JP") + csv = CSV.new(value.dup.force_encoding("UTF-8"), encoding: "EUC-JP") + assert_equal([[value.encode("EUC-JP")]], + csv.read) + end + + def test_string_input_set_encoding_encoding + # U+3042 HIRAGANA LETTER A + # U+3044 HIRAGANA LETTER I + # U+3046 HIRAGANA LETTER U + value = "\u3042\u3044\u3046".encode("EUC-JP") + csv = CSV.new(value.dup.force_encoding("UTF-8"), + encoding: Encoding.find("EUC-JP")) + assert_equal([[value.encode("EUC-JP")]], + csv.read) + end + private def assert_parses(fields, encoding, **options) diff --git a/test/csv/test_patterns.rb b/test/csv/test_patterns.rb new file mode 100644 index 0000000000..881f03a3a4 --- /dev/null +++ b/test/csv/test_patterns.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require_relative "helper" + +class TestCSVPatternMatching < Test::Unit::TestCase + + def test_hash + case CSV::Row.new(%i{A B C}, [1, 2, 3]) + in B: b, C: c + assert_equal([2, 3], [b, c]) + end + end + + def test_hash_rest + case CSV::Row.new(%i{A B C}, [1, 2, 3]) + in B: b, **rest + assert_equal([2, { A: 1, C: 3 }], [b, rest]) + end + end + + def test_array + case CSV::Row.new(%i{A B C}, [1, 2, 3]) + in *, matched + assert_equal(3, matched) + end + end +end diff --git a/test/csv/test_table.rb b/test/csv/test_table.rb index 968e64eae7..e8ab74044e 100644 --- a/test/csv/test_table.rb +++ b/test/csv/test_table.rb @@ -274,6 +274,22 @@ A,B,C,Type,Index @table.each { |row| assert_instance_of(CSV::Row, row) } end + def test_each_by_col_duplicated_headers + table = CSV.parse(<<-CSV, headers: true) +a,a,,,b +1,2,3,4,5 +11,12,13,14,15 + CSV + assert_equal([ + ["a", ["1", "11"]], + ["a", ["2", "12"]], + [nil, ["3", "13"]], + [nil, ["4", "14"]], + ["b", ["5", "15"]], + ], + table.by_col.each.to_a) + end + def test_each_split yielded_values = [] @table.each do |column1, column2, column3| @@ -320,6 +336,43 @@ A,B,C assert_equal(csv, @header_table.to_csv) end + def test_to_csv_limit_positive + assert_equal(<<-CSV, @table.to_csv(limit: 2)) +A,B,C +1,2,3 +4,5,6 + CSV + end + + def test_to_csv_limit_positive_over + assert_equal(<<-CSV, @table.to_csv(limit: 5)) +A,B,C +1,2,3 +4,5,6 +7,8,9 + CSV + end + + def test_to_csv_limit_zero + assert_equal(<<-CSV, @table.to_csv(limit: 0)) +A,B,C + CSV + end + + def test_to_csv_limit_negative + assert_equal(<<-CSV, @table.to_csv(limit: -2)) +A,B,C +1,2,3 +4,5,6 + CSV + end + + def test_to_csv_limit_negative_over + assert_equal(<<-CSV, @table.to_csv(limit: -5)) +A,B,C + CSV + end + def test_append # verify that we can chain the call assert_equal(@table, @table << [10, 11, 12]) @@ -549,7 +602,25 @@ A assert_send([Encoding, :compatible?, Encoding.find("US-ASCII"), @table.inspect.encoding], - "inspect() was not ASCII compatible." ) + "inspect() was not ASCII compatible." ) + end + + def test_inspect_with_rows + additional_rows = [ CSV::Row.new(%w{A B C}, [101, 102, 103]), + CSV::Row.new(%w{A B C}, [104, 105, 106]), + CSV::Row.new(%w{A B C}, [107, 108, 109]) ] + table = CSV::Table.new(@rows + additional_rows) + str_table = table.inspect + + assert_equal(<<-CSV, str_table) +#<CSV::Table mode:col_or_row row_count:7> +A,B,C +1,2,3 +4,5,6 +7,8,9 +101,102,103 +104,105,106 + CSV end def test_dig_mixed |