summaryrefslogtreecommitdiff
path: root/test/csv/parse
diff options
context:
space:
mode:
Diffstat (limited to 'test/csv/parse')
-rw-r--r--test/csv/parse/test_column_separator.rb40
-rw-r--r--test/csv/parse/test_convert.rb165
-rw-r--r--test/csv/parse/test_each.rb23
-rw-r--r--test/csv/parse/test_general.rb334
-rw-r--r--test/csv/parse/test_header.rb342
-rw-r--r--test/csv/parse/test_inputs_scanner.rb63
-rw-r--r--test/csv/parse/test_invalid.rb52
-rw-r--r--test/csv/parse/test_liberal_parsing.rb171
-rw-r--r--test/csv/parse/test_quote_char_nil.rb93
-rw-r--r--test/csv/parse/test_rewind.rb40
-rw-r--r--test/csv/parse/test_row_separator.rb16
-rw-r--r--test/csv/parse/test_skip_lines.rb118
-rw-r--r--test/csv/parse/test_strip.rb112
-rw-r--r--test/csv/parse/test_unconverted_fields.rb117
14 files changed, 1686 insertions, 0 deletions
diff --git a/test/csv/parse/test_column_separator.rb b/test/csv/parse/test_column_separator.rb
new file mode 100644
index 0000000000..d6eaa7b6de
--- /dev/null
+++ b/test/csv/parse/test_column_separator.rb
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseColumnSeparator < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_comma
+ assert_equal([["a", "b", nil, "d"]],
+ CSV.parse("a,b,,d", col_sep: ","))
+ end
+
+ def test_space
+ assert_equal([["a", "b", nil, "d"]],
+ CSV.parse("a b d", col_sep: " "))
+ end
+
+ def test_tab
+ assert_equal([["a", "b", nil, "d"]],
+ CSV.parse("a\tb\t\td", col_sep: "\t"))
+ end
+
+ def test_multiple_characters_include_sub_separator
+ assert_equal([["a b", nil, "d"]],
+ CSV.parse("a b d", col_sep: " "))
+ end
+
+ def test_multiple_characters_leading_empty_fields
+ data = <<-CSV
+<=><=>A<=>B<=>C
+1<=>2<=>3
+ CSV
+ assert_equal([
+ [nil, nil, "A", "B", "C"],
+ ["1", "2", "3"],
+ ],
+ CSV.parse(data, col_sep: "<=>"))
+ end
+end
diff --git a/test/csv/parse/test_convert.rb b/test/csv/parse/test_convert.rb
new file mode 100644
index 0000000000..c9195c71d9
--- /dev/null
+++ b/test/csv/parse/test_convert.rb
@@ -0,0 +1,165 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseConvert < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def setup
+ super
+ @data = "Numbers,:integer,1,:float,3.015"
+ @parser = CSV.new(@data)
+
+ @custom = lambda {|field| /\A:(\S.*?)\s*\Z/ =~ field ? $1.to_sym : field}
+
+ @time = Time.utc(2018, 12, 30, 6, 41, 29)
+ @windows_safe_time_data = @time.strftime("%a %b %d %H:%M:%S %Y")
+
+ @preserving_converter = lambda do |field, info|
+ f = field.encode(CSV::ConverterEncoding)
+ return f if info.quoted?
+ begin
+ Integer(f, 10)
+ rescue
+ f
+ end
+ end
+
+ @quoted_header_converter = lambda do |field, info|
+ f = field.encode(CSV::ConverterEncoding)
+ return f if info.quoted?
+ f.to_sym
+ end
+ end
+
+ def test_integer
+ @parser.convert(:integer)
+ assert_equal(["Numbers", ":integer", 1, ":float", "3.015"],
+ @parser.shift)
+ end
+
+ def test_float
+ @parser.convert(:float)
+ assert_equal(["Numbers", ":integer", 1.0, ":float", 3.015],
+ @parser.shift)
+ end
+
+ def test_float_integer
+ @parser.convert(:float)
+ @parser.convert(:integer)
+ assert_equal(["Numbers", ":integer", 1.0, ":float", 3.015],
+ @parser.shift)
+ end
+
+ def test_integer_float
+ @parser.convert(:integer)
+ @parser.convert(:float)
+ assert_equal(["Numbers", ":integer", 1, ":float", 3.015],
+ @parser.shift)
+ end
+
+ def test_numeric
+ @parser.convert(:numeric)
+ assert_equal(["Numbers", ":integer", 1, ":float", 3.015],
+ @parser.shift)
+ end
+
+ def test_all
+ @data << ",#{@windows_safe_time_data}"
+ @parser = CSV.new(@data)
+ @parser.convert(:all)
+ assert_equal(["Numbers", ":integer", 1, ":float", 3.015, @time.to_datetime],
+ @parser.shift)
+ end
+
+ def test_custom
+ @parser.convert do |field|
+ /\A:(\S.*?)\s*\Z/ =~ field ? $1.to_sym : field
+ end
+ assert_equal(["Numbers", :integer, "1", :float, "3.015"],
+ @parser.shift)
+ end
+
+ def test_builtin_custom
+ @parser.convert(:numeric)
+ @parser.convert(&@custom)
+ assert_equal(["Numbers", :integer, 1, :float, 3.015],
+ @parser.shift)
+ end
+
+ def test_custom_field_info_line
+ @parser.convert do |field, info|
+ assert_equal(1, info.line)
+ info.index == 4 ? Float(field).floor : field
+ end
+ assert_equal(["Numbers", ":integer", "1", ":float", 3],
+ @parser.shift)
+ end
+
+ def test_custom_field_info_header
+ headers = ["one", "two", "three", "four", "five"]
+ @parser = CSV.new(@data, headers: headers)
+ @parser.convert do |field, info|
+ info.header == "three" ? Integer(field) * 100 : field
+ end
+ assert_equal(CSV::Row.new(headers,
+ ["Numbers", ":integer", 100, ":float", "3.015"]),
+ @parser.shift)
+ end
+
+ def test_custom_blank_field
+ converter = lambda {|field| field.nil?}
+ row = CSV.parse_line('nil,', converters: converter)
+ assert_equal([false, true], row)
+ end
+
+ def test_nil_value
+ assert_equal(["nil", "", "a"],
+ CSV.parse_line(',"",a', nil_value: "nil"))
+ end
+
+ def test_empty_value
+ assert_equal([nil, "empty", "a"],
+ CSV.parse_line(',"",a', empty_value: "empty"))
+ end
+
+ def test_quoted_parse_line
+ row = CSV.parse_line('1,"2",3', converters: @preserving_converter)
+ assert_equal([1, "2", 3], row)
+ end
+
+ def test_quoted_parse
+ expected = [["quoted", "unquoted"], ["109", 1], ["10A", 2]]
+ rows = CSV.parse(<<~CSV, converters: @preserving_converter)
+ "quoted",unquoted
+ "109",1
+ "10A",2
+ CSV
+ assert_equal(expected, rows)
+ end
+
+ def test_quoted_alternating_quote
+ row = CSV.parse_line('"1",2,"3"', converters: @preserving_converter)
+ assert_equal(['1', 2, '3'], row)
+ end
+
+ def test_quoted_parse_headers
+ expected = [["quoted", :unquoted], ["109", "1"], ["10A", "2"]]
+ table = CSV.parse(<<~CSV, headers: true, header_converters: @quoted_header_converter)
+ "quoted",unquoted
+ "109",1
+ "10A",2
+ CSV
+ assert_equal(expected, table.to_a)
+ end
+
+ def test_quoted_parse_with_string_headers
+ expected = [["quoted", :unquoted], %w[109 1], %w[10A 2]]
+ table = CSV.parse(<<~CSV, headers: '"quoted",unquoted', header_converters: @quoted_header_converter)
+ "109",1
+ "10A",2
+ CSV
+ assert_equal(expected, table.to_a)
+ end
+end
diff --git a/test/csv/parse/test_each.rb b/test/csv/parse/test_each.rb
new file mode 100644
index 0000000000..ce0b71d058
--- /dev/null
+++ b/test/csv/parse/test_each.rb
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseEach < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_twice
+ data = <<-CSV
+Ruby,2.6.0,script
+ CSV
+ csv = CSV.new(data)
+ assert_equal([
+ [["Ruby", "2.6.0", "script"]],
+ [],
+ ],
+ [
+ csv.to_a,
+ csv.to_a,
+ ])
+ end
+end
diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb
new file mode 100644
index 0000000000..902be2ce4a
--- /dev/null
+++ b/test/csv/parse/test_general.rb
@@ -0,0 +1,334 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require "timeout"
+
+require_relative "../helper"
+
+#
+# Following tests are my interpretation of the
+# {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that
+# document in one place (intentionally) and that is to make the default row
+# separator <tt>$/</tt>.
+#
+class TestCSVParseGeneral < Test::Unit::TestCase
+ extend DifferentOFS
+
+ BIG_DATA = "123456789\n" * 512
+
+ def test_mastering_regex_example
+ ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
+ assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
+ "It's \"10 Grand\", baby", "10K" ],
+ CSV.parse_line(ex) )
+ end
+
+ # Old Ruby 1.8 CSV library tests.
+ def test_std_lib_csv
+ [ ["\t", ["\t"]],
+ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
+ ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
+ ["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
+ ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
+ ["\"\"", [""]],
+ ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
+ ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
+ ["foo,\"\r\",baz", ["foo", "\r", "baz"]],
+ ["foo,\"\",baz", ["foo", "", "baz"]],
+ ["\",\"", [","]],
+ ["foo", ["foo"]],
+ [",,", [nil, nil, nil]],
+ [",", [nil, nil]],
+ ["foo,\"\n\",baz", ["foo", "\n", "baz"]],
+ ["foo,,baz", ["foo", nil, "baz"]],
+ ["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
+ ["\",\",\",\"", [",", ","]],
+ ["foo,bar,", ["foo", "bar", nil]],
+ [",foo,bar", [nil, "foo", "bar"]],
+ ["foo,bar", ["foo", "bar"]],
+ [";", [";"]],
+ ["\t,\t", ["\t", "\t"]],
+ ["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
+ ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
+ ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
+ [";,;", [";", ";"]] ].each do |csv_test|
+ assert_equal(csv_test.last, CSV.parse_line(csv_test.first))
+ end
+
+ [ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
+ ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
+ ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
+ ["\"\"", [""]],
+ ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
+ ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
+ ["foo,\"\r\",baz", ["foo", "\r", "baz"]],
+ ["foo,\"\",baz", ["foo", "", "baz"]],
+ ["foo", ["foo"]],
+ [",,", [nil, nil, nil]],
+ [",", [nil, nil]],
+ ["foo,\"\n\",baz", ["foo", "\n", "baz"]],
+ ["foo,,baz", ["foo", nil, "baz"]],
+ ["foo,bar", ["foo", "bar"]],
+ ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
+ ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
+ assert_equal(csv_test.last, CSV.parse_line(csv_test.first))
+ end
+ end
+
+ # From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496
+ def test_aras_edge_cases
+ [ [%Q{a,b}, ["a", "b"]],
+ [%Q{a,"""b"""}, ["a", "\"b\""]],
+ [%Q{a,"""b"}, ["a", "\"b"]],
+ [%Q{a,"b"""}, ["a", "b\""]],
+ [%Q{a,"\nb"""}, ["a", "\nb\""]],
+ [%Q{a,"""\nb"}, ["a", "\"\nb"]],
+ [%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]],
+ [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
+ [%Q{a,,,}, ["a", nil, nil, nil]],
+ [%Q{,}, [nil, nil]],
+ [%Q{"",""}, ["", ""]],
+ [%Q{""""}, ["\""]],
+ [%Q{"""",""}, ["\"",""]],
+ [%Q{,""}, [nil,""]],
+ [%Q{,"\r"}, [nil,"\r"]],
+ [%Q{"\r\n,"}, ["\r\n,"]],
+ [%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case|
+ assert_equal(edge_case.last, CSV.parse_line(edge_case.first))
+ end
+ end
+
+ def test_james_edge_cases
+ # A read at eof? should return nil.
+ assert_equal(nil, CSV.parse_line(""))
+ #
+ # With Ruby 1.8 CSV it's impossible to tell an empty line from a line
+ # containing a single +nil+ field. The old CSV library returns
+ # <tt>[nil]</tt> in these cases, but <tt>Array.new</tt> makes more sense to
+ # me.
+ #
+ assert_equal(Array.new, CSV.parse_line("\n1,2,3\n"))
+ end
+
+ def test_rob_edge_cases
+ [ [%Q{"a\nb"}, ["a\nb"]],
+ [%Q{"\n\n\n"}, ["\n\n\n"]],
+ [%Q{a,"b\n\nc"}, ['a', "b\n\nc"]],
+ [%Q{,"\r\n"}, [nil,"\r\n"]],
+ [%Q{,"\r\n."}, [nil,"\r\n."]],
+ [%Q{"a\na","one newline"}, ["a\na", 'one newline']],
+ [%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']],
+ [%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']],
+ [%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']],
+ [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
+ ].each do |edge_case|
+ assert_equal(edge_case.last, CSV.parse_line(edge_case.first))
+ end
+ end
+
+ def test_non_regex_edge_cases
+ # An early version of the non-regex parser fails this test
+ [ [ "foo,\"foo,bar,baz,foo\",\"foo\"",
+ ["foo", "foo,bar,baz,foo", "foo"] ] ].each do |edge_case|
+ assert_equal(edge_case.last, CSV.parse_line(edge_case.first))
+ end
+
+ assert_raise(CSV::MalformedCSVError) do
+ CSV.parse_line("1,\"23\"4\"5\", 6")
+ end
+ end
+
+ def test_malformed_csv_cr_first_line
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse_line("1,2\r,3", row_sep: "\n")
+ end
+ assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.",
+ error.message)
+ end
+
+ def test_malformed_csv_cr_middle_line
+ csv = <<-CSV
+line,1,abc
+line,2,"def\nghi"
+
+line,4,some\rjunk
+line,5,jkl
+ CSV
+
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse(csv)
+ end
+ assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.",
+ error.message)
+ end
+
+ def test_malformed_csv_unclosed_quote
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse_line('1,2,"3...')
+ end
+ assert_equal("Unclosed quoted field in line 1.",
+ error.message)
+ end
+
+ def test_malformed_csv_illegal_quote_middle_line
+ csv = <<-CSV
+line,1,abc
+line,2,"def\nghi"
+
+line,4,8'10"
+line,5,jkl
+ CSV
+
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse(csv)
+ end
+ assert_equal("Illegal quoting in line 4.",
+ error.message)
+ end
+
+ def test_the_parse_fails_fast_when_it_can_for_unquoted_fields
+ assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA)
+ end
+
+ def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes
+ assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA)
+ end
+
+ def test_field_size_limit_controls_lookahead
+ assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"',
+ field_size_limit: 2048 )
+ end
+
+ def test_field_size_limit_max_allowed
+ column = "abcde"
+ assert_equal([[column]],
+ CSV.parse("\"#{column}\"",
+ field_size_limit: column.size + 1))
+ end
+
+ def test_field_size_limit_quote_simple
+ column = "abcde"
+ assert_parse_errors_out("\"#{column}\"",
+ field_size_limit: column.size)
+ end
+
+ def test_field_size_limit_no_quote_implicitly
+ column = "abcde"
+ assert_parse_errors_out("#{column}",
+ field_size_limit: column.size)
+ end
+
+ def test_field_size_limit_no_quote_explicitly
+ column = "abcde"
+ assert_parse_errors_out("#{column}",
+ field_size_limit: column.size,
+ quote_char: nil)
+ end
+
+ def test_field_size_limit_in_extended_column_not_exceeding
+ data = <<~DATA
+ "a","b"
+ "
+ 2
+ ",""
+ DATA
+ assert_nothing_raised(CSV::MalformedCSVError) do
+ CSV.parse(data, field_size_limit: 4)
+ end
+ end
+
+ def test_field_size_limit_in_extended_column_exceeding
+ data = <<~DATA
+ "a","b"
+ "
+ 2345
+ ",""
+ DATA
+ assert_parse_errors_out(data, field_size_limit: 5)
+ end
+
+ def test_max_field_size_controls_lookahead
+ assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"',
+ max_field_size: 2048 )
+ end
+
+ def test_max_field_size_max_allowed
+ column = "abcde"
+ assert_equal([[column]],
+ CSV.parse("\"#{column}\"",
+ max_field_size: column.size))
+ end
+
+ def test_max_field_size_quote_simple
+ column = "abcde"
+ assert_parse_errors_out("\"#{column}\"",
+ max_field_size: column.size - 1)
+ end
+
+ def test_max_field_size_no_quote_implicitly
+ column = "abcde"
+ assert_parse_errors_out("#{column}",
+ max_field_size: column.size - 1)
+ end
+
+ def test_max_field_size_no_quote_explicitly
+ column = "abcde"
+ assert_parse_errors_out("#{column}",
+ max_field_size: column.size - 1,
+ quote_char: nil)
+ end
+
+ def test_max_field_size_in_extended_column_not_exceeding
+ data = <<~DATA
+ "a","b"
+ "
+ 2
+ ",""
+ DATA
+ assert_nothing_raised(CSV::MalformedCSVError) do
+ CSV.parse(data, max_field_size: 3)
+ end
+ end
+
+ def test_max_field_size_in_extended_column_exceeding
+ data = <<~DATA
+ "a","b"
+ "
+ 2345
+ ",""
+ DATA
+ assert_parse_errors_out(data, max_field_size: 4)
+ end
+
+ def test_row_sep_auto_cr
+ assert_equal([["a"]], CSV.parse("a\r"))
+ end
+
+ def test_row_sep_auto_lf
+ assert_equal([["a"]], CSV.parse("a\n"))
+ end
+
+ def test_row_sep_auto_cr_lf
+ assert_equal([["a"]], CSV.parse("a\r\n"))
+ end
+
+ def test_seeked_string_io
+ input_with_bom = StringIO.new("\ufeffあ,い,う\r\na,b,c\r\n")
+ input_with_bom.read(3)
+ assert_equal([
+ ["あ", "い", "う"],
+ ["a", "b", "c"],
+ ],
+ CSV.new(input_with_bom).each.to_a)
+ end
+
+ private
+ def assert_parse_errors_out(data, **options)
+ assert_raise(CSV::MalformedCSVError) do
+ Timeout.timeout(0.2) do
+ CSV.parse(data, **options)
+ fail("Parse didn't error out")
+ end
+ end
+ end
+end
diff --git a/test/csv/parse/test_header.rb b/test/csv/parse/test_header.rb
new file mode 100644
index 0000000000..e8c3786d68
--- /dev/null
+++ b/test/csv/parse/test_header.rb
@@ -0,0 +1,342 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVHeaders < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def setup
+ super
+ @data = <<-CSV
+first,second,third
+A,B,C
+1,2,3
+ CSV
+ end
+
+ def test_first_row
+ [:first_row, true].each do |setting| # two names for the same setting
+ # activate headers
+ csv = nil
+ assert_nothing_raised(Exception) do
+ csv = CSV.parse(@data, headers: setting)
+ end
+
+ # first data row - skipping headers
+ row = csv[0]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([%w{first A}, %w{second B}, %w{third C}], row.to_a)
+
+ # second data row
+ row = csv[1]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([%w{first 1}, %w{second 2}, %w{third 3}], row.to_a)
+
+ # empty
+ assert_nil(csv[2])
+ end
+ end
+
+ def test_array_of_headers
+ # activate headers
+ csv = nil
+ assert_nothing_raised(Exception) do
+ csv = CSV.parse(@data, headers: [:my, :new, :headers])
+ end
+
+ # first data row - skipping headers
+ row = csv[0]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal( [[:my, "first"], [:new, "second"], [:headers, "third"]],
+ row.to_a )
+
+ # second data row
+ row = csv[1]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([[:my, "A"], [:new, "B"], [:headers, "C"]], row.to_a)
+
+ # third data row
+ row = csv[2]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([[:my, "1"], [:new, "2"], [:headers, "3"]], row.to_a)
+
+ # empty
+ assert_nil(csv[3])
+
+ # with return and convert
+ assert_nothing_raised(Exception) do
+ csv = CSV.parse( @data, headers: [:my, :new, :headers],
+ return_headers: true,
+ header_converters: lambda { |h| h.to_s } )
+ end
+ row = csv[0]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([["my", :my], ["new", :new], ["headers", :headers]], row.to_a)
+ assert_predicate(row, :header_row?)
+ assert_not_predicate(row, :field_row?)
+ end
+
+ def test_csv_header_string
+ # activate headers
+ csv = nil
+ assert_nothing_raised(Exception) do
+ csv = CSV.parse(@data, headers: "my,new,headers")
+ end
+
+ # first data row - skipping headers
+ row = csv[0]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([%w{my first}, %w{new second}, %w{headers third}], row.to_a)
+
+ # second data row
+ row = csv[1]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([%w{my A}, %w{new B}, %w{headers C}], row.to_a)
+
+ # third data row
+ row = csv[2]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([%w{my 1}, %w{new 2}, %w{headers 3}], row.to_a)
+
+ # empty
+ assert_nil(csv[3])
+
+ # with return and convert
+ assert_nothing_raised(Exception) do
+ csv = CSV.parse( @data, headers: "my,new,headers",
+ return_headers: true,
+ header_converters: :symbol )
+ end
+ row = csv[0]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([[:my, "my"], [:new, "new"], [:headers, "headers"]], row.to_a)
+ assert_predicate(row, :header_row?)
+ assert_not_predicate(row, :field_row?)
+ end
+
+ def test_csv_header_string_inherits_separators
+ # parse with custom col_sep
+ csv = nil
+ assert_nothing_raised(Exception) do
+ csv = CSV.parse( @data.tr(",", "|"), col_sep: "|",
+ headers: "my|new|headers" )
+ end
+
+ # verify headers were recognized
+ row = csv[0]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([%w{my first}, %w{new second}, %w{headers third}], row.to_a)
+ end
+
+ def test_return_headers
+ # activate headers and request they are returned
+ csv = nil
+ assert_nothing_raised(Exception) do
+ csv = CSV.parse(@data, headers: true, return_headers: true)
+ end
+
+ # header row
+ row = csv[0]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal( [%w{first first}, %w{second second}, %w{third third}],
+ row.to_a )
+ assert_predicate(row, :header_row?)
+ assert_not_predicate(row, :field_row?)
+
+ # first data row - skipping headers
+ row = csv[1]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([%w{first A}, %w{second B}, %w{third C}], row.to_a)
+ assert_not_predicate(row, :header_row?)
+ assert_predicate(row, :field_row?)
+
+ # second data row
+ row = csv[2]
+ assert_not_nil(row)
+ assert_instance_of(CSV::Row, row)
+ assert_equal([%w{first 1}, %w{second 2}, %w{third 3}], row.to_a)
+ assert_not_predicate(row, :header_row?)
+ assert_predicate(row, :field_row?)
+
+ # empty
+ assert_nil(csv[3])
+ end
+
+ def test_converters
+ # create test data where headers and fields look alike
+ data = <<-CSV
+1,2,3
+1,2,3
+ CSV
+
+ # normal converters do not affect headers
+ csv = CSV.parse( data, headers: true,
+ return_headers: true,
+ converters: :numeric )
+ assert_equal([%w{1 1}, %w{2 2}, %w{3 3}], csv[0].to_a)
+ assert_equal([["1", 1], ["2", 2], ["3", 3]], csv[1].to_a)
+ assert_nil(csv[2])
+
+ # header converters do affect headers (only)
+ assert_nothing_raised(Exception) do
+ csv = CSV.parse( data, headers: true,
+ return_headers: true,
+ converters: :numeric,
+ header_converters: :symbol )
+ end
+ assert_equal([[:"1", "1"], [:"2", "2"], [:"3", "3"]], csv[0].to_a)
+ assert_equal([[:"1", 1], [:"2", 2], [:"3", 3]], csv[1].to_a)
+ assert_nil(csv[2])
+ end
+
+ def test_builtin_downcase_converter
+ csv = CSV.parse( "One,TWO Three", headers: true,
+ return_headers: true,
+ header_converters: :downcase )
+ assert_equal(%w{one two\ three}, csv.headers)
+ end
+
+ def test_builtin_symbol_converter
+ # Note that the trailing space is intentional
+ csv = CSV.parse( "One,TWO Three ", headers: true,
+ return_headers: true,
+ header_converters: :symbol )
+ assert_equal([:one, :two_three], csv.headers)
+ end
+
+ def test_builtin_symbol_raw_converter
+ csv = CSV.parse( "a b,c d", headers: true,
+ return_headers: true,
+ header_converters: :symbol_raw )
+ assert_equal([:"a b", :"c d"], csv.headers)
+ end
+
+ def test_builtin_symbol_converter_with_punctuation
+ csv = CSV.parse( "One, Two & Three ($)", headers: true,
+ return_headers: true,
+ header_converters: :symbol )
+ assert_equal([:one, :two_three], csv.headers)
+ end
+
+ def test_builtin_converters_with_blank_header
+ csv = CSV.parse( "one,,three", headers: true,
+ return_headers: true,
+ header_converters: [:downcase, :symbol, :symbol_raw] )
+ assert_equal([:one, nil, :three], csv.headers)
+ end
+
+ def test_custom_converter
+ converter = lambda { |header| header.tr(" ", "_") }
+ csv = CSV.parse( "One,TWO Three",
+ headers: true,
+ return_headers: true,
+ header_converters: converter )
+ assert_equal(%w{One TWO_Three}, csv.headers)
+ end
+
+ def test_table_support
+ csv = nil
+ assert_nothing_raised(Exception) do
+ csv = CSV.parse(@data, headers: true)
+ end
+
+ assert_instance_of(CSV::Table, csv)
+ end
+
+ def test_skip_blanks
+ @data = <<-CSV
+
+
+A,B,C
+
+1,2,3
+
+
+
+ CSV
+
+ expected = [%w[1 2 3]]
+ CSV.parse(@data, headers: true, skip_blanks: true) do |row|
+ assert_equal(expected.shift, row.fields)
+ end
+
+ expected = [%w[A B C], %w[1 2 3]]
+ CSV.parse( @data,
+ headers: true,
+ return_headers: true,
+ skip_blanks: true ) do |row|
+ assert_equal(expected.shift, row.fields)
+ end
+ end
+
+ def test_headers_reader
+ # no headers
+ assert_nil(CSV.new(@data).headers)
+
+ # headers
+ csv = CSV.new(@data, headers: true)
+ assert_equal(true, csv.headers) # before headers are read
+ csv.shift # set headers
+ assert_equal(%w[first second third], csv.headers) # after headers are read
+ end
+
+ def test_blank_row
+ @data += "\n#{@data}" # add a blank row
+
+ # ensure that everything returned is a Row object
+ CSV.parse(@data, headers: true) do |row|
+ assert_instance_of(CSV::Row, row)
+ end
+ end
+
+ def test_nil_row_header
+ @data = <<-CSV
+A
+
+1
+ CSV
+
+ csv = CSV.parse(@data, headers: true)
+
+ # ensure nil row creates Row object with headers
+ row = csv[0]
+ assert_equal([["A"], [nil]],
+ [row.headers, row.fields])
+ end
+
+ def test_parse_empty
+ assert_equal(CSV::Table.new([]),
+ CSV.parse("", headers: true))
+ end
+
+ def test_parse_empty_line
+ assert_equal(CSV::Table.new([]),
+ CSV.parse("\n", headers: true))
+ end
+
+ def test_specified_empty
+ assert_equal(CSV::Table.new([],
+ headers: ["header1"]),
+ CSV.parse("", headers: ["header1"]))
+ end
+
+ def test_specified_empty_line
+ assert_equal(CSV::Table.new([CSV::Row.new(["header1"], [])],
+ headers: ["header1"]),
+ CSV.parse("\n", headers: ["header1"]))
+ end
+end
diff --git a/test/csv/parse/test_inputs_scanner.rb b/test/csv/parse/test_inputs_scanner.rb
new file mode 100644
index 0000000000..06e1c845d5
--- /dev/null
+++ b/test/csv/parse/test_inputs_scanner.rb
@@ -0,0 +1,63 @@
+require_relative "../helper"
+
+class TestCSVParseInputsScanner < Test::Unit::TestCase
+ include Helper
+
+ def test_scan_keep_over_chunks_nested_back
+ input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
+ scanner = CSV::Parser::InputsScanner.new([input],
+ Encoding::UTF_8,
+ nil,
+ chunk_size: 2)
+ scanner.keep_start
+ assert_equal("abc", scanner.scan_all(/[a-c]+/))
+ scanner.keep_start
+ assert_equal("def", scanner.scan_all(/[d-f]+/))
+ scanner.keep_back
+ scanner.keep_back
+ assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
+ end
+
+ def test_scan_keep_over_chunks_nested_drop_back
+ input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
+ scanner = CSV::Parser::InputsScanner.new([input],
+ Encoding::UTF_8,
+ nil,
+ chunk_size: 3)
+ scanner.keep_start
+ assert_equal("ab", scanner.scan(/../))
+ scanner.keep_start
+ assert_equal("c", scanner.scan(/./))
+ assert_equal("d", scanner.scan(/./))
+ scanner.keep_drop
+ scanner.keep_back
+ assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
+ end
+
+ def test_each_line_keep_over_chunks_multibyte
+ input = CSV::Parser::UnoptimizedStringIO.new("ab\n\u{3000}a\n")
+ scanner = CSV::Parser::InputsScanner.new([input],
+ Encoding::UTF_8,
+ nil,
+ chunk_size: 1)
+ each_line = scanner.each_line("\n")
+ assert_equal("ab\n", each_line.next)
+ scanner.keep_start
+ assert_equal("\u{3000}a\n", each_line.next)
+ scanner.keep_back
+ assert_equal("\u{3000}a\n", scanner.scan_all(/[^,]+/))
+ end
+
+ def test_each_line_keep_over_chunks_fit_chunk_size
+ input = CSV::Parser::UnoptimizedStringIO.new("\na")
+ scanner = CSV::Parser::InputsScanner.new([input],
+ Encoding::UTF_8,
+ nil,
+ chunk_size: 1)
+ each_line = scanner.each_line("\n")
+ assert_equal("\n", each_line.next)
+ scanner.keep_start
+ assert_equal("a", each_line.next)
+ scanner.keep_back
+ end
+end
diff --git a/test/csv/parse/test_invalid.rb b/test/csv/parse/test_invalid.rb
new file mode 100644
index 0000000000..ddb59e2b9a
--- /dev/null
+++ b/test/csv/parse/test_invalid.rb
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseInvalid < Test::Unit::TestCase
+ def test_no_column_mixed_new_lines
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse("\n" +
+ "\r")
+ end
+ assert_equal("New line must be <\"\\n\"> not <\"\\r\"> in line 2.",
+ error.message)
+ end
+
+ def test_ignore_invalid_line
+ csv = CSV.new(<<-CSV, headers: true, return_headers: true)
+head1,head2,head3
+aaa,bbb,ccc
+ddd,ee"e.fff
+ggg,hhh,iii
+ CSV
+ headers = ["head1", "head2", "head3"]
+ assert_equal(CSV::Row.new(headers, headers),
+ csv.shift)
+ assert_equal(CSV::Row.new(headers, ["aaa", "bbb", "ccc"]),
+ csv.shift)
+ assert_equal(false, csv.eof?)
+ error = assert_raise(CSV::MalformedCSVError) do
+ csv.shift
+ end
+ assert_equal("Illegal quoting in line 3.",
+ error.message)
+ assert_equal(false, csv.eof?)
+ assert_equal(CSV::Row.new(headers, ["ggg", "hhh", "iii"]),
+ csv.shift)
+ assert_equal(true, csv.eof?)
+ end
+
+ def test_ignore_invalid_line_cr_lf
+ data = <<-CSV
+"1","OK"\r
+"2",""NOT" OK"\r
+"3","OK"\r
+CSV
+ csv = CSV.new(data)
+
+ assert_equal(['1', 'OK'], csv.shift)
+ assert_raise(CSV::MalformedCSVError) { csv.shift }
+ assert_equal(['3', 'OK'], csv.shift)
+ end
+end
diff --git a/test/csv/parse/test_liberal_parsing.rb b/test/csv/parse/test_liberal_parsing.rb
new file mode 100644
index 0000000000..5796d10828
--- /dev/null
+++ b/test/csv/parse/test_liberal_parsing.rb
@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseLiberalParsing < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_middle_quote_start
+ input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson'
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse_line(input)
+ end
+ assert_equal("Illegal quoting in line 1.",
+ error.message)
+ assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'],
+ CSV.parse_line(input, liberal_parsing: true))
+ end
+
+ def test_middle_quote_end
+ input = '"quoted" field'
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse_line(input)
+ end
+ assert_equal("Any value after quoted field isn't allowed in line 1.",
+ error.message)
+ assert_equal(['"quoted" field'],
+ CSV.parse_line(input, liberal_parsing: true))
+ end
+
+ def test_endline_after_quoted_field_end
+ csv = CSV.new("A\r\n\"B\"\nC\r\n", liberal_parsing: true)
+ assert_equal(["A"], csv.gets)
+ error = assert_raise(CSV::MalformedCSVError) do
+ csv.gets
+ end
+ assert_equal('Illegal end-of-line sequence outside of a quoted field <"\n"> in line 2.',
+ error.message)
+ assert_equal(["C"], csv.gets)
+ end
+
+ def test_quote_after_column_separator
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)
+ end
+ assert_equal("Unclosed quoted field in line 1.",
+ error.message)
+ end
+
+ def test_quote_before_column_separator
+ assert_equal(["is", 'this "three', ' or four"', "fields"],
+ CSV.parse_line('is,this "three, or four",fields',
+ liberal_parsing: true))
+ end
+
+ def test_backslash_quote
+ assert_equal([
+ "1",
+ "\"Hamlet says, \\\"Seems",
+ "\\\" madam! Nay it is; I know not \\\"seems.\\\"\"",
+ ],
+ CSV.parse_line('1,' +
+ '"Hamlet says, \"Seems,' +
+ '\" madam! Nay it is; I know not \"seems.\""',
+ liberal_parsing: true))
+ end
+
+ def test_space_quote
+ input = <<~CSV
+ Los Angeles, 34°03'N, 118°15'W
+ New York City, 40°42'46"N, 74°00'21"W
+ Paris, 48°51'24"N, 2°21'03"E
+ CSV
+ assert_equal(
+ [
+ ["Los Angeles", " 34°03'N", " 118°15'W"],
+ ["New York City", " 40°42'46\"N", " 74°00'21\"W"],
+ ["Paris", " 48°51'24\"N", " 2°21'03\"E"],
+ ],
+ CSV.parse(input, liberal_parsing: true))
+ end
+
+ def test_double_quote_outside_quote
+ data = %Q{a,""b""}
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse(data)
+ end
+ assert_equal("Any value after quoted field isn't allowed in line 1.",
+ error.message)
+ assert_equal([
+ [["a", %Q{""b""}]],
+ [["a", %Q{"b"}]],
+ ],
+ [
+ CSV.parse(data, liberal_parsing: true),
+ CSV.parse(data,
+ liberal_parsing: {
+ double_quote_outside_quote: true,
+ }),
+ ])
+ end
+
+ class TestBackslashQuote < Test::Unit::TestCase
+ extend ::DifferentOFS
+
+ def test_double_quote_outside_quote
+ data = %Q{a,""b""}
+ assert_equal([
+ [["a", %Q{""b""}]],
+ [["a", %Q{"b"}]],
+ ],
+ [
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true
+ }),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true,
+ double_quote_outside_quote: true
+ }),
+ ])
+ end
+
+ def test_unquoted_value
+ data = %q{\"\"a\"\"}
+ assert_equal([
+ [[%q{\"\"a\"\"}]],
+ [[%q{""a""}]],
+ ],
+ [
+ CSV.parse(data, liberal_parsing: true),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true
+ }),
+ ])
+ end
+
+ def test_unquoted_value_multiple_characters_col_sep
+ data = %q{a<\\"b<=>x}
+ assert_equal([[%Q{a<"b}, "x"]],
+ CSV.parse(data,
+ col_sep: "<=>",
+ liberal_parsing: {
+ backslash_quote: true
+ }))
+ end
+
+ def test_quoted_value
+ data = %q{"\"\"a\"\""}
+ assert_equal([
+ [[%q{"\"\"a\"\""}]],
+ [[%q{""a""}]],
+ [[%q{""a""}]],
+ ],
+ [
+ CSV.parse(data, liberal_parsing: true),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true
+ }),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true,
+ double_quote_outside_quote: true
+ }),
+ ])
+ end
+ end
+end
diff --git a/test/csv/parse/test_quote_char_nil.rb b/test/csv/parse/test_quote_char_nil.rb
new file mode 100644
index 0000000000..fc3b646759
--- /dev/null
+++ b/test/csv/parse/test_quote_char_nil.rb
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseQuoteCharNil < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_full
+ assert_equal(["a", "b"], CSV.parse_line(%Q{a,b}, quote_char: nil))
+ end
+
+ def test_end_with_nil
+ assert_equal(["a", nil, nil, nil], CSV.parse_line(%Q{a,,,}, quote_char: nil))
+ end
+
+ def test_nil_nil
+ assert_equal([nil, nil], CSV.parse_line(%Q{,}, quote_char: nil))
+ end
+
+ def test_unquoted_value_multiple_characters_col_sep
+ data = %q{a<b<=>x}
+ assert_equal([[%Q{a<b}, "x"]], CSV.parse(data, col_sep: "<=>", quote_char: nil))
+ end
+
+ def test_csv_header_string
+ data = <<~DATA
+ first,second,third
+ A,B,C
+ 1,2,3
+ DATA
+ assert_equal(
+ CSV::Table.new([
+ CSV::Row.new(["my", "new", "headers"], ["first", "second", "third"]),
+ CSV::Row.new(["my", "new", "headers"], ["A", "B", "C"]),
+ CSV::Row.new(["my", "new", "headers"], ["1", "2", "3"])
+ ]),
+ CSV.parse(data, headers: "my,new,headers", quote_char: nil)
+ )
+ end
+
+ def test_comma
+ assert_equal([["a", "b", nil, "d"]],
+ CSV.parse("a,b,,d", col_sep: ",", quote_char: nil))
+ end
+
+ def test_space
+ assert_equal([["a", "b", nil, "d"]],
+ CSV.parse("a b d", col_sep: " ", quote_char: nil))
+ end
+
+ def encode_array(array, encoding)
+ array.collect do |element|
+ element ? element.encode(encoding) : element
+ end
+ end
+
+ def test_space_no_ascii
+ encoding = Encoding::UTF_16LE
+ assert_equal([encode_array(["a", "b", nil, "d"], encoding)],
+ CSV.parse("a b d".encode(encoding),
+ col_sep: " ".encode(encoding),
+ quote_char: nil))
+ end
+
+ def test_multiple_space
+ assert_equal([["a b", nil, "d"]],
+ CSV.parse("a b d", col_sep: " ", quote_char: nil))
+ end
+
+ def test_multiple_characters_leading_empty_fields
+ data = <<-CSV
+<=><=>A<=>B<=>C
+1<=>2<=>3
+ CSV
+ assert_equal([
+ [nil, nil, "A", "B", "C"],
+ ["1", "2", "3"],
+ ],
+ CSV.parse(data, col_sep: "<=>", quote_char: nil))
+ end
+
+ def test_line
+ lines = [
+ "abc,def\n",
+ ]
+ csv = CSV.new(lines.join(""), quote_char: nil)
+ lines.each do |line|
+ csv.shift
+ assert_equal(line, csv.line)
+ end
+ end
+end
diff --git a/test/csv/parse/test_rewind.rb b/test/csv/parse/test_rewind.rb
new file mode 100644
index 0000000000..0aa403b756
--- /dev/null
+++ b/test/csv/parse/test_rewind.rb
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseRewind < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def parse(data, **options)
+ csv = CSV.new(data, **options)
+ records = csv.to_a
+ csv.rewind
+ [records, csv.to_a]
+ end
+
+ def test_default
+ data = <<-CSV
+Ruby,2.6.0,script
+ CSV
+ assert_equal([
+ [["Ruby", "2.6.0", "script"]],
+ [["Ruby", "2.6.0", "script"]],
+ ],
+ parse(data))
+ end
+
+ def test_have_headers
+ data = <<-CSV
+Language,Version,Type
+Ruby,2.6.0,script
+ CSV
+ assert_equal([
+ [CSV::Row.new(["Language", "Version", "Type"],
+ ["Ruby", "2.6.0", "script"])],
+ [CSV::Row.new(["Language", "Version", "Type"],
+ ["Ruby", "2.6.0", "script"])],
+ ],
+ parse(data, headers: true))
+ end
+end
diff --git a/test/csv/parse/test_row_separator.rb b/test/csv/parse/test_row_separator.rb
new file mode 100644
index 0000000000..eaf6adc910
--- /dev/null
+++ b/test/csv/parse/test_row_separator.rb
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseRowSeparator < Test::Unit::TestCase
+ extend DifferentOFS
+ include Helper
+
+ def test_multiple_characters
+ with_chunk_size("1") do
+ assert_equal([["a"], ["b"]],
+ CSV.parse("a\r\nb\r\n", row_sep: "\r\n"))
+ end
+ end
+end
diff --git a/test/csv/parse/test_skip_lines.rb b/test/csv/parse/test_skip_lines.rb
new file mode 100644
index 0000000000..98d67ae51c
--- /dev/null
+++ b/test/csv/parse/test_skip_lines.rb
@@ -0,0 +1,118 @@
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseSkipLines < Test::Unit::TestCase
+ extend DifferentOFS
+ include Helper
+
+ def test_default
+ csv = CSV.new("a,b,c\n")
+ assert_nil(csv.skip_lines)
+ end
+
+ def test_regexp
+ csv = <<-CSV
+1
+#2
+ #3
+4
+ CSV
+ assert_equal([
+ ["1"],
+ ["4"],
+ ],
+ CSV.parse(csv, :skip_lines => /\A\s*#/))
+ end
+
+ def test_regexp_quoted
+ csv = <<-CSV
+1
+#2
+"#3"
+4
+ CSV
+ assert_equal([
+ ["1"],
+ ["#3"],
+ ["4"],
+ ],
+ CSV.parse(csv, :skip_lines => /\A\s*#/))
+ end
+
+ def test_string
+ csv = <<-CSV
+1
+.2
+3.
+4
+ CSV
+ assert_equal([
+ ["1"],
+ ["4"],
+ ],
+ CSV.parse(csv, :skip_lines => "."))
+ end
+
+ class RegexStub
+ end
+
+ def test_not_matchable
+ regex_stub = RegexStub.new
+ csv = CSV.new("1\n", :skip_lines => regex_stub)
+ error = assert_raise(ArgumentError) do
+ csv.shift
+ end
+ assert_equal(":skip_lines has to respond to #match: #{regex_stub.inspect}",
+ error.message)
+ end
+
+ class Matchable
+ def initialize(pattern)
+ @pattern = pattern
+ end
+
+ def match(line)
+ @pattern.match(line)
+ end
+ end
+
+ def test_matchable
+ csv = <<-CSV
+1
+# 2
+3
+# 4
+ CSV
+ assert_equal([
+ ["1"],
+ ["3"],
+ ],
+ CSV.parse(csv, :skip_lines => Matchable.new(/\A#/)))
+ end
+
+ def test_multibyte_data
+ # U+3042 HIRAGANA LETTER A
+ # U+3044 HIRAGANA LETTER I
+ # U+3046 HIRAGANA LETTER U
+ value = "\u3042\u3044\u3046"
+ with_chunk_size("5") do
+ assert_equal([[value], [value]],
+ CSV.parse("#{value}\n#{value}\n",
+ :skip_lines => /\A#/))
+ end
+ end
+
+ def test_empty_line_and_liberal_parsing
+ assert_equal([["a", "b"]],
+ CSV.parse("a,b\n",
+ :liberal_parsing => true,
+ :skip_lines => /^$/))
+ end
+
+ def test_crlf
+ assert_equal([["a", "b"]],
+ CSV.parse("a,b\r\n,\r\n",
+ :skip_lines => /^,+$/))
+ end
+end
diff --git a/test/csv/parse/test_strip.rb b/test/csv/parse/test_strip.rb
new file mode 100644
index 0000000000..c5e35209cc
--- /dev/null
+++ b/test/csv/parse/test_strip.rb
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseStrip < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_both
+ assert_equal(["a", "b"],
+ CSV.parse_line(%Q{ a , b }, strip: true))
+ end
+
+ def test_left
+ assert_equal(["a", "b"],
+ CSV.parse_line(%Q{ a, b}, strip: true))
+ end
+
+ def test_right
+ assert_equal(["a", "b"],
+ CSV.parse_line(%Q{a ,b }, strip: true))
+ end
+
+ def test_middle
+ assert_equal(["a b"],
+ CSV.parse_line(%Q{a b}, strip: true))
+ end
+
+ def test_quoted
+ assert_equal([" a ", " b "],
+ CSV.parse_line(%Q{" a "," b "}, strip: true))
+ end
+
+ def test_liberal_parsing
+ assert_equal([" a ", "b", " c ", " d "],
+ CSV.parse_line(%Q{" a ", b , " c "," d " },
+ strip: true,
+ liberal_parsing: true))
+ end
+
+ def test_string
+ assert_equal(["a", " b"],
+ CSV.parse_line(%Q{ a , " b" },
+ strip: " "))
+ end
+
+ def test_no_quote
+ assert_equal([" a ", " b "],
+ CSV.parse_line(%Q{" a ", b },
+ strip: %Q{"},
+ quote_char: nil))
+ end
+
+ def test_do_not_strip_cr
+ assert_equal([
+ ["a", "b "],
+ ["a", "b "],
+ ],
+ CSV.parse(%Q{"a" ,"b " \r} +
+ %Q{"a" ,"b " \r},
+ strip: true))
+ end
+
+ def test_do_not_strip_lf
+ assert_equal([
+ ["a", "b "],
+ ["a", "b "],
+ ],
+ CSV.parse(%Q{"a" ,"b " \n} +
+ %Q{"a" ,"b " \n},
+ strip: true))
+ end
+
+ def test_do_not_strip_crlf
+ assert_equal([
+ ["a", "b "],
+ ["a", "b "],
+ ],
+ CSV.parse(%Q{"a" ,"b " \r\n} +
+ %Q{"a" ,"b " \r\n},
+ strip: true))
+ end
+
+ def test_col_sep_incompatible_true
+ message = "The provided strip (true) and " \
+ "col_sep (\\t) options are incompatible."
+ assert_raise_with_message(ArgumentError, message) do
+ CSV.parse_line(%Q{"a"\t"b"\n},
+ col_sep: "\t",
+ strip: true)
+ end
+ end
+
+ def test_col_sep_incompatible_string
+ message = "The provided strip (\\t) and " \
+ "col_sep (\\t) options are incompatible."
+ assert_raise_with_message(ArgumentError, message) do
+ CSV.parse_line(%Q{"a"\t"b"\n},
+ col_sep: "\t",
+ strip: "\t")
+ end
+ end
+
+ def test_col_sep_compatible_string
+ assert_equal(
+ ["a", "b"],
+ CSV.parse_line(%Q{\va\tb\v\n},
+ col_sep: "\t",
+ strip: "\v")
+ )
+ end
+end
diff --git a/test/csv/parse/test_unconverted_fields.rb b/test/csv/parse/test_unconverted_fields.rb
new file mode 100644
index 0000000000..437124ebd3
--- /dev/null
+++ b/test/csv/parse/test_unconverted_fields.rb
@@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseUnconvertedFields < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def setup
+ super
+ @custom = lambda {|field| /\A:(\S.*?)\s*\Z/ =~ field ? $1.to_sym : field}
+
+ @headers = ["first", "second", "third"]
+ @data = <<-CSV
+first,second,third
+1,2,3
+ CSV
+ end
+
+
+ def test_custom
+ row = CSV.parse_line("Numbers,:integer,1,:float,3.015",
+ converters: [:numeric, @custom],
+ unconverted_fields: true)
+ assert_equal([
+ ["Numbers", :integer, 1, :float, 3.015],
+ ["Numbers", ":integer", "1", ":float", "3.015"],
+ ],
+ [
+ row,
+ row.unconverted_fields,
+ ])
+ end
+
+ def test_no_fields
+ row = CSV.parse_line("\n",
+ converters: [:numeric, @custom],
+ unconverted_fields: true)
+ assert_equal([
+ [],
+ [],
+ ],
+ [
+ row,
+ row.unconverted_fields,
+ ])
+ end
+
+ def test_parsed_header
+ row = CSV.parse_line(@data,
+ converters: :numeric,
+ unconverted_fields: true,
+ headers: :first_row)
+ assert_equal([
+ CSV::Row.new(@headers,
+ [1, 2, 3]),
+ ["1", "2", "3"],
+ ],
+ [
+ row,
+ row.unconverted_fields,
+ ])
+ end
+
+ def test_return_headers
+ row = CSV.parse_line(@data,
+ converters: :numeric,
+ unconverted_fields: true,
+ headers: :first_row,
+ return_headers: true)
+ assert_equal([
+ CSV::Row.new(@headers,
+ @headers),
+ @headers,
+ ],
+ [
+ row,
+ row.unconverted_fields,
+ ])
+ end
+
+ def test_header_converters
+ row = CSV.parse_line(@data,
+ converters: :numeric,
+ unconverted_fields: true,
+ headers: :first_row,
+ return_headers: true,
+ header_converters: :symbol)
+ assert_equal([
+ CSV::Row.new(@headers.collect(&:to_sym),
+ @headers),
+ @headers,
+ ],
+ [
+ row,
+ row.unconverted_fields,
+ ])
+ end
+
+ def test_specified_headers
+ row = CSV.parse_line("\n",
+ converters: :numeric,
+ unconverted_fields: true,
+ headers: %w{my new headers},
+ return_headers: true,
+ header_converters: :symbol)
+ assert_equal([
+ CSV::Row.new([:my, :new, :headers],
+ ["my", "new", "headers"]),
+ [],
+ ],
+ [
+ row,
+ row.unconverted_fields,
+ ])
+ end
+end