diff options
Diffstat (limited to 'test/csv/parse/test_general.rb')
-rw-r--r-- | test/csv/parse/test_general.rb | 259 |
1 files changed, 0 insertions, 259 deletions
diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb deleted file mode 100644 index 14903d462c..0000000000 --- a/test/csv/parse/test_general.rb +++ /dev/null @@ -1,259 +0,0 @@ -# -*- coding: utf-8 -*- -# frozen_string_literal: false - -require "timeout" - -require_relative "../helper" - -# -# Following tests are my interpretation of the -# {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that -# document in one place (intentionally) and that is to make the default row -# separator <tt>$/</tt>. -# -class TestCSVParseGeneral < Test::Unit::TestCase - extend DifferentOFS - - BIG_DATA = "123456789\n" * 1024 - - def test_mastering_regex_example - ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K} - assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000", - "It's \"10 Grand\", baby", "10K" ], - CSV.parse_line(ex) ) - end - - # Old Ruby 1.8 CSV library tests. - def test_std_lib_csv - [ ["\t", ["\t"]], - ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]], - ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]], - ["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]], - ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]], - ["\"\"", [""]], - ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]], - ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]], - ["foo,\"\r\",baz", ["foo", "\r", "baz"]], - ["foo,\"\",baz", ["foo", "", "baz"]], - ["\",\"", [","]], - ["foo", ["foo"]], - [",,", [nil, nil, nil]], - [",", [nil, nil]], - ["foo,\"\n\",baz", ["foo", "\n", "baz"]], - ["foo,,baz", ["foo", nil, "baz"]], - ["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]], - ["\",\",\",\"", [",", ","]], - ["foo,bar,", ["foo", "bar", nil]], - [",foo,bar", [nil, "foo", "bar"]], - ["foo,bar", ["foo", "bar"]], - [";", [";"]], - ["\t,\t", ["\t", "\t"]], - ["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]], - ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]], - ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]], - [";,;", [";", ";"]] ].each do |csv_test| - assert_equal(csv_test.last, CSV.parse_line(csv_test.first)) - end - - [ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]], - ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]], - ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]], - ["\"\"", [""]], - ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]], - ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]], - ["foo,\"\r\",baz", ["foo", "\r", "baz"]], - ["foo,\"\",baz", ["foo", "", "baz"]], - ["foo", ["foo"]], - [",,", [nil, nil, nil]], - [",", [nil, nil]], - ["foo,\"\n\",baz", ["foo", "\n", "baz"]], - ["foo,,baz", ["foo", nil, "baz"]], - ["foo,bar", ["foo", "bar"]], - ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]], - ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test| - assert_equal(csv_test.last, CSV.parse_line(csv_test.first)) - end - end - - # From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496 - def test_aras_edge_cases - [ [%Q{a,b}, ["a", "b"]], - [%Q{a,"""b"""}, ["a", "\"b\""]], - [%Q{a,"""b"}, ["a", "\"b"]], - [%Q{a,"b"""}, ["a", "b\""]], - [%Q{a,"\nb"""}, ["a", "\nb\""]], - [%Q{a,"""\nb"}, ["a", "\"\nb"]], - [%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]], - [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]], - [%Q{a,,,}, ["a", nil, nil, nil]], - [%Q{,}, [nil, nil]], - [%Q{"",""}, ["", ""]], - [%Q{""""}, ["\""]], - [%Q{"""",""}, ["\"",""]], - [%Q{,""}, [nil,""]], - [%Q{,"\r"}, [nil,"\r"]], - [%Q{"\r\n,"}, ["\r\n,"]], - [%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case| - assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) - end - end - - def test_james_edge_cases - # A read at eof? should return nil. - assert_equal(nil, CSV.parse_line("")) - # - # With Ruby 1.8 CSV it's impossible to tell an empty line from a line - # containing a single +nil+ field. The old CSV library returns - # <tt>[nil]</tt> in these cases, but <tt>Array.new</tt> makes more sense to - # me. - # - assert_equal(Array.new, CSV.parse_line("\n1,2,3\n")) - end - - def test_rob_edge_cases - [ [%Q{"a\nb"}, ["a\nb"]], - [%Q{"\n\n\n"}, ["\n\n\n"]], - [%Q{a,"b\n\nc"}, ['a', "b\n\nc"]], - [%Q{,"\r\n"}, [nil,"\r\n"]], - [%Q{,"\r\n."}, [nil,"\r\n."]], - [%Q{"a\na","one newline"}, ["a\na", 'one newline']], - [%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']], - [%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']], - [%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']], - [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]], - ].each do |edge_case| - assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) - end - end - - def test_non_regex_edge_cases - # An early version of the non-regex parser fails this test - [ [ "foo,\"foo,bar,baz,foo\",\"foo\"", - ["foo", "foo,bar,baz,foo", "foo"] ] ].each do |edge_case| - assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) - end - - assert_raise(CSV::MalformedCSVError) do - CSV.parse_line("1,\"23\"4\"5\", 6") - end - end - - def test_malformed_csv_cr_first_line - error = assert_raise(CSV::MalformedCSVError) do - CSV.parse_line("1,2\r,3", row_sep: "\n") - end - assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.", - error.message) - end - - def test_malformed_csv_cr_middle_line - csv = <<-CSV -line,1,abc -line,2,"def\nghi" - -line,4,some\rjunk -line,5,jkl - CSV - - error = assert_raise(CSV::MalformedCSVError) do - CSV.parse(csv) - end - assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.", - error.message) - end - - def test_malformed_csv_unclosed_quote - error = assert_raise(CSV::MalformedCSVError) do - CSV.parse_line('1,2,"3...') - end - assert_equal("Unclosed quoted field in line 1.", - error.message) - end - - def test_malformed_csv_illegal_quote_middle_line - csv = <<-CSV -line,1,abc -line,2,"def\nghi" - -line,4,8'10" -line,5,jkl - CSV - - error = assert_raise(CSV::MalformedCSVError) do - CSV.parse(csv) - end - assert_equal("Illegal quoting in line 4.", - error.message) - end - - def test_the_parse_fails_fast_when_it_can_for_unquoted_fields - assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA) - end - - def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes - assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA) - end - - def test_field_size_limit_controls_lookahead - assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"', - field_size_limit: 2048 ) - end - - def test_field_size_limit_in_extended_column_not_exceeding - data = <<~DATA - "a","b" - " - 2 - ","" - DATA - assert_nothing_raised(CSV::MalformedCSVError) do - CSV.parse(data, field_size_limit: 4) - end - end - - def test_field_size_limit_in_extended_column_exceeding - data = <<~DATA - "a","b" - " - 2345 - ","" - DATA - assert_parse_errors_out(data, field_size_limit: 5) - end - - def test_row_sep_auto_cr - assert_equal([["a"]], CSV.parse("a\r")) - end - - def test_row_sep_auto_lf - assert_equal([["a"]], CSV.parse("a\n")) - end - - def test_row_sep_auto_cr_lf - assert_equal([["a"]], CSV.parse("a\r\n")) - end - - def test_seeked_string_io - input_with_bom = StringIO.new("\ufeffあ,い,う\r\na,b,c\r\n") - input_with_bom.read(3) - assert_equal([ - ["あ", "い", "う"], - ["a", "b", "c"], - ], - CSV.new(input_with_bom).each.to_a) - end - - private - def assert_parse_errors_out(data, **options) - assert_raise(CSV::MalformedCSVError) do - timeout = 0.2 - if defined?(RubyVM::JIT.enabled?) and RubyVM::JIT.enabled? - timeout = 5 # for --jit-wait - end - Timeout.timeout(timeout) do - CSV.parse(data, **options) - fail("Parse didn't error out") - end - end - end -end |