summaryrefslogtreecommitdiff
path: root/test/csv/parse
diff options
context:
space:
mode:
Diffstat (limited to 'test/csv/parse')
-rw-r--r--test/csv/parse/test_general.rb4
-rw-r--r--test/csv/parse/test_invalid.rb36
-rw-r--r--test/csv/parse/test_liberal_parsing.rb75
-rw-r--r--test/csv/parse/test_quote_char_nil.rb93
-rw-r--r--test/csv/parse/test_row_separator.rb16
-rw-r--r--test/csv/parse/test_skip_lines.rb105
-rw-r--r--test/csv/parse/test_strip.rb48
7 files changed, 371 insertions, 6 deletions
diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb
index 2f235f16f6..49222c7159 100644
--- a/test/csv/parse/test_general.rb
+++ b/test/csv/parse/test_general.rb
@@ -142,7 +142,7 @@ class TestCSVParseGeneral < Test::Unit::TestCase
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line("1,2\r,3", row_sep: "\n")
end
- assert_equal("Unquoted fields do not allow \\r or \\n in line 1.",
+ assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.",
error.message)
end
@@ -158,7 +158,7 @@ line,5,jkl
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse(csv)
end
- assert_equal("Unquoted fields do not allow \\r or \\n in line 4.",
+ assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.",
error.message)
end
diff --git a/test/csv/parse/test_invalid.rb b/test/csv/parse/test_invalid.rb
new file mode 100644
index 0000000000..b84707c2cc
--- /dev/null
+++ b/test/csv/parse/test_invalid.rb
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseInvalid < Test::Unit::TestCase
+ def test_no_column_mixed_new_lines
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse("\n" +
+ "\r")
+ end
+ assert_equal("New line must be <\"\\n\"> not <\"\\r\"> in line 2.",
+ error.message)
+ end
+
+ def test_ignore_invalid_line
+ csv = CSV.new(<<-CSV, headers: true, return_headers: true)
+head1,head2,head3
+aaa,bbb,ccc
+ddd,ee"e.fff
+ggg,hhh,iii
+ CSV
+ headers = ["head1", "head2", "head3"]
+ assert_equal(CSV::Row.new(headers, headers),
+ csv.shift)
+ assert_equal(CSV::Row.new(headers, ["aaa", "bbb", "ccc"]),
+ csv.shift)
+ error = assert_raise(CSV::MalformedCSVError) do
+ csv.shift
+ end
+ assert_equal("Illegal quoting in line 3.",
+ error.message)
+ assert_equal(CSV::Row.new(headers, ["ggg", "hhh", "iii"]),
+ csv.shift)
+ end
+end
diff --git a/test/csv/parse/test_liberal_parsing.rb b/test/csv/parse/test_liberal_parsing.rb
index 22b1689a37..2f7b34689f 100644
--- a/test/csv/parse/test_liberal_parsing.rb
+++ b/test/csv/parse/test_liberal_parsing.rb
@@ -22,8 +22,7 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line(input)
end
- assert_equal("Do not allow except col_sep_split_separator " +
- "after quoted fields in line 1.",
+ assert_equal("Any value after quoted field isn't allowed in line 1.",
error.message)
assert_equal(['"quoted" field'],
CSV.parse_line(input, liberal_parsing: true))
@@ -75,8 +74,7 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse(data)
end
- assert_equal("Do not allow except col_sep_split_separator " +
- "after quoted fields in line 1.",
+ assert_equal("Any value after quoted field isn't allowed in line 1.",
error.message)
assert_equal([
[["a", %Q{""b""}]],
@@ -90,4 +88,73 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
}),
])
end
+
+ class TestBackslashQuote < Test::Unit::TestCase
+ extend ::DifferentOFS
+
+ def test_double_quote_outside_quote
+ data = %Q{a,""b""}
+ assert_equal([
+ [["a", %Q{""b""}]],
+ [["a", %Q{"b"}]],
+ ],
+ [
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true
+ }),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true,
+ double_quote_outside_quote: true
+ }),
+ ])
+ end
+
+ def test_unquoted_value
+ data = %q{\"\"a\"\"}
+ assert_equal([
+ [[%q{\"\"a\"\"}]],
+ [[%q{""a""}]],
+ ],
+ [
+ CSV.parse(data, liberal_parsing: true),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true
+ }),
+ ])
+ end
+
+ def test_unquoted_value_multiple_characters_col_sep
+ data = %q{a<\\"b<=>x}
+ assert_equal([[%Q{a<"b}, "x"]],
+ CSV.parse(data,
+ col_sep: "<=>",
+ liberal_parsing: {
+ backslash_quote: true
+ }))
+ end
+
+ def test_quoted_value
+ data = %q{"\"\"a\"\""}
+ assert_equal([
+ [[%q{"\"\"a\"\""}]],
+ [[%q{""a""}]],
+ [[%q{""a""}]],
+ ],
+ [
+ CSV.parse(data, liberal_parsing: true),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true
+ }),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true,
+ double_quote_outside_quote: true
+ }),
+ ])
+ end
+ end
end
diff --git a/test/csv/parse/test_quote_char_nil.rb b/test/csv/parse/test_quote_char_nil.rb
new file mode 100644
index 0000000000..fc3b646759
--- /dev/null
+++ b/test/csv/parse/test_quote_char_nil.rb
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseQuoteCharNil < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_full
+ assert_equal(["a", "b"], CSV.parse_line(%Q{a,b}, quote_char: nil))
+ end
+
+ def test_end_with_nil
+ assert_equal(["a", nil, nil, nil], CSV.parse_line(%Q{a,,,}, quote_char: nil))
+ end
+
+ def test_nil_nil
+ assert_equal([nil, nil], CSV.parse_line(%Q{,}, quote_char: nil))
+ end
+
+ def test_unquoted_value_multiple_characters_col_sep
+ data = %q{a<b<=>x}
+ assert_equal([[%Q{a<b}, "x"]], CSV.parse(data, col_sep: "<=>", quote_char: nil))
+ end
+
+ def test_csv_header_string
+ data = <<~DATA
+ first,second,third
+ A,B,C
+ 1,2,3
+ DATA
+ assert_equal(
+ CSV::Table.new([
+ CSV::Row.new(["my", "new", "headers"], ["first", "second", "third"]),
+ CSV::Row.new(["my", "new", "headers"], ["A", "B", "C"]),
+ CSV::Row.new(["my", "new", "headers"], ["1", "2", "3"])
+ ]),
+ CSV.parse(data, headers: "my,new,headers", quote_char: nil)
+ )
+ end
+
+ def test_comma
+ assert_equal([["a", "b", nil, "d"]],
+ CSV.parse("a,b,,d", col_sep: ",", quote_char: nil))
+ end
+
+ def test_space
+ assert_equal([["a", "b", nil, "d"]],
+ CSV.parse("a b d", col_sep: " ", quote_char: nil))
+ end
+
+ def encode_array(array, encoding)
+ array.collect do |element|
+ element ? element.encode(encoding) : element
+ end
+ end
+
+ def test_space_no_ascii
+ encoding = Encoding::UTF_16LE
+ assert_equal([encode_array(["a", "b", nil, "d"], encoding)],
+ CSV.parse("a b d".encode(encoding),
+ col_sep: " ".encode(encoding),
+ quote_char: nil))
+ end
+
+ def test_multiple_space
+ assert_equal([["a b", nil, "d"]],
+ CSV.parse("a b d", col_sep: " ", quote_char: nil))
+ end
+
+ def test_multiple_characters_leading_empty_fields
+ data = <<-CSV
+<=><=>A<=>B<=>C
+1<=>2<=>3
+ CSV
+ assert_equal([
+ [nil, nil, "A", "B", "C"],
+ ["1", "2", "3"],
+ ],
+ CSV.parse(data, col_sep: "<=>", quote_char: nil))
+ end
+
+ def test_line
+ lines = [
+ "abc,def\n",
+ ]
+ csv = CSV.new(lines.join(""), quote_char: nil)
+ lines.each do |line|
+ csv.shift
+ assert_equal(line, csv.line)
+ end
+ end
+end
diff --git a/test/csv/parse/test_row_separator.rb b/test/csv/parse/test_row_separator.rb
new file mode 100644
index 0000000000..eaf6adc910
--- /dev/null
+++ b/test/csv/parse/test_row_separator.rb
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseRowSeparator < Test::Unit::TestCase
+ extend DifferentOFS
+ include Helper
+
+ def test_multiple_characters
+ with_chunk_size("1") do
+ assert_equal([["a"], ["b"]],
+ CSV.parse("a\r\nb\r\n", row_sep: "\r\n"))
+ end
+ end
+end
diff --git a/test/csv/parse/test_skip_lines.rb b/test/csv/parse/test_skip_lines.rb
new file mode 100644
index 0000000000..196858f1b0
--- /dev/null
+++ b/test/csv/parse/test_skip_lines.rb
@@ -0,0 +1,105 @@
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseSkipLines < Test::Unit::TestCase
+ extend DifferentOFS
+ include Helper
+
+ def test_default
+ csv = CSV.new("a,b,c\n")
+ assert_nil(csv.skip_lines)
+ end
+
+ def test_regexp
+ csv = <<-CSV
+1
+#2
+ #3
+4
+ CSV
+ assert_equal([
+ ["1"],
+ ["4"],
+ ],
+ CSV.parse(csv, :skip_lines => /\A\s*#/))
+ end
+
+ def test_regexp_quoted
+ csv = <<-CSV
+1
+#2
+"#3"
+4
+ CSV
+ assert_equal([
+ ["1"],
+ ["#3"],
+ ["4"],
+ ],
+ CSV.parse(csv, :skip_lines => /\A\s*#/))
+ end
+
+ def test_string
+ csv = <<-CSV
+1
+.2
+3.
+4
+ CSV
+ assert_equal([
+ ["1"],
+ ["4"],
+ ],
+ CSV.parse(csv, :skip_lines => "."))
+ end
+
+ class RegexStub
+ end
+
+ def test_not_matchable
+ regex_stub = RegexStub.new
+ csv = CSV.new("1\n", :skip_lines => regex_stub)
+ error = assert_raise(ArgumentError) do
+ csv.shift
+ end
+ assert_equal(":skip_lines has to respond to #match: #{regex_stub.inspect}",
+ error.message)
+ end
+
+ class Matchable
+ def initialize(pattern)
+ @pattern = pattern
+ end
+
+ def match(line)
+ @pattern.match(line)
+ end
+ end
+
+ def test_matchable
+ csv = <<-CSV
+1
+# 2
+3
+# 4
+ CSV
+ assert_equal([
+ ["1"],
+ ["3"],
+ ],
+ CSV.parse(csv, :skip_lines => Matchable.new(/\A#/)))
+ end
+
+ def test_multibyte_data
+ # U+3042 HIRAGANA LETTER A
+ # U+3044 HIRAGANA LETTER I
+ # U+3046 HIRAGANA LETTER U
+ value = "\u3042\u3044\u3046"
+ with_chunk_size("5") do
+ assert_equal([[value], [value]],
+ CSV.parse("#{value}\n#{value}\n",
+ :skip_lines => /\A#/))
+ end
+ end
+end
diff --git a/test/csv/parse/test_strip.rb b/test/csv/parse/test_strip.rb
new file mode 100644
index 0000000000..160407bd94
--- /dev/null
+++ b/test/csv/parse/test_strip.rb
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseStrip < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_both
+ assert_equal(["a", "b"],
+ CSV.parse_line(%Q{ a , b }, strip: true))
+ end
+
+ def test_left
+ assert_equal(["a", "b"],
+ CSV.parse_line(%Q{ a, b}, strip: true))
+ end
+
+ def test_right
+ assert_equal(["a", "b"],
+ CSV.parse_line(%Q{a ,b }, strip: true))
+ end
+
+ def test_quoted
+ assert_equal([" a ", " b "],
+ CSV.parse_line(%Q{" a "," b "}, strip: true))
+ end
+
+ def test_liberal_parsing
+ assert_equal([" a ", "b", " c ", " d "],
+ CSV.parse_line(%Q{" a ", b , " c "," d " },
+ strip: true,
+ liberal_parsing: true))
+ end
+
+ def test_string
+ assert_equal(["a", " b"],
+ CSV.parse_line(%Q{ a , " b" },
+ strip: " "))
+ end
+
+ def test_no_quote
+ assert_equal([" a ", " b "],
+ CSV.parse_line(%Q{" a ", b },
+ strip: %Q{"},
+ quote_char: nil))
+ end
+end