summaryrefslogtreecommitdiff
path: root/test/csv/parse/test_liberal_parsing.rb
blob: 2f7b34689f1575e15a0b6936dfdb249c7aa38ba0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# -*- coding: utf-8 -*-
# frozen_string_literal: false

require_relative "../helper"

class TestCSVParseLiberalParsing < Test::Unit::TestCase
  extend DifferentOFS

  def test_middle_quote_start
    input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson'
    error = assert_raise(CSV::MalformedCSVError) do
      CSV.parse_line(input)
    end
    assert_equal("Illegal quoting in line 1.",
                 error.message)
    assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'],
                 CSV.parse_line(input, liberal_parsing: true))
  end

  def test_middle_quote_end
    input = '"quoted" field'
    error = assert_raise(CSV::MalformedCSVError) do
      CSV.parse_line(input)
    end
    assert_equal("Any value after quoted field isn't allowed in line 1.",
                 error.message)
    assert_equal(['"quoted" field'],
                 CSV.parse_line(input, liberal_parsing: true))
  end

  def test_quote_after_column_separator
    error = assert_raise(CSV::MalformedCSVError) do
      CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)
    end
    assert_equal("Unclosed quoted field in line 1.",
                 error.message)
  end

  def test_quote_before_column_separator
    assert_equal(["is", 'this "three', ' or four"', "fields"],
                 CSV.parse_line('is,this "three, or four",fields',
                                liberal_parsing: true))
  end

  def test_backslash_quote
    assert_equal([
                   "1",
                   "\"Hamlet says, \\\"Seems",
                   "\\\" madam! Nay it is; I know not \\\"seems.\\\"\"",
                 ],
                 CSV.parse_line('1,' +
                                '"Hamlet says, \"Seems,' +
                                '\" madam! Nay it is; I know not \"seems.\""',
                                liberal_parsing: true))
  end

  def test_space_quote
    input = <<~CSV
      Los Angeles,   34°03'N,    118°15'W
      New York City, 40°42'46"N, 74°00'21"W
      Paris,         48°51'24"N, 2°21'03"E
    CSV
    assert_equal(
                 [
                   ["Los Angeles", "   34°03'N", "    118°15'W"],
                   ["New York City", " 40°42'46\"N", " 74°00'21\"W"],
                   ["Paris", "         48°51'24\"N", " 2°21'03\"E"],
                 ],
                 CSV.parse(input, liberal_parsing: true))
  end

  def test_double_quote_outside_quote
    data = %Q{a,""b""}
    error = assert_raise(CSV::MalformedCSVError) do
      CSV.parse(data)
    end
    assert_equal("Any value after quoted field isn't allowed in line 1.",
                 error.message)
    assert_equal([
                   [["a", %Q{""b""}]],
                   [["a", %Q{"b"}]],
                 ],
                 [
                   CSV.parse(data, liberal_parsing: true),
                   CSV.parse(data,
                             liberal_parsing: {
                               double_quote_outside_quote: true,
                             }),
                 ])
  end

  class TestBackslashQuote < Test::Unit::TestCase
    extend ::DifferentOFS

    def test_double_quote_outside_quote
      data = %Q{a,""b""}
      assert_equal([
                     [["a", %Q{""b""}]],
                     [["a", %Q{"b"}]],
                   ],
                   [
                     CSV.parse(data,
                               liberal_parsing: {
                                 backslash_quote: true
                               }),
                     CSV.parse(data,
                               liberal_parsing: {
                                 backslash_quote: true,
                                 double_quote_outside_quote: true
                               }),
                   ])
    end

    def test_unquoted_value
      data = %q{\"\"a\"\"}
      assert_equal([
                     [[%q{\"\"a\"\"}]],
                     [[%q{""a""}]],
                   ],
                   [
                     CSV.parse(data, liberal_parsing: true),
                     CSV.parse(data,
                               liberal_parsing: {
                                 backslash_quote: true
                               }),
                   ])
    end

    def test_unquoted_value_multiple_characters_col_sep
      data = %q{a<\\"b<=>x}
      assert_equal([[%Q{a<"b}, "x"]],
                   CSV.parse(data,
                             col_sep: "<=>",
                             liberal_parsing: {
                               backslash_quote: true
                             }))
    end

    def test_quoted_value
      data = %q{"\"\"a\"\""}
      assert_equal([
                     [[%q{"\"\"a\"\""}]],
                     [[%q{""a""}]],
                     [[%q{""a""}]],
                   ],
                   [
                     CSV.parse(data, liberal_parsing: true),
                     CSV.parse(data,
                               liberal_parsing: {
                                 backslash_quote: true
                               }),
                     CSV.parse(data,
                               liberal_parsing: {
                                 backslash_quote: true,
                                 double_quote_outside_quote: true
                               }),
                   ])
    end
  end
end