summaryrefslogtreecommitdiff
path: root/test/csv/test_features.rb
blob: 53b513d0fa27bca79fcd033637e3e8632a10d22a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
#!/usr/bin/env ruby -w
# encoding: UTF-8
# frozen_string_literal: false

# tc_features.rb
#
# Created by James Edward Gray II on 2005-10-31.

begin
  require "zlib"
rescue LoadError
end

require_relative "base"
require "tempfile"

class TestCSV::Features < TestCSV
  extend DifferentOFS

  TEST_CASES = [ [%Q{a,b},               ["a", "b"]],
                 [%Q{a,"""b"""},         ["a", "\"b\""]],
                 [%Q{a,"""b"},           ["a", "\"b"]],
                 [%Q{a,"b"""},           ["a", "b\""]],
                 [%Q{a,"\nb"""},         ["a", "\nb\""]],
                 [%Q{a,"""\nb"},         ["a", "\"\nb"]],
                 [%Q{a,"""\nb\n"""},     ["a", "\"\nb\n\""]],
                 [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
                 [%Q{a,,,},              ["a", nil, nil, nil]],
                 [%Q{,},                 [nil, nil]],
                 [%Q{"",""},             ["", ""]],
                 [%Q{""""},              ["\""]],
                 [%Q{"""",""},           ["\"",""]],
                 [%Q{,""},               [nil,""]],
                 [%Q{,"\r"},             [nil,"\r"]],
                 [%Q{"\r\n,"},           ["\r\n,"]],
                 [%Q{"\r\n,",},          ["\r\n,", nil]] ]

  def setup
    super
    @sample_data = <<-CSV
line,1,abc
line,2,"def\nghi"

line,4,jkl
    CSV
    @csv = CSV.new(@sample_data)
  end

  def test_col_sep
    [";", "\t"].each do |sep|
      TEST_CASES.each do |test_case|
        assert_equal( test_case.last.map { |t| t.tr(",", sep) unless t.nil? },
                      CSV.parse_line( test_case.first.tr(",", sep),
                                      col_sep: sep ) )
      end
    end
    assert_equal([",,,", nil], CSV.parse_line(",,,;", col_sep: ";"))
  end

  def test_row_sep
    error = assert_raise(CSV::MalformedCSVError) do
      CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n")
    end
    assert_equal("Unquoted fields do not allow \\r or \\n in line 1.",
                 error.message)
    assert_equal( ["1", "2", "3\n", "4", "5"],
                  CSV.parse_line(%Q{1,2,"3\n",4,5\r\n}, row_sep: "\r\n"))
  end

  def test_quote_char
    TEST_CASES.each do |test_case|
      assert_equal(test_case.last.map {|t| t.tr('"', "'") unless t.nil?},
                   CSV.parse_line(test_case.first.tr('"', "'"),
                                  quote_char: "'" ))
    end
  end

  def test_quote_char_special_regexp_char
    TEST_CASES.each do |test_case|
      assert_equal(test_case.last.map {|t| t.tr('"', "|") unless t.nil?},
                   CSV.parse_line(test_case.first.tr('"', "|"),
                                  quote_char: "|"))
    end
  end

  def test_quote_char_special_regexp_char_liberal_parsing
    TEST_CASES.each do |test_case|
      assert_equal(test_case.last.map {|t| t.tr('"', "|") unless t.nil?},
                   CSV.parse_line(test_case.first.tr('"', "|"),
                                  quote_char: "|",
                                  liberal_parsing: true))
    end
  end

  def test_csv_char_readers
    %w[col_sep row_sep quote_char].each do |reader|
      csv = CSV.new("abc,def", reader.to_sym => "|")
      assert_equal("|", csv.send(reader))
    end
  end

  def test_row_sep_auto_discovery
    ["\r\n", "\n", "\r"].each do |line_end|
      data       = "1,2,3#{line_end}4,5#{line_end}"
      discovered = CSV.new(data).row_sep
      assert_equal(line_end, discovered)
    end

    assert_equal("\n", CSV.new("\n\r\n\r").row_sep)

    assert_equal($/, CSV.new("").row_sep)

    assert_equal($/, CSV.new(STDERR).row_sep)
  end

  def test_line
    lines = [
      %Q(abc,def\n),
      %Q(abc,"d\nef"\n),
      %Q(abc,"d\r\nef"\n),
      %Q(abc,"d\ref")
    ]
    csv = CSV.new(lines.join(''))
    lines.each do |line|
      csv.shift
      assert_equal(line, csv.line)
    end
  end

  def test_lineno
    assert_equal(5, @sample_data.lines.to_a.size)

    4.times do |line_count|
      assert_equal(line_count, @csv.lineno)
      assert_not_nil(@csv.shift)
      assert_equal(line_count + 1, @csv.lineno)
    end
    assert_nil(@csv.shift)
  end

  def test_readline
    test_lineno

    @csv.rewind

    test_lineno
  end

  def test_unknown_options
    assert_raise_with_message(ArgumentError, /unknown keyword/) {
      CSV.new(@sample_data, unknown: :error)
    }
    assert_raise_with_message(ArgumentError, /unknown keyword/) {
      CSV.new(@sample_data, universal_newline: true)
    }
  end

  def test_skip_blanks
    assert_equal(4, @csv.to_a.size)

    @csv  = CSV.new(@sample_data, skip_blanks: true)

    count = 0
    @csv.each do |row|
      count += 1
      assert_equal("line", row.first)
    end
    assert_equal(3, count)
  end

  def test_liberal_parsing_middle_quote_start
    input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson'
    error = assert_raise(CSV::MalformedCSVError) do
        CSV.parse_line(input)
    end
    assert_equal("Illegal quoting in line 1.",
                 error.message)
    assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'],
                 CSV.parse_line(input, liberal_parsing: true))
  end

  def test_liberal_parsing_middle_quote_end
    input = '"quoted" field'
    error = assert_raise(CSV::MalformedCSVError) do
        CSV.parse_line(input)
    end
    assert_equal("Do not allow except col_sep_split_separator " +
                 "after quoted fields in line 1.",
                 error.message)
    assert_equal(['"quoted" field'],
                 CSV.parse_line(input, liberal_parsing: true))
  end

  def test_liberal_parsing_quote_after_column_separator
    error = assert_raise(CSV::MalformedCSVError) do
      CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)
    end
    assert_equal("Unclosed quoted field in line 1.",
                 error.message)
  end

  def test_liberal_parsing_quote_before_column_separator
    assert_equal(["is", 'this "three', ' or four"', "fields"],
                 CSV.parse_line('is,this "three, or four",fields',
                                liberal_parsing: true))
  end

  def test_liberal_parsing_backslash_quote
    assert_equal([
                   "1",
                   "\"Hamlet says, \\\"Seems",
                   "\\\" madam! Nay it is; I know not \\\"seems.\\\"\"",
                 ],
                 CSV.parse_line('1,' +
                                '"Hamlet says, \"Seems,' +
                                '\" madam! Nay it is; I know not \"seems.\""',
                                liberal_parsing: true))
  end

  def test_liberal_parsing_space_quote
    input = <<~CSV
      Los Angeles,   34°03'N,    118°15'W
      New York City, 40°42'46"N, 74°00'21"W
      Paris,         48°51'24"N, 2°21'03"E
    CSV
    assert_equal(
                 [
                   ["Los Angeles", "   34°03'N", "    118°15'W"],
                   ["New York City", " 40°42'46\"N", " 74°00'21\"W"],
                   ["Paris", "         48°51'24\"N", " 2°21'03\"E"],
                 ],
                 CSV.parse(input, liberal_parsing: true))
  end

  def test_csv_behavior_readers
    %w[ unconverted_fields return_headers write_headers
        skip_blanks        force_quotes ].each do |behavior|
      assert_not_predicate(CSV.new("abc,def"), "#{behavior}?", "Behavior defaulted to on.")
      csv = CSV.new("abc,def", behavior.to_sym => true)
      assert_predicate(csv, "#{behavior}?", "Behavior change now registered.")
    end
  end

  def test_converters_reader
    # no change
    assert_equal( [:integer],
                  CSV.new("abc,def", converters: [:integer]).converters )

    # just one
    assert_equal( [:integer],
                  CSV.new("abc,def", converters: :integer).converters )

    # expanded
    assert_equal( [:integer, :float],
                  CSV.new("abc,def", converters: :numeric).converters )

    # custom
    csv = CSV.new("abc,def", converters: [:integer, lambda {  }])
    assert_equal(2, csv.converters.size)
    assert_equal(:integer, csv.converters.first)
    assert_instance_of(Proc, csv.converters.last)
  end

  def test_header_converters_reader
    # no change
    hc = :header_converters
    assert_equal([:downcase], CSV.new("abc,def", hc => [:downcase]).send(hc))

    # just one
    assert_equal([:downcase], CSV.new("abc,def", hc => :downcase).send(hc))

    # custom
    csv = CSV.new("abc,def", hc => [:symbol, lambda {  }])
    assert_equal(2, csv.send(hc).size)
    assert_equal(:symbol, csv.send(hc).first)
    assert_instance_of(Proc, csv.send(hc).last)
  end

  # reported by Kev Jackson
  def test_failing_to_escape_col_sep
    assert_nothing_raised(Exception) { CSV.new(String.new, col_sep: "|") }
  end

  # reported by Chris Roos
  def test_failing_to_reset_headers_in_rewind
    csv = CSV.new("forename,surname", headers: true, return_headers: true)
    csv.each {|row| assert_predicate row, :header_row?}
    csv.rewind
    csv.each {|row| assert_predicate row, :header_row?}
  end

  # reported by Dave Burt
  def test_leading_empty_fields_with_multibyte_col_sep
    data = <<-CSV
<=><=>A<=>B<=>C
1<=>2<=>3
    CSV
    parsed = CSV.parse(data, col_sep: "<=>")
    assert_equal([[nil, nil, "A", "B", "C"], ["1", "2", "3"]], parsed)
  end

  def test_gzip_reader
    zipped = nil
    assert_nothing_raised(NoMethodError) do
      zipped = CSV.new(
                 Zlib::GzipReader.open(
                   File.join(File.dirname(__FILE__), "line_endings.gz")
                 )
               )
    end
    assert_equal("\r\n", zipped.row_sep)
  ensure
    zipped.close
  end if defined?(Zlib::GzipReader)

  def test_gzip_writer
    Tempfile.create(%w"temp .gz") {|tempfile|
      tempfile.close
      file = tempfile.path
      zipped = nil
      assert_nothing_raised(NoMethodError) do
        zipped = CSV.new(Zlib::GzipWriter.open(file))
      end
      zipped << %w[one two three]
      zipped << [1, 2, 3]
      zipped.close

      assert_include(Zlib::GzipReader.open(file) {|f| f.read},
                     $INPUT_RECORD_SEPARATOR, "@row_sep did not default")
    }
  end if defined?(Zlib::GzipWriter)

  def test_inspect_is_smart_about_io_types
    str = CSV.new("string,data").inspect
    assert_include(str, "io_type:StringIO", "IO type not detected.")

    str = CSV.new($stderr).inspect
    assert_include(str, "io_type:$stderr", "IO type not detected.")

    Tempfile.create(%w"temp .csv") {|tempfile|
      tempfile.close
      path = tempfile.path
      File.open(path, "w") { |csv| csv << "one,two,three\n1,2,3\n" }
      str  = CSV.open(path) { |csv| csv.inspect }
      assert_include(str, "io_type:File", "IO type not detected.")
    }
  end

  def test_inspect_shows_key_attributes
    str = @csv.inspect
    %w[lineno col_sep row_sep quote_char].each do |attr_name|
      assert_match(/\b#{attr_name}:[^\s>]+/, str)
    end
  end

  def test_inspect_shows_headers_when_available
    csv = CSV.new("one,two,three\n1,2,3\n", headers: true)
    assert_include(csv.inspect, "headers:true", "Header hint not shown.")
    csv.shift  # load headers
    assert_match(/headers:\[[^\]]+\]/, csv.inspect)
  end

  def test_inspect_encoding_is_ascii_compatible
    csv = CSV.new("one,two,three\n1,2,3\n".encode("UTF-16BE"))
    assert_send([Encoding, :compatible?,
                  Encoding.find("US-ASCII"), csv.inspect.encoding],
                "inspect() was not ASCII compatible.")
  end

  def test_version
    assert_not_nil(CSV::VERSION)
    assert_instance_of(String, CSV::VERSION)
    assert_predicate(CSV::VERSION, :frozen?)
    assert_match(/\A\d\.\d\.\d\z/, CSV::VERSION)
  end

  def test_accepts_comment_skip_lines_option
    assert_nothing_raised(ArgumentError) do
      CSV.new(@sample_data, :skip_lines => /\A\s*#/)
    end
  end

  def test_accepts_comment_defaults_to_nil
    c = CSV.new(@sample_data)
    assert_nil(c.skip_lines)
  end

  class RegexStub
  end

  def test_requires_skip_lines_to_call_match
    regex_stub = RegexStub.new
    csv = CSV.new(@sample_data, :skip_lines => regex_stub)
    assert_raise_with_message(ArgumentError, /skip_lines/) do
      csv.shift
    end
  end

  class Matchable
    def initialize(pattern)
      @pattern = pattern
    end

    def match(line)
      @pattern.match(line)
    end
  end

  def test_skip_lines_match
    csv = <<-CSV.chomp
1
# 2
3
# 4
    CSV
    assert_equal([["1"], ["3"]],
                 CSV.parse(csv, :skip_lines => Matchable.new(/\A#/)))
  end

  def test_comment_rows_are_ignored
    sample_data = "line,1,a\n#not,a,line\nline,2,b\n   #also,no,line"
    c = CSV.new sample_data, :skip_lines => /\A\s*#/
    assert_equal [["line", "1", "a"], ["line", "2", "b"]], c.each.to_a
  end

  def test_comment_rows_are_ignored_with_heredoc
    sample_data = <<~EOL
      1,foo
      .2,bar
      3,baz
    EOL

    c = CSV.new(sample_data, skip_lines: ".")
    assert_equal [["1", "foo"], ["3", "baz"]], c.each.to_a
  end

  def test_quoted_skip_line_markers_are_ignored
    sample_data = "line,1,a\n\"#not\",a,line\nline,2,b"
    c = CSV.new sample_data, :skip_lines => /\A\s*#/
    assert_equal [["line", "1", "a"], ["#not", "a", "line"], ["line", "2", "b"]], c.each.to_a
  end

  def test_string_works_like_a_regexp
    sample_data = "line,1,a\n#(not,a,line\nline,2,b\n   also,#no,line"
    c = CSV.new sample_data, :skip_lines => "#"
    assert_equal [["line", "1", "a"], ["line", "2", "b"]], c.each.to_a
  end

  def test_table_nil_equality
    assert_nothing_raised(NoMethodError) { CSV.parse("test", headers: true) == nil }
  end

  # non-seekable input stream for testing https://github.com/ruby/csv/issues/44
  class DummyIO
    extend Forwardable
    def_delegators :@io, :gets, :read, :pos, :eof?  # no seek or rewind!
    def initialize(data)
      @io = StringIO.new(data)
    end
  end

  def test_line_separator_autodetection_for_non_seekable_input_lf
    c = CSV.new(DummyIO.new("one,two,three\nfoo,bar,baz\n"))
    assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a
  end

  def test_line_separator_autodetection_for_non_seekable_input_cr
    c = CSV.new(DummyIO.new("one,two,three\rfoo,bar,baz\r"))
    assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a
  end

  def test_line_separator_autodetection_for_non_seekable_input_cr_lf
    c = CSV.new(DummyIO.new("one,two,three\r\nfoo,bar,baz\r\n"))
    assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a
  end

  def test_line_separator_autodetection_for_non_seekable_input_1024_over_lf
    table = (1..10).map { |row| (1..200).map { |col| "row#{row}col#{col}" }.to_a }.to_a
    input = table.map { |line| line.join(",") }.join("\n")
    c = CSV.new(DummyIO.new(input))
    assert_equal table, c.each.to_a
  end

  def test_line_separator_autodetection_for_non_seekable_input_1024_over_cr_lf
    table = (1..10).map { |row| (1..200).map { |col| "row#{row}col#{col}" }.to_a }.to_a
    input = table.map { |line| line.join(",") }.join("\r\n")
    c = CSV.new(DummyIO.new(input))
    assert_equal table, c.each.to_a
  end

  def test_line_separator_autodetection_for_non_seekable_input_many_cr_only
    # input with lots of CRs (to make sure no bytes are lost due to look-ahead)
    c = CSV.new(DummyIO.new("foo\r" + "\r" * 9999 + "bar\r"))
    assert_equal [["foo"]] + [[]] * 9999 + [["bar"]], c.each.to_a
  end
end